piermesh/src/Components/hopper.py

92 lines
2.5 KiB
Python
Raw Normal View History

2024-08-12 10:29:58 +00:00
from bs4 import BeautifulSoup
2024-07-28 11:21:15 +00:00
import requests
2024-08-12 10:29:58 +00:00
2024-07-28 11:21:15 +00:00
import msgpack
2024-08-12 10:29:58 +00:00
2024-07-28 11:21:15 +00:00
import lzma
2024-08-12 10:29:58 +00:00
import base64
import mimetypes
from Packets.Messages.Protocols.hopper.Response import HopperResponse
2024-07-28 11:21:15 +00:00
2024-08-12 10:29:58 +00:00
def downloadFile(url, text=True, mimeType=None):
fbytes = b""
with requests.get(url, stream=True) as r:
r.raise_for_status()
for chunk in r.iter_content(chunk_size=8192):
fbytes += chunk
if text:
return fbytes.decode("utf-8")
else:
if mimeType == None:
mimeType, encoding = mimetypes.guess_type(url)
if mimeType == None:
raise Error(
"Couldnt guess mime type and none was supplied, cant encode to data url"
)
b64str = base64.b64encode(fbytes).decode("utf-8")
dataUrl = "data:{0};base64,{1}".format(mimeType, b64str)
return dataUrl
2024-07-28 11:21:15 +00:00
2024-08-12 10:29:58 +00:00
def get(url: str, params=None, followTags=None):
2024-07-28 11:21:15 +00:00
"""
http/s get request
Parameters
----------
url: str
params
Requests (library) parameters
2024-08-12 10:29:58 +00:00
followTags
None or list of tags to download the src/href from
2024-07-28 11:21:15 +00:00
"""
r = requests.get(url, params=params)
2024-08-12 10:29:58 +00:00
r = {
"response": r.text,
"code": r.status_code,
"content-type": r.headers.get("content-type"),
}
# TODO: Reject followtags if content type is other then html
if followTags != None:
soup = BeautifulSoup(r["response"], "html.parser")
# TODO: Checking for relative links
for tag in followTags:
if tag in ["img", "video"]:
for elem in soup.find_all(tag):
elem["src"] = downloadFile(elem["src"], text=False)
elif tag in ["link"]:
for elem in soup.find_all(tag):
if elem["rel"] == "stylesheet":
style = downloadFile(elem["href"])
elem.decompose()
soup.head.append_tag(soup.new_tag("style", string=style))
elif tag == "script":
for elem in soup.find_all(tag):
script = downloadFile(elem["src"])
elem["src"] = ""
elem.string = script
r["response"] = soup.text
return r
2024-07-28 11:21:15 +00:00
def post(url: str, params=None):
"""
http/s post request
Parameters
----------
url: str
params
Requests (library) parameters
"""
2024-08-01 01:00:46 +00:00
r = requests.post(url, data=params)
2024-07-28 11:21:15 +00:00
r = {"response": r.text, "code": r.status_code}
2024-08-12 10:29:58 +00:00
return r