rsstube/scripts/download_page.py

36 lines
1.0 KiB
Python
Raw Normal View History

#!/usr/bin/python3
import pycurl
from io import BytesIO
2021-07-24 20:00:00 -04:00
from utils import notify,debug,error
# args should be a dictionary of arguments
# return page bytes, response code
2021-11-07 19:00:00 -05:00
def download (platform, url, args, verbosity, return_http_code=False, follow_location=True):
page_bytes = BytesIO()
c = pycurl.Curl()
c.setopt(c.URL, url)
c.setopt(c.WRITEDATA, page_bytes)
2021-11-07 19:00:00 -05:00
c.setopt(c.FOLLOWLOCATION, follow_location)
# TODO: handle possible arguments
if "user_agent" in args:
c.setopt(pycurl.USERAGENT, args["user_agent"])
if "header" in args:
c.setopt(pycurl.HTTPHEADER, args["header"])
notify ("Downloading " + url + "...", verbosity, platform)
2021-07-24 20:00:00 -04:00
try:
c.perform()
except pycurl.error as e:
error (str(e), verbosity, platform)
return None
response_code = c.getinfo(c.RESPONSE_CODE)
c.close()
debug (url + " downloaded!", verbosity, platform)
debug ("Response code: " + str(response_code), verbosity, platform)
if return_http_code:
return page_bytes.getvalue().decode('utf8'),response_code
else:
return page_bytes.getvalue().decode('utf8')