diff --git a/scripts/download_page.py b/scripts/download_page.py index 01a39f4..a49ecc7 100644 --- a/scripts/download_page.py +++ b/scripts/download_page.py @@ -15,11 +15,10 @@ def download (platform, url, args, verbosity, return_http_code=False, follow_loc c.setopt(c.FOLLOWLOCATION, follow_location) # TODO: handle possible arguments -# if args["user_agent"]: -# c.setopt(pycurl.USERAGENT, args["user_agent"] -# if args["ciphers"]: -# c.setopt(pycurl.CIPHERS, args["ciphers"] - + if "user_agent" in args: + c.setopt(pycurl.USERAGENT, args["user_agent"]) + if "header" in args: + c.setopt(pycurl.HTTPHEADER, args["header"]) notify ("Downloading " + url + "...", verbosity, platform) try: c.perform() diff --git a/scripts/options.py b/scripts/options.py index ea736ce..60217d8 100644 --- a/scripts/options.py +++ b/scripts/options.py @@ -69,7 +69,6 @@ def options(params): opts, args = getopt.getopt(params,"A:c:H:hno:p:qtUVv", [ "user-agent=", "ciphers=", - "compressed", "header=", "help", "license", @@ -82,6 +81,7 @@ def options(params): "suppress-errors", "tls-max=", "tls13-ciphers=", + "unbreak", "verbose", "verbosity=", "version" @@ -96,9 +96,6 @@ def options(params): elif opt == "--ciphers": d["ciphers"] = arg arg_count += 2 - elif opt == "--compressed": - d["compressed"] = True - arg_count += 1 elif opt in ("-h", "--help"): print ("Usage: rsstube [OPTIONS] URL") # not available yet @@ -149,6 +146,22 @@ def options(params): elif opt == "--tls13-ciphers": d["tls13_ciphers"] = arg arg_count += 2 + elif opt == "--unbreak": + # attempt to unbreak hostile websites (e.g., Cloudflare) + # based on Tor Browser cURL request + d["user_agent"] = 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0' + header = [ + 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language: en-US,en;q=0.5', + 'Connection: keep-alive', + 'Upgrade-Insecure-Requests: 1', + 'Sec-Fetch-Dest: document', + 'Sec-Fetch-Mode: navigate', + 'Sec-Fetch-Site: none', + 'Sec-Fetch-User: ?1', + 'Cache-Control: max-age=0' + ] + arg_count += 1 elif opt in ("-U", "--update"): update() arg_count += 1 diff --git a/tests/generic/wordpress.txt b/tests/generic/wordpress.txt index 6eb6737..5a4bbf6 100644 --- a/tests/generic/wordpress.txt +++ b/tests/generic/wordpress.txt @@ -1,4 +1,4 @@ https://usersnap.com/blog/hands-on-experience-with-hugo-static-site-generator/ https://usersnap.com/blog/feed/ # uses Cloudflare -#https://thenewstack.io/tutorial-use-hugo-to-generate-a-static-website/ https://thenewstack.io/feed/ +https://thenewstack.io/tutorial-use-hugo-to-generate-a-static-website/ https://thenewstack.io/feed/ diff --git a/tests/test.sh b/tests/test.sh index 26835dc..b206ed6 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -33,7 +33,7 @@ function test_site { echo "Goal:" echo "${links[1]}" - output=$(/usr/bin/python3 ../rsstube -o "" --output-format url "${links[0]}" | tail -1) + output=$(/usr/bin/python3 ../rsstube --unbreak -o "" --output-format url "${links[0]}" | tail -1) echo "Output:" echo "${output}"