From 5cc99dd432b83271184c5d0cfaa02182d0850b62 Mon Sep 17 00:00:00 2001 From: 0x80 <0x80@negativezero.link> Date: Thu, 30 Dec 2021 00:00:00 +0000 Subject: [PATCH] Big refactor. Reorganize, make the code cleaner. --- rsstube | 2 +- rsstube-gtk | 1 + scripts/rsstube.py | 173 ------------------ {scripts => src}/__init__.py | 0 {scripts => src}/determine_site.py | 0 {scripts => src}/determine_software.py | 0 {scripts => src}/download_page.py | 9 +- .../extractors/NEWSITE.py.template | 0 {scripts => src}/extractors/__init__.py | 0 {scripts => src}/extractors/apple_podcasts.py | 0 {scripts => src}/extractors/bibliogram.py | 0 {scripts => src}/extractors/castbox.py | 0 {scripts => src}/extractors/castro_fm.py | 0 {scripts => src}/extractors/chirbit.py | 0 {scripts => src}/extractors/deviantart.py | 0 {scripts => src}/extractors/fyyd.py | 0 {scripts => src}/extractors/generic.py | 0 {scripts => src}/extractors/github.py | 0 {scripts => src}/extractors/lbry.py | 0 {scripts => src}/extractors/peertube.py | 0 {scripts => src}/extractors/player_fm.py | 0 {scripts => src}/extractors/pocketcasts.py | 0 {scripts => src}/extractors/podbay.py | 0 {scripts => src}/extractors/radiopublic.py | 0 {scripts => src}/extractors/reddit.py | 0 {scripts => src}/extractors/soundcloud.py | 0 {scripts => src}/extractors/tumblr.py | 0 {scripts => src}/extractors/vimeo.py | 0 {scripts => src}/extractors/youtube.py | 0 {scripts => src}/opml.py | 0 scripts/options.py => src/parse_options.py | 43 +++-- rsstube-gtk.py => src/rsstube-gtk.py | 14 +- src/rsstube.py | 143 +++++++++++++++ {scripts => src}/utils.py | 0 34 files changed, 177 insertions(+), 208 deletions(-) create mode 120000 rsstube-gtk delete mode 100755 scripts/rsstube.py rename {scripts => src}/__init__.py (100%) rename {scripts => src}/determine_site.py (100%) rename {scripts => src}/determine_software.py (100%) rename {scripts => src}/download_page.py (84%) rename {scripts => src}/extractors/NEWSITE.py.template (100%) rename {scripts => src}/extractors/__init__.py (100%) rename {scripts => src}/extractors/apple_podcasts.py (100%) rename {scripts => src}/extractors/bibliogram.py (100%) rename {scripts => src}/extractors/castbox.py (100%) rename {scripts => src}/extractors/castro_fm.py (100%) rename {scripts => src}/extractors/chirbit.py (100%) rename {scripts => src}/extractors/deviantart.py (100%) rename {scripts => src}/extractors/fyyd.py (100%) rename {scripts => src}/extractors/generic.py (100%) rename {scripts => src}/extractors/github.py (100%) rename {scripts => src}/extractors/lbry.py (100%) rename {scripts => src}/extractors/peertube.py (100%) rename {scripts => src}/extractors/player_fm.py (100%) rename {scripts => src}/extractors/pocketcasts.py (100%) rename {scripts => src}/extractors/podbay.py (100%) rename {scripts => src}/extractors/radiopublic.py (100%) rename {scripts => src}/extractors/reddit.py (100%) rename {scripts => src}/extractors/soundcloud.py (100%) rename {scripts => src}/extractors/tumblr.py (100%) rename {scripts => src}/extractors/vimeo.py (100%) rename {scripts => src}/extractors/youtube.py (100%) rename {scripts => src}/opml.py (100%) rename scripts/options.py => src/parse_options.py (85%) rename rsstube-gtk.py => src/rsstube-gtk.py (80%) create mode 100755 src/rsstube.py rename {scripts => src}/utils.py (100%) diff --git a/rsstube b/rsstube index 9a1f582..4704e24 120000 --- a/rsstube +++ b/rsstube @@ -1 +1 @@ -scripts/rsstube.py \ No newline at end of file +src/rsstube.py \ No newline at end of file diff --git a/rsstube-gtk b/rsstube-gtk new file mode 120000 index 0000000..21f694d --- /dev/null +++ b/rsstube-gtk @@ -0,0 +1 @@ +src/rsstube-gtk.py \ No newline at end of file diff --git a/scripts/rsstube.py b/scripts/rsstube.py deleted file mode 100755 index 8f76eb2..0000000 --- a/scripts/rsstube.py +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/python3 - -import sys,importlib - -from utils import debug,notify,warn,error,success -import opml - -network = True -verbosity = 3 -args = {} -arg_count = 0 -output_format = "url" -output_opml = None -output_filename = None -output = None - -config = None -try: - from pathlib import Path - home = str(Path.home()) - local_config_path = home + "/.config/rsstube/config" - config = open(local_config_path,"r") -except FileNotFoundError: - try: - global_config_path = "/etc/rsstube/config" - config = open(global_config_path,"r") - except FileNotFoundError: - # no change - config = None -file_params = "" -if not config is None: - for line in config: - line = line.strip() - # comment lines should begin with # after stripping - if line != "" and line[0] != "#": - file_params += " " + line -from options import options - -def process_args ( - network, - verbosity, - args, - arg_count, - output_format, - output_filename, - network_new, - verbosity_new, - args_new, - arg_count_new, - output_format_new, - output_filename_new -): - if network_new == False: - network = network_new - if not verbosity_new is None: - verbosity = verbosity_new - for i in args_new: - args[i] = args_new[i] - arg_count = arg_count_new - if output_format_new != "": - output_format = output_format_new - if not output_filename_new is None: - output_filename = output_filename_new - return network,verbosity,args,arg_count,output_format,output_filename - -# config file options -if not file_params == "": - network_new,verbosity_new,args_new,arg_count_new,output_format_new,output_filename_new = options(file_params.split()) - network,verbosity,args,arg_count,output_format,output_filename = process_args( - network, - verbosity, - args, - arg_count, - output_format, - output_filename, - network_new, - verbosity_new, - args_new, - arg_count_new, - output_format_new, - output_filename_new - ) - -# command-line options -network_new,verbosity_new,args_new,arg_count_new,output_format_new,output_filename_new = options(sys.argv[1:]) -network,verbosity,args,arg_count,output_format,output_filename = process_args( - network, - verbosity, - args, - arg_count, - output_format, - output_filename, - network_new, - verbosity_new, - args_new, - arg_count_new, - output_format_new, - output_filename_new -) - -if output_format == "opml": - debug ("Formatting output as OPML.", verbosity) - output_opml = opml.Opml("rsstube feeds") - -if not output_filename is None and output_filename != "": - debug ("Output will be saved in " + output_filename, verbosity) - output = open(output_filename, "w") - -if len(sys.argv) == arg_count+1: - error ("Please provide one or more URL.", verbosity) - -for url in sys.argv[arg_count+1:]: - from determine_site import determine_site - debug ("Attempting to determine site...", verbosity) - site = determine_site (url) - if not site is None: - debug ("Site identified as " + site, verbosity) - notify ("Trying " + site + " extractor...", verbosity) - - # get appropriate extractor - extractor = importlib.import_module("extractors." + site) - feed = extractor.extract(url, None, network, verbosity, args) - if feed is None: - error ("Unable to get RSS feed for " + url, verbosity, site) - else: - if not output_opml is None: - output_opml.add_feed (feed, site + ": " + url, url) - else: - success (feed, output) - elif network: - from download_page import download - page = download (None, url, args, verbosity) - if page is None: - error ("Failed to download " + url, verbosity) - continue - - # try to get feed for common software like PeerTube - debug ("Attempting to determine software from page...", verbosity) - from determine_software import determine_software - software = determine_software (page) - if not software is None: - debug ("Software identified as " + software, verbosity) - notify ("Trying " + software + " extractor...", verbosity) - extractor = importlib.import_module("extractors." + software) - feed = extractor.extract(url, page, network, verbosity, args) - if feed is None: - notify ("Unable to get RSS feed for " + url + " with " + software + " extractor", verbosity, software) - else: - if not output_opml is None: - output_opml.add_feed (feed, software + ": " + url, url) - else: - success (feed, output) - continue - - # try generic extractor even if software is known - debug ("Trying generic extractor...", verbosity) - extractor = importlib.import_module("extractors.generic") - feed = extractor.extract(url, page, network, verbosity, args) - if feed is None: - error ("Unable to get RSS feed for " + url, verbosity, "generic") - else: - if not output_opml is None: - output_opml.add_feed (feed, url, url) - else: - success (feed, output) - else: - error ("Unable to get RSS feed for " + url + " without downloading page", verbosity) - -if not output_opml is None: - success (output_opml.get_opml(), output) - -if not output is None: - output.close() diff --git a/scripts/__init__.py b/src/__init__.py similarity index 100% rename from scripts/__init__.py rename to src/__init__.py diff --git a/scripts/determine_site.py b/src/determine_site.py similarity index 100% rename from scripts/determine_site.py rename to src/determine_site.py diff --git a/scripts/determine_software.py b/src/determine_software.py similarity index 100% rename from scripts/determine_software.py rename to src/determine_software.py diff --git a/scripts/download_page.py b/src/download_page.py similarity index 84% rename from scripts/download_page.py rename to src/download_page.py index a49ecc7..11ce84c 100644 --- a/scripts/download_page.py +++ b/src/download_page.py @@ -15,10 +15,11 @@ def download (platform, url, args, verbosity, return_http_code=False, follow_loc c.setopt(c.FOLLOWLOCATION, follow_location) # TODO: handle possible arguments - if "user_agent" in args: - c.setopt(pycurl.USERAGENT, args["user_agent"]) - if "header" in args: - c.setopt(pycurl.HTTPHEADER, args["header"]) + if not args is None: + if "user_agent" in args: + c.setopt(pycurl.USERAGENT, args["user_agent"]) + if "header" in args: + c.setopt(pycurl.HTTPHEADER, args["header"]) notify ("Downloading " + url + "...", verbosity, platform) try: c.perform() diff --git a/scripts/extractors/NEWSITE.py.template b/src/extractors/NEWSITE.py.template similarity index 100% rename from scripts/extractors/NEWSITE.py.template rename to src/extractors/NEWSITE.py.template diff --git a/scripts/extractors/__init__.py b/src/extractors/__init__.py similarity index 100% rename from scripts/extractors/__init__.py rename to src/extractors/__init__.py diff --git a/scripts/extractors/apple_podcasts.py b/src/extractors/apple_podcasts.py similarity index 100% rename from scripts/extractors/apple_podcasts.py rename to src/extractors/apple_podcasts.py diff --git a/scripts/extractors/bibliogram.py b/src/extractors/bibliogram.py similarity index 100% rename from scripts/extractors/bibliogram.py rename to src/extractors/bibliogram.py diff --git a/scripts/extractors/castbox.py b/src/extractors/castbox.py similarity index 100% rename from scripts/extractors/castbox.py rename to src/extractors/castbox.py diff --git a/scripts/extractors/castro_fm.py b/src/extractors/castro_fm.py similarity index 100% rename from scripts/extractors/castro_fm.py rename to src/extractors/castro_fm.py diff --git a/scripts/extractors/chirbit.py b/src/extractors/chirbit.py similarity index 100% rename from scripts/extractors/chirbit.py rename to src/extractors/chirbit.py diff --git a/scripts/extractors/deviantart.py b/src/extractors/deviantart.py similarity index 100% rename from scripts/extractors/deviantart.py rename to src/extractors/deviantart.py diff --git a/scripts/extractors/fyyd.py b/src/extractors/fyyd.py similarity index 100% rename from scripts/extractors/fyyd.py rename to src/extractors/fyyd.py diff --git a/scripts/extractors/generic.py b/src/extractors/generic.py similarity index 100% rename from scripts/extractors/generic.py rename to src/extractors/generic.py diff --git a/scripts/extractors/github.py b/src/extractors/github.py similarity index 100% rename from scripts/extractors/github.py rename to src/extractors/github.py diff --git a/scripts/extractors/lbry.py b/src/extractors/lbry.py similarity index 100% rename from scripts/extractors/lbry.py rename to src/extractors/lbry.py diff --git a/scripts/extractors/peertube.py b/src/extractors/peertube.py similarity index 100% rename from scripts/extractors/peertube.py rename to src/extractors/peertube.py diff --git a/scripts/extractors/player_fm.py b/src/extractors/player_fm.py similarity index 100% rename from scripts/extractors/player_fm.py rename to src/extractors/player_fm.py diff --git a/scripts/extractors/pocketcasts.py b/src/extractors/pocketcasts.py similarity index 100% rename from scripts/extractors/pocketcasts.py rename to src/extractors/pocketcasts.py diff --git a/scripts/extractors/podbay.py b/src/extractors/podbay.py similarity index 100% rename from scripts/extractors/podbay.py rename to src/extractors/podbay.py diff --git a/scripts/extractors/radiopublic.py b/src/extractors/radiopublic.py similarity index 100% rename from scripts/extractors/radiopublic.py rename to src/extractors/radiopublic.py diff --git a/scripts/extractors/reddit.py b/src/extractors/reddit.py similarity index 100% rename from scripts/extractors/reddit.py rename to src/extractors/reddit.py diff --git a/scripts/extractors/soundcloud.py b/src/extractors/soundcloud.py similarity index 100% rename from scripts/extractors/soundcloud.py rename to src/extractors/soundcloud.py diff --git a/scripts/extractors/tumblr.py b/src/extractors/tumblr.py similarity index 100% rename from scripts/extractors/tumblr.py rename to src/extractors/tumblr.py diff --git a/scripts/extractors/vimeo.py b/src/extractors/vimeo.py similarity index 100% rename from scripts/extractors/vimeo.py rename to src/extractors/vimeo.py diff --git a/scripts/extractors/youtube.py b/src/extractors/youtube.py similarity index 100% rename from scripts/extractors/youtube.py rename to src/extractors/youtube.py diff --git a/scripts/opml.py b/src/opml.py similarity index 100% rename from scripts/opml.py rename to src/opml.py diff --git a/scripts/options.py b/src/parse_options.py similarity index 85% rename from scripts/options.py rename to src/parse_options.py index 60217d8..cdf2e51 100644 --- a/scripts/options.py +++ b/src/parse_options.py @@ -41,14 +41,11 @@ def update(): else: print("rsstube appears to have been manually downloaded or installed with a package manager. Use that same method to update.") -def options(params): +def parse_options(params): import sys,getopt,glob from utils import debug,notify,warn,error - # general settings - network = True - output_format = "" - output_filename = None + options = dict() ## verbosity: in addition to the feed, print... ## 0: no messages (suppress errors) @@ -56,10 +53,11 @@ def options(params): ## 2: error messages and warnings ## 3: [default] errors, warnings, and info ## 4: all messages, including debugging info - verbosity = None # pycurl args - d = dict() + options["curl_args"] = dict() + + # user may submit multiple HTTP headers, so they're stored as a list header = [] # count number of arguments @@ -91,10 +89,10 @@ def options(params): sys.exit(2) for opt, arg in opts: if opt in ("-A", "--user-agent"): - d["user_agent"] = arg + options["curl_args"]["user_agent"] = arg arg_count += 2 elif opt == "--ciphers": - d["ciphers"] = arg + options["curl_args"]["ciphers"] = arg arg_count += 2 elif opt in ("-h", "--help"): print ("Usage: rsstube [OPTIONS] URL") @@ -110,20 +108,20 @@ def options(params): arg_count += 1 sys.exit() elif opt in ("-n", "--non-network"): - network = False + options["network"] = False arg_count += 1 elif opt in ("-o", "--output"): - output_filename = arg + options["output_filename"] = arg arg_count += 2 elif opt == "--output-format": if str.lower(arg) in ("opml", "url"): - output_format = str.lower(arg) + options["output_format"] = str.lower(arg) arg_count += 2 elif opt in ("-p", "--proxy"): - d["proxy"] = arg + options["curl_args"]["proxy"] = arg arg_count += 2 elif opt in ("-q", "--quiet"): - verbosity = 1 + options["verbosity"] = 1 arg_count += 1 elif opt in ("--sites"): print ("Site-specific support:") @@ -138,18 +136,18 @@ def options(params): arg_count += 1 sys.exit() elif opt in ("--suppress-errors"): - verbosity = 0 + options["verbosity"] = 0 arg_count += 1 elif opt == "--tls-max": - d["tls_max"] = arg + options["curl_args"]["tls_max"] = arg arg_count += 2 elif opt == "--tls13-ciphers": - d["tls13_ciphers"] = arg + options["curl_args"]["tls13_ciphers"] = arg arg_count += 2 elif opt == "--unbreak": # attempt to unbreak hostile websites (e.g., Cloudflare) # based on Tor Browser cURL request - d["user_agent"] = 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0' + options["curl_args"]["user_agent"] = 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0' header = [ 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language: en-US,en;q=0.5', @@ -167,12 +165,12 @@ def options(params): arg_count += 1 sys.exit() elif opt in ("-v", "--verbose"): - verbosity = 4 + options["verbosity"] = 4 arg_count += 1 elif opt == "--verbosity": v = int(arg) if v >= 0 and v <= 4: - verbosity = v + options["verbosity"] = v else: print ("Invalid verbosity: " + arg) arg_count += 2 @@ -184,5 +182,6 @@ def options(params): arg_count += 1 sys.exit() - d["header"] = header - return network,verbosity,d,arg_count,output_format,output_filename + if len(header) > 0: + options["curl_args"]["header"] = header + return options, arg_count diff --git a/rsstube-gtk.py b/src/rsstube-gtk.py similarity index 80% rename from rsstube-gtk.py rename to src/rsstube-gtk.py index c3e73e7..9a0f131 100755 --- a/rsstube-gtk.py +++ b/src/rsstube-gtk.py @@ -12,6 +12,7 @@ import gi,subprocess,sys,pycurl gi.require_version('Gtk', '3.0') from gi.repository import Gtk +from rsstube import get_feed class RsstubeGtk(Gtk.Window): def __init__(self): @@ -26,7 +27,7 @@ class RsstubeGtk(Gtk.Window): self.btn = Gtk.Button() self.btn.set_label("Get Feed") - self.btn.connect("clicked",self.get_feed) + self.btn.connect("clicked",self.display_feed) self.output_label = Gtk.Label() self.output_label.set_text("") @@ -45,17 +46,14 @@ class RsstubeGtk(Gtk.Window): self.connect("destroy", Gtk.main_quit) # runs rsstube - def get_feed(self,widget): + def display_feed(self,widget): error_color = "#B3589A" url = self.entry.get_text() - feed = str(subprocess.check_output([sys.executable, "scripts/rsstube.py", url])) - feed = feed[:feed.rindex("\\n")] - feed = feed[feed.rindex("\\n")+2:] + feed = get_feed(url)[0] # this color is used for error output - if "\\x1b[1;31m[" in feed: - errmsg = feed[feed.index(']')+2:] - errmsg = errmsg[:errmsg.rindex("\\x1b[0m")] + if feed is None: + errmsg = "Unable to get feed." self.output_label.set_markup('' + errmsg + '') else: self.output_label.set_markup('' + feed + '') diff --git a/src/rsstube.py b/src/rsstube.py new file mode 100755 index 0000000..63bff3c --- /dev/null +++ b/src/rsstube.py @@ -0,0 +1,143 @@ +#!/usr/bin/python3 + +import sys,importlib + +from utils import debug,notify,warn,error,success +import opml + +# enter a URL and attempt to return a feed URL +def get_feed (url, verbosity=3, network=True, curl_args=None): + from determine_site import determine_site + debug ("Attempting to determine site...", verbosity) + site = determine_site (url) + if not site is None: + debug ("Site identified as " + site, verbosity) + notify ("Trying " + site + " extractor...", verbosity) + # get appropriate extractor + extractor = importlib.import_module("extractors." + site) + feed = extractor.extract(url, None, network, verbosity, curl_args) + if feed is None: + error ("Unable to get RSS feed for " + url, verbosity, site) + else: + return feed,site + + elif network: + from download_page import download + page = download (None, url, curl_args, verbosity) + if page is None: + error ("Failed to download " + url, verbosity) + return None,None + + # try to get feed for common software like PeerTube + debug ("Attempting to determine software from page...", verbosity) + from determine_software import determine_software + software = determine_software (page) + if not software is None: + debug ("Software identified as " + software, verbosity) + notify ("Trying " + software + " extractor...", verbosity) + extractor = importlib.import_module("extractors." + software) + feed = extractor.extract(url, page, network, verbosity, curl_args) + if feed is None: + notify ("Unable to get RSS feed for " + url + " with " + software + " extractor", verbosity, software) + else: + return feed,software + + # try generic extractor even if software is known + debug ("Trying generic extractor...", verbosity) + extractor = importlib.import_module("extractors.generic") + feed = extractor.extract(url, page, network, verbosity, curl_args) + if feed is None: + error ("Unable to get RSS feed for " + url, verbosity, "generic") + else: + return feed,"generic" + else: + error ("Unable to get RSS feed for " + url + " without downloading page", verbosity) + return None,None + +def process_args (options, options_new): + for opt in options_new: + # curl_args is handled as a special case below + if opt != "curl_args": + options[opt] = options_new[opt] + + # may need to merge dictionaries from config file and command line + if "curl_args" in options_new: + for i in options_new["curl_args"]: + options["curl_args"][i] = options_new["curl_args"][i] + +if __name__ == "__main__": + options = dict() + + # set default options + options["network"] = True + options["verbosity"] = 3 + options["curl_args"] = dict() + options["output_format"] = "url" + options["output_filename"] = "" + + # count of command-line arguments + arg_count = 0 + + # object to output feeds as OPML + output_opml = None + + # output file + output_file = None + + config = None + try: + from pathlib import Path + home = str(Path.home()) + local_config_path = home + "/.config/rsstube/config" + config = open(local_config_path,"r") + except FileNotFoundError: + try: + global_config_path = "/etc/rsstube/config" + config = open(global_config_path,"r") + except FileNotFoundError: + # no change + config = None + file_params = "" + if not config is None: + for line in config: + line = line.strip() + # comment lines should begin with # after stripping + if line != "" and line[0] != "#": + file_params += " " + line + + from parse_options import parse_options + + # config file options + if file_params != "": + # throw away arg_count + config_file_options = parse_options (file_params.split())[0] + process_args (options, config_file_options) + + # command-line options + command_line_options, arg_count = parse_options (sys.argv[1:]) + process_args (options, command_line_options) + + if options["output_format"] == "opml": + debug ("Formatting output as OPML.", options["verbosity"]) + output_opml = opml.Opml("rsstube feeds") + + if options["output_filename"] != "": + debug ("Output will be saved in " + options["output_filename"], options["verbosity"]) + output_file = open(options["output_filename"], "w") + + if len(sys.argv) == arg_count+1: + error ("Please provide one or more URL.", options["verbosity"]) + + for url in sys.argv[arg_count+1:]: + feed,site = get_feed (url, options["verbosity"], options["network"], options["curl_args"]) + if not feed is None: + if not output_opml is None: + output_opml.add_feed (feed, site + ": " + url, url) + else: + success (feed, output_file) + + if not output_opml is None: + success (output_opml.get_opml(), output_file) + + if not output_file is None: + output_file.close() diff --git a/scripts/utils.py b/src/utils.py similarity index 100% rename from scripts/utils.py rename to src/utils.py