2021-12-29 19:00:00 -05:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
import sys,importlib
|
|
|
|
|
|
|
|
from utils import debug,notify,warn,error,success
|
|
|
|
import opml
|
|
|
|
|
|
|
|
# enter a URL and attempt to return a feed URL
|
|
|
|
def get_feed (url, verbosity=3, network=True, curl_args=None):
|
2022-04-01 20:00:00 -04:00
|
|
|
feed = None
|
|
|
|
|
2021-12-29 19:00:00 -05:00
|
|
|
from determine_site import determine_site
|
|
|
|
debug ("Attempting to determine site...", verbosity)
|
|
|
|
site = determine_site (url)
|
|
|
|
if not site is None:
|
|
|
|
debug ("Site identified as " + site, verbosity)
|
|
|
|
notify ("Trying " + site + " extractor...", verbosity)
|
|
|
|
# get appropriate extractor
|
|
|
|
extractor = importlib.import_module("extractors." + site)
|
|
|
|
feed = extractor.extract(url, None, network, verbosity, curl_args)
|
|
|
|
if feed is None:
|
|
|
|
error ("Unable to get RSS feed for " + url, verbosity, site)
|
|
|
|
else:
|
|
|
|
return feed,site
|
|
|
|
|
|
|
|
elif network:
|
|
|
|
from download_page import download
|
2022-04-01 20:00:00 -04:00
|
|
|
page,response_code = download (None, url, curl_args, verbosity, True)
|
2021-12-29 19:00:00 -05:00
|
|
|
if page is None:
|
|
|
|
error ("Failed to download " + url, verbosity)
|
|
|
|
return None,None
|
|
|
|
|
|
|
|
# try to get feed for common software like PeerTube
|
|
|
|
debug ("Attempting to determine software from page...", verbosity)
|
|
|
|
from determine_software import determine_software
|
|
|
|
software = determine_software (page)
|
|
|
|
if not software is None:
|
|
|
|
debug ("Software identified as " + software, verbosity)
|
|
|
|
notify ("Trying " + software + " extractor...", verbosity)
|
|
|
|
extractor = importlib.import_module("extractors." + software)
|
|
|
|
feed = extractor.extract(url, page, network, verbosity, curl_args)
|
|
|
|
if feed is None:
|
|
|
|
notify ("Unable to get RSS feed for " + url + " with " + software + " extractor", verbosity, software)
|
|
|
|
else:
|
|
|
|
return feed,software
|
|
|
|
|
|
|
|
# try generic extractor even if software is known
|
2022-04-01 20:00:00 -04:00
|
|
|
# don't try generic extractor if we got an error
|
|
|
|
if response_code in range(200,299):
|
|
|
|
debug ("Trying generic extractor...", verbosity)
|
|
|
|
extractor = importlib.import_module("extractors.generic")
|
|
|
|
feed = extractor.extract(url, page, network, verbosity, curl_args)
|
|
|
|
|
2021-12-29 19:00:00 -05:00
|
|
|
if feed is None:
|
|
|
|
error ("Unable to get RSS feed for " + url, verbosity, "generic")
|
|
|
|
else:
|
|
|
|
return feed,"generic"
|
|
|
|
else:
|
|
|
|
error ("Unable to get RSS feed for " + url + " without downloading page", verbosity)
|
|
|
|
return None,None
|
|
|
|
|
|
|
|
def process_args (options, options_new):
|
|
|
|
for opt in options_new:
|
|
|
|
# curl_args is handled as a special case below
|
|
|
|
if opt != "curl_args":
|
|
|
|
options[opt] = options_new[opt]
|
|
|
|
|
|
|
|
# may need to merge dictionaries from config file and command line
|
|
|
|
if "curl_args" in options_new:
|
|
|
|
for i in options_new["curl_args"]:
|
|
|
|
options["curl_args"][i] = options_new["curl_args"][i]
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
options = dict()
|
|
|
|
|
|
|
|
# set default options
|
|
|
|
options["network"] = True
|
|
|
|
options["verbosity"] = 3
|
|
|
|
options["curl_args"] = dict()
|
|
|
|
options["output_format"] = "url"
|
|
|
|
options["output_filename"] = ""
|
|
|
|
|
|
|
|
# count of command-line arguments
|
|
|
|
arg_count = 0
|
|
|
|
|
|
|
|
# object to output feeds as OPML
|
|
|
|
output_opml = None
|
|
|
|
|
|
|
|
# output file
|
|
|
|
output_file = None
|
|
|
|
|
|
|
|
config = None
|
|
|
|
try:
|
|
|
|
from pathlib import Path
|
|
|
|
home = str(Path.home())
|
|
|
|
local_config_path = home + "/.config/rsstube/config"
|
|
|
|
config = open(local_config_path,"r")
|
|
|
|
except FileNotFoundError:
|
|
|
|
try:
|
|
|
|
global_config_path = "/etc/rsstube/config"
|
|
|
|
config = open(global_config_path,"r")
|
|
|
|
except FileNotFoundError:
|
|
|
|
# no change
|
|
|
|
config = None
|
|
|
|
file_params = ""
|
|
|
|
if not config is None:
|
|
|
|
for line in config:
|
|
|
|
line = line.strip()
|
|
|
|
# comment lines should begin with # after stripping
|
|
|
|
if line != "" and line[0] != "#":
|
|
|
|
file_params += " " + line
|
|
|
|
|
|
|
|
from parse_options import parse_options
|
|
|
|
|
|
|
|
# config file options
|
|
|
|
if file_params != "":
|
|
|
|
# throw away arg_count
|
|
|
|
config_file_options = parse_options (file_params.split())[0]
|
|
|
|
process_args (options, config_file_options)
|
|
|
|
|
|
|
|
# command-line options
|
|
|
|
command_line_options, arg_count = parse_options (sys.argv[1:])
|
|
|
|
process_args (options, command_line_options)
|
|
|
|
|
|
|
|
if options["output_format"] == "opml":
|
|
|
|
debug ("Formatting output as OPML.", options["verbosity"])
|
|
|
|
output_opml = opml.Opml("rsstube feeds")
|
|
|
|
|
|
|
|
if options["output_filename"] != "":
|
|
|
|
debug ("Output will be saved in " + options["output_filename"], options["verbosity"])
|
|
|
|
output_file = open(options["output_filename"], "w")
|
|
|
|
|
|
|
|
if len(sys.argv) == arg_count+1:
|
|
|
|
error ("Please provide one or more URL.", options["verbosity"])
|
|
|
|
|
|
|
|
for url in sys.argv[arg_count+1:]:
|
|
|
|
feed,site = get_feed (url, options["verbosity"], options["network"], options["curl_args"])
|
|
|
|
if not feed is None:
|
|
|
|
if not output_opml is None:
|
|
|
|
output_opml.add_feed (feed, site + ": " + url, url)
|
|
|
|
else:
|
|
|
|
success (feed, output_file)
|
|
|
|
|
|
|
|
if not output_opml is None:
|
|
|
|
success (output_opml.get_opml(), output_file)
|
|
|
|
|
|
|
|
if not output_file is None:
|
|
|
|
output_file.close()
|