rsstube/src/rsstube.py

149 lines
4.6 KiB
Python
Executable File

#!/usr/bin/python3
import sys,importlib
from utils import debug,notify,warn,error,success
import opml
# enter a URL and attempt to return a feed URL
def get_feed (url, verbosity=3, network=True, curl_args=None):
feed = None
from determine_site import determine_site
debug ("Attempting to determine site...", verbosity)
site = determine_site (url)
if not site is None:
debug ("Site identified as " + site, verbosity)
notify ("Trying " + site + " extractor...", verbosity)
# get appropriate extractor
extractor = importlib.import_module("extractors." + site)
feed = extractor.extract(url, None, network, verbosity, curl_args)
if feed is None:
error ("Unable to get RSS feed for " + url, verbosity, site)
else:
return feed,site
elif network:
from download_page import download
page,response_code = download (None, url, curl_args, verbosity, True)
if page is None:
error ("Failed to download " + url, verbosity)
return None,None
# try to get feed for common software like PeerTube
debug ("Attempting to determine software from page...", verbosity)
from determine_software import determine_software
software = determine_software (page)
if not software is None:
debug ("Software identified as " + software, verbosity)
notify ("Trying " + software + " extractor...", verbosity)
extractor = importlib.import_module("extractors." + software)
feed = extractor.extract(url, page, network, verbosity, curl_args)
if feed is None:
notify ("Unable to get RSS feed for " + url + " with " + software + " extractor", verbosity, software)
else:
return feed,software
# try generic extractor even if software is known
# don't try generic extractor if we got an error
if response_code in range(200,299):
debug ("Trying generic extractor...", verbosity)
extractor = importlib.import_module("extractors.generic")
feed = extractor.extract(url, page, network, verbosity, curl_args)
if feed is None:
error ("Unable to get RSS feed for " + url, verbosity, "generic")
else:
return feed,"generic"
else:
error ("Unable to get RSS feed for " + url + " without downloading page", verbosity)
return None,None
def process_args (options, options_new):
for opt in options_new:
# curl_args is handled as a special case below
if opt != "curl_args":
options[opt] = options_new[opt]
# may need to merge dictionaries from config file and command line
if "curl_args" in options_new:
for i in options_new["curl_args"]:
options["curl_args"][i] = options_new["curl_args"][i]
if __name__ == "__main__":
options = dict()
# set default options
options["network"] = True
options["verbosity"] = 3
options["curl_args"] = dict()
options["output_format"] = "url"
options["output_filename"] = ""
# count of command-line arguments
arg_count = 0
# object to output feeds as OPML
output_opml = None
# output file
output_file = None
config = None
try:
from pathlib import Path
home = str(Path.home())
local_config_path = home + "/.config/rsstube/config"
config = open(local_config_path,"r")
except FileNotFoundError:
try:
global_config_path = "/etc/rsstube/config"
config = open(global_config_path,"r")
except FileNotFoundError:
# no change
config = None
file_params = ""
if not config is None:
for line in config:
line = line.strip()
# comment lines should begin with # after stripping
if line != "" and line[0] != "#":
file_params += " " + line
from parse_options import parse_options
# config file options
if file_params != "":
# throw away arg_count
config_file_options = parse_options (file_params.split())[0]
process_args (options, config_file_options)
# command-line options
command_line_options, arg_count = parse_options (sys.argv[1:])
process_args (options, command_line_options)
if options["output_format"] == "opml":
debug ("Formatting output as OPML.", options["verbosity"])
output_opml = opml.Opml("rsstube feeds")
if options["output_filename"] != "":
debug ("Output will be saved in " + options["output_filename"], options["verbosity"])
output_file = open(options["output_filename"], "w")
if len(sys.argv) == arg_count+1:
error ("Please provide one or more URL.", options["verbosity"])
for url in sys.argv[arg_count+1:]:
feed,site = get_feed (url, options["verbosity"], options["network"], options["curl_args"])
if not feed is None:
if not output_opml is None:
output_opml.add_feed (feed, site + ": " + url, url)
else:
success (feed, output_file)
if not output_opml is None:
success (output_opml.get_opml(), output_file)
if not output_file is None:
output_file.close()