rsstube/src/rsstube.py

#!/usr/bin/python3

import sys,importlib

from utils import debug,notify,warn,error,success
import opml

# enter a URL and attempt to return a feed URL
def get_feed (url, verbosity=3, network=True, curl_args=None):
	feed = None

	from determine_site import determine_site
	debug ("Attempting to determine site...", verbosity)
	site = determine_site (url)
	if not site is None:
		debug ("Site identified as " + site, verbosity)
		notify ("Trying " + site + " extractor...", verbosity)
		# get appropriate extractor
		extractor = importlib.import_module("extractors." + site)
		feed = extractor.extract(url, None, network, verbosity, curl_args)
		if feed is None:
			error ("Unable to get RSS feed for " + url, verbosity, site)
		else:
			return feed,site

	elif network:
		from download_page import download
		page,response_code = download (None, url, curl_args, verbosity, True)
		if page is None:
			error ("Failed to download " + url, verbosity)
			return None,None

		# try to get feed for common software like PeerTube
		debug ("Attempting to determine software from page...", verbosity)
		from determine_software import determine_software
		software = determine_software (page)
		if not software is None:
			debug ("Software identified as " + software, verbosity)
			notify ("Trying " + software + " extractor...", verbosity)
			extractor = importlib.import_module("extractors." + software)
			feed = extractor.extract(url, page, network, verbosity, curl_args)
			if feed is None:
				notify ("Unable to get RSS feed for " + url + " with " + software + " extractor", verbosity, software)
			else:
				return feed,software

		# try generic extractor even if software is known
		# don't try generic extractor if we got an error
		if response_code in range(200,299):
			debug ("Trying generic extractor...", verbosity)
			extractor = importlib.import_module("extractors.generic")
			feed = extractor.extract(url, page, network, verbosity, curl_args)

		if feed is None:
			error ("Unable to get RSS feed for " + url, verbosity, "generic")
		else:
			return feed,"generic"
	else:
		error ("Unable to get RSS feed for " + url + " without downloading page", verbosity)
	return None,None

def process_args (options, options_new):
	for opt in options_new:
		# curl_args is handled as a special case below
		if opt != "curl_args":
			options[opt] = options_new[opt]

	# may need to merge dictionaries from config file and command line
	if "curl_args" in options_new:
		for i in options_new["curl_args"]:
			options["curl_args"][i] = options_new["curl_args"][i]

if __name__ == "__main__":
	options = dict()

	# set default options
	options["network"] = True
	options["verbosity"] = 3
	options["curl_args"] = dict()
	options["output_format"] = "url"
	options["output_filename"] = ""

	# count of command-line arguments
	arg_count = 0

	# object to output feeds as OPML
	output_opml = None

	# output file
	output_file = None

	config = None
	try:
		from pathlib import Path
		home = str(Path.home())
		local_config_path = home + "/.config/rsstube/config"
		config = open(local_config_path,"r")
	except FileNotFoundError:
		try:
			global_config_path = "/etc/rsstube/config"
			config = open(global_config_path,"r")
		except FileNotFoundError:
			# no change
			config = None
	file_params = ""
	if not config is None:
		for line in config:
			line = line.strip()
			# comment lines should begin with # after stripping
			if line != "" and line[0] != "#":
				file_params += " " + line

	from parse_options import parse_options

	# config file options
	if file_params != "":
		# throw away arg_count
		config_file_options = parse_options (file_params.split())[0]
		process_args (options, config_file_options)

	# command-line options
	command_line_options, arg_count = parse_options (sys.argv[1:])
	process_args (options, command_line_options)

	if options["output_format"] == "opml":
		debug ("Formatting output as OPML.", options["verbosity"])
		output_opml = opml.Opml("rsstube feeds")

	if options["output_filename"] != "":
		debug ("Output will be saved in " + options["output_filename"], options["verbosity"])
		output_file = open(options["output_filename"], "w")

	if len(sys.argv) == arg_count+1:
		error ("Please provide one or more URL.", options["verbosity"])

	for url in sys.argv[arg_count+1:]:
		feed,site = get_feed (url, options["verbosity"], options["network"], options["curl_args"])
		if not feed is None:
			if not output_opml is None:
				output_opml.add_feed (feed, site + ": " + url, url)
			else:
				success (feed, output_file)

	if not output_opml is None:
		success (output_opml.get_opml(), output_file)

	if not output_file is None:
		output_file.close()