Initial code push.

This version of rsstube works but is not complete.
2021-07-23 00:00:00 +00:00 · 2021-07-23 00:00:00 +00:00 · 3b89a5283e
parent da1e5a2c5b
commit 3b89a5283e
48 changed files with 1386 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -6,6 +6,43 @@ This is a work-in-progress. It's missing some features I want like proxy support

 That said, if you download this code and run it, it should work for the features and sites that have been implemented.

+
+## Installing
+
+rsstube is written in Python, an interpreted language. There's no need to compile it.
+
+### Dependencies
+
+- python3
+- python3-pycurl
+
+### Sample Installation
+
+`git clone https://negativezero.link/code/lost/rsstube.git`
+
+`sudo ln -s "$(pwd)/rsstube/rsstube" /usr/local/bin/rsstube`
+
+### Updating
+
+If you installed with git, just `cd` into the directory and `git pull`.
+
+
+## Usage
+
+`rsstube &lt;link&gt;`
+
+There are some options, but most of them don't work yet.
+
+
+## Scope
+
+rsstube is not a tool for generating new feeds where they don't already exist. It is a tool for locating official feed URLs provided by the site.
+
+I will not register an API key with any service just to get a feed. rsstube only gets feeds which are publicly available.
+
+Site-specific extractors should only be added to rsstube when the site in question requires special logic. If the logic is generally applicable to many sites, it should go in the generic extractor.
+
+
 ## License

 GPLv3 or later
--- a/docs/version
+++ b/docs/version
@ -0,0 +1 @@
+2021-07-23 (pre-release)
--- a/1
+++ b/1
@ -0,0 +1 @@
+scripts/rsstube.py
--- a/scripts/init.py
+++ b/scripts/init.py
--- a/scripts/determine_site.py
+++ b/scripts/determine_site.py
@ -0,0 +1,46 @@
+#!/usr/bin/python3
+
+supported_sites = {
+	"castro_fm" : ["castro.fm"],
+
+	"chirbit" : [
+		"chirbit.com",
+		"chirb.it"
+	],
+
+	"deviantart" : ["deviantart.com"],
+
+	"fyyd" : ["fyyd.de/podcast/"],
+
+	"github" : ["github.com"],
+
+	"player_fm" : ["player.fm/series/"],
+
+	"pocketcasts" : ["pca.st"],
+
+	"reddit" : ["reddit.com"],
+
+	"soundcloud" : ["soundcloud.com"],
+
+	"tumblr" : ["tumblr.com"],
+
+	"vimeo" : ["vimeo.com"],
+
+	"youtube" : [
+		"youtube.com",
+		"youtu.be",
+		"yt.be",
+		"youtube-nocookie.com",
+		"youtubeeducation.com",
+		"youtubegaming.com",
+		"ytimg.com"
+	]
+}
+
+def determine_site (url):
+	site = None
+	for possible_site in supported_sites:
+		url_lower = url.lower()
+		for domain in supported_sites[possible_site]:
+			if domain in url:
+				return possible_site
--- a/scripts/determine_software.py
+++ b/scripts/determine_software.py
@ -0,0 +1,15 @@
+#!/usr/bin/python3
+
+supported_software = {
+	"peertube" : [
+		'<meta property="og:platform" content="PeerTube"'
+	]
+}
+
+def determine_software (page):
+	from utils import search
+	software = None
+	for possible_software in supported_software:
+		for pattern in supported_software[possible_software]:
+			if not search (page, pattern, "") is None:
+				return possible_software
--- a/scripts/download_page.py
+++ b/scripts/download_page.py
@ -0,0 +1,32 @@
+#!/usr/bin/python3
+
+import pycurl
+from io import BytesIO
+from utils import notify,debug
+
+# args should be a dictionary of arguments
+# return page bytes, response code
+def download (platform, url, args, verbosity, return_http_code=False):
+	page_bytes = BytesIO()
+	c = pycurl.Curl()
+
+	c.setopt(c.URL, url)
+	c.setopt(c.WRITEDATA, page_bytes)
+	c.setopt(c.FOLLOWLOCATION, True)
+
+	# TODO: handle possible arguments
+#	if args["user_agent"]:
+#		c.setopt(pycurl.USERAGENT, args["user_agent"]
+#	if args["ciphers"]:
+#		c.setopt(pycurl.CIPHERS, args["ciphers"]
+
+	notify ("Downloading " + url + "...", verbosity, platform)
+	c.perform()
+	response_code = c.getinfo(c.RESPONSE_CODE)
+	c.close()
+	debug (url + " downloaded!", verbosity, platform)
+	debug ("Response code: " + str(response_code), verbosity, platform)
+	if return_http_code:
+		return page_bytes.getvalue().decode('utf8'),response_code
+	else:
+		return page_bytes.getvalue().decode('utf8')
--- a/scripts/extractors/NEWSITE.py.template
+++ b/scripts/extractors/NEWSITE.py.template
@ -0,0 +1,28 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_url (url, verbosity):
+
+
+def extract_from_page (page, verbosity):
+
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	feed = extract_from_url (url, verbosity)
+	if not feed is None:
+		return feed
+	else:
+		notify ("Unable to get feed from URL alone", verbosity, platform)
+		if network == True:
+			page = download (platform, url, args, verbosity)
+			feed = extract_from_page (page, verbosity)
+			if not feed is None:
+				return feed
--- a/scripts/extractors/init.py
+++ b/scripts/extractors/init.py
--- a/scripts/extractors/castro_fm.py
+++ b/scripts/extractors/castro_fm.py
@ -0,0 +1,23 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_page (page, verbosity):
+	return search (page, '<a href="', '"><img alt="Subscribe to RSS"')
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	# cannot get feed from URL alone
+	if not network:
+		return None
+
+	page = download (platform, url, args, verbosity)
+	feed = extract_from_page (page, verbosity)
+	if not feed is None:
+		return feed
--- a/scripts/extractors/chirbit.py
+++ b/scripts/extractors/chirbit.py
@ -0,0 +1,37 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_url (url, verbosity):
+	subst = "chirbit.com/"
+	if subst in url:
+		username = url[url.index(subst) + len(subst):]
+		if "/" in username:
+			username = username[:username.index("/")]
+		return "https://www.chirbit.com/" + username + "/rss"
+	else:
+		return None
+
+def extract_from_page (page, verbosity):
+	username = search (page, '<span id="chirbit-username">', '</span>')
+	if not username is None:
+		return "https://www.chirbit.com/" + username + "/rss"
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	feed = extract_from_url (url, verbosity)
+	if not feed is None:
+		return feed
+	else:
+		notify ("Unable to get feed from URL alone", verbosity, platform)
+		if network == True:
+			page = download (platform, url, args, verbosity)
+			feed = extract_from_page (page, verbosity)
+			if not feed is None:
+				return feed
--- a/scripts/extractors/deviantart.py
+++ b/scripts/extractors/deviantart.py
@ -0,0 +1,30 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_url (url, verbosity):
+	# split into domain and path
+	index = url.find("/",url.find("//")+2)
+	domain = url[:index]
+	path = url[index:]
+
+	index = path.find('/', 1)
+	if index < 0:
+		username = path[1:]
+	else:
+		username = path[1:index]
+
+	if username:
+		return "https://backend.deviantart.com/rss.xml?type=deviation&q=by%3A" + username + "+sort%3Atime+meta%3Aall"
+
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	# I don't have a more robust way to do this.
+	return extract_from_url (url, verbosity)
--- a/scripts/extractors/fyyd.py
+++ b/scripts/extractors/fyyd.py
@ -0,0 +1,23 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_page (page, verbosity):
+	return search (page, '<noscript><a href="', '">subscribe</a></noscript>')
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	# cannot get feed from URL alone
+	if not network:
+		return None
+
+	page = download (platform, url, args, verbosity)
+	feed = extract_from_page (page, verbosity)
+	if not feed is None:
+		return feed
--- a/scripts/extractors/generic.py
+++ b/scripts/extractors/generic.py
@ -0,0 +1,129 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def try_common_paths (verbosity, url, args):
+	debug ("Trying common paths for " + url + "...", verbosity, platform)
+
+	# strip extra arguments at end of URL
+	for symbol in ["?", "&", ";", "#"]:
+		if symbol in url:
+			url = url[:url.index(symbol)]
+
+	# strip trailing slash (if applicable)
+	if url.endswith("/"):
+		url = url[:(-1)]
+
+	common_paths = {
+		"atom",
+		"atom.xml",
+		"feed",
+		"feed.atom",
+		"feed.rss",
+		"feed.xml",
+		"rss",
+		"rss.xml"
+	}
+
+	for path in common_paths:
+		page,response_code = download (platform, url + '/' + path, args, verbosity, True)
+		if response_code == 200:
+			# TODO: verify it is a valid RSS feed
+			# Some pages serve response 200 for invalid pages
+
+			# assume we found a feed
+			return url + '/' + path
+
+	# failed to find
+	return None
+
+def extract_from_page (page, verbosity, url, args):
+
+	# Pages often include links like <link rel="alternate" type="application/rss+xml" title="My Blog's Feed" href="https://example.com/feed/" />
+	# We want to start with more specific so we can be more confident in the results, but we'll try to figure it out...
+	# Sometimes these include multiple entries. We want the first one because it's usually the correct one. Other entries might be comments, etc.
+	delimeters = ['"', "'", '']
+	feed_types = ["rss", "atom"]
+	plus_signs = ['+', "&#43;", "&#x2B;"]
+	link_formats = []
+	for delimeter in delimeters:
+		for feed_type in feed_types:
+			for plus_sign in plus_signs:
+				format = "type=" + delimeter + "application/" + feed_type + plus_sign + "xml" + delimeter
+				before = search (page, '<', format, reverse=True)
+				after = search (page, format, '>')
+
+				# if one is not None, we may get the feed
+				if before is None and after is None:
+					continue
+
+				# let us safely mess with these strings
+				if before is None:
+					before = ""
+				if after is None:
+					after = ""
+
+				string_to_search = None
+				if "href=" in before:
+					string_to_search = before
+				elif "href=" in after:
+					string_to_search = after
+				if not string_to_search is None:
+					for delimeter in delimeters:
+						result = search (string_to_search, 'href=' + delimeter, delimeter)
+						if not result is None:
+							return result
+
+	result = try_common_paths (verbosity, url, args)
+	if not result is None:
+		return result
+
+	debug ("Failed to find from page. Let's try higher-level pages.", verbosity, platform)
+
+	# split into domain and path
+	index = url.find("/",url.find("//")+2)
+	domain = url[:index]
+	path = url[index:]
+
+	if path.startswith("/@") or path.startswith("/~"):
+		offset = 3
+	elif path.startswith("/user/"):
+		offset = 7
+	elif path.startswith("/users/"):
+		offset = 8
+	else:
+		offset = 1
+
+	# find first slash after offset (if present)
+	index = path.find('/',offset)
+	if index > -1:
+		path = path[:index+1]
+	else:
+		path = '/'
+
+	# we don't want to infinitely recurse
+	if domain + path == url:
+		return
+
+	# try with higher level
+	page = download (platform, domain + path, args, verbosity)
+	notify ("Trying " + domain + path + " with generic extractor...", verbosity, platform)
+	return extract_from_page(page, verbosity, domain + path, args)
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	if network == True:
+		if page is None:
+			page = download (platform, url, args, verbosity)
+		feed = extract_from_page (page, verbosity, url, args)
+		if not feed is None:
+			if feed.startswith("/"):
+				domain = url[:url.find("/",url.find("//")+2)]
+				feed = domain + feed
+			return feed
--- a/scripts/extractors/github.py
+++ b/scripts/extractors/github.py
@ -0,0 +1,83 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_url (url, verbosity):
+	# split into domain and path
+	index = url.find("/", url.find("//")+2)
+	domain = url[:index]
+	path = url[index:]
+
+	# get owner/organization name
+	index = path.find("/",1)
+	if index < 0:
+		# we could just return None here, as this means no project
+		org = path[1:]
+	else:
+		org = path[1:index]
+	if org == "":
+		return None
+
+	# get project name
+	index2 = path.find("/",index+1)
+	if index2 < 0:
+		project = path[index+1:]
+	else:
+		project = path[index+1:index2]
+	if project == "":
+		return None
+
+	# get page category
+	index3 = path.find("/",index2+1)
+	if index3 < 0:
+		category = path[index2+1:]
+	else:
+		category = path[index2+1:index3]
+
+	# return feed based on category
+	urlbase = domain + "/" + org + "/" + project + "/"
+	if category == "":
+		# note, this is an alternate link to /org/project/commits/default-branch.atom
+		return urlbase + "commits.atom"
+	elif category == "releases":
+		return urlbase + "releases.atom"
+	elif category == "tags":
+		return urlbase + "tags.atom"
+	elif category == "tree":
+		# get current branch
+		index4 = path.find("/",index3+1)
+		if index4 < 0:
+			branch = path[index3+1:]
+		else:
+			branch = path[index3+1:index4]
+		if branch != "":
+			# return commit feed for that branch
+			return urlbase + "commits/" + branch + ".atom"
+
+def extract_from_page (page, verbosity):
+	result = search (page, '<link href="', 'type="application/atom+xml">', reverse=True)
+	if not result is None:
+		# strip '" rel="alternate" title="Recent Commits to <project name>:<branch>" '
+		index = result.find('"')
+		result = result[:index]
+		if not result == "":
+			return result
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	feed = extract_from_url (url, verbosity)
+	if not feed is None:
+		return feed
+	else:
+		notify ("Unable to get feed from URL alone", verbosity, platform)
+		if network == True:
+			page = download (platform, url, args, verbosity)
+			feed = extract_from_page (page, verbosity)
+			if not feed is None:
+				return feed
--- a/scripts/extractors/peertube.py
+++ b/scripts/extractors/peertube.py
@ -0,0 +1,99 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_page (page, verbosity, url, args):
+	# strip extra arguments at end of URL
+	for symbol in ["?", "&", ";"]:
+		if symbol in url:
+			url = url[:url.index(symbol)]
+
+	# split into domain and path
+	index = url.find("/",url.find("//")+2)
+	domain = url[:index]
+	path = url[index:]
+
+	# get page type
+	index = path.find("/",1)
+	page_type = path[1:index]
+
+	# get item name
+	if page_type == "accounts" or page_type == "video-channels":
+		index2 = path.find("/",index+1)
+	elif page_type == "videos":
+		# assume UUID is last thing in URL after cleaning additional args
+		# end index is -1 in case of trailing slash
+		name = path[path.rindex("/",0,-1):]
+		if path.startswith("/videos/watch/playlist/"):
+			notify ("PeerTube playlists don't seem to have API pages", verbosity, platform)
+			return
+		elif path.startswith("/videos/watch/"):
+			# format is like https://example.com/videos/watch/uuid
+			index = path.find("/",index+1)
+			index2 = path.find("/",index+2)
+		else:
+			notify ("Unrecognized URL format.", verbosity, platform)
+			return
+	else:
+		notify ("Unrecognized URL format.", verbosity, platform)
+		return None
+	if index2 < 0:
+	        name = path[index+1:]
+	else:
+	        name = path[index+1:index2]
+
+	# account on other instance
+	if '@' in name:
+		# TODO: how do we handle protocol (http vs. https)?
+		# for now, assume it's the same as url, or https if not specified
+		if "//" in domain:
+			protocol = domain[:domain.index("//")+2]
+		else:
+			debug ("Assuming HTTPS", verbosity, platform)
+			protocol = "https://"
+		index = name.index('@')
+		domain = protocol + name[index+1:]
+		name = name[:index]
+		debug ("Translating " + url + " into " + domain + "/" + page_type + "/" + name, verbosity, platform)
+
+	# get API page
+	api_page_url = domain + "/api/v1/" + page_type + "/" + name
+	api_page = download (platform, api_page_url, args, verbosity)
+
+	if page_type == "videos":
+		# TODO: This doesn't need two API calls if we just parse the JSON
+		# query API for video-channels page
+
+		# search from end, not from beginning, as "/video-channels/" is significant part
+		domain = search (api_page, '"url":"', '/video-channels/', reverse=True)
+		if not domain is None:
+			name = search (api_page, '"url":"' + domain + '/video-channels/', '"')
+		if not name is None:
+			page_type = "video-channels"
+			api_page_url = domain + "/api/v1/" + page_type + "/" + name
+			api_page = download (platform, api_page_url, args, verbosity)
+
+	ident = search (api_page, '"id":', ",")
+
+	if not ident is None:
+		if page_type == "accounts":
+			return domain + "/feeds/videos.xml?accountId=" + ident
+		elif page_type == "video-channels":
+			return domain + "/feeds/videos.xml?videoChannelId=" + ident
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	# cannot get feed from URL alone
+	if not network:
+		return None
+
+	# note: we need the URL for the domain
+	feed = extract_from_page (page, verbosity, url, args)
+	if not feed is None:
+		return feed
--- a/scripts/extractors/player_fm.py
+++ b/scripts/extractors/player_fm.py
@ -0,0 +1,23 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_page (page, verbosity):
+	return search (page, '&bullet;<a class="blatant" href="', '">Feed</a></div>')
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	# cannot get feed from URL alone
+	if not network:
+		return None
+
+	page = download (platform, url, args, verbosity)
+	feed = extract_from_page (page, verbosity)
+	if not feed is None:
+		return feed
--- a/scripts/extractors/pocketcasts.py
+++ b/scripts/extractors/pocketcasts.py
@ -0,0 +1,23 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_page (page, verbosity):
+	return search (page, '<div class="button rss_button"><a href="', '" target="_blank">RSS</a></div>')
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	# cannot get feed from URL alone
+	if not network:
+		return None
+
+	page = download (platform, url, args, verbosity)
+	feed = extract_from_page (page, verbosity)
+	if not feed is None:
+		return feed
--- a/scripts/extractors/reddit.py
+++ b/scripts/extractors/reddit.py
@ -0,0 +1,52 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_url (url, verbosity):
+	# strip extra arguments at end of URL
+	for symbol in ["?", "&", ";"]:
+		if symbol in url:
+			url = url[:url.index(symbol)]
+	if not url.endswith("/"):
+		url = url + "/"
+	return url + ".rss"
+
+def extract_from_page (page, verbosity):
+	# this could be handled by the generic extractor
+	# also, this method should never be reached
+	return search (page, '<link rel="alternate" type="application/atom\+xml" title="RSS" href="', '" />')
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	feed = extract_from_url (url, verbosity)
+	if not feed is None:
+		return feed
+	else:
+		notify ("Unable to get feed from URL alone", verbosity, platform)
+		notify ("Something must have gone wrong here because this point should be unreachable.", verbosity, platform)
+		if network == True:
+			# old Reddit interface is easier to deal with
+			index = url.find("/",url.find("//")+2)
+			domain = url[:index]
+			path = url[index:]
+
+			www = False
+
+			if "www.reddit.com" in domain:
+				domain = domain.replace("www.reddit.com", "old.reddit.com")
+				url = domain + path
+				www = True
+
+			page = download (platform, url, args, verbosity)
+			feed = extract_from_page (page, verbosity)
+			if not feed is None:
+				if www:
+					return feed.replace("old.reddit.com", "www.reddit.com")
+				else:
+					return feed
--- a/scripts/extractors/soundcloud.py
+++ b/scripts/extractors/soundcloud.py
@ -0,0 +1,25 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_page (page, verbosity):
+	user_id = search (page, 'soundcloud:users:', '"')
+	if not user_id is None:
+		return "https://feeds.soundcloud.com/users/soundcloud:users:" + user_id + "/sounds.rss"
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	# cannot get feed from URL alone
+	if not network:
+		return None
+
+	page = download (platform, url, args, verbosity)
+	feed = extract_from_page (page, verbosity)
+	if not feed is None:
+		return feed
--- a/scripts/extractors/tumblr.py
+++ b/scripts/extractors/tumblr.py
@ -0,0 +1,60 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+def extract_from_url (url, verbosity):
+	# split into domain and path
+	index = url.find("/",url.find("//")+2)
+	domain = url[:index]
+	path = url[index:]
+
+	primary_domain = {
+		"https://tumblr.com",
+		"http://tumblr.com",
+		"https://www.tumblr.com",
+		"http://www.tumblr.com"
+	}
+	if domain in primary_domain:
+		# only handle blogs on other subdomains
+		return None
+
+	for page_type in ["tagged", "search"]:
+		page_type_with_slashes = "/" + page_type + "/"
+		if path.startswith(page_type_with_slashes):
+			offset = len(page_type_with_slashes)
+			tag_end = path.find('/', offset)
+
+			if tag_end < 0:
+				# no trailing slash, go to end
+				tag = path[offset:]
+			else:
+				tag = path[offset:tag_end]
+
+			if tag:
+				return domain + page_type_with_slashes + tag + "/rss"
+
+	# if we've reached this point, just return overall blog feed
+	return domain + "/rss"
+
+def extract_from_page (page, verbosity):
+	# this method should not be called
+	return search (page, '<link rel="alternate" type="application/rss+xml" href="', '">')
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	feed = extract_from_url (url, verbosity)
+	if not feed is None:
+		return feed
+	else:
+		notify ("Unable to get feed from URL alone", verbosity, platform)
+		if network == True:
+			page = download (platform, url, args, verbosity)
+			feed = extract_from_page (page, verbosity)
+			if not feed is None:
+				return feed
--- a/scripts/extractors/vimeo.py
+++ b/scripts/extractors/vimeo.py
@ -0,0 +1,53 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+channel_url_start = "https://vimeo.com/user"
+channel_url_end = "/videos/rss"
+
+def extract_from_url (url, verbosity):
+	# useful function for stripping ID out of URL
+	def get_id (url, prefix):
+		if prefix in url:
+			ident = url[url.index(prefix) + len(prefix):]
+			for symbol in ["/","?", "&"]:
+				if symbol in ident:
+					ident = ident[:ident.index(symbol)]
+			return ident
+		else:
+			return None
+
+	username = get_id (url, "vimeo.com/user")
+	if username != None:
+		return channel_url_start + username + channel_url_end
+
+def extract_from_page (page, verbosity):
+	# We can get the username from a few places. We'll include multiple
+	# in case Vimeo makes breaking changes.
+	username = search (page, '"creator_id":', ",")
+	if username is None:
+		username = search (page, '"owner":{"id":', ',"display_name":"')
+	if username is None:
+		username = search (page, '"item":{"@id":"https://vimeo.com/user', '","name":')
+
+	if not username is None:
+		return channel_url_start + username + channel_url_end
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	feed = extract_from_url (url, verbosity)
+	if not feed is None:
+		return feed
+	else:
+		notify ("Unable to get feed from URL alone", verbosity, platform)
+		if network == True:
+			page = download (platform, url, args, verbosity)
+			feed = extract_from_page (page, verbosity)
+			if not feed is None:
+				return feed
--- a/scripts/extractors/youtube.py
+++ b/scripts/extractors/youtube.py
@ -0,0 +1,68 @@
+#!/usr/bin/python3
+
+from utils import *
+from download_page import download
+
+# portable code to get filename
+import os
+platform = os.path.basename(__file__)
+if platform.endswith(".py"):
+	platform = platform[:(-3)]
+
+channel_url = "https://www.youtube.com/feeds/videos.xml?channel_id="
+
+def extract_from_url (url, verbosity):
+	# useful function for stripping ID out of URL
+	def get_id (url, prefix):
+		if prefix in url:
+			ident = url[url.index(prefix) + len(prefix):]
+			for symbol in ["/", "?", "&"]:
+				if symbol in ident:
+					ident = ident[:ident.index(symbol)]
+			return ident
+		else:
+			return None
+
+	# attempt to get feed for playlist
+	ident = get_id (url, "youtube.com/playlist?list=")
+	if ident is None:
+		ident = get_id (url, "&list=")
+	if not ident is None:
+		return "https://www.youtube.com/feeds/videos.xml?playlist_id=" + ident
+
+	# attempt to get feed based on channel ID
+	ident = get_id (url, "youtube.com/channel/")
+	if not ident is None:
+		return channel_url + ident
+
+	# attempt to get feed based on username
+	ident = get_id (url, "youtube.com/user/")
+	if not ident is None:
+		return "https://www.youtube.com/feeds/videos.xml?user=" + ident
+
+	return None
+
+def extract_from_page (page, verbosity):
+	ident = search (page, '<link rel="canonical" href="https://www.youtube.com/channel/', '">')
+	if not ident is None:
+		return channel_url + ident
+
+	ident = search (page, '<meta itemprop="channelId" content="','">')
+	if not ident is None:
+		return channel_url + ident
+
+	ident = search (page, '"channelId":"','"')
+	if not ident is None:
+		return channel_url + ident
+
+def extract (url, page=None, network=False, verbosity=3, args={}):
+	feed = extract_from_url (url, verbosity)
+	if not feed is None:
+		return feed
+	else:
+		notify ("Unable to get feed from URL alone", verbosity, platform)
+		if network == True:
+			page = download (platform, url, args, verbosity)
+			feed = extract_from_page (page, verbosity)
+			if not feed is None:
+				return feed
--- a/scripts/options.py
+++ b/scripts/options.py
@ -0,0 +1,134 @@
+#!/usr/bin/python3
+
+# NOTE: Many options listed here are not implemented yet.
+
+license = """
+rsstube - get RSS feeds from supported sites
+Copyright (C) 2021
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+def options(params):
+	import sys,getopt,glob
+	from utils import debug,notify,warn,error
+
+	# get installation path
+	import os.path
+	path = os.path.realpath(os.path.abspath(__file__))
+	path = path[0:path.rfind("/")]
+	path = path[0:path.rfind("/")]
+
+	# general settings
+	network = True
+
+	## verbosity: in addition to the feed, print...
+	## 0: no messages (suppress errors)
+	## 1: only error messages
+	## 2: error messages and warnings
+	## 3: [default] errors, warnings, and info
+	## 4: all messages, including debugging info
+	verbosity = None
+
+	# pycurl args
+	d = dict()
+	header = []
+
+	# count number of arguments
+	arg_count = 0
+
+	try:
+		opts, args = getopt.getopt(params,"A:c:H:hnp:qtUVv", [
+			"user-agent=",
+			"ciphers=",
+			"compressed",
+			"header=",
+			"help",
+			"license",
+			"non-network",
+			"proxy",
+			"quiet",
+			"sites",
+			"suppress-errors",
+			"tls-max=",
+			"tls13-ciphers=",
+			"verbose",
+			"verbosity=",
+			"version"
+		])
+	except getopt.GetoptError:
+		error ("Invalid options. See the README or manual for legal rsstube flags.")
+		sys.exit(2)
+	for opt, arg in opts:
+		if arg == "":
+			arg_count += 1
+		else:
+			arg_count += 2
+
+		if opt in ("-A", "--user-agent"):
+			d["user_agent"] = arg
+		elif opt == "--ciphers":
+			d["ciphers"] = arg
+		elif opt == "--compressed":
+			d["compressed"] = True
+		elif opt in ("-h", "--help"):
+			print ("Usage: rsstube [OPTIONS] URL")
+			# not available yet
+#			print ("Use `man rsstube` to see the manual for rsstube.")
+			sys.exit()
+		elif opt in ("-H", "--header"):
+			header.append(arg)
+		elif opt in ("--license"):
+			print(license)
+			sys.exit()
+		elif opt in ("-n", "--non-network"):
+			network = False
+		elif opt in ("-p", "--proxy"):
+			d["proxy"] = arg
+		elif opt in ("-q", "--quiet"):
+			verbosity = 1
+		elif opt in ("--sites"):
+			print ("Site-specific support:")
+			for test in sorted(glob.glob(path + "/tests/*.txt")):
+				site = test[test.rfind("/")+1:(-4)]
+				print ("- " + site)
+
+			print ("\nGeneric support:")
+			for test in sorted(glob.glob(path + "/tests/generic/*.txt")):
+				site = test[test.rfind("/")+1:(-4)]
+				print ("- " + site)
+			sys.exit()
+		elif opt in ("--suppress-errors"):
+			verbosity = 0
+		elif opt == "--tls-max":
+			d["tls_max"] = arg
+		elif opt == "--tls13-ciphers":
+			d["tls13_ciphers"] = arg
+		elif opt in ("-v", "--verbose"):
+			verbosity = 4
+		elif opt == "--verbosity":
+			v = int(arg)
+			if v >= 0 and v <= 4:
+				verbosity = v
+			else:
+				print ("Invalid verbosity: " + arg)
+		elif opt in ("-V", "--version"):
+			version = open(path + "/docs/version","r")
+
+			# only go to -1 to cut EOL character
+			print (version.readline()[:-1])
+			sys.exit()
+
+	d["header"] = header
+	return network,verbosity,d,arg_count
--- a/scripts/rsstube.py
+++ b/scripts/rsstube.py
@ -0,0 +1,101 @@
+#!/usr/bin/python3
+
+import sys,importlib
+
+from utils import debug,notify,warn,error,success
+
+network = True
+verbosity = 3
+args = {}
+arg_count = 0
+
+config = None
+try:
+	from pathlib import Path
+	home = str(Path.home())
+	local_config_path = home + "/.config/rsstube/config"
+	config = open(local_config_path,"r")
+except FileNotFoundError:
+	try:
+		global_config_path = "/etc/rsstube/config"
+		config = open(global_config_path,"r")
+	except FileNotFoundError:
+		# no change
+		config = None
+file_params = None
+if not config is None:
+	for line in config:
+		line = line.strip()
+		# comment lines should begin with # after stripping
+		if line[0] != "#":
+			file_params += " " + line
+from options import options
+
+def process_args (network,verbosity,args,arg_count,network_new,verbosity_new,args_new,arg_count_new):
+	if network_new == False:
+		network = network_new
+	if not verbosity_new is None:
+		verbosity = verbosity_new
+	for i in args_new:
+		args[i] = args_new[i]
+	arg_count = arg_count_new
+	return network,verbosity,args,arg_count
+
+# config file options
+if not file_params == "" and not file_params is None:
+	network_new,verbosity_new,args_new,arg_count_new = options(file_params.split())
+	network,verbosity,args,arg_count = process_args(network,verbosity,args,arg_count,network_new,verbosity_new,args_new,arg_count_new)
+
+# command-line options
+network_new,verbosity_new,args_new,arg_count_new = options(sys.argv[1:])
+network,verbosity,args,arg_count = process_args(network,verbosity,args,arg_count,network_new,verbosity_new,args_new,arg_count_new)
+#if not verbosity_temp is None:
+#	verbosity = verbosity_temp
+
+if len(sys.argv) == arg_count+1:
+	error ("Please provide one or more URL.", verbosity)
+
+for url in sys.argv[arg_count+1:]:
+	from determine_site import determine_site
+	debug ("Attempting to determine site...", verbosity)
+	site = determine_site (url)
+	if not site is None:
+		debug ("Site identified as " + site, verbosity)
+		notify ("Trying " + site + " extractor...", verbosity)
+
+		# get appropriate extractor
+		extractor = importlib.import_module("extractors." + site)
+		feed = extractor.extract(url, None, network, verbosity, args)
+		if feed is None:
+			error ("Unable to get RSS feed for " + url, verbosity, site)
+		else:
+			success (feed)
+	elif network:
+		from download_page import download
+		page = download (None, url, args, verbosity)
+
+		# try to get feed for common software like PeerTube
+		debug ("Attempting to determine software from page...", verbosity)
+		from determine_software import determine_software
+		software = determine_software (page)
+		if not software is None:
+			debug ("Software identified as " + software, verbosity)
+			notify ("Trying " + software + " extractor...", verbosity)
+			extractor = importlib.import_module("extractors." + software)
+			feed = extractor.extract(url, page, network, verbosity, args)
+			if feed is None:
+				notify ("Unable to get RSS feed for " + url + " with " + software + " extractor", verbosity, software)
+			else:
+				success (feed)
+				continue
+
+		# try generic extractor even if software is known
+		debug ("Trying generic extractor...", verbosity)
+		extractor = importlib.import_module("extractors.generic")
+		feed = extractor.extract(url, page, network, verbosity, args)
+		if feed is None:
+			error ("Unable to get RSS feed for " + url, verbosity, "generic")
+		else:
+			success (feed)
+	else:
+		error ("Unable to get RSS feed for " + url + " without downloading page", verbosity)
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -0,0 +1,55 @@
+#!/usr/bin/python3
+
+import re
+
+class color:
+	ERR='\033[1;31m'
+	NC='\033[0m'
+	SUCCESS = '\033[92m'
+	WARN='\033[1;33m'
+
+def debug (message, verbosity, platform=None):
+	if verbosity >= 4:
+		print ("[" + str(platform) + "] " + message)
+
+def notify (message, verbosity, platform=None):
+	if verbosity >= 3:
+		print ("[" + str(platform) + "] " + message)
+
+def warn (message, verbosity, platform=None):
+	if verbosity >= 2:
+		print (color.WARN + "[" + str(platform) + "] " + message + color.NC)
+
+def error (message, verbosity=1, platform=None):
+	if verbosity >= 1:
+		print (color.ERR + "[" + str(platform) + "] " + message + color.NC)
+
+def success (message):
+#	caused issues with piping output into other stuff
+#	print (color.SUCCESS + message + color.NC)
+	print (message)
+
+def search (content, begins_with, ends_with, index=0, reverse=False):
+	# hack to search based on ends_with being significant
+	if reverse:
+		content = content[::-1]
+		temp = begins_with
+		begins_with = ends_with[::-1]
+		ends_with = temp[::-1]
+
+	# escape + signs as needed
+	begins_with = begins_with.replace('+', "\+")
+	ends_with = ends_with.replace('+', "\+")
+
+	# look for longest match, not shortest, if one delimeter is empty
+	if begins_with == '' or ends_with == '':
+		result = re.findall('(?<=' + begins_with + ')(.*)(?=' + ends_with + ')', content)
+	else:
+		result = re.findall('(?<=' + begins_with + ')(.*?)(?=' + ends_with + ')', content)
+	if len(result) > 0:
+		if reverse:
+			# get index from end instead of from beginning
+			# and reverse string
+			return result[-1 * index - 1][::-1]
+		else:
+			return result[index]
--- a/tests/castro_fm.txt
+++ b/tests/castro_fm.txt
@ -0,0 +1 @@
+https://castro.fm/podcast/b3891201-4d76-4152-adeb-ae3e13d17f29 https://www.smashingsecurity.com/rss
--- a/tests/chirbit.txt
+++ b/tests/chirbit.txt
@ -0,0 +1,5 @@
+# user page
+https://www.chirbit.com/Giannaa https://www.chirbit.com/Giannaa/rss
+
+# track page
+https://chirb.it/9qxaAG https://www.chirbit.com/Giannaa/rss
--- a/tests/deviantart.txt
+++ b/tests/deviantart.txt
@ -0,0 +1,2 @@
+https://www.deviantart.com/sashakhmel https://backend.deviantart.com/rss.xml?type=deviation&q=by%3Asashakhmel+sort%3Atime+meta%3Aall
+https://www.deviantart.com/sashakhmel/art/lady-in-armor-825805802 https://backend.deviantart.com/rss.xml?type=deviation&q=by%3Asashakhmel+sort%3Atime+meta%3Aall
--- a/tests/fyyd.txt
+++ b/tests/fyyd.txt
@ -0,0 +1 @@
+https://fyyd.de/podcast/shades-of-brown/0 https://twoshadesofbrown.com/feed/podcast/
--- a/tests/generic/drupal.txt
+++ b/tests/generic/drupal.txt
@ -0,0 +1,2 @@
+# advertised with link rel="alternate"
+https://opensource.com/article/18/3/start-blog-30-minutes-hugo https://opensource.com/rss.xml
--- a/tests/generic/gatsby.txt
+++ b/tests/generic/gatsby.txt
@ -0,0 +1,2 @@
+# advertised with link rel="alternate"
+https://www.sitepoint.com/7-reasons-not-use-static-site-generator/ https://www.sitepoint.com/sitepoint.rss
--- a/tests/generic/ghost.txt
+++ b/tests/generic/ghost.txt
@ -0,0 +1,2 @@
+# advertised with link rel="alternate"
+https://blog.privacytools.io/delisting-wire/ https://blog.privacytools.io/rss/
--- a/tests/generic/hugo.txt
+++ b/tests/generic/hugo.txt
@ -0,0 +1,4 @@
+https://gohugo.io/ https://gohugo.io/index.xml
+
+# uses &#43; instead of +
+https://addy-dclxvi.github.io/post/hugo/ https://addy-dclxvi.github.io/index.xml
--- a/tests/generic/mastodon.txt
+++ b/tests/generic/mastodon.txt
@ -0,0 +1,7 @@
+https://mastodon.social/@Gargron https://mastodon.social/@Gargron.rss
+
+# get user page from post
+https://mastodon.social/@Gargron/105834541294913446 https://mastodon.social/@Gargron.rss
+
+# get user page from /users/ and post page
+https://mastodon.social/users/Gargron/105834541294913446 https://mastodon.social/@Gargron.rss
--- a/tests/generic/nitter.txt
+++ b/tests/generic/nitter.txt
@ -0,0 +1,5 @@
+https://nitter.mailstation.de/doctorow https://nitter.mailstation.de/doctorow/rss
+https://tweet.lambda.dance/doctorow https://tweet.lambda.dance/doctorow/rss
+
+# get user page from post
+https://nitter.eu/ProBirdRights/status/1361088975658573826#m https://nitter.eu/ProBirdRights/rss
--- a/tests/generic/plume.txt
+++ b/tests/generic/plume.txt
@ -0,0 +1,5 @@
+https://blog.funkwhale.audio/@/funkwhale https://blog.funkwhale.audio/@/funkwhale/atom.xml
+https://fediverse.blog/~/MusingsByRg/ https://fediverse.blog/~/MusingsByRg/atom.xml
+
+# get user page from post
+https://fediverse.blog/~/MusingsByRg/firefox-add-ons-for-the-active-user https://fediverse.blog/~/MusingsByRg/atom.xml
--- a/tests/generic/wordpress.txt
+++ b/tests/generic/wordpress.txt
@ -0,0 +1,2 @@
+https://usersnap.com/blog/hands-on-experience-with-hugo-static-site-generator/ https://usersnap.com/blog/feed/
+https://thenewstack.io/tutorial-use-hugo-to-generate-a-static-website/ https://thenewstack.io/feed/
--- a/tests/generic/writefreely.txt
+++ b/tests/generic/writefreely.txt
@ -0,0 +1,7 @@
+# single-user instance
+https://theo-andreou.org/ https://theo-andreou.org/feed/
+https://theo-andreou.org/installing-a-writefreely-weblog https://theo-andreou.org/feed/
+
+# multi-user instance
+https://wordsmith.social/rgx/ https://wordsmith.social/rgx/feed/
+https://wordsmith.social/rgx/is-it-important-which-instance-you-are-in https://wordsmith.social/rgx/feed/
--- a/tests/github.txt
+++ b/tests/github.txt
@ -0,0 +1,17 @@
+### main page -> commits feed
+# Note: This is the canonical link:
+#https://github.com/ytdl-org/youtube-dl https://github.com/ytdl-org/youtube-dl/commits/master.atom
+# We're doing this instead because it's easier to generalize without having to
+# know the primary branch name of every repo.
+https://github.com/ytdl-org/youtube-dl https://github.com/ytdl-org/youtube-dl/commits.atom
+
+### releases -> release notes feed
+https://github.com/ytdl-org/youtube-dl/releases https://github.com/ytdl-org/youtube-dl/releases.atom
+
+### tags -> tags feed
+https://github.com/ytdl-org/youtube-dl/tags https://github.com/ytdl-org/youtube-dl/tags.atom
+# GitHub used to declare the commits feed as the alternate link on the tags page:
+#https://github.com/ytdl-org/youtube-dl/tags https://github.com/ytdl-org/youtube-dl/commits/master.atom
+
+### commits for a specific branch
+https://github.com/mastodon/mastodon/tree/feature-web-urls https://github.com/mastodon/mastodon/commits/feature-web-urls.atom
--- a/tests/peertube.txt
+++ b/tests/peertube.txt
@ -0,0 +1,23 @@
+# video page -> channel RSS
+# https://tube.privacytools.io/videos/watch/4350417a-eae0-4ef4-9fe5-0dcdae643cba https://tube.privacytools.io/feeds/videos.xml?channelId=51
+
+# account page -> account RSS
+https://peertube.linuxrocks.online/accounts/art/video-channels https://peertube.linuxrocks.online/feeds/videos.xml?accountId=7266
+
+# channel page -> channel RSS
+https://peertube.linuxrocks.online/video-channels/arthur/videos https://peertube.linuxrocks.online/feeds/videos.xml?videoChannelId=3108
+
+# video page -> channel RSS
+https://peertube.linuxrocks.online/videos/watch/9238d983-41ec-4799-bacc-f24130bae2f3 https://peertube.linuxrocks.online/feeds/videos.xml?videoChannelId=3108
+
+# test that it's properly stripping URL before processing
+https://peertube.linuxrocks.online/videos/watch/9238d983-41ec-4799-bacc-f24130bae2f3;threadId=11471 https://peertube.linuxrocks.online/feeds/videos.xml?videoChannelId=3108
+
+# account on another instance -> canonical instance account RSS
+https://share.tube/accounts/blender@video.blender.org/video-channels https://video.blender.org/feeds/videos.xml?accountId=6
+
+# channel on another instance -> canonical instance channel RSS
+https://share.tube/video-channels/blender_open_movies@video.blender.org/videos https://video.blender.org/feeds/videos.xml?videoChannelId=9
+
+# video on another instance -> canonical instance channel RSS
+https://share.tube/videos/watch/3d95fb3d-c866-42c8-9db1-fe82f48ccb95 https://video.blender.org/feeds/videos.xml?videoChannelId=9
--- a/tests/player_fm.txt
+++ b/tests/player_fm.txt
@ -0,0 +1 @@
+https://player.fm/series/tea-with-queen-and-j https://feeds.soundcloud.com/users/soundcloud:users:83464467/sounds.rss
--- a/tests/pocketcasts.txt
+++ b/tests/pocketcasts.txt
@ -0,0 +1 @@
+https://pca.st/iHi8 https://feeds.simplecast.com/ZL7iUDiH
--- a/tests/soundcloud.txt
+++ b/tests/soundcloud.txt
@ -0,0 +1,2 @@
+https://soundcloud.com/greatnessgd https://feeds.soundcloud.com/users/soundcloud:users:2097827/sounds.rss
+https://soundcloud.com/user-98066669 https://feeds.soundcloud.com/users/soundcloud:users:261098918/sounds.rss
--- a/tests/test.sh
+++ b/tests/test.sh
@ -0,0 +1,92 @@
+#!/bin/sh
+
+succeeded=""
+failed=""
+run_generic_tests=false
+
+function test_site {
+	site="$1"
+
+	if [[ $site == *.txt && -f $site ]];then
+		site=${site%.txt}
+	fi
+	if [[ ! -f $site.txt ]];then
+		echo "No test file for $site"
+		echo ""
+		failed="true"
+		continue
+	fi
+
+	site_failed=""
+	echo "Testing $site..."
+	echo ""
+	while read line;do
+		if [[ ! -n "$line" ||  "$line" =~ ^[[:space:]]*# ]];then
+			continue
+		fi
+
+		links=($line)
+
+		echo "Input:"
+		echo "${links[0]}"
+
+		echo "Goal:"
+		echo "${links[1]}"
+
+		output=$(/usr/bin/python3 ../rsstube "${links[0]}" | tail -1)
+		echo "Output:"
+		echo "${output}"
+
+		if [ "$output" == "${links[1]}" ];then
+			echo "Success!"
+		else
+			echo "Failed!"
+			echo "Actual output: $output"
+			site_failed="true"
+			echo "$output"
+			echo "${links[1]}"
+		fi
+		echo ""
+	done < $site.txt
+	if [[ $site_failed != "" ]];then
+		echo "$site did not pass all its tests :("
+		echo ""
+		failed="$failed- $site\n"
+	else
+		echo "$site passed all its tests! :)"
+		echo ""
+		succeeded="$succeeded- $site\n"
+	fi
+}
+
+if [[ ! -n "$1" || "$1" == "all" ]];then
+	sites=$(ls *.txt | grep -Po '(?<=)(.*)(?=.txt)')
+	run_generic_tests=true
+else
+	sites="$@"
+fi
+for site in $sites;do
+	if [[ $site == "generic" || $site == "generic/" ]];then
+		run_generic_tests=true
+	else
+		test_site $site
+	fi
+done
+
+if [[ $run_generic_tests == "true" ]];then
+	echo "Site-specific tests complete. Running generic tests now."
+	sites=$(ls generic/*.txt | grep -Po '(?<=)(.*)(?=.txt)')
+	for site in $sites;do
+		test_site $site
+	done
+fi
+
+if [[ -n $failed ]];then
+	echo "Some checks failed :("
+else
+	echo "All checks passed! :)"
+fi
+
+echo ""
+echo -e "Succeeded:\n$succeeded"
+echo -e "Failed:\n$failed"
--- a/tests/tumblr.txt
+++ b/tests/tumblr.txt
@ -0,0 +1,8 @@
+# blog page
+https://william-snekspeare.tumblr.com/ https://william-snekspeare.tumblr.com/rss
+
+# tag
+https://william-snekspeare.tumblr.com/tagged/comic https://william-snekspeare.tumblr.com/tagged/comic/rss
+
+# search (apparently does provide a proper RSS feed, actually)
+https://william-snekspeare.tumblr.com/search/comic/ https://william-snekspeare.tumblr.com/search/comic/rss
--- a/tests/vimeo.txt
+++ b/tests/vimeo.txt
@ -0,0 +1,5 @@
+# user page
+https://vimeo.com/user51602410 https://vimeo.com/user51602410/videos/rss
+
+# video page
+https://vimeo.com/63502573 https://vimeo.com/user2617788/videos/rss
--- a/tests/youtube.txt
+++ b/tests/youtube.txt
@ -0,0 +1,14 @@
+# channel page (no need to download)
+https://www.youtube.com/channel/UCqC_GY2ZiENFz2pwL0cSfAw https://www.youtube.com/feeds/videos.xml?channel_id=UCqC_GY2ZiENFz2pwL0cSfAw
+
+# user page (no need to download)
+https://www.youtube.com/user/ItsAllyHills https://www.youtube.com/feeds/videos.xml?user=ItsAllyHills
+
+# playlist page (no need to download)
+https://www.youtube.com/playlist?list=PL3KeV6Ui_4CayDGHw64OFXEPHgXLkrtJO https://www.youtube.com/feeds/videos.xml?playlist_id=PL3KeV6Ui_4CayDGHw64OFXEPHgXLkrtJO
+
+# video page -> channel RSS
+https://www.youtube.com/watch?v=XzIXc5CTC2M https://www.youtube.com/feeds/videos.xml?channel_id=UCexJsljKV3x4x8p0eyTrjVg
+
+# video page with playlist arg -> playlist RSS (no need to download)
+https://www.youtube.com/watch?v=fFlfxwZDFzY&list=PL8D8D4240EC972114 https://www.youtube.com/feeds/videos.xml?playlist_id=PL8D8D4240EC972114
				`@ -0,0 +1 @@`
				`https://castro.fm/podcast/b3891201-4d76-4152-adeb-ae3e13d17f29 https://www.smashingsecurity.com/rss`
				`@ -0,0 +1 @@`
				`https://fyyd.de/podcast/shades-of-brown/0 https://twoshadesofbrown.com/feed/podcast/`
				`@ -0,0 +1 @@`
				`https://player.fm/series/tea-with-queen-and-j https://feeds.soundcloud.com/users/soundcloud:users:83464467/sounds.rss`
				`@ -0,0 +1 @@`
				`https://pca.st/iHi8 https://feeds.simplecast.com/ZL7iUDiH`