rsstube/scripts/extractors/reddit.py

53 lines
1.5 KiB
Python

#!/usr/bin/python3
from utils import *
from download_page import download
# portable code to get filename
import os
platform = os.path.basename(__file__)
if platform.endswith(".py"):
platform = platform[:(-3)]
def extract_from_url (url, verbosity):
# strip extra arguments at end of URL
for symbol in ["?", "&", ";"]:
if symbol in url:
url = url[:url.index(symbol)]
if not url.endswith("/"):
url = url + "/"
return url + ".rss"
def extract_from_page (page, verbosity):
# this could be handled by the generic extractor
# also, this method should never be reached
return search (page, '<link rel="alternate" type="application/atom\+xml" title="RSS" href="', '" />')
def extract (url, page=None, network=False, verbosity=3, args={}):
feed = extract_from_url (url, verbosity)
if not feed is None:
return feed
else:
notify ("Unable to get feed from URL alone", verbosity, platform)
notify ("Something must have gone wrong here because this point should be unreachable.", verbosity, platform)
if network == True:
# old Reddit interface is easier to deal with
index = url.find("/",url.find("//")+2)
domain = url[:index]
path = url[index:]
www = False
if "www.reddit.com" in domain:
domain = domain.replace("www.reddit.com", "old.reddit.com")
url = domain + path
www = True
page = download (platform, url, args, verbosity)
feed = extract_from_page (page, verbosity)
if not feed is None:
if www:
return feed.replace("old.reddit.com", "www.reddit.com")
else:
return feed