53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
#!/usr/bin/python3
|
|
|
|
from utils import *
|
|
from download_page import download
|
|
|
|
# portable code to get filename
|
|
import os
|
|
platform = os.path.basename(__file__)
|
|
if platform.endswith(".py"):
|
|
platform = platform[:(-3)]
|
|
|
|
def extract_from_url (url, verbosity):
|
|
# strip extra arguments at end of URL
|
|
for symbol in ["?", "&", ";"]:
|
|
if symbol in url:
|
|
url = url[:url.index(symbol)]
|
|
if not url.endswith("/"):
|
|
url = url + "/"
|
|
return url + ".rss"
|
|
|
|
def extract_from_page (page, verbosity):
|
|
# this could be handled by the generic extractor
|
|
# also, this method should never be reached
|
|
return search (page, '<link rel="alternate" type="application/atom\+xml" title="RSS" href="', '" />')
|
|
|
|
def extract (url, page=None, network=False, verbosity=3, args={}):
|
|
feed = extract_from_url (url, verbosity)
|
|
if not feed is None:
|
|
return feed
|
|
else:
|
|
notify ("Unable to get feed from URL alone", verbosity, platform)
|
|
notify ("Something must have gone wrong here because this point should be unreachable.", verbosity, platform)
|
|
if network == True:
|
|
# old Reddit interface is easier to deal with
|
|
index = url.find("/",url.find("//")+2)
|
|
domain = url[:index]
|
|
path = url[index:]
|
|
|
|
www = False
|
|
|
|
if "www.reddit.com" in domain:
|
|
domain = domain.replace("www.reddit.com", "old.reddit.com")
|
|
url = domain + path
|
|
www = True
|
|
|
|
page = download (platform, url, args, verbosity)
|
|
feed = extract_from_page (page, verbosity)
|
|
if not feed is None:
|
|
if www:
|
|
return feed.replace("old.reddit.com", "www.reddit.com")
|
|
else:
|
|
return feed
|