48 lines
1.1 KiB
Python
48 lines
1.1 KiB
Python
|
#!/usr/bin/python3
|
||
|
|
||
|
from utils import *
|
||
|
from download_page import download
|
||
|
|
||
|
# portable code to get filename
|
||
|
import os
|
||
|
platform = os.path.basename(__file__)
|
||
|
if platform.endswith(".py"):
|
||
|
platform = platform[:(-3)]
|
||
|
|
||
|
def extract_from_url (url, verbosity):
|
||
|
# split into domain and path
|
||
|
index = url.find("/",url.find("//")+2)
|
||
|
path = url[index:]
|
||
|
|
||
|
if not url.endswith('/'):
|
||
|
url = url + '/'
|
||
|
|
||
|
if path.startswith("/u/"):
|
||
|
return url + "rss.xml"
|
||
|
|
||
|
def extract_from_page (page, verbosity, url, args):
|
||
|
# split into domain and path
|
||
|
index = url.find("/",url.find("//")+2)
|
||
|
domain = url[:index]
|
||
|
path = url[index:]
|
||
|
|
||
|
username = search (page, '<a class="name" href="', '">')
|
||
|
if not username is None:
|
||
|
return extract_from_url (domain + username, verbosity)
|
||
|
|
||
|
def extract (url, page=None, network=False, verbosity=3, args={}):
|
||
|
feed = extract_from_url (url, verbosity)
|
||
|
if not feed is None:
|
||
|
return feed
|
||
|
|
||
|
if not network:
|
||
|
return None
|
||
|
|
||
|
# this should not happen because we have no hardcoded Bibliogram domains
|
||
|
if page is None:
|
||
|
page = download (platform, url, args, verbosity)
|
||
|
|
||
|
feed = extract_from_page (page, verbosity, url, args)
|
||
|
if not feed is None:
|
||
|
return feed
|