rsstube/scripts/extractors/bibliogram.py

48 lines
1.1 KiB
Python

#!/usr/bin/python3
from utils import *
from download_page import download
# portable code to get filename
import os
platform = os.path.basename(__file__)
if platform.endswith(".py"):
platform = platform[:(-3)]
def extract_from_url (url, verbosity):
# split into domain and path
index = url.find("/",url.find("//")+2)
path = url[index:]
if not url.endswith('/'):
url = url + '/'
if path.startswith("/u/"):
return url + "rss.xml"
def extract_from_page (page, verbosity, url, args):
# split into domain and path
index = url.find("/",url.find("//")+2)
domain = url[:index]
path = url[index:]
username = search (page, '<a class="name" href="', '">')
if not username is None:
return extract_from_url (domain + username, verbosity)
def extract (url, page=None, network=False, verbosity=3, args={}):
feed = extract_from_url (url, verbosity)
if not feed is None:
return feed
if not network:
return None
# this should not happen because we have no hardcoded Bibliogram domains
if page is None:
page = download (platform, url, args, verbosity)
feed = extract_from_page (page, verbosity, url, args)
if not feed is None:
return feed