rsstube/scripts/extractors/bibliogram.py

#!/usr/bin/python3

from utils import *
from download_page import download

# portable code to get filename
import os
platform = os.path.basename(__file__)
if platform.endswith(".py"):
	platform = platform[:(-3)]

def extract_from_url (url, verbosity):
	# split into domain and path
	index = url.find("/",url.find("//")+2)
	path = url[index:]

	if not url.endswith('/'):
		url = url + '/'

	if path.startswith("/u/"):
		return url + "rss.xml"

def extract_from_page (page, verbosity, url, args):
	# split into domain and path
	index = url.find("/",url.find("//")+2)
	domain = url[:index]
	path = url[index:]

	username = search (page, '<a class="name" href="', '">')
	if not username is None:
		return extract_from_url (domain + username, verbosity)

def extract (url, page=None, network=False, verbosity=3, args={}):
	feed = extract_from_url (url, verbosity)
	if not feed is None:
		return feed

	if not network:
		return None

	# this should not happen because we have no hardcoded Bibliogram domains
	if page is None:
		page = download (platform, url, args, verbosity)

	feed = extract_from_page (page, verbosity, url, args)
	if not feed is None:
		return feed