rsstube/scripts/extractors/youtube.py

#!/usr/bin/python3

from utils import *
from download_page import download

# portable code to get filename
import os
platform = os.path.basename(__file__)
if platform.endswith(".py"):
	platform = platform[:(-3)]

channel_url = "https://www.youtube.com/feeds/videos.xml?channel_id="

def extract_from_url (url, verbosity):
	# useful function for stripping ID out of URL
	def get_id (url, prefix):
		if prefix in url:
			ident = url[url.index(prefix) + len(prefix):]
			for symbol in ["/", "?", "&"]:
				if symbol in ident:
					ident = ident[:ident.index(symbol)]
			return ident
		else:
			return None

	# attempt to get feed for playlist
	ident = get_id (url, "youtube.com/playlist?list=")
	if ident is None:
		ident = get_id (url, "&list=")
	if not ident is None:
		return "https://www.youtube.com/feeds/videos.xml?playlist_id=" + ident

	# attempt to get feed based on channel ID
	ident = get_id (url, "youtube.com/channel/")
	if not ident is None:
		return channel_url + ident

	# attempt to get feed based on username
	ident = get_id (url, "youtube.com/user/")
	if not ident is None:
		return "https://www.youtube.com/feeds/videos.xml?user=" + ident

	return None

def extract_from_page (page, verbosity):
	ident = search (page, '<link rel="canonical" href="https://www.youtube.com/channel/', '">')
	if not ident is None:
		return channel_url + ident

	ident = search (page, '<meta itemprop="channelId" content="','">')
	if not ident is None:
		return channel_url + ident

	ident = search (page, '"channelId":"','"')
	if not ident is None:
		return channel_url + ident

def extract (url, page=None, network=False, verbosity=3, args={}):
	feed = extract_from_url (url, verbosity)
	if not feed is None:
		return feed
	else:
		notify ("Unable to get feed from URL alone", verbosity, platform)
		if network == True:
			page = download (platform, url, args, verbosity)
			feed = extract_from_page (page, verbosity)
			if not feed is None:
				return feed