rsstube/scripts/extractors/youtube.py

69 lines
1.9 KiB
Python

#!/usr/bin/python3
from utils import *
from download_page import download
# portable code to get filename
import os
platform = os.path.basename(__file__)
if platform.endswith(".py"):
platform = platform[:(-3)]
channel_url = "https://www.youtube.com/feeds/videos.xml?channel_id="
def extract_from_url (url, verbosity):
# useful function for stripping ID out of URL
def get_id (url, prefix):
if prefix in url:
ident = url[url.index(prefix) + len(prefix):]
for symbol in ["/", "?", "&"]:
if symbol in ident:
ident = ident[:ident.index(symbol)]
return ident
else:
return None
# attempt to get feed for playlist
ident = get_id (url, "youtube.com/playlist?list=")
if ident is None:
ident = get_id (url, "&list=")
if not ident is None:
return "https://www.youtube.com/feeds/videos.xml?playlist_id=" + ident
# attempt to get feed based on channel ID
ident = get_id (url, "youtube.com/channel/")
if not ident is None:
return channel_url + ident
# attempt to get feed based on username
ident = get_id (url, "youtube.com/user/")
if not ident is None:
return "https://www.youtube.com/feeds/videos.xml?user=" + ident
return None
def extract_from_page (page, verbosity):
ident = search (page, '<link rel="canonical" href="https://www.youtube.com/channel/', '">')
if not ident is None:
return channel_url + ident
ident = search (page, '<meta itemprop="channelId" content="','">')
if not ident is None:
return channel_url + ident
ident = search (page, '"channelId":"','"')
if not ident is None:
return channel_url + ident
def extract (url, page=None, network=False, verbosity=3, args={}):
feed = extract_from_url (url, verbosity)
if not feed is None:
return feed
else:
notify ("Unable to get feed from URL alone", verbosity, platform)
if network == True:
page = download (platform, url, args, verbosity)
feed = extract_from_page (page, verbosity)
if not feed is None:
return feed