#!/usr/bin/python3 from utils import * from download_page import download # portable code to get filename import os platform = os.path.basename(__file__) if platform.endswith(".py"): platform = platform[:(-3)] def extract_from_page (page, verbosity, url, args): # strip extra arguments at end of URL for symbol in ["?", "&", ";"]: if symbol in url: url = url[:url.index(symbol)] # split into domain and path index = url.find("/",url.find("//")+2) domain = url[:index] path = url[index:] # get page type index = path.find("/",1) page_type = path[1:index] # get item name if page_type == "accounts" or page_type == "video-channels": index2 = path.find("/",index+1) elif page_type == "videos": # assume UUID is last thing in URL after cleaning additional args # end index is -1 in case of trailing slash name = path[path.rindex("/",0,-1):] if path.startswith("/videos/watch/playlist/"): notify ("PeerTube playlists don't seem to have API pages", verbosity, platform) return elif path.startswith("/videos/watch/"): # format is like https://example.com/videos/watch/uuid index = path.find("/",index+1) index2 = path.find("/",index+2) else: notify ("Unrecognized URL format.", verbosity, platform) return else: notify ("Unrecognized URL format.", verbosity, platform) return None if index2 < 0: name = path[index+1:] else: name = path[index+1:index2] # account on other instance if '@' in name: # TODO: how do we handle protocol (http vs. https)? # for now, assume it's the same as url, or https if not specified if "//" in domain: protocol = domain[:domain.index("//")+2] else: debug ("Assuming HTTPS", verbosity, platform) protocol = "https://" index = name.index('@') domain = protocol + name[index+1:] name = name[:index] debug ("Translating " + url + " into " + domain + "/" + page_type + "/" + name, verbosity, platform) # get API page api_page_url = domain + "/api/v1/" + page_type + "/" + name api_page = download (platform, api_page_url, args, verbosity) if page_type == "videos": # TODO: This doesn't need two API calls if we just parse the JSON # query API for video-channels page # search from end, not from beginning, as "/video-channels/" is significant part domain = search (api_page, '"url":"', '/video-channels/', reverse=True) if not domain is None: name = search (api_page, '"url":"' + domain + '/video-channels/', '"') if not name is None: page_type = "video-channels" api_page_url = domain + "/api/v1/" + page_type + "/" + name api_page = download (platform, api_page_url, args, verbosity) ident = search (api_page, '"id":', ",") if not ident is None: if page_type == "accounts": return domain + "/feeds/videos.xml?accountId=" + ident elif page_type == "video-channels": return domain + "/feeds/videos.xml?videoChannelId=" + ident def extract (url, page=None, network=False, verbosity=3, args={}): # cannot get feed from URL alone if not network: return None # note: we need the URL for the domain feed = extract_from_page (page, verbosity, url, args) if not feed is None: return feed