From 499c9e660a0fe1025a10c76928cce115374ee4a4 Mon Sep 17 00:00:00 2001 From: 0x80 <0x80@negativezero.link> Date: Sat, 23 Apr 2022 00:00:00 +0000 Subject: [PATCH] Support funkwhale tracks if they come from a channel. Tracks without an associated channel will not work properly. Note: Unsupported funkwhale URLs will be false positives. I need to fix this. --- src/extractors/funkwhale.py | 57 +++++++++++++++++++++++++++++++++++++ tests/funkwhale.txt | 9 +++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/extractors/funkwhale.py b/src/extractors/funkwhale.py index 48c28f5..108cf49 100644 --- a/src/extractors/funkwhale.py +++ b/src/extractors/funkwhale.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 from utils import * +from download_page import download # portable code to get filename import os @@ -44,7 +45,63 @@ def extract_from_url (url, verbosity): if channel_name != "": return domain + "/api/v1/channels/" + channel_name + "/rss" +def extract_from_page (page, verbosity, url, args): + # split into domain and path + slash1 = url.find("/",url.find("//")+2) + domain = url[:slash1] + path = url[slash1:] + + if path.startswith("/library/tracks/"): + # get artist ID + # try this way first because it doesn't require downloading another page + artist_url = search (page, '', reverse=True) + if artist_url is not None: + artist_id = artist_url[artist_url.rfind("/",0,-1)+1:] + # note, it doesn't actually matter if artist has a trailing slash or not + else: + track_id = url[url.rfind("/",0,-1)+1:] + # trailing slash doesn't matter + track_api_page = download (platform, domain + "/api/v1/tracks/" + track_id, args, verbosity) + track_artist_info = search (track_api_page, '"artist":{', '}') + artist_id = search (track_artist_info, '"id":', ',') + + artist_api_page = download (platform, domain + "/api/v1/artists/" + artist_id, args, verbosity) + + # note: no channel means no feed + if not '"channel":null' in artist_api_page: + channel_info = search (artist_api_page, '"channel":{', '}}') + if not channel_info is None: + full_username = search (channel_info, '"full_username":"', '"') + preferred_username = search (channel_info, '"preferred_username":"', '"') + channel_domain = search (channel_info, '"domain":"', '"') + + # TODO: how do we handle protocol (http vs. https)? + # for now, assume it's the same as url, or https if not specified + if "//" in domain: + protocol = domain[:domain.index("//")+2] + else: + protocol = "https://" + debug ("Assuming " + protocol, verbosity, platform) + + # some checks that the data look correct + if not full_username is None and not preferred_username is None and not channel_domain is None: + if full_username != preferred_username + "@" + channel_domain: + debug ("full_username did not match preferred_username and domain. Using preferred_username...", verbosity, platform) + if protocol + channel_domain != domain: + debug ("channel_domain did not match URL domain. Using channel_domain...", verbosity, platform) + + channel_url = protocol + channel_domain + "/channels/" + preferred_username + return extract_from_url (channel_url, verbosity) + def extract (url, page=None, network=False, verbosity=3, args={}): feed = extract_from_url (url, verbosity) if not feed is None: return feed + + else: + notify ("Unable to get feed from URL alone", verbosity, platform) + if page is None: + page = download (platform, url, args, verbosity) + feed = extract_from_page (page, verbosity, url, args) + if not feed is None: + return feed diff --git a/tests/funkwhale.txt b/tests/funkwhale.txt index 29f5ed9..84282ff 100644 --- a/tests/funkwhale.txt +++ b/tests/funkwhale.txt @@ -1,3 +1,10 @@ +# channel https://funkwhale.it/channels/ilpunto/ https://funkwhale.it/api/v1/channels/ilpunto/rss -https://funkwhale.thurk.org/channels/radio_colibri@open.audio/ https://open.audio/api/v1/channels/radio_colibri/rss https://open.audio/channels/radio_colibri https://open.audio/api/v1/channels/radio_colibri/rss + +# remote channel +https://funkwhale.thurk.org/channels/radio_colibri@open.audio/ https://open.audio/api/v1/channels/radio_colibri/rss + +# get channel from track +https://funkwhale.it/library/tracks/3153/ https://funkwhale.it/api/v1/channels/ifattiindiretta/rss +https://funkwhale.co.uk/library/tracks/30234/ https://funkwhale.co.uk/api/v1/channels/OtherworldEscape/rss