From 499c9e660a0fe1025a10c76928cce115374ee4a4 Mon Sep 17 00:00:00 2001
From: 0x80 <0x80@negativezero.link>
Date: Sat, 23 Apr 2022 00:00:00 +0000
Subject: [PATCH] Support funkwhale tracks if they come from a channel.

Tracks without an associated channel will not work properly.
Note: Unsupported funkwhale URLs will be false positives. I need to fix this.
---
 src/extractors/funkwhale.py | 57 +++++++++++++++++++++++++++++++++++++
 tests/funkwhale.txt         |  9 +++++-
 2 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/src/extractors/funkwhale.py b/src/extractors/funkwhale.py
index 48c28f5..108cf49 100644
--- a/src/extractors/funkwhale.py
+++ b/src/extractors/funkwhale.py
@@ -1,6 +1,7 @@
 #!/usr/bin/python3
 
 from utils import *
+from download_page import download
 
 # portable code to get filename
 import os
@@ -44,7 +45,63 @@ def extract_from_url (url, verbosity):
 		if channel_name != "":
 			return domain + "/api/v1/channels/" + channel_name + "/rss"
 
+def extract_from_page (page, verbosity, url, args):
+	# split into domain and path
+	slash1 = url.find("/",url.find("//")+2)
+	domain = url[:slash1]
+	path = url[slash1:]
+
+	if path.startswith("/library/tracks/"):
+		# get artist ID
+		# try this way first because it doesn't require downloading another page
+		artist_url = search (page, '<meta content="', '" property="music:musician" />', reverse=True)
+		if artist_url is not None:
+			artist_id = artist_url[artist_url.rfind("/",0,-1)+1:]
+			# note, it doesn't actually matter if artist has a trailing slash or not
+		else:
+			track_id = url[url.rfind("/",0,-1)+1:]
+			# trailing slash doesn't matter
+			track_api_page = download (platform, domain + "/api/v1/tracks/" + track_id, args, verbosity)
+			track_artist_info = search (track_api_page, '"artist":{', '}')
+			artist_id = search (track_artist_info, '"id":', ',')
+
+		artist_api_page = download (platform, domain + "/api/v1/artists/" + artist_id, args, verbosity)
+
+		# note: no channel means no feed
+		if not '"channel":null' in artist_api_page:
+			channel_info = search (artist_api_page, '"channel":{', '}}')
+			if not channel_info is None:
+				full_username = search (channel_info, '"full_username":"', '"')
+				preferred_username = search (channel_info, '"preferred_username":"', '"')
+				channel_domain = search (channel_info, '"domain":"', '"')
+
+			# TODO: how do we handle protocol (http vs. https)?
+			# for now, assume it's the same as url, or https if not specified
+			if "//" in domain:
+				protocol = domain[:domain.index("//")+2]
+			else:
+				protocol = "https://"
+			debug ("Assuming " + protocol, verbosity, platform)
+
+			# some checks that the data look correct
+			if not full_username is None and not preferred_username is None and not channel_domain is None:
+				if full_username != preferred_username + "@" + channel_domain:
+					debug ("full_username did not match preferred_username and domain. Using preferred_username...", verbosity, platform)
+				if protocol + channel_domain != domain:
+					debug ("channel_domain did not match URL domain. Using channel_domain...", verbosity, platform)
+
+				channel_url = protocol + channel_domain + "/channels/" + preferred_username
+				return extract_from_url (channel_url, verbosity)
+
 def extract (url, page=None, network=False, verbosity=3, args={}):
 	feed = extract_from_url (url, verbosity)
 	if not feed is None:
 		return feed
+
+	else:
+		notify ("Unable to get feed from URL alone", verbosity, platform)
+		if page is None:
+			page = download (platform, url, args, verbosity)
+		feed =  extract_from_page (page, verbosity, url, args)
+		if not feed is None:
+			return feed
diff --git a/tests/funkwhale.txt b/tests/funkwhale.txt
index 29f5ed9..84282ff 100644
--- a/tests/funkwhale.txt
+++ b/tests/funkwhale.txt
@@ -1,3 +1,10 @@
+# channel
 https://funkwhale.it/channels/ilpunto/ https://funkwhale.it/api/v1/channels/ilpunto/rss
-https://funkwhale.thurk.org/channels/radio_colibri@open.audio/ https://open.audio/api/v1/channels/radio_colibri/rss
 https://open.audio/channels/radio_colibri https://open.audio/api/v1/channels/radio_colibri/rss
+
+# remote channel
+https://funkwhale.thurk.org/channels/radio_colibri@open.audio/ https://open.audio/api/v1/channels/radio_colibri/rss
+
+# get channel from track
+https://funkwhale.it/library/tracks/3153/ https://funkwhale.it/api/v1/channels/ifattiindiretta/rss
+https://funkwhale.co.uk/library/tracks/30234/ https://funkwhale.co.uk/api/v1/channels/OtherworldEscape/rss