Fix castbox extractor for channel pages.

2022-03-27 00:00:00 +00:00 · 2022-03-27 00:00:00 +00:00 · 51290a7203
parent 1d59b5ed91
commit 51290a7203
1 changed files with 32 additions and 7 deletions
--- a/src/extractors/castbox.py
+++ b/src/extractors/castbox.py
@ -10,12 +10,37 @@ platform = os.path.basename(__file__)
 if platform.endswith(".py"):
 	platform = platform[:(-3)]
-def extract_from_page (page, verbosity):
+def extract_from_page (page, verbosity, url, args):
-	# it's like %22%2C%22rss_url%22%3A%22https%3A%2F%2Ffeeds.npr.org%2F510310%2Fpodcast.xml%22%2C%22
+	# if it's a chennel page, try to get an episode page
-	# and we have to fix the codes
+	channel_string = "castbox.fm/channel/id"
-	feed = search (page, '%22%2C%22rss_url%22%3A%22', '%22%2C%22')
+	if channel_string in url:
-	if not feed is None:
+		# get channel ID
-		return unquote(feed)
+		channel_id = url[url.index(channel_string)+len(channel_string):]
 		# strip extra arguments at end of URL
 		for symbol in ["/", "?", "&", ";"]:
 			if symbol in channel_id:
 				channel_id = channel_id[:channel_id.index(symbol)]
 		# get the first episode ID
 		episode_id = search (page, 'eids%22%3A%5B', '%2C')
 		if episode_id is None:
 			episode_id = search (page, 'eid%22%3A', '%2C')
 		if not episode_id is None:
 			episode_url = "https://castbox.fm/episode/id" + channel_id + "-id" + episode_id
 			episode_page = download (platform, episode_url, args, verbosity)
 	# if the original URL is an episode URL, use that page in the next step
 	elif "castbox.fm/episode/" in url:
 		episode_page = page
 	# if it's an episode page, get the feed from the page
 	if not episode_page is None:
 		# it's like %22%2C%22rss_url%22%3A%22https%3A%2F%2Ffeeds.npr.org%2F510310%2Fpodcast.xml%22%2C%22
 		# and we have to fix the codes
 		feed = search (episode_page, '%22%2C%22rss_url%22%3A%22', '%22%2C%22')
 		if not feed is None:
 			return unquote(feed)
 def extract (url, page=None, network=False, verbosity=3, args={}):
 	# cannot get feed from URL alone
@ -23,6 +48,6 @@ def extract (url, page=None, network=False, verbosity=3, args={}):
 		return None
 	page = download (platform, url, args, verbosity)
-	feed = extract_from_page (page, verbosity)
+	feed = extract_from_page (page, verbosity, url, args)
 	if not feed is None:
 		return feed