diff --git a/src/extractors/castbox.py b/src/extractors/castbox.py index 7700f82..b4c4475 100644 --- a/src/extractors/castbox.py +++ b/src/extractors/castbox.py @@ -10,12 +10,37 @@ platform = os.path.basename(__file__) if platform.endswith(".py"): platform = platform[:(-3)] -def extract_from_page (page, verbosity): - # it's like %22%2C%22rss_url%22%3A%22https%3A%2F%2Ffeeds.npr.org%2F510310%2Fpodcast.xml%22%2C%22 - # and we have to fix the codes - feed = search (page, '%22%2C%22rss_url%22%3A%22', '%22%2C%22') - if not feed is None: - return unquote(feed) +def extract_from_page (page, verbosity, url, args): + # if it's a chennel page, try to get an episode page + channel_string = "castbox.fm/channel/id" + if channel_string in url: + # get channel ID + channel_id = url[url.index(channel_string)+len(channel_string):] + # strip extra arguments at end of URL + for symbol in ["/", "?", "&", ";"]: + if symbol in channel_id: + channel_id = channel_id[:channel_id.index(symbol)] + + # get the first episode ID + episode_id = search (page, 'eids%22%3A%5B', '%2C') + if episode_id is None: + episode_id = search (page, 'eid%22%3A', '%2C') + + if not episode_id is None: + episode_url = "https://castbox.fm/episode/id" + channel_id + "-id" + episode_id + episode_page = download (platform, episode_url, args, verbosity) + + # if the original URL is an episode URL, use that page in the next step + elif "castbox.fm/episode/" in url: + episode_page = page + + # if it's an episode page, get the feed from the page + if not episode_page is None: + # it's like %22%2C%22rss_url%22%3A%22https%3A%2F%2Ffeeds.npr.org%2F510310%2Fpodcast.xml%22%2C%22 + # and we have to fix the codes + feed = search (episode_page, '%22%2C%22rss_url%22%3A%22', '%22%2C%22') + if not feed is None: + return unquote(feed) def extract (url, page=None, network=False, verbosity=3, args={}): # cannot get feed from URL alone @@ -23,6 +48,6 @@ def extract (url, page=None, network=False, verbosity=3, args={}): return None page = download (platform, url, args, verbosity) - feed = extract_from_page (page, verbosity) + feed = extract_from_page (page, verbosity, url, args) if not feed is None: return feed