Fix castbox extractor for channel pages.
This commit is contained in:
parent
1d59b5ed91
commit
51290a7203
|
@ -10,12 +10,37 @@ platform = os.path.basename(__file__)
|
||||||
if platform.endswith(".py"):
|
if platform.endswith(".py"):
|
||||||
platform = platform[:(-3)]
|
platform = platform[:(-3)]
|
||||||
|
|
||||||
def extract_from_page (page, verbosity):
|
def extract_from_page (page, verbosity, url, args):
|
||||||
# it's like %22%2C%22rss_url%22%3A%22https%3A%2F%2Ffeeds.npr.org%2F510310%2Fpodcast.xml%22%2C%22
|
# if it's a chennel page, try to get an episode page
|
||||||
# and we have to fix the codes
|
channel_string = "castbox.fm/channel/id"
|
||||||
feed = search (page, '%22%2C%22rss_url%22%3A%22', '%22%2C%22')
|
if channel_string in url:
|
||||||
if not feed is None:
|
# get channel ID
|
||||||
return unquote(feed)
|
channel_id = url[url.index(channel_string)+len(channel_string):]
|
||||||
|
# strip extra arguments at end of URL
|
||||||
|
for symbol in ["/", "?", "&", ";"]:
|
||||||
|
if symbol in channel_id:
|
||||||
|
channel_id = channel_id[:channel_id.index(symbol)]
|
||||||
|
|
||||||
|
# get the first episode ID
|
||||||
|
episode_id = search (page, 'eids%22%3A%5B', '%2C')
|
||||||
|
if episode_id is None:
|
||||||
|
episode_id = search (page, 'eid%22%3A', '%2C')
|
||||||
|
|
||||||
|
if not episode_id is None:
|
||||||
|
episode_url = "https://castbox.fm/episode/id" + channel_id + "-id" + episode_id
|
||||||
|
episode_page = download (platform, episode_url, args, verbosity)
|
||||||
|
|
||||||
|
# if the original URL is an episode URL, use that page in the next step
|
||||||
|
elif "castbox.fm/episode/" in url:
|
||||||
|
episode_page = page
|
||||||
|
|
||||||
|
# if it's an episode page, get the feed from the page
|
||||||
|
if not episode_page is None:
|
||||||
|
# it's like %22%2C%22rss_url%22%3A%22https%3A%2F%2Ffeeds.npr.org%2F510310%2Fpodcast.xml%22%2C%22
|
||||||
|
# and we have to fix the codes
|
||||||
|
feed = search (episode_page, '%22%2C%22rss_url%22%3A%22', '%22%2C%22')
|
||||||
|
if not feed is None:
|
||||||
|
return unquote(feed)
|
||||||
|
|
||||||
def extract (url, page=None, network=False, verbosity=3, args={}):
|
def extract (url, page=None, network=False, verbosity=3, args={}):
|
||||||
# cannot get feed from URL alone
|
# cannot get feed from URL alone
|
||||||
|
@ -23,6 +48,6 @@ def extract (url, page=None, network=False, verbosity=3, args={}):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
page = download (platform, url, args, verbosity)
|
page = download (platform, url, args, verbosity)
|
||||||
feed = extract_from_page (page, verbosity)
|
feed = extract_from_page (page, verbosity, url, args)
|
||||||
if not feed is None:
|
if not feed is None:
|
||||||
return feed
|
return feed
|
||||||
|
|
Loading…
Reference in New Issue