From cfb3e4a6be0872be8fc0a39b28041273b4ec4ed0 Mon Sep 17 00:00:00 2001 From: lost Date: Mon, 8 Nov 2021 00:00:00 +0000 Subject: [PATCH] Support more Player FM links. --- docs/version | 2 +- scripts/determine_site.py | 2 +- scripts/download_page.py | 4 ++-- scripts/extractors/player_fm.py | 23 ++++++++++++++++++++++- tests/player_fm.txt | 3 +++ 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/docs/version b/docs/version index 4e179cb..e9c3506 100644 --- a/docs/version +++ b/docs/version @@ -1 +1 @@ -2021-07-23 (pre-release) +2021-11-08 (pre-release) diff --git a/scripts/determine_site.py b/scripts/determine_site.py index 849cd2c..97eed1e 100644 --- a/scripts/determine_site.py +++ b/scripts/determine_site.py @@ -18,7 +18,7 @@ supported_sites = { "lbry" : ["odysee.com"], - "player_fm" : ["player.fm/series/"], + "player_fm" : ["player.fm"], "pocketcasts" : ["pca.st"], diff --git a/scripts/download_page.py b/scripts/download_page.py index 4b28693..01a39f4 100644 --- a/scripts/download_page.py +++ b/scripts/download_page.py @@ -6,13 +6,13 @@ from utils import notify,debug,error # args should be a dictionary of arguments # return page bytes, response code -def download (platform, url, args, verbosity, return_http_code=False): +def download (platform, url, args, verbosity, return_http_code=False, follow_location=True): page_bytes = BytesIO() c = pycurl.Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, page_bytes) - c.setopt(c.FOLLOWLOCATION, True) + c.setopt(c.FOLLOWLOCATION, follow_location) # TODO: handle possible arguments # if args["user_agent"]: diff --git a/scripts/extractors/player_fm.py b/scripts/extractors/player_fm.py index 594d55d..bd43115 100644 --- a/scripts/extractors/player_fm.py +++ b/scripts/extractors/player_fm.py @@ -17,7 +17,28 @@ def extract (url, page=None, network=False, verbosity=3, args={}): if not network: return None - page = download (platform, url, args, verbosity) + if not "player.fm/series/" in url: + # it might be a page that redirects to /series/something + page = download (platform, url, args, verbosity, follow_location=False) + redirect_link = search (page, 'You are being redirected.') + if not redirect_link is None: + url = redirect_link + else: + return None + + # if it's in the form player.fm/series/series-name then get that page + index = url.find("player.fm/series/") + series = url[index + len("player.fm/series/"):] + + # if it's player.fm/series/series-name/episode-name truncate to + # player.fm/series/series-name + index2 = series.find("/") + if index2 >= 0: + series = series[:index2] + + page_to_download = "https://player.fm/series/" + series + + page = download (platform, page_to_download, args, verbosity) feed = extract_from_page (page, verbosity) if not feed is None: return feed diff --git a/tests/player_fm.txt b/tests/player_fm.txt index f410d08..34161cc 100644 --- a/tests/player_fm.txt +++ b/tests/player_fm.txt @@ -1 +1,4 @@ https://player.fm/series/tea-with-queen-and-j https://feeds.soundcloud.com/users/soundcloud:users:83464467/sounds.rss +https://player.fm/series/full-body-chills https://feeds.megaphone.fm/ADL1824189476 +https://player.fm/series/full-body-chills/radio-hell https://feeds.megaphone.fm/ADL1824189476 +https://player.fm/1BCuWqe https://feeds.soundcloud.com/users/soundcloud:users:12999054/sounds.rss