From 09e606af22c50eb7b42b9e40acf2a644964e96de Mon Sep 17 00:00:00 2001 From: lost Date: Fri, 23 Jul 2021 00:00:00 +0000 Subject: [PATCH] Add Apple Podcasts. --- scripts/determine_site.py | 2 ++ scripts/extractors/apple_podcasts.py | 37 ++++++++++++++++++++++++++++ tests/apple_podcasts.txt | 2 ++ 3 files changed, 41 insertions(+) create mode 100644 scripts/extractors/apple_podcasts.py create mode 100644 tests/apple_podcasts.txt diff --git a/scripts/determine_site.py b/scripts/determine_site.py index 1f7eb97..83af2aa 100644 --- a/scripts/determine_site.py +++ b/scripts/determine_site.py @@ -1,6 +1,8 @@ #!/usr/bin/python3 supported_sites = { + "apple_podcasts" : [ "podcasts.apple.com" ], + "castro_fm" : ["castro.fm"], "chirbit" : [ diff --git a/scripts/extractors/apple_podcasts.py b/scripts/extractors/apple_podcasts.py new file mode 100644 index 0000000..dd65690 --- /dev/null +++ b/scripts/extractors/apple_podcasts.py @@ -0,0 +1,37 @@ +#!/usr/bin/python3 + +from utils import * +from download_page import download + +# portable code to get filename +import os +platform = os.path.basename(__file__) +if platform.endswith(".py"): + platform = platform[:(-3)] + +def extract_from_page (page, verbosity): + feed_url = search (page, '"feedUrl":"', '"') + if not feed_url is None: + return feed_url + +def extract (url, page=None, network=False, verbosity=3, args={}): + notify ("Unable to get feed from URL alone", verbosity, platform) + if network == True: + # the format is like + # https://podcasts.apple.com/us/podcast/the-api-academy-podcast/id1171911720 + # and we want the number after domain/country/podcast/name/id + # in this example, 1171911720 + podcast_id = url[url.find("/podcast/")+len("/podcast/"):] + podcast_id = podcast_id[podcast_id.find("/id")+len("/id"):] + # strip stuff from end + for symbol in ["/", "?", "&", ";"]: + if symbol in podcast_id: + podcast_id = podcast_id[:podcast_id.index(symbol)] + query_url = "https://itunes.apple.com/lookup?entity=podcast&id=" + podcast_id + + # Apple API query about podcast (returns JSON object) + debug ("Querying Apple API for " + podcast_id + "...", verbosity, platform) + page = download (platform, query_url, args, verbosity) + feed = extract_from_page (page, verbosity) + if not feed is None: + return feed diff --git a/tests/apple_podcasts.txt b/tests/apple_podcasts.txt new file mode 100644 index 0000000..ba9cd7a --- /dev/null +++ b/tests/apple_podcasts.txt @@ -0,0 +1,2 @@ +https://podcasts.apple.com/us/podcast/the-api-academy-podcast/id1171911720 https://feeds.soundcloud.com/users/soundcloud:users:262261799/sounds.rss +https://podcasts.apple.com/us/podcast/darknet-diaries/id1296350485 https://feeds.megaphone.fm/darknetdiaries