From 04e4bfa0ebb32b3f86d1128c242754f7929bdf1f Mon Sep 17 00:00:00 2001 From: lost Date: Mon, 8 Nov 2021 00:00:00 +0000 Subject: [PATCH] Add Castbox support. --- scripts/determine_site.py | 2 ++ scripts/extractors/castbox.py | 28 ++++++++++++++++++++++++++++ tests/castbox.txt | 7 +++++++ 3 files changed, 37 insertions(+) create mode 100644 scripts/extractors/castbox.py create mode 100644 tests/castbox.txt diff --git a/scripts/determine_site.py b/scripts/determine_site.py index 97eed1e..c415fd5 100644 --- a/scripts/determine_site.py +++ b/scripts/determine_site.py @@ -3,6 +3,8 @@ supported_sites = { "apple_podcasts" : [ "podcasts.apple.com" ], + "castbox" : ["castbox.fm"], + "castro_fm" : ["castro.fm"], "chirbit" : [ diff --git a/scripts/extractors/castbox.py b/scripts/extractors/castbox.py new file mode 100644 index 0000000..7700f82 --- /dev/null +++ b/scripts/extractors/castbox.py @@ -0,0 +1,28 @@ +#!/usr/bin/python3 + +from utils import * +from download_page import download +from urllib.parse import unquote + +# portable code to get filename +import os +platform = os.path.basename(__file__) +if platform.endswith(".py"): + platform = platform[:(-3)] + +def extract_from_page (page, verbosity): + # it's like %22%2C%22rss_url%22%3A%22https%3A%2F%2Ffeeds.npr.org%2F510310%2Fpodcast.xml%22%2C%22 + # and we have to fix the codes + feed = search (page, '%22%2C%22rss_url%22%3A%22', '%22%2C%22') + if not feed is None: + return unquote(feed) + +def extract (url, page=None, network=False, verbosity=3, args={}): + # cannot get feed from URL alone + if not network: + return None + + page = download (platform, url, args, verbosity) + feed = extract_from_page (page, verbosity) + if not feed is None: + return feed diff --git a/tests/castbox.txt b/tests/castbox.txt new file mode 100644 index 0000000..0aed193 --- /dev/null +++ b/tests/castbox.txt @@ -0,0 +1,7 @@ +# channel pages +https://castbox.fm/channel/id2698771 https://feeds.npr.org/510310/podcast.xml +https://castbox.fm/channel/id1238764 https://rss.art19.com/generation-why-podcast + +# episode pages +https://castbox.fm/episode/Congress-Passes-Biden's-Trillion-Dollar-Transit%2C-Broadband%2C-And-Power-Bill-id2698771-id440269324 https://feeds.npr.org/510310/podcast.xml +https://castbox.fm/episode/Introducing%3A-Operator-id1238764-id440095234 https://rss.art19.com/generation-why-podcast