diff --git a/src/determine_site.py b/src/determine_site.py index 7f50289..106c0ca 100644 --- a/src/determine_site.py +++ b/src/determine_site.py @@ -18,6 +18,8 @@ supported_sites = { "github" : ["github.com"], + "google_podcasts" : ["podcasts.google.com"], + "lbry" : ["odysee.com"], "player_fm" : ["player.fm"], diff --git a/src/extractors/google_podcasts.py b/src/extractors/google_podcasts.py new file mode 100644 index 0000000..cedf080 --- /dev/null +++ b/src/extractors/google_podcasts.py @@ -0,0 +1,24 @@ +#!/usr/bin/python3 + +from utils import * +from download_page import download + +# portable code to get filename +import os +platform = os.path.basename(__file__) +if platform.endswith(".py"): + platform = platform[:(-3)] + +def extract_from_page (page, verbosity): + # The feed URL seems to follow this pattern for some reason. + # This seems incredibly brittle, and I'd expect this extractor to + # break a lot... + return search (page, ',null,null,[null,"', '","'); + +def extract (url, page=None, network=False, verbosity=3, args={}): + notify ("Unable to get feed from URL alone", verbosity, platform) + if network == True: + page = download (platform, url, args, verbosity) + feed = extract_from_page (page, verbosity) + if not feed is None: + return feed diff --git a/src/utils.py b/src/utils.py index 3c36036..91a8c9b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -40,7 +40,7 @@ def search (content, begins_with, ends_with, index=0, reverse=False): # escape symbols as needed # note: backslashes MUST be escaped first. - for symbol in ['\\', '+', '?']: + for symbol in ['\\', '+', '?', '[', ']']: begins_with = begins_with.replace(symbol, '\\' + symbol) ends_with = ends_with.replace(symbol, '\\' + symbol) diff --git a/tests/google_podcasts.txt b/tests/google_podcasts.txt new file mode 100644 index 0000000..8999175 --- /dev/null +++ b/tests/google_podcasts.txt @@ -0,0 +1,7 @@ +# show page +https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5tZWdhcGhvbmUuZm0vZGFya25ldGRpYXJpZXM https://feeds.megaphone.fm/darknetdiaries +https://podcasts.google.com/feed/aHR0cHM6Ly9icmVhY2hzZW5zZS5saWJzeW4uY29tL3Jzcw https://breachsense.libsyn.com/rss + +# episode page +https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5tZWdhcGhvbmUuZm0vZGFya25ldGRpYXJpZXM/episode/OWYzODE2MDgtODVmYS0xMWVjLWFlNGQtNjM0Mzk5MGVhMjFm https://feeds.megaphone.fm/darknetdiaries +https://podcasts.google.com/feed/aHR0cHM6Ly9icmVhY2hzZW5zZS5saWJzeW4uY29tL3Jzcw/episode/MGNmNWRiMjktNjMyNy00MDFhLTkzNDMtOTE2ZGU4ZDZlNzcx https://breachsense.libsyn.com/rss