Add Radiopublic support.

2021-11-08 00:00:00 +00:00 · 2021-11-08 00:00:00 +00:00 · 6727ec4278
parent 04e4bfa0eb
commit 6727ec4278
4 changed files with 43 additions and 3 deletions
--- a/scripts/determine_site.py
+++ b/scripts/determine_site.py
@ -24,6 +24,8 @@ supported_sites = {
 	"pocketcasts" : ["pca.st"],
 	"radiopublic" : ["radiopublic.com"],
 	"reddit" : ["reddit.com"],
 	"soundcloud" : ["soundcloud.com"],
--- a/scripts/extractors/radiopublic.py
+++ b/scripts/extractors/radiopublic.py
@ -0,0 +1,34 @@
 #!/usr/bin/python3
 from utils import *
 from download_page import download
 from urllib.parse import unquote
 # portable code to get filename
 import os
 platform = os.path.basename(__file__)
 if platform.endswith(".py"):
 	platform = platform[:(-3)]
 def extract_from_page (page, verbosity):
 	feed = search (page, '" alt="RSS feed"/><div>', '</div>')
 	if not feed is None:
 		return feed
 	feed = search (page, '<a href="https://podcasters.radiopublic.com?feedUrl=', '&amp;utm_campaign=landing-page')
 	if not feed is None:
 		return unquote(feed)
 	feed = search (page, '\\\"feedUrl\\\":\\\"', '\\\"')
 	if not feed is None:
 		return feed
 def extract (url, page=None, network=False, verbosity=3, args={}):
 	# cannot get feed from URL alone
 	if not network:
 		return None
 	page = download (platform, url, args, verbosity)
 	feed = extract_from_page (page, verbosity)
 	if not feed is None:
 		return feed
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -37,9 +37,11 @@ def search (content, begins_with, ends_with, index=0, reverse=False):
 		begins_with = ends_with[::-1]
 		ends_with = temp[::-1]
-	# escape + signs as needed
+	# escape symbols as needed
-	begins_with = begins_with.replace('+', "\+")
+	# note: backslashes MUST be escaped first.
-	ends_with = ends_with.replace('+', "\+")
+	for symbol in ['\\', '+', '?']:
 		begins_with = begins_with.replace(symbol, '\\' + symbol)
 		ends_with = ends_with.replace(symbol, '\\' + symbol)
 	# look for longest match, not shortest, if one delimeter is empty
 	if begins_with == '' or ends_with == '':
--- a/tests/radiopublic.txt
+++ b/tests/radiopublic.txt
@ -0,0 +1,2 @@
 https://radiopublic.com/ThisAmerLife http://feed.thisamericanlife.org/talpodcast
 https://radiopublic.com/ThisAmerLife/s1!7460f http://feed.thisamericanlife.org/talpodcast
		`@ -0,0 +1,2 @@`
							`https://radiopublic.com/ThisAmerLife http://feed.thisamericanlife.org/talpodcast`
							`https://radiopublic.com/ThisAmerLife/s1!7460f http://feed.thisamericanlife.org/talpodcast`