rsstube/scripts/extractors/apple_podcasts.py

#!/usr/bin/python3

from utils import *
from download_page import download

# portable code to get filename
import os
platform = os.path.basename(__file__)
if platform.endswith(".py"):
	platform = platform[:(-3)]

def extract_from_page (page, verbosity):
	feed_url = search (page, '"feedUrl":"', '"')
	if not feed_url is None:
		return feed_url

def extract (url, page=None, network=False, verbosity=3, args={}):
	notify ("Unable to get feed from URL alone", verbosity, platform)
	if network == True:
		# the format is like
		# https://podcasts.apple.com/us/podcast/the-api-academy-podcast/id1171911720
		# and we want the number after domain/country/podcast/name/id
		# in this example, 1171911720
		podcast_id = url[url.find("/podcast/")+len("/podcast/"):]
		podcast_id = podcast_id[podcast_id.find("/id")+len("/id"):]
		# strip stuff from end
		for symbol in ["/", "?", "&", ";"]:
			if symbol in podcast_id:
				podcast_id = podcast_id[:podcast_id.index(symbol)]
		query_url = "https://itunes.apple.com/lookup?entity=podcast&id=" + podcast_id

		# Apple API query about podcast (returns JSON object)
		debug ("Querying Apple API for " + podcast_id + "...", verbosity, platform)
		page = download (platform, query_url, args, verbosity)
		feed = extract_from_page (page, verbosity)
		if not feed is None:
			return feed