Only run generic extractor if we get a 200-level response code.
It probably doesn't help to spam servers that give 403s.
This commit is contained in:
parent
d305dec5c1
commit
bb9a4b10b0
|
@ -41,10 +41,10 @@ def download (platform, url, args, verbosity, return_http_code=False, follow_loc
|
|||
except pycurl.error as e:
|
||||
error (str(e), verbosity, platform)
|
||||
return None
|
||||
response_code = c.getinfo(c.RESPONSE_CODE)
|
||||
response_code = int(c.getinfo(c.RESPONSE_CODE))
|
||||
c.close()
|
||||
debug (url + " downloaded!", verbosity, platform)
|
||||
if int(response_code) in range(400,599):
|
||||
if response_code in range(400,599):
|
||||
error ("Server returned " + str(response_code), verbosity, platform)
|
||||
else:
|
||||
debug ("Server returned " + str(response_code), verbosity, platform)
|
||||
|
|
|
@ -7,6 +7,8 @@ import opml
|
|||
|
||||
# enter a URL and attempt to return a feed URL
|
||||
def get_feed (url, verbosity=3, network=True, curl_args=None):
|
||||
feed = None
|
||||
|
||||
from determine_site import determine_site
|
||||
debug ("Attempting to determine site...", verbosity)
|
||||
site = determine_site (url)
|
||||
|
@ -23,7 +25,7 @@ def get_feed (url, verbosity=3, network=True, curl_args=None):
|
|||
|
||||
elif network:
|
||||
from download_page import download
|
||||
page = download (None, url, curl_args, verbosity)
|
||||
page,response_code = download (None, url, curl_args, verbosity, True)
|
||||
if page is None:
|
||||
error ("Failed to download " + url, verbosity)
|
||||
return None,None
|
||||
|
@ -43,9 +45,12 @@ def get_feed (url, verbosity=3, network=True, curl_args=None):
|
|||
return feed,software
|
||||
|
||||
# try generic extractor even if software is known
|
||||
debug ("Trying generic extractor...", verbosity)
|
||||
extractor = importlib.import_module("extractors.generic")
|
||||
feed = extractor.extract(url, page, network, verbosity, curl_args)
|
||||
# don't try generic extractor if we got an error
|
||||
if response_code in range(200,299):
|
||||
debug ("Trying generic extractor...", verbosity)
|
||||
extractor = importlib.import_module("extractors.generic")
|
||||
feed = extractor.extract(url, page, network, verbosity, curl_args)
|
||||
|
||||
if feed is None:
|
||||
error ("Unable to get RSS feed for " + url, verbosity, "generic")
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue