Only run generic extractor if we get a 200-level response code.
It probably doesn't help to spam servers that give 403s.
This commit is contained in:
parent
d305dec5c1
commit
bb9a4b10b0
|
@ -41,10 +41,10 @@ def download (platform, url, args, verbosity, return_http_code=False, follow_loc
|
||||||
except pycurl.error as e:
|
except pycurl.error as e:
|
||||||
error (str(e), verbosity, platform)
|
error (str(e), verbosity, platform)
|
||||||
return None
|
return None
|
||||||
response_code = c.getinfo(c.RESPONSE_CODE)
|
response_code = int(c.getinfo(c.RESPONSE_CODE))
|
||||||
c.close()
|
c.close()
|
||||||
debug (url + " downloaded!", verbosity, platform)
|
debug (url + " downloaded!", verbosity, platform)
|
||||||
if int(response_code) in range(400,599):
|
if response_code in range(400,599):
|
||||||
error ("Server returned " + str(response_code), verbosity, platform)
|
error ("Server returned " + str(response_code), verbosity, platform)
|
||||||
else:
|
else:
|
||||||
debug ("Server returned " + str(response_code), verbosity, platform)
|
debug ("Server returned " + str(response_code), verbosity, platform)
|
||||||
|
|
|
@ -7,6 +7,8 @@ import opml
|
||||||
|
|
||||||
# enter a URL and attempt to return a feed URL
|
# enter a URL and attempt to return a feed URL
|
||||||
def get_feed (url, verbosity=3, network=True, curl_args=None):
|
def get_feed (url, verbosity=3, network=True, curl_args=None):
|
||||||
|
feed = None
|
||||||
|
|
||||||
from determine_site import determine_site
|
from determine_site import determine_site
|
||||||
debug ("Attempting to determine site...", verbosity)
|
debug ("Attempting to determine site...", verbosity)
|
||||||
site = determine_site (url)
|
site = determine_site (url)
|
||||||
|
@ -23,7 +25,7 @@ def get_feed (url, verbosity=3, network=True, curl_args=None):
|
||||||
|
|
||||||
elif network:
|
elif network:
|
||||||
from download_page import download
|
from download_page import download
|
||||||
page = download (None, url, curl_args, verbosity)
|
page,response_code = download (None, url, curl_args, verbosity, True)
|
||||||
if page is None:
|
if page is None:
|
||||||
error ("Failed to download " + url, verbosity)
|
error ("Failed to download " + url, verbosity)
|
||||||
return None,None
|
return None,None
|
||||||
|
@ -43,9 +45,12 @@ def get_feed (url, verbosity=3, network=True, curl_args=None):
|
||||||
return feed,software
|
return feed,software
|
||||||
|
|
||||||
# try generic extractor even if software is known
|
# try generic extractor even if software is known
|
||||||
debug ("Trying generic extractor...", verbosity)
|
# don't try generic extractor if we got an error
|
||||||
extractor = importlib.import_module("extractors.generic")
|
if response_code in range(200,299):
|
||||||
feed = extractor.extract(url, page, network, verbosity, curl_args)
|
debug ("Trying generic extractor...", verbosity)
|
||||||
|
extractor = importlib.import_module("extractors.generic")
|
||||||
|
feed = extractor.extract(url, page, network, verbosity, curl_args)
|
||||||
|
|
||||||
if feed is None:
|
if feed is None:
|
||||||
error ("Unable to get RSS feed for " + url, verbosity, "generic")
|
error ("Unable to get RSS feed for " + url, verbosity, "generic")
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue