Only run generic extractor if we get a 200-level response code.

It probably doesn't help to spam servers that give 403s.
This commit is contained in:
0x80 2022-04-02 00:00:00 +00:00
parent d305dec5c1
commit bb9a4b10b0
Signed by: 0x80
GPG Key ID: 68368BCBC000EF51
2 changed files with 11 additions and 6 deletions

View File

@ -41,10 +41,10 @@ def download (platform, url, args, verbosity, return_http_code=False, follow_loc
except pycurl.error as e:
error (str(e), verbosity, platform)
return None
response_code = c.getinfo(c.RESPONSE_CODE)
response_code = int(c.getinfo(c.RESPONSE_CODE))
c.close()
debug (url + " downloaded!", verbosity, platform)
if int(response_code) in range(400,599):
if response_code in range(400,599):
error ("Server returned " + str(response_code), verbosity, platform)
else:
debug ("Server returned " + str(response_code), verbosity, platform)

View File

@ -7,6 +7,8 @@ import opml
# enter a URL and attempt to return a feed URL
def get_feed (url, verbosity=3, network=True, curl_args=None):
feed = None
from determine_site import determine_site
debug ("Attempting to determine site...", verbosity)
site = determine_site (url)
@ -23,7 +25,7 @@ def get_feed (url, verbosity=3, network=True, curl_args=None):
elif network:
from download_page import download
page = download (None, url, curl_args, verbosity)
page,response_code = download (None, url, curl_args, verbosity, True)
if page is None:
error ("Failed to download " + url, verbosity)
return None,None
@ -43,9 +45,12 @@ def get_feed (url, verbosity=3, network=True, curl_args=None):
return feed,software
# try generic extractor even if software is known
# don't try generic extractor if we got an error
if response_code in range(200,299):
debug ("Trying generic extractor...", verbosity)
extractor = importlib.import_module("extractors.generic")
feed = extractor.extract(url, page, network, verbosity, curl_args)
if feed is None:
error ("Unable to get RSS feed for " + url, verbosity, "generic")
else: