Only run generic extractor if we get a 200-level response code.

It probably doesn't help to spam servers that give 403s.
This commit is contained in:
0x80 2022-04-02 00:00:00 +00:00
parent d305dec5c1
commit bb9a4b10b0
Signed by: 0x80
GPG Key ID: 68368BCBC000EF51
2 changed files with 11 additions and 6 deletions

View File

@ -41,10 +41,10 @@ def download (platform, url, args, verbosity, return_http_code=False, follow_loc
except pycurl.error as e: except pycurl.error as e:
error (str(e), verbosity, platform) error (str(e), verbosity, platform)
return None return None
response_code = c.getinfo(c.RESPONSE_CODE) response_code = int(c.getinfo(c.RESPONSE_CODE))
c.close() c.close()
debug (url + " downloaded!", verbosity, platform) debug (url + " downloaded!", verbosity, platform)
if int(response_code) in range(400,599): if response_code in range(400,599):
error ("Server returned " + str(response_code), verbosity, platform) error ("Server returned " + str(response_code), verbosity, platform)
else: else:
debug ("Server returned " + str(response_code), verbosity, platform) debug ("Server returned " + str(response_code), verbosity, platform)

View File

@ -7,6 +7,8 @@ import opml
# enter a URL and attempt to return a feed URL # enter a URL and attempt to return a feed URL
def get_feed (url, verbosity=3, network=True, curl_args=None): def get_feed (url, verbosity=3, network=True, curl_args=None):
feed = None
from determine_site import determine_site from determine_site import determine_site
debug ("Attempting to determine site...", verbosity) debug ("Attempting to determine site...", verbosity)
site = determine_site (url) site = determine_site (url)
@ -23,7 +25,7 @@ def get_feed (url, verbosity=3, network=True, curl_args=None):
elif network: elif network:
from download_page import download from download_page import download
page = download (None, url, curl_args, verbosity) page,response_code = download (None, url, curl_args, verbosity, True)
if page is None: if page is None:
error ("Failed to download " + url, verbosity) error ("Failed to download " + url, verbosity)
return None,None return None,None
@ -43,9 +45,12 @@ def get_feed (url, verbosity=3, network=True, curl_args=None):
return feed,software return feed,software
# try generic extractor even if software is known # try generic extractor even if software is known
# don't try generic extractor if we got an error
if response_code in range(200,299):
debug ("Trying generic extractor...", verbosity) debug ("Trying generic extractor...", verbosity)
extractor = importlib.import_module("extractors.generic") extractor = importlib.import_module("extractors.generic")
feed = extractor.extract(url, page, network, verbosity, curl_args) feed = extractor.extract(url, page, network, verbosity, curl_args)
if feed is None: if feed is None:
error ("Unable to get RSS feed for " + url, verbosity, "generic") error ("Unable to get RSS feed for " + url, verbosity, "generic")
else: else: