From a1475943a7484cb732e0be0a8dc1391acfe643a3 Mon Sep 17 00:00:00 2001 From: 0x80 <0x80@negativezero.link> Date: Sun, 24 Apr 2022 00:00:00 +0000 Subject: [PATCH] [generic] Implement basic check that downloaded page is an RSS or Atom feed. --- src/extractors/generic.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/extractors/generic.py b/src/extractors/generic.py index c4e6a88..03e42d8 100644 --- a/src/extractors/generic.py +++ b/src/extractors/generic.py @@ -36,11 +36,12 @@ def try_common_paths (verbosity, url, args): for path in common_paths: page,response_code = download (platform, url + '/' + path, args, verbosity, True) if response_code == 200: - # TODO: verify it is a valid RSS feed - # Some pages serve response 200 for invalid pages - - # assume we found a feed - return url + '/' + path + # basic check that it looks like an RSS or Atom file + if "