From 2401bfa815a5cdd5969ad802eb57ec9a24d8c79e Mon Sep 17 00:00:00 2001 From: lost Date: Tue, 9 Nov 2021 00:00:00 +0000 Subject: [PATCH] Improve generic extractor handling of /account/post format. --- scripts/extractors/generic.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/extractors/generic.py b/scripts/extractors/generic.py index 8786647..c4e6a88 100644 --- a/scripts/extractors/generic.py +++ b/scripts/extractors/generic.py @@ -103,6 +103,13 @@ def extract_from_page (page, verbosity, url, args): offset = 7 elif path.startswith("/users/"): offset = 8 + # set 2 as upper bound for how far out we'll go... + # for example, expect paths like + # /account/username/postid + # but don't try too hard with paths like + # /year/month/day/postid + elif path.count('/') > 3: + offset = path.find('/',1) + 1 else: offset = 1