Improve generic extractor handling of /account/post format.

This commit is contained in:
lost 2021-11-09 00:00:00 +00:00
parent 240370e347
commit 2401bfa815
Signed by: 0x80
GPG Key ID: 00B9C3A6C02F71F6
1 changed files with 7 additions and 0 deletions

View File

@ -103,6 +103,13 @@ def extract_from_page (page, verbosity, url, args):
offset = 7 offset = 7
elif path.startswith("/users/"): elif path.startswith("/users/"):
offset = 8 offset = 8
# set 2 as upper bound for how far out we'll go...
# for example, expect paths like
# /account/username/postid
# but don't try too hard with paths like
# /year/month/day/postid
elif path.count('/') > 3:
offset = path.find('/',1) + 1
else: else:
offset = 1 offset = 1