From 014132347390c9d96bcc8bbdcd3a2bab91901bbd Mon Sep 17 00:00:00 2001 From: iou1name Date: Tue, 3 Apr 2018 17:15:50 -0400 Subject: [PATCH] remove trailing backslashes from url --- 4chanScrape.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/4chanScrape.py b/4chanScrape.py index b513bc8..74d7b2b 100755 --- a/4chanScrape.py +++ b/4chanScrape.py @@ -16,7 +16,8 @@ def Scrape(thread_url, output=None, original_filename=False, combo=False): """ Downloads thread page, extracts file urls and saves them to a directory. """ - thread_url = re.sub(r"/?#.*$", "", thread_url) + thread_url = re.sub(r"\/(?:#.+)?$", "", thread_url) + # regex removes trailing backslashes and post number anchors s = requests.Session() s.headers.update(HEADERS)