From aec11e8e191708088a701005270a2eb95a3d5581 Mon Sep 17 00:00:00 2001 From: iou1name Date: Wed, 24 Apr 2019 13:09:06 -0400 Subject: [PATCH] added size limit and timeout to image cache --- quest/tools.py | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/quest/tools.py b/quest/tools.py index 212ebb9..d467c27 100644 --- a/quest/tools.py +++ b/quest/tools.py @@ -5,6 +5,7 @@ Some miscellaneous tools and helper functions. Primarily for quests. import os import re import json +import time import hashlib import magic @@ -27,32 +28,40 @@ def download_img(url): type, and saves it to file with the hash as filename. Returns a URL to image. """ - # TODO: file size limits - # https://stackoverflow.com/questions/22346158/ - # TODO: prevent overwriting - url = url.replace('..', '') + # TODO: external server + timeout = 10 # TODO: put in settings + url = url.replace('..', '') # TODO: why is this here? if url.startswith(settings.IMG_SVR_URL): if '/' not in url.replace(settings.IMG_SVR_URL, ''): return url try: - res = requests.get(url) - res.raise_for_status() - mime = magic.from_buffer(res.content, mime=True) + with requests.get(url, stream=True) as r: + r.raise_for_status() + data = b'' + start_time = time.time() + for chunk in r.iter_content(102400): + if time.time() - start_time > timeout: + raise ValueError('TIMEOUT_REACHED') + data += chunk + if len(data) > 4*1024*1024: # TODO: put in settings + raise ValueError('RESPONSE_TOO_LARGE') + mime = magic.from_buffer(data, mime=True) assert mime in ALLOWED_MIMES h = hashlib.sha256() - h.update(res.content) + h.update(data) fname = h.hexdigest() fname += "." + mime.partition("/")[2] with open(os.path.join(IMG_DIR, fname), "wb") as file: - for chunk in res.iter_content(100000): - file.write(chunk) + file.write(data) return settings.IMG_SVR_URL + fname except requests.exceptions.RequestException: return "INVALID_URL" except AssertionError: return "INVALID_MIME_TYPE" + except ValueError as e: + return str(e) except Exception as e: - print(e) + print(e) # TODO: log this return "UNKNOWN_ERROR" @@ -66,18 +75,19 @@ def handle_img(text, limit=5): # TODO: handle webms urls = re.findall( r"""\[img(?: title=['"](.*)['"])?\](.*)\[\/img\]""", - text.replace('', '\n') ) urls = urls[:limit] for match_pair in urls: - title, ext_url = match_pair - int_url = download_img(ext_url) - if int_url in ["INVALID_URL", "INVALID_MIME_TYPE", "UNKNOWN_ERROR"]: - text = re.sub(r"\[img.*?\[\/img\]", ext_url, text, 1) + title, external_url = match_pair + internal_url = download_img(external_url) + if not internal_url.startswith("http"): # download errored + # TODO: error message? + text = re.sub(r"\[img.*?\[\/img\]", external_url, text, 1) if not title: - title = os.path.basename(ext_url) - img_tag = f'' + title = os.path.basename(external_url) + img_tag = f'' text = re.sub(r"\[img.*?\[\/img\]", img_tag, text, 1)