added size limit and timeout to image cache

This commit is contained in:
iou1name 2019-04-24 13:09:06 -04:00
parent c99a6ba24e
commit aec11e8e19

View File

@ -5,6 +5,7 @@ Some miscellaneous tools and helper functions. Primarily for quests.
import os import os
import re import re
import json import json
import time
import hashlib import hashlib
import magic import magic
@ -27,32 +28,40 @@ def download_img(url):
type, and saves it to file with the hash as filename. Returns a type, and saves it to file with the hash as filename. Returns a
URL to image. URL to image.
""" """
# TODO: file size limits # TODO: external server
# https://stackoverflow.com/questions/22346158/ timeout = 10 # TODO: put in settings
# TODO: prevent overwriting url = url.replace('..', '') # TODO: why is this here?
url = url.replace('..', '')
if url.startswith(settings.IMG_SVR_URL): if url.startswith(settings.IMG_SVR_URL):
if '/' not in url.replace(settings.IMG_SVR_URL, ''): if '/' not in url.replace(settings.IMG_SVR_URL, ''):
return url return url
try: try:
res = requests.get(url) with requests.get(url, stream=True) as r:
res.raise_for_status() r.raise_for_status()
mime = magic.from_buffer(res.content, mime=True) data = b''
start_time = time.time()
for chunk in r.iter_content(102400):
if time.time() - start_time > timeout:
raise ValueError('TIMEOUT_REACHED')
data += chunk
if len(data) > 4*1024*1024: # TODO: put in settings
raise ValueError('RESPONSE_TOO_LARGE')
mime = magic.from_buffer(data, mime=True)
assert mime in ALLOWED_MIMES assert mime in ALLOWED_MIMES
h = hashlib.sha256() h = hashlib.sha256()
h.update(res.content) h.update(data)
fname = h.hexdigest() fname = h.hexdigest()
fname += "." + mime.partition("/")[2] fname += "." + mime.partition("/")[2]
with open(os.path.join(IMG_DIR, fname), "wb") as file: with open(os.path.join(IMG_DIR, fname), "wb") as file:
for chunk in res.iter_content(100000): file.write(data)
file.write(chunk)
return settings.IMG_SVR_URL + fname return settings.IMG_SVR_URL + fname
except requests.exceptions.RequestException: except requests.exceptions.RequestException:
return "INVALID_URL" return "INVALID_URL"
except AssertionError: except AssertionError:
return "INVALID_MIME_TYPE" return "INVALID_MIME_TYPE"
except ValueError as e:
return str(e)
except Exception as e: except Exception as e:
print(e) print(e) # TODO: log this
return "UNKNOWN_ERROR" return "UNKNOWN_ERROR"
@ -66,18 +75,19 @@ def handle_img(text, limit=5):
# TODO: handle webms # TODO: handle webms
urls = re.findall( urls = re.findall(
r"""\[img(?: title=['"](.*)['"])?\](.*)\[\/img\]""", r"""\[img(?: title=['"](.*)['"])?\](.*)\[\/img\]""",
text.replace('<br', '\n') text.replace('<br>', '\n')
) )
urls = urls[:limit] urls = urls[:limit]
for match_pair in urls: for match_pair in urls:
title, ext_url = match_pair title, external_url = match_pair
int_url = download_img(ext_url) internal_url = download_img(external_url)
if int_url in ["INVALID_URL", "INVALID_MIME_TYPE", "UNKNOWN_ERROR"]: if not internal_url.startswith("http"): # download errored
text = re.sub(r"\[img.*?\[\/img\]", ext_url, text, 1) # TODO: error message?
text = re.sub(r"\[img.*?\[\/img\]", external_url, text, 1)
if not title: if not title:
title = os.path.basename(ext_url) title = os.path.basename(external_url)
img_tag = f'<img src="{int_url}" title="{title}">' img_tag = f'<img src="{internal_url}" title="{title}">'
text = re.sub(r"\[img.*?\[\/img\]", img_tag, text, 1) text = re.sub(r"\[img.*?\[\/img\]", img_tag, text, 1)