added size limit and timeout to image cache

This commit is contained in:
iou1name 2019-04-24 13:09:06 -04:00
parent c99a6ba24e
commit aec11e8e19

View File

@ -5,6 +5,7 @@ Some miscellaneous tools and helper functions. Primarily for quests.
import os
import re
import json
import time
import hashlib
import magic
@ -27,32 +28,40 @@ def download_img(url):
type, and saves it to file with the hash as filename. Returns a
URL to image.
"""
# TODO: file size limits
# https://stackoverflow.com/questions/22346158/
# TODO: prevent overwriting
url = url.replace('..', '')
# TODO: external server
timeout = 10 # TODO: put in settings
url = url.replace('..', '') # TODO: why is this here?
if url.startswith(settings.IMG_SVR_URL):
if '/' not in url.replace(settings.IMG_SVR_URL, ''):
return url
try:
res = requests.get(url)
res.raise_for_status()
mime = magic.from_buffer(res.content, mime=True)
with requests.get(url, stream=True) as r:
r.raise_for_status()
data = b''
start_time = time.time()
for chunk in r.iter_content(102400):
if time.time() - start_time > timeout:
raise ValueError('TIMEOUT_REACHED')
data += chunk
if len(data) > 4*1024*1024: # TODO: put in settings
raise ValueError('RESPONSE_TOO_LARGE')
mime = magic.from_buffer(data, mime=True)
assert mime in ALLOWED_MIMES
h = hashlib.sha256()
h.update(res.content)
h.update(data)
fname = h.hexdigest()
fname += "." + mime.partition("/")[2]
with open(os.path.join(IMG_DIR, fname), "wb") as file:
for chunk in res.iter_content(100000):
file.write(chunk)
file.write(data)
return settings.IMG_SVR_URL + fname
except requests.exceptions.RequestException:
return "INVALID_URL"
except AssertionError:
return "INVALID_MIME_TYPE"
except ValueError as e:
return str(e)
except Exception as e:
print(e)
print(e) # TODO: log this
return "UNKNOWN_ERROR"
@ -66,18 +75,19 @@ def handle_img(text, limit=5):
# TODO: handle webms
urls = re.findall(
r"""\[img(?: title=['"](.*)['"])?\](.*)\[\/img\]""",
text.replace('<br', '\n')
text.replace('<br>', '\n')
)
urls = urls[:limit]
for match_pair in urls:
title, ext_url = match_pair
int_url = download_img(ext_url)
if int_url in ["INVALID_URL", "INVALID_MIME_TYPE", "UNKNOWN_ERROR"]:
text = re.sub(r"\[img.*?\[\/img\]", ext_url, text, 1)
title, external_url = match_pair
internal_url = download_img(external_url)
if not internal_url.startswith("http"): # download errored
# TODO: error message?
text = re.sub(r"\[img.*?\[\/img\]", external_url, text, 1)
if not title:
title = os.path.basename(ext_url)
img_tag = f'<img src="{int_url}" title="{title}">'
title = os.path.basename(external_url)
img_tag = f'<img src="{internal_url}" title="{title}">'
text = re.sub(r"\[img.*?\[\/img\]", img_tag, text, 1)