added size limit and timeout to image cache
This commit is contained in:
parent
c99a6ba24e
commit
aec11e8e19
|
@ -5,6 +5,7 @@ Some miscellaneous tools and helper functions. Primarily for quests.
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
|
@ -27,32 +28,40 @@ def download_img(url):
|
||||||
type, and saves it to file with the hash as filename. Returns a
|
type, and saves it to file with the hash as filename. Returns a
|
||||||
URL to image.
|
URL to image.
|
||||||
"""
|
"""
|
||||||
# TODO: file size limits
|
# TODO: external server
|
||||||
# https://stackoverflow.com/questions/22346158/
|
timeout = 10 # TODO: put in settings
|
||||||
# TODO: prevent overwriting
|
url = url.replace('..', '') # TODO: why is this here?
|
||||||
url = url.replace('..', '')
|
|
||||||
if url.startswith(settings.IMG_SVR_URL):
|
if url.startswith(settings.IMG_SVR_URL):
|
||||||
if '/' not in url.replace(settings.IMG_SVR_URL, ''):
|
if '/' not in url.replace(settings.IMG_SVR_URL, ''):
|
||||||
return url
|
return url
|
||||||
try:
|
try:
|
||||||
res = requests.get(url)
|
with requests.get(url, stream=True) as r:
|
||||||
res.raise_for_status()
|
r.raise_for_status()
|
||||||
mime = magic.from_buffer(res.content, mime=True)
|
data = b''
|
||||||
|
start_time = time.time()
|
||||||
|
for chunk in r.iter_content(102400):
|
||||||
|
if time.time() - start_time > timeout:
|
||||||
|
raise ValueError('TIMEOUT_REACHED')
|
||||||
|
data += chunk
|
||||||
|
if len(data) > 4*1024*1024: # TODO: put in settings
|
||||||
|
raise ValueError('RESPONSE_TOO_LARGE')
|
||||||
|
mime = magic.from_buffer(data, mime=True)
|
||||||
assert mime in ALLOWED_MIMES
|
assert mime in ALLOWED_MIMES
|
||||||
h = hashlib.sha256()
|
h = hashlib.sha256()
|
||||||
h.update(res.content)
|
h.update(data)
|
||||||
fname = h.hexdigest()
|
fname = h.hexdigest()
|
||||||
fname += "." + mime.partition("/")[2]
|
fname += "." + mime.partition("/")[2]
|
||||||
with open(os.path.join(IMG_DIR, fname), "wb") as file:
|
with open(os.path.join(IMG_DIR, fname), "wb") as file:
|
||||||
for chunk in res.iter_content(100000):
|
file.write(data)
|
||||||
file.write(chunk)
|
|
||||||
return settings.IMG_SVR_URL + fname
|
return settings.IMG_SVR_URL + fname
|
||||||
except requests.exceptions.RequestException:
|
except requests.exceptions.RequestException:
|
||||||
return "INVALID_URL"
|
return "INVALID_URL"
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
return "INVALID_MIME_TYPE"
|
return "INVALID_MIME_TYPE"
|
||||||
|
except ValueError as e:
|
||||||
|
return str(e)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e) # TODO: log this
|
||||||
return "UNKNOWN_ERROR"
|
return "UNKNOWN_ERROR"
|
||||||
|
|
||||||
|
|
||||||
|
@ -66,18 +75,19 @@ def handle_img(text, limit=5):
|
||||||
# TODO: handle webms
|
# TODO: handle webms
|
||||||
urls = re.findall(
|
urls = re.findall(
|
||||||
r"""\[img(?: title=['"](.*)['"])?\](.*)\[\/img\]""",
|
r"""\[img(?: title=['"](.*)['"])?\](.*)\[\/img\]""",
|
||||||
text.replace('<br', '\n')
|
text.replace('<br>', '\n')
|
||||||
)
|
)
|
||||||
urls = urls[:limit]
|
urls = urls[:limit]
|
||||||
|
|
||||||
for match_pair in urls:
|
for match_pair in urls:
|
||||||
title, ext_url = match_pair
|
title, external_url = match_pair
|
||||||
int_url = download_img(ext_url)
|
internal_url = download_img(external_url)
|
||||||
if int_url in ["INVALID_URL", "INVALID_MIME_TYPE", "UNKNOWN_ERROR"]:
|
if not internal_url.startswith("http"): # download errored
|
||||||
text = re.sub(r"\[img.*?\[\/img\]", ext_url, text, 1)
|
# TODO: error message?
|
||||||
|
text = re.sub(r"\[img.*?\[\/img\]", external_url, text, 1)
|
||||||
if not title:
|
if not title:
|
||||||
title = os.path.basename(ext_url)
|
title = os.path.basename(external_url)
|
||||||
img_tag = f'<img src="{int_url}" title="{title}">'
|
img_tag = f'<img src="{internal_url}" title="{title}">'
|
||||||
|
|
||||||
text = re.sub(r"\[img.*?\[\/img\]", img_tag, text, 1)
|
text = re.sub(r"\[img.*?\[\/img\]", img_tag, text, 1)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user