fulvia/modules/url.py

76 lines
1.9 KiB
Python
Raw Normal View History

2018-03-16 03:13:43 -04:00
#!/usr/bin/env python3
"""
URL parsing.
"""
import re
from urllib.parse import urlparse
from html.parser import HTMLParser
import requests
2018-09-19 13:01:19 -04:00
from module import hook, url_callback
2018-03-16 03:13:43 -04:00
HEADERS = {"User-Agent": "bix nood gimme the title", "Range": "bytes=0-4096"}
2018-09-19 13:01:19 -04:00
@url_callback('puu.sh/')
def get_puush_fname(bot, url):
"""
Callback puu.sh links. Gets the filename and displays it.
"""
try:
res = requests.head(url)
except requests.exceptions.RequestException:
return
fname = res.headers.get('Content-Disposition', '')
fname = re.search(r'\"(.*)\"', fname).group(1)
if not fname:
return
bot.msg(f"[ \x0310puu.sh \x03] - \x0304{fname}")
2018-03-16 03:13:43 -04:00
@hook(True)
def title_auto(bot, trigger):
"""
Automatically show titles for URLs. For shortened URLs/redirects, find
where the URL redirects to and show the title for that.
"""
2020-01-07 18:58:19 -05:00
if "http" not in ' '.join(trigger.args):
2018-03-16 03:13:43 -04:00
return
url_finder = re.compile(r"((?:http|https)(?::\/\/\S+))", re.IGNORECASE)
2020-01-07 18:58:19 -05:00
urls = re.findall(url_finder, ' '.join(trigger.args))
2018-03-16 03:13:43 -04:00
if len(urls) == 0:
return
for url in urls:
broken = False
for key in bot.url_callbacks:
if key in url:
bot.url_callbacks[key](bot, url)
broken = True
if broken:
continue
try:
res = requests.get(url, headers=HEADERS, verify=True, timeout=10)
2018-07-05 06:58:12 -04:00
except (requests.exceptions.ConnectionError,
2018-07-12 20:04:45 -04:00
requests.exceptions.ReadTimeout):
2018-03-16 03:13:43 -04:00
continue
try:
res.raise_for_status()
except:
continue
if not res.headers.get("Content-Type"):
continue
if not res.headers.get("Content-Type").startswith("text/html"):
2018-03-16 03:13:43 -04:00
continue
if res.text.find("<title>") == -1:
continue
title = res.text[res.text.find("<title>")+7:res.text.find("</title>")]
title = HTMLParser().unescape(title)
title = title.replace("\n","").strip()
2018-07-12 20:04:45 -04:00
#title = title.encode("windows_1252").decode("utf-8")
2018-03-16 03:13:43 -04:00
hostname = urlparse(url).hostname
2018-05-25 15:21:18 -04:00
bot.msg(f"[ \x0310{title} \x03] - \x0304{hostname}")