54 lines
1.3 KiB
Python
54 lines
1.3 KiB
Python
|
#!/usr/bin/env python3
|
||
|
"""
|
||
|
URL parsing.
|
||
|
"""
|
||
|
import re
|
||
|
from urllib.parse import urlparse
|
||
|
from html.parser import HTMLParser
|
||
|
|
||
|
import requests
|
||
|
|
||
|
from module import hook
|
||
|
|
||
|
HEADERS = {"User-Agent": "bix nood gimme the title", "Range": "bytes=0-4096"}
|
||
|
|
||
|
@hook(True)
|
||
|
def title_auto(bot, trigger):
|
||
|
"""
|
||
|
Automatically show titles for URLs. For shortened URLs/redirects, find
|
||
|
where the URL redirects to and show the title for that.
|
||
|
"""
|
||
|
if "http" not in trigger.group(0):
|
||
|
return
|
||
|
url_finder = re.compile(r"((?:http|https)(?::\/\/\S+))", re.IGNORECASE)
|
||
|
|
||
|
urls = re.findall(url_finder, trigger.group(0))
|
||
|
if len(urls) == 0:
|
||
|
return
|
||
|
|
||
|
for url in urls:
|
||
|
broken = False
|
||
|
for key in bot.url_callbacks:
|
||
|
if key in url:
|
||
|
bot.url_callbacks[key](bot, url)
|
||
|
broken = True
|
||
|
if broken:
|
||
|
continue
|
||
|
try:
|
||
|
res = requests.get(url, headers=HEADERS, verify=True)
|
||
|
except requests.exceptions.ConnectionError:
|
||
|
continue
|
||
|
try:
|
||
|
res.raise_for_status()
|
||
|
except:
|
||
|
continue
|
||
|
if not res.headers["Content-Type"].startswith("text/html"):
|
||
|
continue
|
||
|
if res.text.find("<title>") == -1:
|
||
|
continue
|
||
|
title = res.text[res.text.find("<title>")+7:res.text.find("</title>")]
|
||
|
title = HTMLParser().unescape(title)
|
||
|
title = title.replace("\n","").strip()
|
||
|
hostname = urlparse(url).hostname
|
||
|
bot.say(f"[ \x0310{title} \x03] - \x0304{hostname}")
|