#!/usr/bin/env python3 """ URL parsing. """ import re from urllib.parse import urlparse from html.parser import HTMLParser import requests from module import hook HEADERS = {"User-Agent": "bix nood gimme the title", "Range": "bytes=0-4096"} @hook(True) def title_auto(bot, trigger): """ Automatically show titles for URLs. For shortened URLs/redirects, find where the URL redirects to and show the title for that. """ if "http" not in trigger.group(0): return url_finder = re.compile(r"((?:http|https)(?::\/\/\S+))", re.IGNORECASE) urls = re.findall(url_finder, trigger.group(0)) if len(urls) == 0: return for url in urls: broken = False for key in bot.url_callbacks: if key in url: bot.url_callbacks[key](bot, url) broken = True if broken: continue try: res = requests.get(url, headers=HEADERS, verify=True, timeout=10) except requests.exceptions.ConnectionError: continue try: res.raise_for_status() except: continue if not res.headers.get("Content-Type"): continue if not res.headers.get("Content-Type").startswith("text/html"): continue if res.text.find("") == -1: continue title = res.text[res.text.find("<title>")+7:res.text.find("")] title = HTMLParser().unescape(title) title = title.replace("\n","").strip() hostname = urlparse(url).hostname bot.msg(f"[ \x0310{title} \x03] - \x0304{hostname}")