#!/usr/bin/env python3 """ URL parsing. """ import re import html from urllib.parse import urlparse import requests from module import hook, url_callback HEADERS = {"User-Agent": "Give me your data.", "Range": "bytes=0-8192"} @url_callback('puu.sh/') def get_puush_fname(bot, url): """ Callback puu.sh links. Gets the filename and displays it. """ try: res = requests.head(url) except requests.exceptions.RequestException: return fname = res.headers.get('Content-Disposition', '') fname = re.search(r'\"(.*)\"', fname).group(1) if not fname: return bot.msg(f"[ \x0310puu.sh \x03] - \x0304{fname}") @url_callback("youtube.com/watch") @url_callback("youtu.be/") def youtube_title(bot, url): """ Retrieve the title of the YouTube video and display it. """ url = "https://www.youtube.com/oembed?url=" + url res = requests.get(url) res.raise_for_status() title = res.json()['title'] hostname = urlparse(url).hostname bot.msg(f"[ \x0310{title} \x03] - \x0304{hostname}") @hook(True) def title_auto(bot, trigger): """ Automatically show titles for URLs. For shortened URLs/redirects, find where the URL redirects to and show the title for that. """ if "http" not in ' '.join(trigger.args): return url_finder = re.compile(r"((?:http|https)(?::\/\/\S+))", re.IGNORECASE) urls = re.findall(url_finder, ' '.join(trigger.args)) if len(urls) == 0: return for url in urls: url = url.replace('twitter.com', 'nitter.net') broken = False for key in bot.url_callbacks: if key in url: bot.url_callbacks[key](bot, url) broken = True if broken: continue try: res = requests.get(url, headers=HEADERS, verify=True, timeout=10) except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout): continue try: res.raise_for_status() except: continue if not res.headers.get("Content-Type"): continue if not res.headers.get("Content-Type").startswith("text/html"): continue if res.text.find("", "", data) title = data[:data.find("")] title = html.unescape(title) title = title.replace("\n","").strip() hostname = urlparse(url).hostname bot.msg(f"[ \x0310{title} \x03] - \x0304{hostname}")