2018-03-16 03:13:43 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
"""
|
|
|
|
URL parsing.
|
|
|
|
"""
|
|
|
|
import re
|
2021-01-10 17:45:51 -05:00
|
|
|
import html
|
2018-03-16 03:13:43 -04:00
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
2018-09-19 13:01:19 -04:00
|
|
|
from module import hook, url_callback
|
2018-03-16 03:13:43 -04:00
|
|
|
|
2020-01-08 07:51:00 -05:00
|
|
|
HEADERS = {"User-Agent": "Give me your data.", "Range": "bytes=0-4096"}
|
2018-03-16 03:13:43 -04:00
|
|
|
|
2018-09-19 13:01:19 -04:00
|
|
|
@url_callback('puu.sh/')
|
|
|
|
def get_puush_fname(bot, url):
|
|
|
|
"""
|
|
|
|
Callback puu.sh links. Gets the filename and displays it.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
res = requests.head(url)
|
|
|
|
except requests.exceptions.RequestException:
|
|
|
|
return
|
|
|
|
|
|
|
|
fname = res.headers.get('Content-Disposition', '')
|
|
|
|
fname = re.search(r'\"(.*)\"', fname).group(1)
|
|
|
|
if not fname:
|
|
|
|
return
|
|
|
|
|
|
|
|
bot.msg(f"[ \x0310puu.sh \x03] - \x0304{fname}")
|
|
|
|
|
|
|
|
|
2020-08-21 01:03:00 -04:00
|
|
|
@url_callback("youtube.com/watch")
|
2021-01-10 15:00:49 -05:00
|
|
|
@url_callback("youtu.be/")
|
2020-07-07 07:57:42 -04:00
|
|
|
def youtube_title(bot, url):
|
|
|
|
"""
|
|
|
|
Retrieve the title of the YouTube video and display it.
|
|
|
|
"""
|
|
|
|
url = "https://www.youtube.com/oembed?url=" + url
|
|
|
|
res = requests.get(url)
|
|
|
|
res.raise_for_status()
|
|
|
|
|
|
|
|
title = res.json()['title']
|
|
|
|
hostname = urlparse(url).hostname
|
|
|
|
|
|
|
|
bot.msg(f"[ \x0310{title} \x03] - \x0304{hostname}")
|
|
|
|
|
|
|
|
|
2018-03-16 03:13:43 -04:00
|
|
|
@hook(True)
|
|
|
|
def title_auto(bot, trigger):
|
|
|
|
"""
|
|
|
|
Automatically show titles for URLs. For shortened URLs/redirects, find
|
|
|
|
where the URL redirects to and show the title for that.
|
|
|
|
"""
|
2020-01-07 18:58:19 -05:00
|
|
|
if "http" not in ' '.join(trigger.args):
|
2018-03-16 03:13:43 -04:00
|
|
|
return
|
|
|
|
url_finder = re.compile(r"((?:http|https)(?::\/\/\S+))", re.IGNORECASE)
|
|
|
|
|
2020-01-07 18:58:19 -05:00
|
|
|
urls = re.findall(url_finder, ' '.join(trigger.args))
|
2018-03-16 03:13:43 -04:00
|
|
|
if len(urls) == 0:
|
|
|
|
return
|
|
|
|
|
|
|
|
for url in urls:
|
2020-07-07 07:57:42 -04:00
|
|
|
url = url.replace('twitter.com', 'nitter.net')
|
2018-03-16 03:13:43 -04:00
|
|
|
broken = False
|
|
|
|
for key in bot.url_callbacks:
|
|
|
|
if key in url:
|
|
|
|
bot.url_callbacks[key](bot, url)
|
|
|
|
broken = True
|
|
|
|
if broken:
|
|
|
|
continue
|
|
|
|
try:
|
2018-06-11 12:49:36 -04:00
|
|
|
res = requests.get(url, headers=HEADERS, verify=True, timeout=10)
|
2018-07-05 06:58:12 -04:00
|
|
|
except (requests.exceptions.ConnectionError,
|
2018-07-12 20:04:45 -04:00
|
|
|
requests.exceptions.ReadTimeout):
|
2018-03-16 03:13:43 -04:00
|
|
|
continue
|
|
|
|
try:
|
|
|
|
res.raise_for_status()
|
|
|
|
except:
|
|
|
|
continue
|
2018-05-27 14:16:50 -04:00
|
|
|
if not res.headers.get("Content-Type"):
|
|
|
|
continue
|
|
|
|
if not res.headers.get("Content-Type").startswith("text/html"):
|
2018-03-16 03:13:43 -04:00
|
|
|
continue
|
|
|
|
if res.text.find("<title>") == -1:
|
|
|
|
continue
|
|
|
|
title = res.text[res.text.find("<title>")+7:res.text.find("</title>")]
|
2021-01-10 17:45:51 -05:00
|
|
|
title = html.unescape(title)
|
2018-03-16 03:13:43 -04:00
|
|
|
title = title.replace("\n","").strip()
|
|
|
|
hostname = urlparse(url).hostname
|
2018-05-25 15:21:18 -04:00
|
|
|
bot.msg(f"[ \x0310{title} \x03] - \x0304{hostname}")
|