69 lines
1.9 KiB
Python
Executable File
69 lines
1.9 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
URL parsing.
|
|
"""
|
|
import re
|
|
from module import rule
|
|
from tools import web
|
|
from config.types import ValidatedAttribute, ListAttribute, StaticSection
|
|
import requests
|
|
from urllib.parse import urlparse
|
|
from html.parser import HTMLParser
|
|
|
|
headers = {"User-Agent": "bix nood gimme the title", "Range": "bytes=0-4096"}
|
|
|
|
class UrlSection(StaticSection):
|
|
# TODO some validation rules maybe?
|
|
exclude = ListAttribute('exclude')
|
|
exclusion_char = ValidatedAttribute('exclusion_char', default='!')
|
|
|
|
|
|
def configure(config):
|
|
config.define_section('url', UrlSection)
|
|
config.url.configure_setting(
|
|
'exclude',
|
|
'Enter regular expressions for each URL you would like to exclude.'
|
|
)
|
|
config.url.configure_setting(
|
|
'exclusion_char',
|
|
'Enter a character which can be prefixed to suppress URL titling'
|
|
)
|
|
|
|
|
|
@rule('(?u).*(https?://\S+).*')
|
|
def title_auto(bot, trigger):
|
|
"""
|
|
Automatically show titles for URLs. For shortened URLs/redirects, find
|
|
where the URL redirects to and show the title for that.
|
|
"""
|
|
url_finder = re.compile(r'(?u)(%s?(?:http|https|ftp)(?:://\S+))' %
|
|
(bot.config.url.exclusion_char), re.IGNORECASE)
|
|
if re.match(bot.config.core.prefix + 'title', trigger):
|
|
return
|
|
|
|
urls = re.findall(url_finder, trigger)
|
|
if len(urls) == 0:
|
|
return
|
|
|
|
for url in urls:
|
|
# Avoid fetching known malicious links
|
|
if not web.secCheck(bot, url):
|
|
continue
|
|
try:
|
|
res = requests.get(url, headers=headers, verify=True)
|
|
except requests.exceptions.ConnectionError:
|
|
continue
|
|
if res.status_code == 404:
|
|
continue
|
|
res.raise_for_status()
|
|
if not res.headers["Content-Type"].startswith("text/html"):
|
|
continue
|
|
if res.text.find("<title>") == -1:
|
|
continue
|
|
title = res.text[res.text.find("<title>")+7:res.text.find("</title>")]
|
|
title = HTMLParser().unescape(title)
|
|
title = title.replace("\n","").strip()
|
|
hostname = urlparse(url).hostname
|
|
bot.say('[ \x0310%s \x03] - \x0304%s' % (title, hostname))
|