diff --git a/.gitignore b/.gitignore index c56bb68..610faed 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ *.swp *.swo +config.py diff --git a/README.md b/README.md index 28b97a4..b2ba6ab 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,13 @@ Firefox killed Live Bookmarks so it's time to make my own shitty RSS reader/aggr ## Requirements Python 3.6+ -Python packages: `flask gunicorn requests bs4` +Redis 4.0.10+ +Python packages: `flask gunicorn requests bs4 redis lxml` ## Install 1. Get on the floor 2. Walk the dinosaur +3. Install crontab to run `cron.py` ex. `0 * * * * python3 /path/to/cron.py` ## Usage `gunicorn -b localhost:5200 -e SCRIPT_NAME=/ss ss:app` diff --git a/config.template.py b/config.template.py new file mode 100644 index 0000000..3cc417e --- /dev/null +++ b/config.template.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +""" +Project configuration file. +""" + +REDIS_HOST = 'localhost' +REDIS_PORT = 6379 + +FEEDS = [ + "http://website1.com/rss", + "http://website2.com/rss", +] diff --git a/cron.py b/cron.py new file mode 100755 index 0000000..ecd2320 --- /dev/null +++ b/cron.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 +""" +Script to be called by cron to update all feeds periodically. +Ex. 0 * * * * python3 /path/to/cron.py +""" +import database + +if __name__ == "__main__": + database.update_all_feeds() diff --git a/database.py b/database.py new file mode 100644 index 0000000..083c54f --- /dev/null +++ b/database.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Tools for updating and extracting data from the Redis database. +""" +import re +import json +import time + +import bs4 +import redis +import requests + +import config + +_r = redis.Redis( + config.REDIS_HOST, + config.REDIS_PORT, +) + +def scrape_feed(feed_url): + """Scrapes an RSS feed and extract all relevant data from it.""" + try: + res = requests.get(feed_url) + res.raise_for_status() + except requests.exceptions.RequestException as e: + # TODO: log the error + return + encoding = re.search(r'encoding="(.*)"', res.text) + if encoding: + encoding = encoding.group(1) + else: + encoding = 'utf-8' + soup = bs4.BeautifulSoup(res.content, 'xml', from_encoding=encoding) + + if "factorio.com" in feed_url: # because screw making it generic + return scrape_factorio(soup) + + meta = {} + meta['title'] = soup.title.text + meta['html_url'] = soup.find("link", href="").text + meta['description'] = soup.description.text + + entries = soup.find_all('item') + feed_entries = [] + for entry in entries[:20]: + entry_dict = {} + entry_dict['title'] = entry.title.text + entry_dict['link'] = entry.link.text + try: + date = entry.pubDate.text + date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %z') + except AttributeError: + date = entry_dict['date'] = entry.find('dc:date').text + date=time.strptime(date[:-3]+date[-2:], '%Y-%m-%dT%H:%M:%S%z') + entry_dict['date'] = time.strftime('%Y-%m-%d', date) + entry_dict['description'] = entry.description.text[:200] + # TODO: html sanitation + feed_entries.append(entry_dict) + feed = {'meta': meta, 'entries': feed_entries} + return feed + + +def scrape_factorio(soup): + """Handles the special case that is the Factorio development blog.""" + meta = {} + meta['title'] = "Factorio" + meta['html_url'] = soup.find('link').get('href') + meta['description'] = soup.title.text + + entries = soup.find_all('entry') + feed_entries = [] + for entry in entries[:20]: + entry_dict = {} + entry_dict['title'] = entry.title.text + entry_dict['link'] = entry.find('link').get('href') + date = entry.updated.text + date = time.strptime(date[:-3]+date[-2:], '%Y-%m-%dT%H:%M:%S%z') + entry_dict['date'] = time.strftime('%Y-%m-%d', date) + entry_dict['description'] = entry.content.text[:200] + # TODO: html sanitation + feed_entries.append(entry_dict) + feed = {'meta': meta, 'entries': feed_entries} + return feed + + +def update_feed(feed_url): + """Updates the given feed_id.""" + feed = scrape_feed(feed_url) + _r.set(feed_url, json.dumps(feed)) + + +def update_all_feeds(): + """Updates all feeds being watched.""" + for feed_url in config.FEEDS: + update_feed(feed_url) + + +def get_feed(feed_url): + """Returns all stored information about the feed.""" + return json.loads(_r.get(feed_url)) diff --git a/feeds.txt b/feeds.txt new file mode 100644 index 0000000..5073081 --- /dev/null +++ b/feeds.txt @@ -0,0 +1,16 @@ +https://waremetetranslations.wordpress.com/feed/ +https://www.factorio.com/blog/rss +https://twigserial.wordpress.com/feed/ +http://lmsketch.tumblr.com/rss +http://feeds.feedburner.com/mstream +http://hpmor.com/feed/ +http://fenoxo.com/feed/ +http://bagserk.blogspot.com/feeds/posts/default?alt=rss +http://www.ggkthx.org/feed/ +http://www.evil-genius.us/feed/ +http://feeds.feedburner.com/oatmealfeed +http://roguetranslations.wordpress.com/feed/ +http://notch.tumblr.com/rss +http://feeds2.feedburner.com/virtualshackles +http://thepunchlineismachismo.com/feed +http://konashion.blogspot.com/feeds/posts/default?alt=rss diff --git a/static/ss.css b/static/ss.css new file mode 100644 index 0000000..95bb6cc --- /dev/null +++ b/static/ss.css @@ -0,0 +1,30 @@ +body { + background-color: #FAFAFA; + color: #111111; + font-family: Tahoma, Helvetica, sans-serif; + text-align: center; +} + +a:link { + text-decoration: none; + color: #004070; +} + +a:hover { + color: #B44444; +} + +a:visited { + color: #B44444; +} + +#globalContainer { + text-align: left; + display: inline-block; +} + +.date { + margin-left: 3em; + padding-right: 1em; + font-size: 0.9em; +} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..be8fb02 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,20 @@ + + +
+