add the code

2018-10-21 01:03:14 -04:00 · 2018-10-21 01:03:14 -04:00 · 34515fba31
commit 34515fba31
parent 1a5578199c
8 changed files with 195 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 __pycache__/
 *.swp
 *.swo
 config.py
--- a/README.md
+++ b/README.md
@ -3,11 +3,13 @@ Firefox killed Live Bookmarks so it's time to make my own shitty RSS reader/aggr
 ## Requirements  
 Python 3.6+  
-Python packages: `flask gunicorn requests bs4`  
+Redis 4.0.10+  
 Python packages: `flask gunicorn requests bs4 redis lxml`  
 ## Install  
 1. Get on the floor  
 2. Walk the dinosaur  
 3. Install crontab to run `cron.py` ex. `0 * * * * python3 /path/to/cron.py`  
 ## Usage  
 `gunicorn -b localhost:5200 -e SCRIPT_NAME=/ss ss:app`  
--- a/config.template.py
+++ b/config.template.py
@ -0,0 +1,12 @@
 #!/usr/bin/env python3
 """
 Project configuration file.
 """
 REDIS_HOST = 'localhost'
 REDIS_PORT = 6379
 FEEDS = [
 	"http://website1.com/rss",
 	"http://website2.com/rss",
 ]
--- a/cron.py
+++ b/cron.py
@ -0,0 +1,9 @@
 #!/usr/bin/env python3
 """
 Script to be called by cron to update all feeds periodically.
 Ex. 0 * * * * python3 /path/to/cron.py
 """
 import database
 if __name__ == "__main__":
 	database.update_all_feeds()
--- a/database.py
+++ b/database.py
@ -0,0 +1,100 @@
 #!/usr/bin/env python3
 """
 Tools for updating and extracting data from the Redis database.
 """
 import re
 import json
 import time
 import bs4
 import redis
 import requests
 import config
 _r = redis.Redis(
 	config.REDIS_HOST,
 	config.REDIS_PORT,
 )
 def scrape_feed(feed_url):
 	"""Scrapes an RSS feed and extract all relevant data from it."""
 	try:
 		res = requests.get(feed_url)
 		res.raise_for_status()
 	except requests.exceptions.RequestException as e:
 		# TODO: log the error
 		return
 	encoding = re.search(r'encoding="(.*)"', res.text)
 	if encoding:
 		encoding = encoding.group(1)
 	else:
 		encoding = 'utf-8'
 	soup = bs4.BeautifulSoup(res.content, 'xml', from_encoding=encoding)
 	if "factorio.com" in feed_url: # because screw making it generic
 		return scrape_factorio(soup)
 	meta = {}
 	meta['title'] = soup.title.text
 	meta['html_url'] = soup.find("link", href="").text
 	meta['description'] = soup.description.text
 	entries = soup.find_all('item')
 	feed_entries = []
 	for entry in entries[:20]:
 		entry_dict = {}
 		entry_dict['title'] = entry.title.text
 		entry_dict['link'] = entry.link.text
 		try:
 			date = entry.pubDate.text
 			date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %z')
 		except AttributeError:
 			date = entry_dict['date'] = entry.find('dc:date').text
 			date=time.strptime(date[:-3]+date[-2:], '%Y-%m-%dT%H:%M:%S%z')
 		entry_dict['date'] = time.strftime('%Y-%m-%d', date)
 		entry_dict['description'] = entry.description.text[:200]
 		# TODO: html sanitation
 		feed_entries.append(entry_dict)
 	feed = {'meta': meta, 'entries': feed_entries}
 	return feed
 def scrape_factorio(soup):
 	"""Handles the special case that is the Factorio development blog."""
 	meta = {}
 	meta['title'] = "Factorio"
 	meta['html_url'] = soup.find('link').get('href')
 	meta['description'] = soup.title.text
 	entries = soup.find_all('entry')
 	feed_entries = []
 	for entry in entries[:20]:
 		entry_dict = {}
 		entry_dict['title'] = entry.title.text
 		entry_dict['link'] = entry.find('link').get('href')
 		date = entry.updated.text
 		date = time.strptime(date[:-3]+date[-2:], '%Y-%m-%dT%H:%M:%S%z')
 		entry_dict['date'] = time.strftime('%Y-%m-%d', date)
 		entry_dict['description'] = entry.content.text[:200]
 		# TODO: html sanitation
 		feed_entries.append(entry_dict)
 	feed = {'meta': meta, 'entries': feed_entries}
 	return feed
 def update_feed(feed_url):
 	"""Updates the given feed_id."""
 	feed = scrape_feed(feed_url)
 	_r.set(feed_url, json.dumps(feed))
 def update_all_feeds():
 	"""Updates all feeds being watched."""
 	for feed_url in config.FEEDS:
 		update_feed(feed_url)
 def get_feed(feed_url):
 	"""Returns all stored information about the feed."""
 	return json.loads(_r.get(feed_url))
--- a/static/ss.css
+++ b/static/ss.css
@ -0,0 +1,30 @@
 body {
 	background-color: #FAFAFA;
 	color: #111111;
 	font-family: Tahoma, Helvetica, sans-serif;
 	text-align: center;
 }
 a:link {
 	text-decoration: none;
 	color: #004070;
 }
 a:hover {
 	color: #B44444;
 }
 a:visited {
 	color: #B44444;
 }
 #globalContainer {
 	text-align: left;
 	display: inline-block;
 }
 .date {
 	margin-left: 3em;
 	padding-right: 1em;
 	font-size: 0.9em;
 }
--- a/templates/index.html
+++ b/templates/index.html
@ -0,0 +1,20 @@
 <!DOCTYPE html>
 <html>
 <head>
 	<title>/ss/</title>
 	<link rel="stylesheet" type="text/css" href="/static/ss.css">
 	<script type="text/javascript" src="/static/ss.js"></script>
 </head>
 <body>
 	<div id="globalContainer">
 		{% for feed in feeds %}
 		<div>
 			<h3>{{ feed['meta']['title'] }}</h3>
 			{% for entry in feed['entries'] %}
 			<span class="date">{{ entry['date'] }}</span><a href="{{ entry['link'] }}">{{ entry['title'] }}</a><br>
 			{% endfor %}
 		</div>
 		{% endfor %}
 	</div>
 </body>
 </html>
--- a/views.py
+++ b/views.py
@ -0,0 +1,20 @@
 #!/usr/bin/env python3
 """
 The main Flask application for serving up aggregated RSS feed entries.
 """
 from flask import Flask, render_template
 import config
 import database
 app = Flask(__name__)
@app.route("/")
 def index():
 	"""
 	The index page.
 	"""
 	feeds = []
 	for feed_url in config.FEEDS:
 		feeds.append(database.get_feed(feed_url))
 	return render_template("index.html", **locals())