add the code

This commit is contained in:
iou1name 2018-10-21 01:03:14 -04:00
parent 1a5578199c
commit 34515fba31
8 changed files with 195 additions and 1 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
__pycache__/ __pycache__/
*.swp *.swp
*.swo *.swo
config.py

View File

@ -3,11 +3,13 @@ Firefox killed Live Bookmarks so it's time to make my own shitty RSS reader/aggr
## Requirements ## Requirements
Python 3.6+ Python 3.6+
Python packages: `flask gunicorn requests bs4` Redis 4.0.10+
Python packages: `flask gunicorn requests bs4 redis lxml`
## Install ## Install
1. Get on the floor 1. Get on the floor
2. Walk the dinosaur 2. Walk the dinosaur
3. Install crontab to run `cron.py` ex. `0 * * * * python3 /path/to/cron.py`
## Usage ## Usage
`gunicorn -b localhost:5200 -e SCRIPT_NAME=/ss ss:app` `gunicorn -b localhost:5200 -e SCRIPT_NAME=/ss ss:app`

12
config.template.py Normal file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env python3
"""
Project configuration file.
"""
REDIS_HOST = 'localhost'
REDIS_PORT = 6379
FEEDS = [
"http://website1.com/rss",
"http://website2.com/rss",
]

9
cron.py Executable file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env python3
"""
Script to be called by cron to update all feeds periodically.
Ex. 0 * * * * python3 /path/to/cron.py
"""
import database
if __name__ == "__main__":
database.update_all_feeds()

100
database.py Normal file
View File

@ -0,0 +1,100 @@
#!/usr/bin/env python3
"""
Tools for updating and extracting data from the Redis database.
"""
import re
import json
import time
import bs4
import redis
import requests
import config
_r = redis.Redis(
config.REDIS_HOST,
config.REDIS_PORT,
)
def scrape_feed(feed_url):
"""Scrapes an RSS feed and extract all relevant data from it."""
try:
res = requests.get(feed_url)
res.raise_for_status()
except requests.exceptions.RequestException as e:
# TODO: log the error
return
encoding = re.search(r'encoding="(.*)"', res.text)
if encoding:
encoding = encoding.group(1)
else:
encoding = 'utf-8'
soup = bs4.BeautifulSoup(res.content, 'xml', from_encoding=encoding)
if "factorio.com" in feed_url: # because screw making it generic
return scrape_factorio(soup)
meta = {}
meta['title'] = soup.title.text
meta['html_url'] = soup.find("link", href="").text
meta['description'] = soup.description.text
entries = soup.find_all('item')
feed_entries = []
for entry in entries[:20]:
entry_dict = {}
entry_dict['title'] = entry.title.text
entry_dict['link'] = entry.link.text
try:
date = entry.pubDate.text
date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %z')
except AttributeError:
date = entry_dict['date'] = entry.find('dc:date').text
date=time.strptime(date[:-3]+date[-2:], '%Y-%m-%dT%H:%M:%S%z')
entry_dict['date'] = time.strftime('%Y-%m-%d', date)
entry_dict['description'] = entry.description.text[:200]
# TODO: html sanitation
feed_entries.append(entry_dict)
feed = {'meta': meta, 'entries': feed_entries}
return feed
def scrape_factorio(soup):
"""Handles the special case that is the Factorio development blog."""
meta = {}
meta['title'] = "Factorio"
meta['html_url'] = soup.find('link').get('href')
meta['description'] = soup.title.text
entries = soup.find_all('entry')
feed_entries = []
for entry in entries[:20]:
entry_dict = {}
entry_dict['title'] = entry.title.text
entry_dict['link'] = entry.find('link').get('href')
date = entry.updated.text
date = time.strptime(date[:-3]+date[-2:], '%Y-%m-%dT%H:%M:%S%z')
entry_dict['date'] = time.strftime('%Y-%m-%d', date)
entry_dict['description'] = entry.content.text[:200]
# TODO: html sanitation
feed_entries.append(entry_dict)
feed = {'meta': meta, 'entries': feed_entries}
return feed
def update_feed(feed_url):
"""Updates the given feed_id."""
feed = scrape_feed(feed_url)
_r.set(feed_url, json.dumps(feed))
def update_all_feeds():
"""Updates all feeds being watched."""
for feed_url in config.FEEDS:
update_feed(feed_url)
def get_feed(feed_url):
"""Returns all stored information about the feed."""
return json.loads(_r.get(feed_url))

30
static/ss.css Normal file
View File

@ -0,0 +1,30 @@
body {
background-color: #FAFAFA;
color: #111111;
font-family: Tahoma, Helvetica, sans-serif;
text-align: center;
}
a:link {
text-decoration: none;
color: #004070;
}
a:hover {
color: #B44444;
}
a:visited {
color: #B44444;
}
#globalContainer {
text-align: left;
display: inline-block;
}
.date {
margin-left: 3em;
padding-right: 1em;
font-size: 0.9em;
}

20
templates/index.html Normal file
View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html>
<head>
<title>/ss/</title>
<link rel="stylesheet" type="text/css" href="/static/ss.css">
<script type="text/javascript" src="/static/ss.js"></script>
</head>
<body>
<div id="globalContainer">
{% for feed in feeds %}
<div>
<h3>{{ feed['meta']['title'] }}</h3>
{% for entry in feed['entries'] %}
<span class="date">{{ entry['date'] }}</span><a href="{{ entry['link'] }}">{{ entry['title'] }}</a><br>
{% endfor %}
</div>
{% endfor %}
</div>
</body>
</html>

20
views.py Normal file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env python3
"""
The main Flask application for serving up aggregated RSS feed entries.
"""
from flask import Flask, render_template
import config
import database
app = Flask(__name__)
@app.route("/")
def index():
"""
The index page.
"""
feeds = []
for feed_url in config.FEEDS:
feeds.append(database.get_feed(feed_url))
return render_template("index.html", **locals())