Compare commits

...

5 Commits

Author SHA1 Message Date
48a9242d89 fix bold text 2022-09-01 12:41:22 -04:00
ff7bb5e3e8 add useremail field 2022-08-27 11:08:27 -04:00
022e35e13c fix br 2022-08-27 00:10:37 -04:00
bcfe40292d add option to hide post tags and header 2022-08-18 20:27:39 -04:00
8ad34cc221 add visibility menu 2022-08-17 21:19:14 -04:00
5 changed files with 187 additions and 4 deletions

71
fix_br.py Normal file
View File

@ -0,0 +1,71 @@
#!/usr/bin/env python3
"""
Fixes a mistake made to the newline structure in scrape_quest.py
"""
import os
import re
import datetime
import bs4
import psycopg2
import config
BQ_DIR = '/var/www/html/banished/'
con = psycopg2.connect(**config.db)
cur = con.cursor()
def scrape_posts(root_dir):
"""Walks the `root_dir` and extracts data from index.html files found."""
for dir_name, sub_dirs, files in os.walk(root_dir):
for file in files:
if file != 'index.html':
continue
filepath = os.path.join(root_dir, dir_name, file)
print("Processing:", filepath)
with open(filepath, 'r') as file:
data = file.read()
data = re.sub(r'<wbr ?\/?>', '', data)
soup = bs4.BeautifulSoup(data, 'html.parser')
thread_id = int(soup.find(class_='thread').get('id')[1:])
posts = soup.find_all(class_='postContainer')
for post in posts:
post_id = int(post.get('id')[2:])
post_body = post.find(class_='postMessage')
for br in post_body.find_all('br'):
br.replace_with('\n')
#post_body_txt = post_body.get_text()
post_body_txt = ''
for child in post_body.children:
if not child.name: # text element
post_body_txt += child.get_text()
elif child.name == 'b':
post_body_txt += '<b>' + child.get_text() + '</b>'
elif child.name == 'span' and child['class'][0] == 'mu-s':
post_body_txt += '<span class="bold">' + child.get_text() + '</span>'
elif child.name == 'span' and child['class'][0] == 'mu-i':
post_body_txt += '<span class="italic">' + child.get_text() + '</span>'
elif child.name == 'span' and child['class'][0] == 'mu-r':
post_body_txt += '<span class="red">' + child.get_text() + '</span>'
elif child.name == 'span' and child['class'][0] == 'mu-g':
post_body_txt += '<span class="green">' + child.get_text() + '</span>'
elif child.name == 'span' and child['class'][0] == 'mu-b':
post_body_txt += '<span class="blue">' + child.get_text() + '</span>'
else:
post_body_txt += child.get_text()
cur.execute(
"UPDATE post SET body = (%s) WHERE id = (%s)",
(post_body_txt, post_id)
)
if __name__ == '__main__':
scrape_posts(os.path.join(BQ_DIR, 'archive'))
scrape_posts(os.path.join(BQ_DIR, 'qstarchive'))
con.commit()
con.close()

View File

@ -14,6 +14,10 @@ body {
opacity: 0.33; opacity: 0.33;
} }
.hidden {
display: none;
}
.tag { .tag {
font-size: 0.8em; font-size: 0.8em;
} }
@ -37,6 +41,10 @@ body {
font-weight: bold; font-weight: bold;
} }
.useremail > span {
color: darkblue;
}
.name { .name {
color: darkgreen; color: darkgreen;
font-weight: bold; font-weight: bold;
@ -65,3 +73,33 @@ body {
.deadlink { .deadlink {
text-decoration: line-through; text-decoration: line-through;
} }
.bold {
font-weight: bold;
}
.italic {
font-style: italic;
}
.red {
color: red;
}
.green {
color: green;
}
.blue {
color: blue;
}
#visibility_menu_toggle {
cursor: pointer
}
#visibility_menu {
border: 1px solid darkgray;
position: absolute;
background-color: #FAFAFA;
}

View File

@ -46,3 +46,51 @@ function remove_tag(event) {
tag.remove(); tag.remove();
}); });
} }
function toggle_visibility_menu(event) {
let menu = document.querySelector('#visibility_menu');
if (menu.style.display == 'block') {
menu.style.display = 'none';
} else if (menu.style.display == 'none') {
menu.style.display = 'block';
}
}
function toggle_visibility(event) {
console.log(event);
let tag = event.target.parentElement.parentElement.cells[0].innerText;
let vis_class = ['faded', 'hidden'];
if (tag == 'non-qm posts') {
for (let post of document.querySelectorAll('.post_container:not(.qm_post)')) {
if (event.target.checked) {
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
} else {
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
}
}
} else if (tag == 'post tags') {
for (let post of document.querySelectorAll('.post_container > .tags')) {
if (event.target.checked) {
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
} else {
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
}
}
} else if (tag == 'post header') {
for (let post of document.querySelectorAll('.post_container > .header')) {
if (event.target.checked) {
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
} else {
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
}
}
} else {
for (let post of document.querySelectorAll('.' + tag)) {
if (event.target.checked) {
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
} else {
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
}
}
}
}

View File

@ -11,6 +11,34 @@
<body> <body>
<header> <header>
<h1>Voyage</h1> <h1>Voyage</h1>
<span id="visibility_menu_toggle" onclick="toggle_visibility_menu(event)">Visibility</span>
<div id="visibility_menu" style="display: none;">
<table>
<thead>
<tr>
<th>Tag</th>
<th>Faded</th>
<th>Hidden</th>
</tr>
</thead>
<tbody>
<tr>
<td>non-qm posts</td><td><input type="checkbox" onchange="toggle_visibility(event)" checked></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
</tr>
{% for tag in tags_vis %}
<tr>
<td>{{ tag }}</td><td><input type="checkbox" onchange="toggle_visibility(event)"></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
</tr>
{% endfor %}
<tr>
<td>post tags</td><td></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
</tr>
<tr>
<td>post header</td><td></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
</tr>
</tbody>
</table>
</div>
</header> </header>
<main> <main>
{% for post in posts %} {% for post in posts %}
@ -25,10 +53,7 @@
{% if post.subject %} {% if post.subject %}
<span class="subject">{{ post.subject }}</span> <span class="subject">{{ post.subject }}</span>
{% endif %} {% endif %}
<span class="name">{{ post.name }}</span> {% if post.email %}<a class="useremail" href="{{ post.email }}">{% endif %}<span class="name">{{ post.name }}</span>{% if post.tripcode %} <span class="tripcode">{{ post.tripcode }}</span>{% endif %}{% if post.email %}</a>{% endif %}
{% if post.tripcode %}
<span class="tripcode">{{ post.tripcode }}</span>
{% endif %}
<span class="time">{{ post.time.astimezone().strftime('%Y-%m-%d %H:%M') }}</span> <span class="time">{{ post.time.astimezone().strftime('%Y-%m-%d %H:%M') }}</span>
<span class="id">No.{{ post.id }}</span> <span class="id">No.{{ post.id }}</span>
{% if backlinks.get(post.id) %} {% if backlinks.get(post.id) %}

View File

@ -46,6 +46,7 @@ async def thread(request):
tags = defaultdict(list) tags = defaultdict(list)
links = defaultdict(list) links = defaultdict(list)
backlinks = defaultdict(list) backlinks = defaultdict(list)
tags_vis = sorted(list(set(t[1] for t in tags_raw)))
for tag_raw in tags_raw: for tag_raw in tags_raw:
tag = tags[tag_raw['post_id']] tag = tags[tag_raw['post_id']]