Compare commits
No commits in common. "master" and "8385103796e0bdb6281c4c8303cc2862b218252e" have entirely different histories.
master
...
8385103796
71
fix_br.py
71
fix_br.py
|
@ -1,71 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Fixes a mistake made to the newline structure in scrape_quest.py
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
import bs4
|
|
||||||
import psycopg2
|
|
||||||
|
|
||||||
import config
|
|
||||||
|
|
||||||
BQ_DIR = '/var/www/html/banished/'
|
|
||||||
|
|
||||||
con = psycopg2.connect(**config.db)
|
|
||||||
cur = con.cursor()
|
|
||||||
|
|
||||||
def scrape_posts(root_dir):
|
|
||||||
"""Walks the `root_dir` and extracts data from index.html files found."""
|
|
||||||
for dir_name, sub_dirs, files in os.walk(root_dir):
|
|
||||||
for file in files:
|
|
||||||
if file != 'index.html':
|
|
||||||
continue
|
|
||||||
filepath = os.path.join(root_dir, dir_name, file)
|
|
||||||
print("Processing:", filepath)
|
|
||||||
with open(filepath, 'r') as file:
|
|
||||||
data = file.read()
|
|
||||||
data = re.sub(r'<wbr ?\/?>', '', data)
|
|
||||||
soup = bs4.BeautifulSoup(data, 'html.parser')
|
|
||||||
|
|
||||||
thread_id = int(soup.find(class_='thread').get('id')[1:])
|
|
||||||
|
|
||||||
posts = soup.find_all(class_='postContainer')
|
|
||||||
for post in posts:
|
|
||||||
post_id = int(post.get('id')[2:])
|
|
||||||
|
|
||||||
post_body = post.find(class_='postMessage')
|
|
||||||
for br in post_body.find_all('br'):
|
|
||||||
br.replace_with('\n')
|
|
||||||
#post_body_txt = post_body.get_text()
|
|
||||||
post_body_txt = ''
|
|
||||||
|
|
||||||
for child in post_body.children:
|
|
||||||
if not child.name: # text element
|
|
||||||
post_body_txt += child.get_text()
|
|
||||||
elif child.name == 'b':
|
|
||||||
post_body_txt += '<b>' + child.get_text() + '</b>'
|
|
||||||
elif child.name == 'span' and child['class'][0] == 'mu-s':
|
|
||||||
post_body_txt += '<span class="bold">' + child.get_text() + '</span>'
|
|
||||||
elif child.name == 'span' and child['class'][0] == 'mu-i':
|
|
||||||
post_body_txt += '<span class="italic">' + child.get_text() + '</span>'
|
|
||||||
elif child.name == 'span' and child['class'][0] == 'mu-r':
|
|
||||||
post_body_txt += '<span class="red">' + child.get_text() + '</span>'
|
|
||||||
elif child.name == 'span' and child['class'][0] == 'mu-g':
|
|
||||||
post_body_txt += '<span class="green">' + child.get_text() + '</span>'
|
|
||||||
elif child.name == 'span' and child['class'][0] == 'mu-b':
|
|
||||||
post_body_txt += '<span class="blue">' + child.get_text() + '</span>'
|
|
||||||
else:
|
|
||||||
post_body_txt += child.get_text()
|
|
||||||
|
|
||||||
cur.execute(
|
|
||||||
"UPDATE post SET body = (%s) WHERE id = (%s)",
|
|
||||||
(post_body_txt, post_id)
|
|
||||||
)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
scrape_posts(os.path.join(BQ_DIR, 'archive'))
|
|
||||||
scrape_posts(os.path.join(BQ_DIR, 'qstarchive'))
|
|
||||||
con.commit()
|
|
||||||
con.close()
|
|
|
@ -14,10 +14,6 @@ body {
|
||||||
opacity: 0.33;
|
opacity: 0.33;
|
||||||
}
|
}
|
||||||
|
|
||||||
.hidden {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.tag {
|
.tag {
|
||||||
font-size: 0.8em;
|
font-size: 0.8em;
|
||||||
}
|
}
|
||||||
|
@ -41,10 +37,6 @@ body {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
|
||||||
.useremail > span {
|
|
||||||
color: darkblue;
|
|
||||||
}
|
|
||||||
|
|
||||||
.name {
|
.name {
|
||||||
color: darkgreen;
|
color: darkgreen;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
|
@ -73,33 +65,3 @@ body {
|
||||||
.deadlink {
|
.deadlink {
|
||||||
text-decoration: line-through;
|
text-decoration: line-through;
|
||||||
}
|
}
|
||||||
|
|
||||||
.bold {
|
|
||||||
font-weight: bold;
|
|
||||||
}
|
|
||||||
|
|
||||||
.italic {
|
|
||||||
font-style: italic;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red {
|
|
||||||
color: red;
|
|
||||||
}
|
|
||||||
|
|
||||||
.green {
|
|
||||||
color: green;
|
|
||||||
}
|
|
||||||
|
|
||||||
.blue {
|
|
||||||
color: blue;
|
|
||||||
}
|
|
||||||
|
|
||||||
#visibility_menu_toggle {
|
|
||||||
cursor: pointer
|
|
||||||
}
|
|
||||||
|
|
||||||
#visibility_menu {
|
|
||||||
border: 1px solid darkgray;
|
|
||||||
position: absolute;
|
|
||||||
background-color: #FAFAFA;
|
|
||||||
}
|
|
||||||
|
|
|
@ -46,51 +46,3 @@ function remove_tag(event) {
|
||||||
tag.remove();
|
tag.remove();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function toggle_visibility_menu(event) {
|
|
||||||
let menu = document.querySelector('#visibility_menu');
|
|
||||||
if (menu.style.display == 'block') {
|
|
||||||
menu.style.display = 'none';
|
|
||||||
} else if (menu.style.display == 'none') {
|
|
||||||
menu.style.display = 'block';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function toggle_visibility(event) {
|
|
||||||
console.log(event);
|
|
||||||
let tag = event.target.parentElement.parentElement.cells[0].innerText;
|
|
||||||
let vis_class = ['faded', 'hidden'];
|
|
||||||
if (tag == 'non-qm posts') {
|
|
||||||
for (let post of document.querySelectorAll('.post_container:not(.qm_post)')) {
|
|
||||||
if (event.target.checked) {
|
|
||||||
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
|
|
||||||
} else {
|
|
||||||
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (tag == 'post tags') {
|
|
||||||
for (let post of document.querySelectorAll('.post_container > .tags')) {
|
|
||||||
if (event.target.checked) {
|
|
||||||
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
|
|
||||||
} else {
|
|
||||||
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (tag == 'post header') {
|
|
||||||
for (let post of document.querySelectorAll('.post_container > .header')) {
|
|
||||||
if (event.target.checked) {
|
|
||||||
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
|
|
||||||
} else {
|
|
||||||
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (let post of document.querySelectorAll('.' + tag)) {
|
|
||||||
if (event.target.checked) {
|
|
||||||
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
|
|
||||||
} else {
|
|
||||||
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -11,34 +11,6 @@
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>Voyage</h1>
|
<h1>Voyage</h1>
|
||||||
<span id="visibility_menu_toggle" onclick="toggle_visibility_menu(event)">Visibility</span>
|
|
||||||
<div id="visibility_menu" style="display: none;">
|
|
||||||
<table>
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Tag</th>
|
|
||||||
<th>Faded</th>
|
|
||||||
<th>Hidden</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>non-qm posts</td><td><input type="checkbox" onchange="toggle_visibility(event)" checked></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
|
|
||||||
</tr>
|
|
||||||
{% for tag in tags_vis %}
|
|
||||||
<tr>
|
|
||||||
<td>{{ tag }}</td><td><input type="checkbox" onchange="toggle_visibility(event)"></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
|
|
||||||
</tr>
|
|
||||||
{% endfor %}
|
|
||||||
<tr>
|
|
||||||
<td>post tags</td><td></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>post header</td><td></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
</header>
|
</header>
|
||||||
<main>
|
<main>
|
||||||
{% for post in posts %}
|
{% for post in posts %}
|
||||||
|
@ -53,7 +25,10 @@
|
||||||
{% if post.subject %}
|
{% if post.subject %}
|
||||||
<span class="subject">{{ post.subject }}</span>
|
<span class="subject">{{ post.subject }}</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if post.email %}<a class="useremail" href="{{ post.email }}">{% endif %}<span class="name">{{ post.name }}</span>{% if post.tripcode %} <span class="tripcode">{{ post.tripcode }}</span>{% endif %}{% if post.email %}</a>{% endif %}
|
<span class="name">{{ post.name }}</span>
|
||||||
|
{% if post.tripcode %}
|
||||||
|
<span class="tripcode">{{ post.tripcode }}</span>
|
||||||
|
{% endif %}
|
||||||
<span class="time">{{ post.time.astimezone().strftime('%Y-%m-%d %H:%M') }}</span>
|
<span class="time">{{ post.time.astimezone().strftime('%Y-%m-%d %H:%M') }}</span>
|
||||||
<span class="id">No.{{ post.id }}</span>
|
<span class="id">No.{{ post.id }}</span>
|
||||||
{% if backlinks.get(post.id) %}
|
{% if backlinks.get(post.id) %}
|
||||||
|
|
|
@ -46,7 +46,6 @@ async def thread(request):
|
||||||
tags = defaultdict(list)
|
tags = defaultdict(list)
|
||||||
links = defaultdict(list)
|
links = defaultdict(list)
|
||||||
backlinks = defaultdict(list)
|
backlinks = defaultdict(list)
|
||||||
tags_vis = sorted(list(set(t[1] for t in tags_raw)))
|
|
||||||
|
|
||||||
for tag_raw in tags_raw:
|
for tag_raw in tags_raw:
|
||||||
tag = tags[tag_raw['post_id']]
|
tag = tags[tag_raw['post_id']]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user