Compare commits
5 Commits
8385103796
...
master
Author | SHA1 | Date | |
---|---|---|---|
48a9242d89 | |||
ff7bb5e3e8 | |||
022e35e13c | |||
bcfe40292d | |||
8ad34cc221 |
71
fix_br.py
Normal file
71
fix_br.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Fixes a mistake made to the newline structure in scrape_quest.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
import bs4
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
BQ_DIR = '/var/www/html/banished/'
|
||||||
|
|
||||||
|
con = psycopg2.connect(**config.db)
|
||||||
|
cur = con.cursor()
|
||||||
|
|
||||||
|
def scrape_posts(root_dir):
|
||||||
|
"""Walks the `root_dir` and extracts data from index.html files found."""
|
||||||
|
for dir_name, sub_dirs, files in os.walk(root_dir):
|
||||||
|
for file in files:
|
||||||
|
if file != 'index.html':
|
||||||
|
continue
|
||||||
|
filepath = os.path.join(root_dir, dir_name, file)
|
||||||
|
print("Processing:", filepath)
|
||||||
|
with open(filepath, 'r') as file:
|
||||||
|
data = file.read()
|
||||||
|
data = re.sub(r'<wbr ?\/?>', '', data)
|
||||||
|
soup = bs4.BeautifulSoup(data, 'html.parser')
|
||||||
|
|
||||||
|
thread_id = int(soup.find(class_='thread').get('id')[1:])
|
||||||
|
|
||||||
|
posts = soup.find_all(class_='postContainer')
|
||||||
|
for post in posts:
|
||||||
|
post_id = int(post.get('id')[2:])
|
||||||
|
|
||||||
|
post_body = post.find(class_='postMessage')
|
||||||
|
for br in post_body.find_all('br'):
|
||||||
|
br.replace_with('\n')
|
||||||
|
#post_body_txt = post_body.get_text()
|
||||||
|
post_body_txt = ''
|
||||||
|
|
||||||
|
for child in post_body.children:
|
||||||
|
if not child.name: # text element
|
||||||
|
post_body_txt += child.get_text()
|
||||||
|
elif child.name == 'b':
|
||||||
|
post_body_txt += '<b>' + child.get_text() + '</b>'
|
||||||
|
elif child.name == 'span' and child['class'][0] == 'mu-s':
|
||||||
|
post_body_txt += '<span class="bold">' + child.get_text() + '</span>'
|
||||||
|
elif child.name == 'span' and child['class'][0] == 'mu-i':
|
||||||
|
post_body_txt += '<span class="italic">' + child.get_text() + '</span>'
|
||||||
|
elif child.name == 'span' and child['class'][0] == 'mu-r':
|
||||||
|
post_body_txt += '<span class="red">' + child.get_text() + '</span>'
|
||||||
|
elif child.name == 'span' and child['class'][0] == 'mu-g':
|
||||||
|
post_body_txt += '<span class="green">' + child.get_text() + '</span>'
|
||||||
|
elif child.name == 'span' and child['class'][0] == 'mu-b':
|
||||||
|
post_body_txt += '<span class="blue">' + child.get_text() + '</span>'
|
||||||
|
else:
|
||||||
|
post_body_txt += child.get_text()
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
"UPDATE post SET body = (%s) WHERE id = (%s)",
|
||||||
|
(post_body_txt, post_id)
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
scrape_posts(os.path.join(BQ_DIR, 'archive'))
|
||||||
|
scrape_posts(os.path.join(BQ_DIR, 'qstarchive'))
|
||||||
|
con.commit()
|
||||||
|
con.close()
|
|
@ -14,6 +14,10 @@ body {
|
||||||
opacity: 0.33;
|
opacity: 0.33;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.hidden {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
.tag {
|
.tag {
|
||||||
font-size: 0.8em;
|
font-size: 0.8em;
|
||||||
}
|
}
|
||||||
|
@ -37,6 +41,10 @@ body {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.useremail > span {
|
||||||
|
color: darkblue;
|
||||||
|
}
|
||||||
|
|
||||||
.name {
|
.name {
|
||||||
color: darkgreen;
|
color: darkgreen;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
|
@ -65,3 +73,33 @@ body {
|
||||||
.deadlink {
|
.deadlink {
|
||||||
text-decoration: line-through;
|
text-decoration: line-through;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.bold {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.italic {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.red {
|
||||||
|
color: red;
|
||||||
|
}
|
||||||
|
|
||||||
|
.green {
|
||||||
|
color: green;
|
||||||
|
}
|
||||||
|
|
||||||
|
.blue {
|
||||||
|
color: blue;
|
||||||
|
}
|
||||||
|
|
||||||
|
#visibility_menu_toggle {
|
||||||
|
cursor: pointer
|
||||||
|
}
|
||||||
|
|
||||||
|
#visibility_menu {
|
||||||
|
border: 1px solid darkgray;
|
||||||
|
position: absolute;
|
||||||
|
background-color: #FAFAFA;
|
||||||
|
}
|
||||||
|
|
|
@ -46,3 +46,51 @@ function remove_tag(event) {
|
||||||
tag.remove();
|
tag.remove();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function toggle_visibility_menu(event) {
|
||||||
|
let menu = document.querySelector('#visibility_menu');
|
||||||
|
if (menu.style.display == 'block') {
|
||||||
|
menu.style.display = 'none';
|
||||||
|
} else if (menu.style.display == 'none') {
|
||||||
|
menu.style.display = 'block';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggle_visibility(event) {
|
||||||
|
console.log(event);
|
||||||
|
let tag = event.target.parentElement.parentElement.cells[0].innerText;
|
||||||
|
let vis_class = ['faded', 'hidden'];
|
||||||
|
if (tag == 'non-qm posts') {
|
||||||
|
for (let post of document.querySelectorAll('.post_container:not(.qm_post)')) {
|
||||||
|
if (event.target.checked) {
|
||||||
|
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
|
||||||
|
} else {
|
||||||
|
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (tag == 'post tags') {
|
||||||
|
for (let post of document.querySelectorAll('.post_container > .tags')) {
|
||||||
|
if (event.target.checked) {
|
||||||
|
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
|
||||||
|
} else {
|
||||||
|
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (tag == 'post header') {
|
||||||
|
for (let post of document.querySelectorAll('.post_container > .header')) {
|
||||||
|
if (event.target.checked) {
|
||||||
|
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
|
||||||
|
} else {
|
||||||
|
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (let post of document.querySelectorAll('.' + tag)) {
|
||||||
|
if (event.target.checked) {
|
||||||
|
post.classList.add(vis_class[event.target.parentElement.cellIndex - 1]);
|
||||||
|
} else {
|
||||||
|
post.classList.remove(vis_class[event.target.parentElement.cellIndex - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -11,6 +11,34 @@
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>Voyage</h1>
|
<h1>Voyage</h1>
|
||||||
|
<span id="visibility_menu_toggle" onclick="toggle_visibility_menu(event)">Visibility</span>
|
||||||
|
<div id="visibility_menu" style="display: none;">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Tag</th>
|
||||||
|
<th>Faded</th>
|
||||||
|
<th>Hidden</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>non-qm posts</td><td><input type="checkbox" onchange="toggle_visibility(event)" checked></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
|
||||||
|
</tr>
|
||||||
|
{% for tag in tags_vis %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ tag }}</td><td><input type="checkbox" onchange="toggle_visibility(event)"></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
<tr>
|
||||||
|
<td>post tags</td><td></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>post header</td><td></td><td><input type="checkbox" onchange="toggle_visibility(event)"></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
</header>
|
</header>
|
||||||
<main>
|
<main>
|
||||||
{% for post in posts %}
|
{% for post in posts %}
|
||||||
|
@ -25,10 +53,7 @@
|
||||||
{% if post.subject %}
|
{% if post.subject %}
|
||||||
<span class="subject">{{ post.subject }}</span>
|
<span class="subject">{{ post.subject }}</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<span class="name">{{ post.name }}</span>
|
{% if post.email %}<a class="useremail" href="{{ post.email }}">{% endif %}<span class="name">{{ post.name }}</span>{% if post.tripcode %} <span class="tripcode">{{ post.tripcode }}</span>{% endif %}{% if post.email %}</a>{% endif %}
|
||||||
{% if post.tripcode %}
|
|
||||||
<span class="tripcode">{{ post.tripcode }}</span>
|
|
||||||
{% endif %}
|
|
||||||
<span class="time">{{ post.time.astimezone().strftime('%Y-%m-%d %H:%M') }}</span>
|
<span class="time">{{ post.time.astimezone().strftime('%Y-%m-%d %H:%M') }}</span>
|
||||||
<span class="id">No.{{ post.id }}</span>
|
<span class="id">No.{{ post.id }}</span>
|
||||||
{% if backlinks.get(post.id) %}
|
{% if backlinks.get(post.id) %}
|
||||||
|
|
|
@ -46,6 +46,7 @@ async def thread(request):
|
||||||
tags = defaultdict(list)
|
tags = defaultdict(list)
|
||||||
links = defaultdict(list)
|
links = defaultdict(list)
|
||||||
backlinks = defaultdict(list)
|
backlinks = defaultdict(list)
|
||||||
|
tags_vis = sorted(list(set(t[1] for t in tags_raw)))
|
||||||
|
|
||||||
for tag_raw in tags_raw:
|
for tag_raw in tags_raw:
|
||||||
tag = tags[tag_raw['post_id']]
|
tag = tags[tag_raw['post_id']]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user