Pyrite/build_db.py

109 lines
2.6 KiB
Python

#!/usr/bin/env python3
"""
Builds the music library database.
"""
import os
import asyncio
import multiprocessing
import asyncpg
import tinytag
import config
MUSIC_EXT = ['.flac', '.mp3', '.opus']
def read_track(filepath):
"""
Reads the specified file and extracts relevant information from it.
"""
t = tinytag.TinyTag.get(filepath)
d = {
'filepath': filepath,
'artist': t.artist,
'albumartist': t.albumartist,
'album': t.album,
'title': t.title,
'date': t.year,
'discnumber': str(t.disc),
'tracknumber': str(t.track),
'genre': t.genre,
'duration': t.duration,
'last_modified': os.path.getmtime(filepath)
}
return d
async def build_library(root_dir):
"""Walks the directory and builds a library from tracks discovered."""
print("Building library")
db_pool = await asyncpg.create_pool(**config.db)
async with db_pool.acquire() as conn:
with open('pyrite.sql', 'r') as file:
await conn.execute(file.read())
filepaths = []
for dir_name, sub_dirs, files in os.walk(root_dir):
for file in files:
if not os.path.splitext(file)[1] in MUSIC_EXT:
continue
filepath = os.path.join(root_dir, dir_name, file)
last_modified = os.path.getmtime(filepath)
filepaths.append((filepath, last_modified))
async with db_pool.acquire() as conn:
tracks_prev = await conn.fetch("SELECT filepath, last_modified FROM track")
tracks_prev = {track['filepath']: track for track in tracks_prev}
global worker
def worker(args):
"""Worker for multi-processing tracks."""
filepath, last_modified = args
track_prev = tracks_prev.get(filepath)
if track_prev:
if track_prev['last_modified'] >= last_modified:
return
data = read_track(filepath)
return data
with multiprocessing.Pool() as pool:
mapping = pool.imap(worker, filepaths)
tracks = []
prev_percent = 0
while True:
try:
track = mapping.next()
if track:
tracks.append(track)
except StopIteration:
break
percent = round(len(tracks) / len(filepaths) * 100, 2)
if percent >= prev_percent + 2.5:
print(f"{percent}%")
prev_percent = percent
if not tracks:
print("No new tracks found!")
return
cols = ', '.join(tracks[0].keys())
vals = ', '.join(['$'+str(i) for i in range(1, len(tracks[0])+1)])
tracks_data = [list(track.values()) for track in tracks]
async with db_pool.acquire() as conn:
p = f"INSERT INTO track ({cols}) VALUES ({vals}) "
p += "ON CONFLICT(filepath) DO UPDATE SET "
for col in tracks[0].keys():
p += col + " = EXCLUDED." + col + ", "
p = p[:-2]
cur = await conn.prepare(p)
await cur.executemany(tracks_data)
print("Done")
if __name__ == "__main__":
asyncio.run(build_library(config.music_dir))