Files
Audiolib/backend/app/services/scanner.py
Audiolib 14ffee3051 Initial commit: Phase 1 – Projektstruktur, DB-Schema, Core-API
- FastAPI-Backend mit vollständiger ABS v2.x API-Kompatibilität
- SQLAlchemy-Models: User, Library, LibraryItem, BookFile, Chapter,
  Podcast, PodcastEpisode, MediaProgress, Bookmark, PlaybackSession
- Auth: JWT-Login (/login, /logout, /api/authorize)
- Library + Items Endpoints inkl. camelCase ABS-Response-Format
- HLS-Streaming via FFmpeg (POST /api/items/:id/play, Session-Sync)
- Me/Progress Endpoints + Lesezeichen
- User-Management + Server-Settings (Admin)
- Library-Scanner (MP3/WAV Discovery, Hintergrund-Task)
- File Watcher (watchdog, 30s Debounce)
- Matching-Skelett (MusicBrainz, OpenLibrary, Google Books – Phase 5)
- Docker-Setup: backend (Python 3.12+FFmpeg), frontend (React/Vite),
  nginx Reverse-Proxy auf Port 3000
- setup.sh: Installiert Docker auf Debian/Ubuntu, richtet .env ein

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 11:43:35 +02:00

200 lines
7.0 KiB
Python

import os
import uuid
import logging
from datetime import datetime
from pathlib import Path
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from ..database import AsyncSessionLocal
from ..models.library import Library
from ..models.media_item import LibraryItem, BookFile, Chapter
from ..models.session import ScanJob
logger = logging.getLogger(__name__)
AUDIO_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac", ".m4b", ".opus"}
def _get_audio_duration(file_path: str) -> float:
try:
from mutagen import File as MutagenFile
audio = MutagenFile(file_path)
if audio and audio.info:
return float(audio.info.length)
except Exception:
pass
return 0.0
def _get_file_size(file_path: str) -> int:
try:
return os.path.getsize(file_path)
except Exception:
return 0
def _guess_title_from_path(folder_path: str) -> str:
"""Leitet Titel aus dem Ordnernamen ab."""
return os.path.basename(folder_path)
def _discover_audiobook_folders(base_path: str) -> list[dict]:
"""
Findet alle Unterordner mit Audio-Dateien.
Jeder Ordner = ein Hörbuch (ABS-Prinzip).
"""
books = []
base = Path(base_path)
if not base.exists():
logger.warning(f"Pfad nicht gefunden: {base_path}")
return books
# Direkte Audio-Dateien im Root → ein "Root"-Buch
root_audio = [f for f in base.iterdir() if f.is_file() and f.suffix.lower() in AUDIO_EXTENSIONS]
if root_audio:
books.append({
"path": str(base),
"files": [str(f) for f in sorted(root_audio)],
})
# Unterordner durchsuchen
for entry in base.iterdir():
if not entry.is_dir():
continue
audio_files = []
_collect_audio_files(entry, audio_files)
if audio_files:
books.append({
"path": str(entry),
"files": sorted(audio_files),
})
return books
def _collect_audio_files(folder: Path, result: list):
"""Rekursiv alle Audio-Dateien sammeln."""
try:
for entry in sorted(folder.iterdir()):
if entry.is_file() and entry.suffix.lower() in AUDIO_EXTENSIONS:
result.append(str(entry))
elif entry.is_dir():
_collect_audio_files(entry, result)
except PermissionError:
pass
async def scan_library_task(library_id: str, job_id: str):
"""Hintergrund-Task: Scannt eine Library und befüllt die DB."""
async with AsyncSessionLocal() as db:
try:
# Job auf "running" setzen
job_result = await db.execute(select(ScanJob).where(ScanJob.id == job_id))
job = job_result.scalar_one_or_none()
if job:
job.status = "running"
job.started_at = datetime.utcnow()
await db.commit()
lib_result = await db.execute(select(Library).where(Library.id == library_id))
lib = lib_result.scalar_one_or_none()
if not lib:
return
folders = lib.folders or []
all_books = []
for folder_info in folders:
folder_path = folder_info.get("fullPath", folder_info.get("full_path", ""))
if folder_path:
all_books.extend(_discover_audiobook_folders(folder_path))
items_found = 0
for book_info in all_books:
folder_path = book_info["path"]
audio_files = book_info["files"]
# Existiert schon?
existing = await db.execute(
select(LibraryItem).where(
LibraryItem.library_id == library_id,
LibraryItem.path == folder_path,
)
)
existing_item = existing.scalar_one_or_none()
total_duration = sum(_get_audio_duration(f) for f in audio_files)
total_size = sum(_get_file_size(f) for f in audio_files)
if existing_item:
existing_item.duration_seconds = total_duration
existing_item.size_bytes = total_size
existing_item.num_files = len(audio_files)
existing_item.is_missing = False
existing_item.updated_at = datetime.utcnow()
item = existing_item
else:
item_id = str(uuid.uuid4())
title = _guess_title_from_path(folder_path)
item = LibraryItem(
id=item_id,
library_id=library_id,
media_type=lib.media_type,
path=folder_path,
ino=str(os.stat(folder_path).st_ino) if os.path.exists(folder_path) else "",
title=title,
duration_seconds=total_duration,
size_bytes=total_size,
num_files=len(audio_files),
tags=["zu_prüfen"],
)
db.add(item)
await db.flush()
# BookFiles anlegen
for idx, file_path in enumerate(audio_files):
bf = BookFile(
library_item_id=item.id,
filename=os.path.basename(file_path),
path=file_path,
format=Path(file_path).suffix.lstrip(".").lower(),
size_bytes=_get_file_size(file_path),
duration_seconds=_get_audio_duration(file_path),
track_index=idx,
)
db.add(bf)
items_found += 1
await db.commit()
# Fehlende Items markieren
all_items_result = await db.execute(
select(LibraryItem).where(LibraryItem.library_id == library_id)
)
all_items = all_items_result.scalars().all()
found_paths = {b["path"] for b in all_books}
for item in all_items:
item.is_missing = item.path not in found_paths
await db.commit()
if job:
job.status = "done"
job.items_found = items_found
job.finished_at = datetime.utcnow()
job.progress = 1.0
await db.commit()
logger.info(f"Scan abgeschlossen: {items_found} Items in Library {library_id}")
except Exception as e:
logger.error(f"Scan-Fehler für Library {library_id}: {e}", exc_info=True)
async with AsyncSessionLocal() as err_db:
job_result = await err_db.execute(select(ScanJob).where(ScanJob.id == job_id))
job = job_result.scalar_one_or_none()
if job:
job.status = "error"
job.log = str(e)
job.finished_at = datetime.utcnow()
await err_db.commit()