Initial commit: Phase 1 – Projektstruktur, DB-Schema, Core-API

- FastAPI-Backend mit vollständiger ABS v2.x API-Kompatibilität - SQLAlchemy-Models: User, Library, LibraryItem, BookFile, Chapter, Podcast, PodcastEpisode, MediaProgress, Bookmark, PlaybackSession - Auth: JWT-Login (/login, /logout, /api/authorize) - Library + Items Endpoints inkl. camelCase ABS-Response-Format - HLS-Streaming via FFmpeg (POST /api/items/:id/play, Session-Sync) - Me/Progress Endpoints + Lesezeichen - User-Management + Server-Settings (Admin) - Library-Scanner (MP3/WAV Discovery, Hintergrund-Task) - File Watcher (watchdog, 30s Debounce) - Matching-Skelett (MusicBrainz, OpenLibrary, Google Books – Phase 5) - Docker-Setup: backend (Python 3.12+FFmpeg), frontend (React/Vite), nginx Reverse-Proxy auf Port 3000 - setup.sh: Installiert Docker auf Debian/Ubuntu, richtet .env ein Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 11:43:35 +02:00
commit 14ffee3051
56 changed files with 3220 additions and 0 deletions
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
--- a/backend/app/services/auth.py
+++ b/backend/app/services/auth.py
@@ -0,0 +1,31 @@
+from datetime import datetime, timedelta
+from typing import Optional
+from jose import JWTError, jwt
+from passlib.context import CryptContext
+from ..config import get_settings
+
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+
+
+def hash_password(password: str) -> str:
+    return pwd_context.hash(password)
+
+
+def verify_password(plain: str, hashed: str) -> bool:
+    return pwd_context.verify(plain, hashed)
+
+
+def create_token(user_id: str) -> str:
+    settings = get_settings()
+    expire = datetime.utcnow() + timedelta(days=settings.jwt_expire_days)
+    payload = {"sub": user_id, "exp": expire, "iat": datetime.utcnow()}
+    return jwt.encode(payload, settings.jwt_secret, algorithm=settings.jwt_algorithm)
+
+
+def decode_token(token: str) -> Optional[str]:
+    settings = get_settings()
+    try:
+        payload = jwt.decode(token, settings.jwt_secret, algorithms=[settings.jwt_algorithm])
+        return payload.get("sub")
+    except JWTError:
+        return None
--- a/backend/app/services/file_watcher.py
+++ b/backend/app/services/file_watcher.py
@@ -0,0 +1,94 @@
+import asyncio
+import logging
+import uuid
+from pathlib import Path
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler, FileCreatedEvent, FileMovedEvent
+from ..database import AsyncSessionLocal
+from ..models.library import Library
+from ..models.session import ScanJob
+from sqlalchemy import select
+
+logger = logging.getLogger(__name__)
+
+AUDIO_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac", ".m4b", ".opus"}
+
+_observer: Observer | None = None
+_scan_debounce: dict[str, asyncio.TimerHandle] = {}
+
+
+class AudioFileHandler(FileSystemEventHandler):
+    def __init__(self, library_id: str, loop: asyncio.AbstractEventLoop):
+        self.library_id = library_id
+        self.loop = loop
+
+    def _schedule_scan(self):
+        key = self.library_id
+        if key in _scan_debounce:
+            _scan_debounce[key].cancel()
+        handle = self.loop.call_later(
+            30.0,  # 30s Debounce — nicht bei jeder Datei sofort scannen
+            lambda: asyncio.run_coroutine_threadsafe(
+                _trigger_scan(self.library_id), self.loop
+            ),
+        )
+        _scan_debounce[key] = handle
+
+    def on_created(self, event):
+        if not event.is_directory:
+            ext = Path(event.src_path).suffix.lower()
+            if ext in AUDIO_EXTENSIONS:
+                logger.info(f"Neue Audiodatei erkannt: {event.src_path}")
+                self._schedule_scan()
+
+    def on_moved(self, event):
+        if not event.is_directory:
+            ext = Path(event.dest_path).suffix.lower()
+            if ext in AUDIO_EXTENSIONS:
+                logger.info(f"Audiodatei verschoben: {event.dest_path}")
+                self._schedule_scan()
+
+
+async def _trigger_scan(library_id: str):
+    from ..services.scanner import scan_library_task
+    async with AsyncSessionLocal() as db:
+        job = ScanJob(
+            id=str(uuid.uuid4()),
+            library_id=library_id,
+            status="queued",
+        )
+        db.add(job)
+        await db.commit()
+        await db.refresh(job)
+    asyncio.create_task(scan_library_task(library_id, job.id))
+
+
+async def start_file_watcher():
+    global _observer
+    loop = asyncio.get_event_loop()
+
+    async with AsyncSessionLocal() as db:
+        result = await db.execute(select(Library))
+        libraries = result.scalars().all()
+
+    observer = Observer()
+    for lib in libraries:
+        for folder_info in (lib.folders or []):
+            folder_path = folder_info.get("fullPath", folder_info.get("full_path", ""))
+            if folder_path and Path(folder_path).exists():
+                handler = AudioFileHandler(lib.id, loop)
+                observer.schedule(handler, folder_path, recursive=True)
+                logger.info(f"Watching: {folder_path} (Library: {lib.name})")
+
+    observer.start()
+    _observer = observer
+    logger.info("File Watcher gestartet.")
+
+
+def stop_file_watcher():
+    global _observer
+    if _observer:
+        _observer.stop()
+        _observer.join()
+        _observer = None
+        logger.info("File Watcher gestoppt.")
--- a/backend/app/services/hls.py
+++ b/backend/app/services/hls.py
@@ -0,0 +1,108 @@
+import os
+import asyncio
+import uuid
+import shutil
+from pathlib import Path
+from typing import Optional
+from ..config import get_settings
+
+
+HLS_SEGMENT_DURATION = 10  # Sekunden pro Segment
+
+
+async def create_hls_session(
+    session_id: str,
+    audio_files: list[str],
+    start_time: float = 0.0,
+) -> str:
+    """
+    Erstellt HLS-Segmente via FFmpeg für die gegebenen Audio-Dateien.
+    Gibt den Pfad zum HLS-Verzeichnis zurück.
+    """
+    settings = get_settings()
+    session_dir = os.path.join(settings.hls_cache_dir, session_id)
+    os.makedirs(session_dir, exist_ok=True)
+
+    playlist_path = os.path.join(session_dir, "output.m3u8")
+
+    if len(audio_files) == 1:
+        input_path = audio_files[0]
+    else:
+        # Mehrere Dateien: Concat-Liste erstellen
+        concat_file = os.path.join(session_dir, "concat.txt")
+        with open(concat_file, "w", encoding="utf-8") as f:
+            for af in audio_files:
+                safe_path = af.replace("\\", "/")
+                f.write(f"file '{safe_path}'\n")
+        input_path = concat_file
+
+    if len(audio_files) == 1:
+        cmd = [
+            "ffmpeg", "-y",
+            "-ss", str(start_time),
+            "-i", input_path,
+            "-c:a", "aac",
+            "-b:a", "192k",
+            "-ac", "2",
+            "-hls_time", str(HLS_SEGMENT_DURATION),
+            "-hls_list_size", "0",
+            "-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
+            "-hls_flags", "independent_segments",
+            playlist_path,
+        ]
+    else:
+        cmd = [
+            "ffmpeg", "-y",
+            "-f", "concat", "-safe", "0",
+            "-i", input_path,
+            "-ss", str(start_time),
+            "-c:a", "aac",
+            "-b:a", "192k",
+            "-ac", "2",
+            "-hls_time", str(HLS_SEGMENT_DURATION),
+            "-hls_list_size", "0",
+            "-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
+            "-hls_flags", "independent_segments",
+            playlist_path,
+        ]
+
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdout=asyncio.subprocess.DEVNULL,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    _, stderr = await proc.communicate()
+
+    if proc.returncode != 0:
+        error_msg = stderr.decode(errors="replace") if stderr else "unknown error"
+        raise RuntimeError(f"FFmpeg fehler: {error_msg}")
+
+    return session_dir
+
+
+def cleanup_hls_session(session_id: str):
+    settings = get_settings()
+    session_dir = os.path.join(settings.hls_cache_dir, session_id)
+    if os.path.exists(session_dir):
+        shutil.rmtree(session_dir, ignore_errors=True)
+
+
+def get_hls_session_path(session_id: str) -> Optional[str]:
+    settings = get_settings()
+    session_dir = os.path.join(settings.hls_cache_dir, session_id)
+    playlist = os.path.join(session_dir, "output.m3u8")
+    return session_dir if os.path.exists(playlist) else None
+
+
+def parse_m3u8_duration(playlist_path: str) -> float:
+    """Berechnet Gesamtdauer aus M3U8-Playlist."""
+    total = 0.0
+    try:
+        with open(playlist_path, "r") as f:
+            for line in f:
+                if line.startswith("#EXTINF:"):
+                    duration_str = line.split(":")[1].split(",")[0]
+                    total += float(duration_str)
+    except Exception:
+        pass
+    return total
--- a/backend/app/services/matching/init.py
+++ b/backend/app/services/matching/init.py
--- a/backend/app/services/matching/base.py
+++ b/backend/app/services/matching/base.py
@@ -0,0 +1,25 @@
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+@dataclass
+class MatchResult:
+    source: str  # musicbrainz / open_library / google_books
+    source_id: str
+    title: str
+    author: str | None = None
+    narrator: str | None = None
+    description: str | None = None
+    cover_url: str | None = None
+    publisher: str | None = None
+    publish_year: int | None = None
+    series: str | None = None
+    series_sequence: str | None = None
+    language: str | None = None
+    genres: list[str] = field(default_factory=list)
+    chapters: list[dict] = field(default_factory=list)
+    confidence: float = 0.0
+
+
+class BaseMatcherError(Exception):
+    pass
--- a/backend/app/services/matching/google_books.py
+++ b/backend/app/services/matching/google_books.py
@@ -0,0 +1,35 @@
+"""Google Books-Matching — Phase 5."""
+import httpx
+from .base import MatchResult
+
+GB_BASE = "https://www.googleapis.com/books/v1"
+
+
+async def search_google_books(title: str, author: str | None = None) -> list[MatchResult]:
+    q = f'intitle:"{title}"'
+    if author:
+        q += f' inauthor:"{author}"'
+
+    async with httpx.AsyncClient(timeout=10) as client:
+        resp = await client.get(f"{GB_BASE}/volumes", params={"q": q, "maxResults": 5, "langRestrict": "de"})
+        resp.raise_for_status()
+        data = resp.json()
+
+    results = []
+    for item in data.get("items", []):
+        vol = item.get("volumeInfo", {})
+        authors = vol.get("authors", [])
+        results.append(
+            MatchResult(
+                source="google_books",
+                source_id=item.get("id", ""),
+                title=vol.get("title", title),
+                author=authors[0] if authors else None,
+                description=vol.get("description"),
+                publisher=vol.get("publisher"),
+                publish_year=int(vol.get("publishedDate", "0")[:4]) if vol.get("publishedDate") else None,
+                language=vol.get("language"),
+                confidence=0.5,
+            )
+        )
+    return results
--- a/backend/app/services/matching/musicbrainz.py
+++ b/backend/app/services/matching/musicbrainz.py
@@ -0,0 +1,40 @@
+"""MusicBrainz-Matching — Phase 5."""
+import httpx
+from .base import MatchResult
+
+MB_BASE = "https://musicbrainz.org/ws/2"
+HEADERS = {"User-Agent": "audiolib/1.0 (https://github.com/audiolib)"}
+
+
+async def search_musicbrainz(title: str, artist: str | None = None) -> list[MatchResult]:
+    query = f'release:"{title}"'
+    if artist:
+        query += f' AND artist:"{artist}"'
+    query += " AND format:Digital"
+
+    async with httpx.AsyncClient(headers=HEADERS, timeout=10) as client:
+        resp = await client.get(
+            f"{MB_BASE}/release",
+            params={"query": query, "fmt": "json", "limit": 5},
+        )
+        resp.raise_for_status()
+        data = resp.json()
+
+    results = []
+    for release in data.get("releases", []):
+        confidence = release.get("score", 0) / 100.0
+        artist_name = None
+        credits = release.get("artist-credit", [])
+        if credits:
+            artist_name = credits[0].get("name") or credits[0].get("artist", {}).get("name")
+
+        results.append(
+            MatchResult(
+                source="musicbrainz",
+                source_id=release.get("id", ""),
+                title=release.get("title", title),
+                author=artist_name,
+                confidence=confidence,
+            )
+        )
+    return results
--- a/backend/app/services/matching/open_library.py
+++ b/backend/app/services/matching/open_library.py
@@ -0,0 +1,30 @@
+"""OpenLibrary-Matching — Phase 5."""
+import httpx
+from .base import MatchResult
+
+OL_BASE = "https://openlibrary.org"
+
+
+async def search_open_library(title: str, author: str | None = None) -> list[MatchResult]:
+    params: dict = {"title": title, "limit": 5}
+    if author:
+        params["author"] = author
+
+    async with httpx.AsyncClient(timeout=10) as client:
+        resp = await client.get(f"{OL_BASE}/search.json", params=params)
+        resp.raise_for_status()
+        data = resp.json()
+
+    results = []
+    for doc in data.get("docs", []):
+        results.append(
+            MatchResult(
+                source="open_library",
+                source_id=doc.get("key", ""),
+                title=doc.get("title", title),
+                author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
+                publish_year=doc.get("first_publish_year"),
+                confidence=0.6,
+            )
+        )
+    return results
--- a/backend/app/services/scanner.py
+++ b/backend/app/services/scanner.py
@@ -0,0 +1,199 @@
+import os
+import uuid
+import logging
+from datetime import datetime
+from pathlib import Path
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select
+from ..database import AsyncSessionLocal
+from ..models.library import Library
+from ..models.media_item import LibraryItem, BookFile, Chapter
+from ..models.session import ScanJob
+
+logger = logging.getLogger(__name__)
+
+AUDIO_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac", ".m4b", ".opus"}
+
+
+def _get_audio_duration(file_path: str) -> float:
+    try:
+        from mutagen import File as MutagenFile
+        audio = MutagenFile(file_path)
+        if audio and audio.info:
+            return float(audio.info.length)
+    except Exception:
+        pass
+    return 0.0
+
+
+def _get_file_size(file_path: str) -> int:
+    try:
+        return os.path.getsize(file_path)
+    except Exception:
+        return 0
+
+
+def _guess_title_from_path(folder_path: str) -> str:
+    """Leitet Titel aus dem Ordnernamen ab."""
+    return os.path.basename(folder_path)
+
+
+def _discover_audiobook_folders(base_path: str) -> list[dict]:
+    """
+    Findet alle Unterordner mit Audio-Dateien.
+    Jeder Ordner = ein Hörbuch (ABS-Prinzip).
+    """
+    books = []
+    base = Path(base_path)
+
+    if not base.exists():
+        logger.warning(f"Pfad nicht gefunden: {base_path}")
+        return books
+
+    # Direkte Audio-Dateien im Root → ein "Root"-Buch
+    root_audio = [f for f in base.iterdir() if f.is_file() and f.suffix.lower() in AUDIO_EXTENSIONS]
+    if root_audio:
+        books.append({
+            "path": str(base),
+            "files": [str(f) for f in sorted(root_audio)],
+        })
+
+    # Unterordner durchsuchen
+    for entry in base.iterdir():
+        if not entry.is_dir():
+            continue
+        audio_files = []
+        _collect_audio_files(entry, audio_files)
+        if audio_files:
+            books.append({
+                "path": str(entry),
+                "files": sorted(audio_files),
+            })
+
+    return books
+
+
+def _collect_audio_files(folder: Path, result: list):
+    """Rekursiv alle Audio-Dateien sammeln."""
+    try:
+        for entry in sorted(folder.iterdir()):
+            if entry.is_file() and entry.suffix.lower() in AUDIO_EXTENSIONS:
+                result.append(str(entry))
+            elif entry.is_dir():
+                _collect_audio_files(entry, result)
+    except PermissionError:
+        pass
+
+
+async def scan_library_task(library_id: str, job_id: str):
+    """Hintergrund-Task: Scannt eine Library und befüllt die DB."""
+    async with AsyncSessionLocal() as db:
+        try:
+            # Job auf "running" setzen
+            job_result = await db.execute(select(ScanJob).where(ScanJob.id == job_id))
+            job = job_result.scalar_one_or_none()
+            if job:
+                job.status = "running"
+                job.started_at = datetime.utcnow()
+                await db.commit()
+
+            lib_result = await db.execute(select(Library).where(Library.id == library_id))
+            lib = lib_result.scalar_one_or_none()
+            if not lib:
+                return
+
+            folders = lib.folders or []
+            all_books = []
+            for folder_info in folders:
+                folder_path = folder_info.get("fullPath", folder_info.get("full_path", ""))
+                if folder_path:
+                    all_books.extend(_discover_audiobook_folders(folder_path))
+
+            items_found = 0
+            for book_info in all_books:
+                folder_path = book_info["path"]
+                audio_files = book_info["files"]
+
+                # Existiert schon?
+                existing = await db.execute(
+                    select(LibraryItem).where(
+                        LibraryItem.library_id == library_id,
+                        LibraryItem.path == folder_path,
+                    )
+                )
+                existing_item = existing.scalar_one_or_none()
+
+                total_duration = sum(_get_audio_duration(f) for f in audio_files)
+                total_size = sum(_get_file_size(f) for f in audio_files)
+
+                if existing_item:
+                    existing_item.duration_seconds = total_duration
+                    existing_item.size_bytes = total_size
+                    existing_item.num_files = len(audio_files)
+                    existing_item.is_missing = False
+                    existing_item.updated_at = datetime.utcnow()
+                    item = existing_item
+                else:
+                    item_id = str(uuid.uuid4())
+                    title = _guess_title_from_path(folder_path)
+                    item = LibraryItem(
+                        id=item_id,
+                        library_id=library_id,
+                        media_type=lib.media_type,
+                        path=folder_path,
+                        ino=str(os.stat(folder_path).st_ino) if os.path.exists(folder_path) else "",
+                        title=title,
+                        duration_seconds=total_duration,
+                        size_bytes=total_size,
+                        num_files=len(audio_files),
+                        tags=["zu_prüfen"],
+                    )
+                    db.add(item)
+                    await db.flush()
+
+                    # BookFiles anlegen
+                    for idx, file_path in enumerate(audio_files):
+                        bf = BookFile(
+                            library_item_id=item.id,
+                            filename=os.path.basename(file_path),
+                            path=file_path,
+                            format=Path(file_path).suffix.lstrip(".").lower(),
+                            size_bytes=_get_file_size(file_path),
+                            duration_seconds=_get_audio_duration(file_path),
+                            track_index=idx,
+                        )
+                        db.add(bf)
+
+                items_found += 1
+
+            await db.commit()
+
+            # Fehlende Items markieren
+            all_items_result = await db.execute(
+                select(LibraryItem).where(LibraryItem.library_id == library_id)
+            )
+            all_items = all_items_result.scalars().all()
+            found_paths = {b["path"] for b in all_books}
+            for item in all_items:
+                item.is_missing = item.path not in found_paths
+            await db.commit()
+
+            if job:
+                job.status = "done"
+                job.items_found = items_found
+                job.finished_at = datetime.utcnow()
+                job.progress = 1.0
+                await db.commit()
+
+            logger.info(f"Scan abgeschlossen: {items_found} Items in Library {library_id}")
+
+        except Exception as e:
+            logger.error(f"Scan-Fehler für Library {library_id}: {e}", exc_info=True)
+            async with AsyncSessionLocal() as err_db:
+                job_result = await err_db.execute(select(ScanJob).where(ScanJob.id == job_id))
+                job = job_result.scalar_one_or_none()
+                if job:
+                    job.status = "error"
+                    job.log = str(e)
+                    job.finished_at = datetime.utcnow()
+                    await err_db.commit()