Phase 5-9: Matching-Engine, Podcast-Support, Web-Interface + Player

Backend: - Matching-Orchestrator mit deutschen Serien-Patterns (drei ???, TKKG, ...) - Vollständige MusicBrainz-Integration (Tracklist → Kapitel, Cover Art Archive) - OpenLibrary + Google Books als Fallback-Quellen - Auto-Accept (≥0.75) vs zu_prüfen (0.5-0.75) vs kein Match - Manuelles Matching: GET /api/items/:id/match/search, POST apply - RSS-Feed-Manager: feedparser, iTunes Search, periodisches Update - APScheduler für Podcast-Feed-Updates (konfigurierbares Intervall) - Podcast-Router: Feed-URL setzen, Episoden, Feed-Suche - HLS: FFmpeg läuft als Background-Task, wartet auf ersten Segment - main.py: APScheduler + neue Router eingebunden Frontend (React + Vite + Tailwind + HLS.js): - Login-Seite mit Fehlerbehandlung - Library-Seite: Grid/Listen-Ansicht, Suche, Tag-Filter, Pagination, Scan - BookCard: Cover, Fortschrittsbalken, zu_prüfen Badge, Quick-Play - BookDetail: Metadaten, Matching-Panel, Kapitel-Liste, Lesezeichen - AudioPlayer: HLS.js, Kapitel-Marker auf Fortschrittsbalken, Speed, Sleep-Timer, Lesezeichen, Keyboard-Shortcuts (Space/Arrows) - MiniPlayer: persistent an Fußzeile, expandierbar - PodcastDetail: Feed-URL, iTunes-Suche, Episoden-Liste - Admin-Panel: Benutzer/Bibliotheken/Einstellungen verwalten - App.tsx: React Router, Auth-Guard, Player-Overlay Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 13:11:04 +02:00
parent dfbb397e46
commit 52c10a7518
32 changed files with 2987 additions and 223 deletions
--- a/backend/app/services/matching/google_books.py
+++ b/backend/app/services/matching/google_books.py
@@ -1,4 +1,3 @@
-"""Google Books-Matching — Phase 5."""
 import httpx
 from .base import MatchResult

@@ -10,26 +9,52 @@ async def search_google_books(title: str, author: str | None = None) -> list[Mat
    if author:
        q += f' inauthor:"{author}"'

-    async with httpx.AsyncClient(timeout=10) as client:
-        resp = await client.get(f"{GB_BASE}/volumes", params={"q": q, "maxResults": 5, "langRestrict": "de"})
-        resp.raise_for_status()
-        data = resp.json()
+    async with httpx.AsyncClient(timeout=12) as client:
+        try:
+            r = await client.get(
+                f"{GB_BASE}/volumes",
+                params={"q": q, "maxResults": 5, "langRestrict": "de", "printType": "books"},
+            )
+            r.raise_for_status()
+            data = r.json()
+        except Exception:
+            return []

    results = []
    for item in data.get("items", []):
        vol = item.get("volumeInfo", {})
        authors = vol.get("authors", [])
-        results.append(
-            MatchResult(
-                source="google_books",
-                source_id=item.get("id", ""),
-                title=vol.get("title", title),
-                author=authors[0] if authors else None,
-                description=vol.get("description"),
-                publisher=vol.get("publisher"),
-                publish_year=int(vol.get("publishedDate", "0")[:4]) if vol.get("publishedDate") else None,
-                language=vol.get("language"),
-                confidence=0.5,
+
+        cover_url = None
+        image_links = vol.get("imageLinks", {})
+        if image_links:
+            cover_url = (
+                image_links.get("extraLarge")
+                or image_links.get("large")
+                or image_links.get("medium")
+                or image_links.get("thumbnail", "").replace("zoom=1", "zoom=3")
            )
-        )
+
+        year = None
+        pub_date = vol.get("publishedDate", "")
+        if pub_date and len(pub_date) >= 4:
+            try:
+                year = int(pub_date[:4])
+            except ValueError:
+                pass
+
+        results.append(MatchResult(
+            source="google_books",
+            source_id=item.get("id", ""),
+            title=vol.get("title", title),
+            subtitle=vol.get("subtitle"),
+            author=authors[0] if authors else None,
+            description=vol.get("description"),
+            publisher=vol.get("publisher"),
+            publish_year=year,
+            language=vol.get("language"),
+            genres=vol.get("categories", []),
+            cover_url=cover_url,
+            confidence=0.5,
+        ))
    return results
--- a/backend/app/services/matching/musicbrainz.py
+++ b/backend/app/services/matching/musicbrainz.py
@@ -1,40 +1,115 @@
-"""MusicBrainz-Matching — Phase 5."""
 import httpx
+import asyncio
 from .base import MatchResult

 MB_BASE = "https://musicbrainz.org/ws/2"
-HEADERS = {"User-Agent": "audiolib/1.0 (https://github.com/audiolib)"}
+CAA_BASE = "https://coverartarchive.org"
+HEADERS = {"User-Agent": "audiolib/1.0 (contact@audiolib.local)"}
+_semaphore = asyncio.Semaphore(2)  # MusicBrainz Rate-Limit: max 1 req/s
+
+
+async def _get(client: httpx.AsyncClient, url: str, **params) -> dict:
+    async with _semaphore:
+        await asyncio.sleep(1.1)  # MusicBrainz erlaubt 1 req/s
+        r = await client.get(url, params={"fmt": "json", **params}, timeout=15)
+        r.raise_for_status()
+        return r.json()


 async def search_musicbrainz(title: str, artist: str | None = None) -> list[MatchResult]:
    query = f'release:"{title}"'
    if artist:
        query += f' AND artist:"{artist}"'
-    query += " AND format:Digital"

-    async with httpx.AsyncClient(headers=HEADERS, timeout=10) as client:
-        resp = await client.get(
-            f"{MB_BASE}/release",
-            params={"query": query, "fmt": "json", "limit": 5},
-        )
-        resp.raise_for_status()
-        data = resp.json()
+    async with httpx.AsyncClient(headers=HEADERS) as client:
+        try:
+            data = await _get(client, f"{MB_BASE}/release", query=query, limit=5)
+        except Exception:
+            return []

    results = []
-    for release in data.get("releases", []):
-        confidence = release.get("score", 0) / 100.0
-        artist_name = None
-        credits = release.get("artist-credit", [])
-        if credits:
-            artist_name = credits[0].get("name") or credits[0].get("artist", {}).get("name")
+    for rel in data.get("releases", []):
+        confidence = rel.get("score", 0) / 100.0
+        artist_name = _first_artist(rel)
+        release_id = rel.get("id", "")

-        results.append(
-            MatchResult(
-                source="musicbrainz",
-                source_id=release.get("id", ""),
-                title=release.get("title", title),
-                author=artist_name,
-                confidence=confidence,
-            )
-        )
+        results.append(MatchResult(
+            source="musicbrainz",
+            source_id=release_id,
+            title=rel.get("title", title),
+            author=artist_name,
+            publish_year=_parse_year(rel.get("date", "")),
+            confidence=confidence,
+        ))
    return results
+
+
+async def get_release_details(release_id: str) -> MatchResult | None:
+    """Lädt vollständige Release-Details inkl. Tracklist (= Kapitel) und Cover."""
+    async with httpx.AsyncClient(headers=HEADERS) as client:
+        try:
+            data = await _get(
+                client, f"{MB_BASE}/release/{release_id}",
+                inc="recordings+artists+release-groups"
+            )
+        except Exception:
+            return None
+
+        artist_name = _first_artist(data)
+        rg = data.get("release-group", {})
+        series = rg.get("title") if rg.get("primary-type") == "Album" else None
+
+        # Tracklist → Kapitel
+        chapters = []
+        offset = 0.0
+        for medium in data.get("media", []):
+            for track in medium.get("tracks", []):
+                duration_ms = track.get("length") or track.get("recording", {}).get("length") or 0
+                duration_s = duration_ms / 1000.0
+                chapters.append({
+                    "title": track.get("title", f"Track {track.get('position', '')}"),
+                    "start": offset,
+                    "end": offset + duration_s,
+                })
+                offset += duration_s
+
+        # Cover Art
+        cover_url = None
+        try:
+            caa = await client.get(f"{CAA_BASE}/release/{release_id}", timeout=10)
+            if caa.status_code == 200:
+                caa_data = caa.json()
+                images = caa_data.get("images", [])
+                front = next((i for i in images if i.get("front")), images[0] if images else None)
+                if front:
+                    cover_url = front.get("thumbnails", {}).get("large") or front.get("image")
+        except Exception:
+            pass
+
+        return MatchResult(
+            source="musicbrainz",
+            source_id=release_id,
+            title=data.get("title", ""),
+            author=artist_name,
+            publish_year=_parse_year(data.get("date", "")),
+            cover_url=cover_url,
+            chapters=chapters,
+            confidence=1.0,
+        )
+
+
+def _first_artist(release: dict) -> str | None:
+    credits = release.get("artist-credit", [])
+    if credits:
+        c = credits[0]
+        return c.get("name") or c.get("artist", {}).get("name")
+    return None
+
+
+def _parse_year(date_str: str) -> int | None:
+    if date_str and len(date_str) >= 4:
+        try:
+            return int(date_str[:4])
+        except ValueError:
+            pass
+    return None
--- a/backend/app/services/matching/open_library.py
+++ b/backend/app/services/matching/open_library.py
@@ -1,4 +1,3 @@
-"""OpenLibrary-Matching — Phase 5."""
 import httpx
 from .base import MatchResult

@@ -6,25 +5,55 @@ OL_BASE = "https://openlibrary.org"


 async def search_open_library(title: str, author: str | None = None) -> list[MatchResult]:
-    params: dict = {"title": title, "limit": 5}
+    params: dict = {"title": title, "limit": 5, "fields": "key,title,author_name,first_publish_year,cover_i,subject"}
    if author:
        params["author"] = author

-    async with httpx.AsyncClient(timeout=10) as client:
-        resp = await client.get(f"{OL_BASE}/search.json", params=params)
-        resp.raise_for_status()
-        data = resp.json()
+    async with httpx.AsyncClient(timeout=12) as client:
+        try:
+            r = await client.get(f"{OL_BASE}/search.json", params=params)
+            r.raise_for_status()
+            data = r.json()
+        except Exception:
+            return []

    results = []
    for doc in data.get("docs", []):
-        results.append(
-            MatchResult(
-                source="open_library",
-                source_id=doc.get("key", ""),
-                title=doc.get("title", title),
-                author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
-                publish_year=doc.get("first_publish_year"),
-                confidence=0.6,
-            )
-        )
+        cover_url = None
+        if doc.get("cover_i"):
+            cover_url = f"https://covers.openlibrary.org/b/id/{doc['cover_i']}-L.jpg"
+
+        results.append(MatchResult(
+            source="open_library",
+            source_id=doc.get("key", ""),
+            title=doc.get("title", title),
+            author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
+            publish_year=doc.get("first_publish_year"),
+            cover_url=cover_url,
+            genres=doc.get("subject", [])[:5],
+            confidence=0.55,
+        ))
    return results
+
+
+async def get_work_details(work_key: str) -> MatchResult | None:
+    """Lädt Beschreibung und Genres nach."""
+    async with httpx.AsyncClient(timeout=12) as client:
+        try:
+            r = await client.get(f"{OL_BASE}{work_key}.json")
+            r.raise_for_status()
+            data = r.json()
+        except Exception:
+            return None
+
+    desc = data.get("description")
+    if isinstance(desc, dict):
+        desc = desc.get("value")
+
+    return MatchResult(
+        source="open_library",
+        source_id=work_key,
+        title=data.get("title", ""),
+        description=desc,
+        confidence=1.0,
+    )