Phase 5-9: Matching-Engine, Podcast-Support, Web-Interface + Player

Backend: - Matching-Orchestrator mit deutschen Serien-Patterns (drei ???, TKKG, ...) - Vollständige MusicBrainz-Integration (Tracklist → Kapitel, Cover Art Archive) - OpenLibrary + Google Books als Fallback-Quellen - Auto-Accept (≥0.75) vs zu_prüfen (0.5-0.75) vs kein Match - Manuelles Matching: GET /api/items/:id/match/search, POST apply - RSS-Feed-Manager: feedparser, iTunes Search, periodisches Update - APScheduler für Podcast-Feed-Updates (konfigurierbares Intervall) - Podcast-Router: Feed-URL setzen, Episoden, Feed-Suche - HLS: FFmpeg läuft als Background-Task, wartet auf ersten Segment - main.py: APScheduler + neue Router eingebunden Frontend (React + Vite + Tailwind + HLS.js): - Login-Seite mit Fehlerbehandlung - Library-Seite: Grid/Listen-Ansicht, Suche, Tag-Filter, Pagination, Scan - BookCard: Cover, Fortschrittsbalken, zu_prüfen Badge, Quick-Play - BookDetail: Metadaten, Matching-Panel, Kapitel-Liste, Lesezeichen - AudioPlayer: HLS.js, Kapitel-Marker auf Fortschrittsbalken, Speed, Sleep-Timer, Lesezeichen, Keyboard-Shortcuts (Space/Arrows) - MiniPlayer: persistent an Fußzeile, expandierbar - PodcastDetail: Feed-URL, iTunes-Suche, Episoden-Liste - Admin-Panel: Benutzer/Bibliotheken/Einstellungen verwalten - App.tsx: React Router, Auth-Guard, Player-Overlay Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 13:11:04 +02:00
parent dfbb397e46
commit 52c10a7518
32 changed files with 2987 additions and 223 deletions
--- a/backend/app/services/hls.py
+++ b/backend/app/services/hls.py
@@ -1,70 +1,38 @@
 import os
 import asyncio
-import uuid
 import shutil
-from pathlib import Path
 from typing import Optional
 from ..config import get_settings

-
-HLS_SEGMENT_DURATION = 10  # Sekunden pro Segment
+HLS_SEGMENT_DURATION = 10
+_running_sessions: dict[str, asyncio.Task] = {}


-async def create_hls_session(
-    session_id: str,
-    audio_files: list[str],
-    start_time: float = 0.0,
-) -> str:
-    """
-    Erstellt HLS-Segmente via FFmpeg für die gegebenen Audio-Dateien.
-    Gibt den Pfad zum HLS-Verzeichnis zurück.
-    """
+async def _run_ffmpeg(session_id: str, audio_files: list[str], start_time: float = 0.0):
    settings = get_settings()
    session_dir = os.path.join(settings.hls_cache_dir, session_id)
    os.makedirs(session_dir, exist_ok=True)
-
    playlist_path = os.path.join(session_dir, "output.m3u8")

    if len(audio_files) == 1:
-        input_path = audio_files[0]
+        input_args = ["-ss", str(start_time), "-i", audio_files[0]]
    else:
-        # Mehrere Dateien: Concat-Liste erstellen
        concat_file = os.path.join(session_dir, "concat.txt")
        with open(concat_file, "w", encoding="utf-8") as f:
            for af in audio_files:
-                safe_path = af.replace("\\", "/")
-                f.write(f"file '{safe_path}'\n")
-        input_path = concat_file
+                f.write(f"file '{af.replace(chr(92), '/')}'\n")
+        input_args = ["-f", "concat", "-safe", "0", "-i", concat_file, "-ss", str(start_time)]

-    if len(audio_files) == 1:
-        cmd = [
-            "ffmpeg", "-y",
-            "-ss", str(start_time),
-            "-i", input_path,
-            "-c:a", "aac",
-            "-b:a", "192k",
-            "-ac", "2",
-            "-hls_time", str(HLS_SEGMENT_DURATION),
-            "-hls_list_size", "0",
-            "-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
-            "-hls_flags", "independent_segments",
-            playlist_path,
-        ]
-    else:
-        cmd = [
-            "ffmpeg", "-y",
-            "-f", "concat", "-safe", "0",
-            "-i", input_path,
-            "-ss", str(start_time),
-            "-c:a", "aac",
-            "-b:a", "192k",
-            "-ac", "2",
-            "-hls_time", str(HLS_SEGMENT_DURATION),
-            "-hls_list_size", "0",
-            "-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
-            "-hls_flags", "independent_segments",
-            playlist_path,
-        ]
+    cmd = [
+        "ffmpeg", "-y",
+        *input_args,
+        "-c:a", "aac", "-b:a", "128k", "-ac", "2",
+        "-hls_time", str(HLS_SEGMENT_DURATION),
+        "-hls_list_size", "0",
+        "-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
+        "-hls_flags", "independent_segments",
+        playlist_path,
+    ]

    proc = await asyncio.create_subprocess_exec(
        *cmd,
@@ -72,17 +40,49 @@ async def create_hls_session(
        stderr=asyncio.subprocess.PIPE,
    )
    _, stderr = await proc.communicate()
+    if proc.returncode != 0 and session_id in _running_sessions:
+        err = stderr.decode(errors="replace") if stderr else "unknown"
+        # Fehler-Datei schreiben damit der Client es merkt
+        with open(os.path.join(session_dir, "error.txt"), "w") as f:
+            f.write(err)

-    if proc.returncode != 0:
-        error_msg = stderr.decode(errors="replace") if stderr else "unknown error"
-        raise RuntimeError(f"FFmpeg fehler: {error_msg}")

+def start_hls_session(session_id: str, audio_files: list[str], start_time: float = 0.0) -> str:
+    """Startet FFmpeg als Background-Task. Gibt den Session-Pfad zurück."""
+    settings = get_settings()
+    session_dir = os.path.join(settings.hls_cache_dir, session_id)
+    os.makedirs(session_dir, exist_ok=True)
+
+    task = asyncio.create_task(_run_ffmpeg(session_id, audio_files, start_time))
+    _running_sessions[session_id] = task
    return session_dir


+async def wait_for_playlist(session_id: str, timeout: float = 60.0) -> bool:
+    """Wartet bis das erste Segment fertig ist (max. timeout Sekunden)."""
+    settings = get_settings()
+    playlist = os.path.join(settings.hls_cache_dir, session_id, "output.m3u8")
+    error_file = os.path.join(settings.hls_cache_dir, session_id, "error.txt")
+    waited = 0.0
+    while waited < timeout:
+        if os.path.exists(error_file):
+            return False
+        if os.path.exists(playlist) and os.path.getsize(playlist) > 0:
+            # Warte auf mindestens 1 Segment
+            seg0 = os.path.join(settings.hls_cache_dir, session_id, "seg00000.ts")
+            if os.path.exists(seg0):
+                return True
+        await asyncio.sleep(0.5)
+        waited += 0.5
+    return False
+
+
 def cleanup_hls_session(session_id: str):
    settings = get_settings()
    session_dir = os.path.join(settings.hls_cache_dir, session_id)
+    task = _running_sessions.pop(session_id, None)
+    if task and not task.done():
+        task.cancel()
    if os.path.exists(session_dir):
        shutil.rmtree(session_dir, ignore_errors=True)

@@ -90,19 +90,4 @@ def cleanup_hls_session(session_id: str):
 def get_hls_session_path(session_id: str) -> Optional[str]:
    settings = get_settings()
    session_dir = os.path.join(settings.hls_cache_dir, session_id)
-    playlist = os.path.join(session_dir, "output.m3u8")
-    return session_dir if os.path.exists(playlist) else None
-
-
-def parse_m3u8_duration(playlist_path: str) -> float:
-    """Berechnet Gesamtdauer aus M3U8-Playlist."""
-    total = 0.0
-    try:
-        with open(playlist_path, "r") as f:
-            for line in f:
-                if line.startswith("#EXTINF:"):
-                    duration_str = line.split(":")[1].split(",")[0]
-                    total += float(duration_str)
-    except Exception:
-        pass
-    return total
+    return session_dir if os.path.isdir(session_dir) else None
--- a/backend/app/services/matcher.py
+++ b/backend/app/services/matcher.py
@@ -0,0 +1,279 @@
+"""
+Matching-Orchestrator:
+- Erkennt deutsche Hörbuch-Serien (die drei ???, TKKG, ...)
+- Versucht MusicBrainz → OpenLibrary → Google Books
+- Lädt Cover herunter
+- Bewertet Konfidenz und entscheidet über Auto-Accept
+"""
+import re
+import os
+import logging
+import httpx
+import asyncio
+from pathlib import Path
+from datetime import datetime
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select
+
+from ..config import get_settings
+from ..models.media_item import LibraryItem, BookFile, Chapter
+from ..models.session import ServerSetting
+from ..database import AsyncSessionLocal
+from .matching.base import MatchResult
+from .matching.musicbrainz import search_musicbrainz, get_release_details
+from .matching.open_library import search_open_library, get_work_details
+from .matching.google_books import search_google_books
+
+logger = logging.getLogger(__name__)
+
+AUTO_ACCEPT_THRESHOLD = 0.75
+UNCERTAIN_THRESHOLD = 0.50
+
+# Bekannte deutsche Hörbuch-Serien: (regex, kanonischer Name)
+SERIES_PATTERNS = [
+    (r"(?i)^(die drei \?\?\?|die drei fragezeichen|drei fragezeichen)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Die drei ???"),
+    (r"(?i)^(tkkg)\s*[-–]?\s*(?:folge\s*)?(\d+)", "TKKG"),
+    (r"(?i)^(fünf freunde|funf freunde)\s*[-–]?\s*(?:band\s*)?(\d+)", "Fünf Freunde"),
+    (r"(?i)^(bibi blocksberg)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Bibi Blocksberg"),
+    (r"(?i)^(benjamin blümchen|benjamin blumchen)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Benjamin Blümchen"),
+    (r"(?i)^(bibi und tina)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Bibi und Tina"),
+    (r"(?i)^(der kleine vampir)\s*[-–]?\s*(?:band\s*)?(\d+)", "Der kleine Vampir"),
+    # Generisch: "Serie - Folge/Band/Teil N - Titel"
+    (r"(?i)^(.+?)\s*[-–]\s*(?:folge|band|teil|nr\.?|#)\s*(\d+)", None),
+    # Generisch: "Serie (Folge N)"
+    (r"(?i)^(.+?)\s*\((?:folge|band|teil|nr\.?|#|episode)\s*(\d+)\)", None),
+]
+
+
+def detect_series(title: str) -> tuple[str | None, str | None]:
+    """Gibt (Serienname, Folgennummer) zurück oder (None, None)."""
+    for pattern, canonical_name in SERIES_PATTERNS:
+        m = re.match(pattern, title.strip())
+        if m:
+            series = canonical_name or m.group(1).strip()
+            episode = m.group(2)
+            return series, episode
+    return None, None
+
+
+def _title_similarity(a: str, b: str) -> float:
+    """Einfache Ähnlichkeit: Wort-Überlapp."""
+    if not a or not b:
+        return 0.0
+    wa = set(re.findall(r'\w+', a.lower()))
+    wb = set(re.findall(r'\w+', b.lower()))
+    if not wa or not wb:
+        return 0.0
+    return len(wa & wb) / max(len(wa), len(wb))
+
+
+def _score_result(result: MatchResult, query_title: str, query_author: str | None) -> float:
+    score = result.confidence
+    title_sim = _title_similarity(result.title, query_title)
+    score = score * 0.4 + title_sim * 0.6
+    if query_author and result.author:
+        author_sim = _title_similarity(result.author, query_author)
+        score = score * 0.7 + author_sim * 0.3
+    return min(score, 1.0)
+
+
+async def _download_cover(url: str, item_id: str) -> str | None:
+    """Lädt Cover herunter und speichert es lokal."""
+    settings = get_settings()
+    ext = ".jpg"
+    if ".png" in url:
+        ext = ".png"
+    dest = os.path.join(settings.covers_dir, f"{item_id}{ext}")
+    try:
+        async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
+            r = await client.get(url)
+            if r.status_code == 200:
+                os.makedirs(settings.covers_dir, exist_ok=True)
+                with open(dest, "wb") as f:
+                    f.write(r.content)
+                return dest
+    except Exception as e:
+        logger.warning(f"Cover-Download fehlgeschlagen ({url}): {e}")
+    return None
+
+
+async def _apply_match(db: AsyncSession, item: LibraryItem, result: MatchResult, confidence: float):
+    """Schreibt Metadaten aus MatchResult in die DB."""
+    if result.title:
+        item.title = result.title
+    if result.subtitle and not item.subtitle:
+        item.subtitle = result.subtitle
+    if result.author:
+        item.author = result.author
+    if result.narrator:
+        item.narrator = result.narrator
+    if result.description:
+        item.description = result.description
+    if result.publisher:
+        item.publisher = result.publisher
+    if result.publish_year:
+        item.publish_year = result.publish_year
+    if result.language:
+        item.language = result.language
+    if result.genres:
+        item.genres = result.genres
+    if result.series:
+        item.series = result.series
+    if result.series_sequence:
+        item.series_sequence = result.series_sequence
+
+    item.matched_source = result.source
+    item.matched_id = result.source_id
+    item.match_confidence = confidence
+    item.updated_at = datetime.utcnow()
+
+    # Cover herunterladen
+    if result.cover_url and not item.cover_path:
+        cover_path = await _download_cover(result.cover_url, item.id)
+        if cover_path:
+            item.cover_path = cover_path
+
+    # Kapitel aus MusicBrainz-Tracklisting
+    if result.chapters:
+        from sqlalchemy import delete
+        from ..models.media_item import Chapter
+        await db.execute(delete(Chapter).where(Chapter.library_item_id == item.id))
+        for idx, ch in enumerate(result.chapters):
+            chapter = Chapter(
+                library_item_id=item.id,
+                chapter_index=idx,
+                title=ch.get("title", f"Kapitel {idx + 1}"),
+                start_seconds=ch.get("start", 0.0),
+                end_seconds=ch.get("end", 0.0),
+            )
+            db.add(chapter)
+
+    # zu_prüfen entfernen wenn Konfidenz hoch genug
+    if confidence >= AUTO_ACCEPT_THRESHOLD:
+        tags = item.tags or []
+        item.tags = [t for t in tags if t != "zu_prüfen"]
+
+
+async def match_audiobook(item_id: str):
+    """
+    Haupt-Matching-Funktion. Wird nach dem Scan als Hintergrund-Task gestartet.
+    """
+    async with AsyncSessionLocal() as db:
+        result_row = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
+        item = result_row.scalar_one_or_none()
+        if not item or item.match_locked:
+            return
+
+        # Einstellung prüfen
+        setting = await db.execute(
+            select(ServerSetting).where(ServerSetting.key == "autoMatchBooks")
+        )
+        s = setting.scalar_one_or_none()
+        if s and s.value is False:
+            return
+
+        title = item.title or ""
+        author = item.author
+
+        # Serien-Erkennung verbessert den Suchbegriff
+        series, episode = detect_series(title)
+        search_title = title
+        if series:
+            search_title = f"{series} {episode}" if episode else series
+            if not item.series:
+                item.series = series
+            if not item.series_sequence and episode:
+                item.series_sequence = episode
+
+        logger.info(f"Matche: '{title}' (Serie: {series}, Folge: {episode})")
+
+        best: MatchResult | None = None
+        best_score = 0.0
+
+        # 1. MusicBrainz
+        try:
+            mb_results = await search_musicbrainz(search_title, author)
+            for r in mb_results:
+                score = _score_result(r, title, author)
+                if score > best_score:
+                    best_score = score
+                    best = r
+        except Exception as e:
+            logger.warning(f"MusicBrainz Fehler: {e}")
+
+        # Wenn guter MB-Treffer → Details holen (Tracklist + Cover)
+        if best and best_score >= UNCERTAIN_THRESHOLD and best.source == "musicbrainz":
+            try:
+                details = await get_release_details(best.source_id)
+                if details:
+                    details.confidence = best_score
+                    best = details
+            except Exception as e:
+                logger.warning(f"MusicBrainz Details Fehler: {e}")
+
+        # 2. OpenLibrary als Fallback
+        if best_score < UNCERTAIN_THRESHOLD:
+            try:
+                ol_results = await search_open_library(search_title, author)
+                for r in ol_results:
+                    score = _score_result(r, title, author)
+                    if score > best_score:
+                        best_score = score
+                        best = r
+                if best and best.source == "open_library" and best_score >= UNCERTAIN_THRESHOLD:
+                    details = await get_work_details(best.source_id)
+                    if details and details.description:
+                        best.description = details.description
+            except Exception as e:
+                logger.warning(f"OpenLibrary Fehler: {e}")
+
+        # 3. Google Books als letzter Fallback
+        if best_score < UNCERTAIN_THRESHOLD:
+            try:
+                gb_results = await search_google_books(search_title, author)
+                for r in gb_results:
+                    score = _score_result(r, title, author)
+                    if score > best_score:
+                        best_score = score
+                        best = r
+            except Exception as e:
+                logger.warning(f"Google Books Fehler: {e}")
+
+        if best and best_score >= UNCERTAIN_THRESHOLD:
+            await _apply_match(db, item, best, best_score)
+            logger.info(f"Match angewendet: '{item.title}' ← {best.source} (Konfidenz: {best_score:.2f})")
+        else:
+            logger.info(f"Kein Match gefunden für '{title}' (beste Konfidenz: {best_score:.2f})")
+
+        await db.commit()
+
+
+async def search_for_item(title: str, author: str | None = None) -> list[dict]:
+    """Suche über alle Quellen – für manuelles Matching."""
+    results = []
+
+    async def _search_source(coro):
+        try:
+            return await coro
+        except Exception:
+            return []
+
+    mb, ol, gb = await asyncio.gather(
+        _search_source(search_musicbrainz(title, author)),
+        _search_source(search_open_library(title, author)),
+        _search_source(search_google_books(title, author)),
+    )
+
+    for r in mb + ol + gb:
+        results.append({
+            "source": r.source,
+            "id": r.source_id,
+            "title": r.title,
+            "author": r.author,
+            "publishYear": r.publish_year,
+            "cover": r.cover_url,
+            "confidence": r.confidence,
+        })
+
+    results.sort(key=lambda x: x["confidence"], reverse=True)
+    return results
--- a/backend/app/services/matching/google_books.py
+++ b/backend/app/services/matching/google_books.py
@@ -1,4 +1,3 @@
-"""Google Books-Matching — Phase 5."""
 import httpx
 from .base import MatchResult

@@ -10,26 +9,52 @@ async def search_google_books(title: str, author: str | None = None) -> list[Mat
    if author:
        q += f' inauthor:"{author}"'

-    async with httpx.AsyncClient(timeout=10) as client:
-        resp = await client.get(f"{GB_BASE}/volumes", params={"q": q, "maxResults": 5, "langRestrict": "de"})
-        resp.raise_for_status()
-        data = resp.json()
+    async with httpx.AsyncClient(timeout=12) as client:
+        try:
+            r = await client.get(
+                f"{GB_BASE}/volumes",
+                params={"q": q, "maxResults": 5, "langRestrict": "de", "printType": "books"},
+            )
+            r.raise_for_status()
+            data = r.json()
+        except Exception:
+            return []

    results = []
    for item in data.get("items", []):
        vol = item.get("volumeInfo", {})
        authors = vol.get("authors", [])
-        results.append(
-            MatchResult(
-                source="google_books",
-                source_id=item.get("id", ""),
-                title=vol.get("title", title),
-                author=authors[0] if authors else None,
-                description=vol.get("description"),
-                publisher=vol.get("publisher"),
-                publish_year=int(vol.get("publishedDate", "0")[:4]) if vol.get("publishedDate") else None,
-                language=vol.get("language"),
-                confidence=0.5,
+
+        cover_url = None
+        image_links = vol.get("imageLinks", {})
+        if image_links:
+            cover_url = (
+                image_links.get("extraLarge")
+                or image_links.get("large")
+                or image_links.get("medium")
+                or image_links.get("thumbnail", "").replace("zoom=1", "zoom=3")
            )
-        )
+
+        year = None
+        pub_date = vol.get("publishedDate", "")
+        if pub_date and len(pub_date) >= 4:
+            try:
+                year = int(pub_date[:4])
+            except ValueError:
+                pass
+
+        results.append(MatchResult(
+            source="google_books",
+            source_id=item.get("id", ""),
+            title=vol.get("title", title),
+            subtitle=vol.get("subtitle"),
+            author=authors[0] if authors else None,
+            description=vol.get("description"),
+            publisher=vol.get("publisher"),
+            publish_year=year,
+            language=vol.get("language"),
+            genres=vol.get("categories", []),
+            cover_url=cover_url,
+            confidence=0.5,
+        ))
    return results
--- a/backend/app/services/matching/musicbrainz.py
+++ b/backend/app/services/matching/musicbrainz.py
@@ -1,40 +1,115 @@
-"""MusicBrainz-Matching — Phase 5."""
 import httpx
+import asyncio
 from .base import MatchResult

 MB_BASE = "https://musicbrainz.org/ws/2"
-HEADERS = {"User-Agent": "audiolib/1.0 (https://github.com/audiolib)"}
+CAA_BASE = "https://coverartarchive.org"
+HEADERS = {"User-Agent": "audiolib/1.0 (contact@audiolib.local)"}
+_semaphore = asyncio.Semaphore(2)  # MusicBrainz Rate-Limit: max 1 req/s
+
+
+async def _get(client: httpx.AsyncClient, url: str, **params) -> dict:
+    async with _semaphore:
+        await asyncio.sleep(1.1)  # MusicBrainz erlaubt 1 req/s
+        r = await client.get(url, params={"fmt": "json", **params}, timeout=15)
+        r.raise_for_status()
+        return r.json()


 async def search_musicbrainz(title: str, artist: str | None = None) -> list[MatchResult]:
    query = f'release:"{title}"'
    if artist:
        query += f' AND artist:"{artist}"'
-    query += " AND format:Digital"

-    async with httpx.AsyncClient(headers=HEADERS, timeout=10) as client:
-        resp = await client.get(
-            f"{MB_BASE}/release",
-            params={"query": query, "fmt": "json", "limit": 5},
-        )
-        resp.raise_for_status()
-        data = resp.json()
+    async with httpx.AsyncClient(headers=HEADERS) as client:
+        try:
+            data = await _get(client, f"{MB_BASE}/release", query=query, limit=5)
+        except Exception:
+            return []

    results = []
-    for release in data.get("releases", []):
-        confidence = release.get("score", 0) / 100.0
-        artist_name = None
-        credits = release.get("artist-credit", [])
-        if credits:
-            artist_name = credits[0].get("name") or credits[0].get("artist", {}).get("name")
+    for rel in data.get("releases", []):
+        confidence = rel.get("score", 0) / 100.0
+        artist_name = _first_artist(rel)
+        release_id = rel.get("id", "")

-        results.append(
-            MatchResult(
-                source="musicbrainz",
-                source_id=release.get("id", ""),
-                title=release.get("title", title),
-                author=artist_name,
-                confidence=confidence,
-            )
-        )
+        results.append(MatchResult(
+            source="musicbrainz",
+            source_id=release_id,
+            title=rel.get("title", title),
+            author=artist_name,
+            publish_year=_parse_year(rel.get("date", "")),
+            confidence=confidence,
+        ))
    return results
+
+
+async def get_release_details(release_id: str) -> MatchResult | None:
+    """Lädt vollständige Release-Details inkl. Tracklist (= Kapitel) und Cover."""
+    async with httpx.AsyncClient(headers=HEADERS) as client:
+        try:
+            data = await _get(
+                client, f"{MB_BASE}/release/{release_id}",
+                inc="recordings+artists+release-groups"
+            )
+        except Exception:
+            return None
+
+        artist_name = _first_artist(data)
+        rg = data.get("release-group", {})
+        series = rg.get("title") if rg.get("primary-type") == "Album" else None
+
+        # Tracklist → Kapitel
+        chapters = []
+        offset = 0.0
+        for medium in data.get("media", []):
+            for track in medium.get("tracks", []):
+                duration_ms = track.get("length") or track.get("recording", {}).get("length") or 0
+                duration_s = duration_ms / 1000.0
+                chapters.append({
+                    "title": track.get("title", f"Track {track.get('position', '')}"),
+                    "start": offset,
+                    "end": offset + duration_s,
+                })
+                offset += duration_s
+
+        # Cover Art
+        cover_url = None
+        try:
+            caa = await client.get(f"{CAA_BASE}/release/{release_id}", timeout=10)
+            if caa.status_code == 200:
+                caa_data = caa.json()
+                images = caa_data.get("images", [])
+                front = next((i for i in images if i.get("front")), images[0] if images else None)
+                if front:
+                    cover_url = front.get("thumbnails", {}).get("large") or front.get("image")
+        except Exception:
+            pass
+
+        return MatchResult(
+            source="musicbrainz",
+            source_id=release_id,
+            title=data.get("title", ""),
+            author=artist_name,
+            publish_year=_parse_year(data.get("date", "")),
+            cover_url=cover_url,
+            chapters=chapters,
+            confidence=1.0,
+        )
+
+
+def _first_artist(release: dict) -> str | None:
+    credits = release.get("artist-credit", [])
+    if credits:
+        c = credits[0]
+        return c.get("name") or c.get("artist", {}).get("name")
+    return None
+
+
+def _parse_year(date_str: str) -> int | None:
+    if date_str and len(date_str) >= 4:
+        try:
+            return int(date_str[:4])
+        except ValueError:
+            pass
+    return None
--- a/backend/app/services/matching/open_library.py
+++ b/backend/app/services/matching/open_library.py
@@ -1,4 +1,3 @@
-"""OpenLibrary-Matching — Phase 5."""
 import httpx
 from .base import MatchResult

@@ -6,25 +5,55 @@ OL_BASE = "https://openlibrary.org"


 async def search_open_library(title: str, author: str | None = None) -> list[MatchResult]:
-    params: dict = {"title": title, "limit": 5}
+    params: dict = {"title": title, "limit": 5, "fields": "key,title,author_name,first_publish_year,cover_i,subject"}
    if author:
        params["author"] = author

-    async with httpx.AsyncClient(timeout=10) as client:
-        resp = await client.get(f"{OL_BASE}/search.json", params=params)
-        resp.raise_for_status()
-        data = resp.json()
+    async with httpx.AsyncClient(timeout=12) as client:
+        try:
+            r = await client.get(f"{OL_BASE}/search.json", params=params)
+            r.raise_for_status()
+            data = r.json()
+        except Exception:
+            return []

    results = []
    for doc in data.get("docs", []):
-        results.append(
-            MatchResult(
-                source="open_library",
-                source_id=doc.get("key", ""),
-                title=doc.get("title", title),
-                author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
-                publish_year=doc.get("first_publish_year"),
-                confidence=0.6,
-            )
-        )
+        cover_url = None
+        if doc.get("cover_i"):
+            cover_url = f"https://covers.openlibrary.org/b/id/{doc['cover_i']}-L.jpg"
+
+        results.append(MatchResult(
+            source="open_library",
+            source_id=doc.get("key", ""),
+            title=doc.get("title", title),
+            author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
+            publish_year=doc.get("first_publish_year"),
+            cover_url=cover_url,
+            genres=doc.get("subject", [])[:5],
+            confidence=0.55,
+        ))
    return results
+
+
+async def get_work_details(work_key: str) -> MatchResult | None:
+    """Lädt Beschreibung und Genres nach."""
+    async with httpx.AsyncClient(timeout=12) as client:
+        try:
+            r = await client.get(f"{OL_BASE}{work_key}.json")
+            r.raise_for_status()
+            data = r.json()
+        except Exception:
+            return None
+
+    desc = data.get("description")
+    if isinstance(desc, dict):
+        desc = desc.get("value")
+
+    return MatchResult(
+        source="open_library",
+        source_id=work_key,
+        title=data.get("title", ""),
+        description=desc,
+        confidence=1.0,
+    )
--- a/backend/app/services/podcast_feed.py
+++ b/backend/app/services/podcast_feed.py
@@ -0,0 +1,186 @@
+"""
+Podcast-Feed-Manager:
+- RSS-Feed parsen
+- Episoden mit lokalen Dateien abgleichen
+- Periodisches Update
+"""
+import os
+import re
+import logging
+import httpx
+import feedparser
+from datetime import datetime
+from difflib import SequenceMatcher
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select
+from ..database import AsyncSessionLocal
+from ..models.library import Library
+from ..models.media_item import LibraryItem
+from ..models.podcast import Podcast, PodcastEpisode
+from ..services.matcher import _download_cover
+
+logger = logging.getLogger(__name__)
+
+
+def _similarity(a: str, b: str) -> float:
+    if not a or not b:
+        return 0.0
+    return SequenceMatcher(None, a.lower(), b.lower()).ratio()
+
+
+def _parse_duration(s: str | None) -> float:
+    """Parst "HH:MM:SS" oder "MM:SS" oder reine Sekunden."""
+    if not s:
+        return 0.0
+    s = s.strip()
+    try:
+        if ":" in s:
+            parts = s.split(":")
+            if len(parts) == 3:
+                return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
+            elif len(parts) == 2:
+                return int(parts[0]) * 60 + float(parts[1])
+        return float(s)
+    except (ValueError, IndexError):
+        return 0.0
+
+
+async def search_podcast_feeds(query: str) -> list[dict]:
+    """Sucht Podcast-Feeds über iTunes Search API."""
+    results = []
+    try:
+        async with httpx.AsyncClient(timeout=12) as client:
+            r = await client.get(
+                "https://itunes.apple.com/search",
+                params={"term": query, "media": "podcast", "limit": 10, "country": "de"},
+            )
+            r.raise_for_status()
+            data = r.json()
+            for item in data.get("results", []):
+                results.append({
+                    "title": item.get("collectionName", ""),
+                    "author": item.get("artistName", ""),
+                    "feedUrl": item.get("feedUrl", ""),
+                    "artworkUrl": item.get("artworkUrl600") or item.get("artworkUrl100", ""),
+                    "trackCount": item.get("trackCount", 0),
+                    "itunesId": item.get("collectionId"),
+                })
+    except Exception as e:
+        logger.warning(f"iTunes-Suche fehlgeschlagen: {e}")
+    return results
+
+
+async def fetch_and_update_feed(library_item_id: str):
+    """
+    Holt RSS-Feed und aktualisiert Metadaten + Episoden in der DB.
+    """
+    async with AsyncSessionLocal() as db:
+        item_result = await db.execute(select(LibraryItem).where(LibraryItem.id == library_item_id))
+        item = item_result.scalar_one_or_none()
+        if not item:
+            return
+
+        podcast_result = await db.execute(select(Podcast).where(Podcast.library_item_id == library_item_id))
+        podcast = podcast_result.scalar_one_or_none()
+        if not podcast or not podcast.feed_url:
+            logger.warning(f"Kein Feed für Item {library_item_id}")
+            return
+
+        try:
+            async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
+                r = await client.get(podcast.feed_url)
+                r.raise_for_status()
+                raw = r.text
+        except Exception as e:
+            logger.error(f"Feed-Abruf fehlgeschlagen ({podcast.feed_url}): {e}")
+            return
+
+        feed = feedparser.parse(raw)
+        channel = feed.feed
+
+        # Podcast-Metadaten aktualisieren
+        if channel.get("title") and not item.title:
+            item.title = channel.get("title")
+        if channel.get("author") and not item.author:
+            item.author = channel.get("author")
+        if channel.get("summary") and not item.description:
+            item.description = channel.get("summary")
+        if channel.get("language") and not item.language:
+            item.language = channel.get("language")
+
+        # Cover
+        cover_url = None
+        if channel.get("image"):
+            cover_url = channel.image.get("href") or channel.image.get("url")
+        if cover_url and not item.cover_path:
+            cover_path = await _download_cover(cover_url, item.id)
+            if cover_path:
+                item.cover_path = cover_path
+
+        podcast.feed_last_checked = datetime.utcnow()
+
+        # Lokale Episode-Dateien holen
+        episodes_result = await db.execute(
+            select(PodcastEpisode).where(PodcastEpisode.podcast_id == podcast.id)
+        )
+        existing_episodes = {ep.feed_episode_id: ep for ep in episodes_result.scalars().all()}
+
+        # Feed-Einträge verarbeiten
+        for entry in feed.entries:
+            feed_ep_id = entry.get("id") or entry.get("link", "")
+            title = entry.get("title", "")
+            description = entry.get("summary") or entry.get("content", [{}])[0].get("value", "") if entry.get("content") else ""
+            pub_date = None
+            if entry.get("published_parsed"):
+                import time
+                pub_date = datetime(*entry.published_parsed[:6])
+
+            enclosure_url = None
+            duration_s = 0.0
+            for enc in entry.get("enclosures", []):
+                if enc.get("type", "").startswith("audio/"):
+                    enclosure_url = enc.get("href") or enc.get("url")
+                    break
+            duration_s = _parse_duration(entry.get("itunes_duration"))
+
+            ep_num = entry.get("itunes_episode")
+            season_num = entry.get("itunes_season")
+
+            if feed_ep_id in existing_episodes:
+                # Vorhandene Episode aktualisieren
+                ep = existing_episodes[feed_ep_id]
+                ep.title = title
+                ep.description = description
+                ep.feed_episode_url = enclosure_url
+                ep.duration_seconds = duration_s or ep.duration_seconds
+            else:
+                # Neue Episode anlegen
+                ep = PodcastEpisode(
+                    podcast_id=podcast.id,
+                    title=title,
+                    description=description,
+                    pub_date=pub_date,
+                    duration_seconds=duration_s,
+                    feed_episode_id=feed_ep_id,
+                    feed_episode_url=enclosure_url,
+                    episode_number=str(ep_num) if ep_num else None,
+                    season_number=str(season_num) if season_num else None,
+                )
+                db.add(ep)
+
+        item.updated_at = datetime.utcnow()
+        await db.commit()
+        logger.info(f"Feed aktualisiert: {item.title} ({len(feed.entries)} Einträge)")
+
+
+async def update_all_feeds():
+    """Aktualisiert alle Podcast-Feeds (wird vom Scheduler aufgerufen)."""
+    async with AsyncSessionLocal() as db:
+        result = await db.execute(select(Podcast).where(Podcast.feed_url.isnot(None)))
+        podcasts = result.scalars().all()
+
+    for podcast in podcasts:
+        try:
+            await fetch_and_update_feed(podcast.library_item_id)
+        except Exception as e:
+            logger.error(f"Feed-Update fehlgeschlagen für {podcast.id}: {e}")