Fix HLS playback auth + add DNB matching source

Player: hls.js did not send Authorization header for segment requests, causing 401 errors on all HLS fetches. Fixed via xhrSetup callback. DNB: Added Deutsche Nationalbibliothek SRU search (mat=ton filter for audiobooks, MARC21-XML parsing). Extracts title, author, narrator, publisher, year, series, genres, ISBN-based cover URL. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 17:37:26 +02:00
parent 3aab0ac9f1
commit eefdfc9886
3 changed files with 157 additions and 3 deletions
--- a/backend/app/services/matcher.py
+++ b/backend/app/services/matcher.py
@@ -24,6 +24,7 @@ from .matching.base import MatchResult
 from .matching.musicbrainz import search_musicbrainz, get_release_details
 from .matching.open_library import search_open_library, get_work_details
 from .matching.google_books import search_google_books
+from .matching.dnb import search_dnb

 logger = logging.getLogger(__name__)

@@ -165,6 +166,7 @@ _SOURCE_FUNCS = {
    "musicbrainz": (search_musicbrainz, get_release_details),
    "open_library": (search_open_library, get_work_details),
    "google_books": (search_google_books, None),
+    "dnb": (search_dnb, None),
 }


@@ -260,13 +262,14 @@ async def search_for_item(title: str, author: str | None = None) -> list[dict]:
        except Exception:
            return []

-    mb, ol, gb = await asyncio.gather(
+    mb, ol, gb, dnb = await asyncio.gather(
        _search_source(search_musicbrainz(title, author)),
        _search_source(search_open_library(title, author)),
        _search_source(search_google_books(title, author)),
+        _search_source(search_dnb(title, author)),
    )

-    for r in mb + ol + gb:
+    for r in mb + ol + gb + dnb:
        results.append({
            "source": r.source,
            "id": r.source_id,
--- a/backend/app/services/matching/dnb.py
+++ b/backend/app/services/matching/dnb.py
@@ -0,0 +1,145 @@
+"""
+Deutsche Nationalbibliothek (DNB) SRU-Schnittstelle.
+Sucht Hörbücher (mat=ton) über MARC21-XML.
+"""
+import re
+import httpx
+from xml.etree import ElementTree as ET
+from .base import MatchResult
+
+DNB_SRU = "https://services.dnb.de/sru/dnb"
+HEADERS = {"User-Agent": "audiolib/1.0 (contact@audiolib.local)"}
+_NS_SRW = "http://www.loc.gov/zing/srw/"
+_NS_MARC = "http://www.loc.gov/MARC21/slim"
+
+
+async def search_dnb(title: str, author: str | None = None) -> list[MatchResult]:
+    parts = [f'tit="{title}"', "mat=ton"]
+    if author:
+        parts.append(f'per="{author}"')
+    query = " AND ".join(parts)
+
+    params = {
+        "version": "1.1",
+        "operation": "searchRetrieve",
+        "query": query,
+        "recordSchema": "MARC21-xml",
+        "maximumRecords": "5",
+    }
+    async with httpx.AsyncClient(headers=HEADERS, timeout=15) as client:
+        try:
+            r = await client.get(DNB_SRU, params=params)
+            r.raise_for_status()
+        except Exception:
+            return []
+
+    try:
+        root = ET.fromstring(r.text)
+    except ET.ParseError:
+        return []
+
+    results = []
+    for record in root.findall(f".//{{{_NS_SRW}}}record"):
+        marc = record.find(f".//{{{_NS_MARC}}}record")
+        if marc is None:
+            continue
+        try:
+            result = _parse_marc(marc)
+            if result:
+                results.append(result)
+        except Exception:
+            continue
+    return results
+
+
+def _field(marc, tag: str, code: str | None = None) -> str | None:
+    for f in marc.findall(f"{{{_NS_MARC}}}datafield[@tag='{tag}']"):
+        if code:
+            sf = f.find(f"{{{_NS_MARC}}}subfield[@code='{code}']")
+            if sf is not None and sf.text:
+                return sf.text.strip()
+        else:
+            parts = [sf.text.strip() for sf in f.findall(f"{{{_NS_MARC}}}subfield") if sf.text]
+            if parts:
+                return " ".join(parts)
+    return None
+
+
+def _fields(marc, tag: str, code: str) -> list[str]:
+    out = []
+    for f in marc.findall(f"{{{_NS_MARC}}}datafield[@tag='{tag}']"):
+        sf = f.find(f"{{{_NS_MARC}}}subfield[@code='{code}']")
+        if sf is not None and sf.text:
+            out.append(sf.text.strip())
+    return out
+
+
+def _parse_marc(marc) -> MatchResult | None:
+    title_a = (_field(marc, "245", "a") or "").rstrip("/ ").strip()
+    title_b = _field(marc, "245", "b")
+    title = (title_a + " " + title_b.rstrip("/ ").strip()).strip() if title_b else title_a
+    if not title:
+        return None
+
+    subtitle = title_b.rstrip("/ ").strip() if title_b else None
+
+    author = _field(marc, "100", "a")
+    if author:
+        author = author.rstrip(",").strip()
+
+    # Sprecher aus 700 $e = "Sprecher" oder $4 = "spk"
+    narrator = None
+    for f in marc.findall(f"{{{_NS_MARC}}}datafield[@tag='700']"):
+        e_sf = f.find(f"{{{_NS_MARC}}}subfield[@code='e']")
+        r_sf = f.find(f"{{{_NS_MARC}}}subfield[@code='4']")
+        is_narrator = (
+            (e_sf is not None and e_sf.text and "prech" in e_sf.text.lower())
+            or (r_sf is not None and r_sf.text == "spk")
+        )
+        if is_narrator:
+            n_sf = f.find(f"{{{_NS_MARC}}}subfield[@code='a']")
+            if n_sf is not None and n_sf.text:
+                narrator = n_sf.text.rstrip(",").strip()
+                break
+
+    publisher = (_field(marc, "264", "b") or "").rstrip(",").strip() or None
+    year_raw = _field(marc, "264", "c") or _field(marc, "260", "c")
+    publish_year = None
+    if year_raw:
+        m = re.search(r"\d{4}", year_raw)
+        if m:
+            publish_year = int(m.group())
+
+    description = _field(marc, "520", "a")
+    language = _field(marc, "041", "a")
+    genres = _fields(marc, "650", "a")[:5]
+
+    series = _field(marc, "830", "a") or _field(marc, "800", "t")
+    series_seq = _field(marc, "830", "v") or _field(marc, "800", "v")
+
+    # DNB-ID aus Kontrollfeld 001
+    ctrl = marc.find(f"{{{_NS_MARC}}}controlfield[@tag='001']")
+    dnb_id = ctrl.text.strip() if ctrl is not None and ctrl.text else None
+
+    # ISBN für Cover
+    isbn_raw = _field(marc, "020", "a") or ""
+    isbn = re.sub(r"[^0-9X]", "", isbn_raw.split()[0]) if isbn_raw else None
+    cover_url = f"https://portal.dnb.de/opac/mvb/cover?isbn={isbn}" if isbn else None
+
+    return MatchResult(
+        source="dnb",
+        source_id=dnb_id or f"dnb_{title[:30]}",
+        title=title,
+        subtitle=subtitle,
+        author=author,
+        narrator=narrator,
+        description=description,
+        publisher=publisher,
+        publish_year=publish_year,
+        language=language,
+        genres=genres,
+        series=series,
+        series_sequence=series_seq,
+        cover_url=cover_url,
+        confidence=0.65,
+    )