Files
Audiolib/backend/app/routers/matching.py
Audiolib e3e6492b1f Make matching debuggable + fix metadata search blockers
DNB rewrite:
- Multiple query strategies with fallback (title+author+mat=ton →
  title+author → title+mat=ton → title-only → fulltext). Returns on
  first hit. Most German audiobooks aren't tagged mat=ton in DNB,
  which was killing all searches.
- Strip CQL wildcard chars (?, *, <, >, =, /, quotes) from search
  terms. The "???" in "Die drei ???" was breaking the CQL parser.
- Log HTTP status, body snippet on non-200, and numberOfRecords on
  every query so log shows exactly what DNB returned.
- Parse SRU diagnostic elements (DNB error messages buried in XML).
- Convert author/narrator from "Lastname, Firstname" to
  "Firstname Lastname" for consistency with other sources.

Matcher:
- Split series patterns: WITH_EPISODE (need digit) and SERIES_ONLY
  (just the series name). "Die drei ??? und der Fluch des Rubins"
  now properly detects "Die drei ???" as series even without folge#.
- New _build_search_title: removes ??? sequences, trailing parens,
  collapses whitespace, before sending to APIs.
- Manual search also passes through normalization. Logs source +
  hit count per query.

Debug endpoint:
- GET /api/items/match/debug?title=...&author=... returns raw results
  from all 4 sources with status, error messages, and full metadata.
- "Debug" button added in BookDetail — shows what each API actually
  returns inline, so the user can see if it's a search problem,
  parse problem, or threshold problem.
- "Cover aus Datei" button — triggers local cover extraction
  (folder.jpg or embedded artwork) on demand.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 18:34:49 +02:00

232 lines
8.3 KiB
Python

import asyncio
import logging
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from ..dependencies import get_db, get_current_user, require_admin
from ..models.user import User
from ..models.media_item import LibraryItem
from ..services.matcher import match_audiobook, search_for_item, _apply_match, _enrich_match
from ..services.matching.musicbrainz import get_release_details
from ..services.matching.open_library import get_work_details
from ..services.matching.base import MatchResult
from datetime import datetime
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/items", tags=["matching"])
@router.post("/{item_id}/match")
async def trigger_match(
item_id: str,
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
background_tasks.add_task(match_audiobook, item_id)
return {"message": "Matching gestartet", "itemId": item_id}
@router.get("/{item_id}/match/search")
async def search_match(
item_id: str,
q: str | None = None,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
query = q or item.title or ""
author = item.author if not q else None
results = await search_for_item(query, author)
return {"results": results}
@router.get("/match/debug")
async def debug_match(
title: str,
author: str | None = None,
current_user: User = Depends(get_current_user),
):
"""Debug-Endpoint: gibt rohe Ergebnisse aller Such-Quellen zurück.
Aufruf direkt aus Browser: /api/items/match/debug?title=Foo&author=Bar
"""
from ..services.matching.musicbrainz import search_musicbrainz
from ..services.matching.open_library import search_open_library
from ..services.matching.google_books import search_google_books
from ..services.matching.dnb import search_dnb
from ..services.matcher import _build_search_title, detect_series
series, episode = detect_series(title)
search_title = _build_search_title(title)
if series and episode:
search_title = f"{series} {episode}"
logger.info(f"DEBUG: title={title!r} → search={search_title!r} series={series!r} episode={episode!r}")
async def _try(name, coro):
try:
r = await coro
return {
"source": name,
"ok": True,
"count": len(r),
"results": [
{
"title": x.title, "author": x.author, "narrator": x.narrator,
"publisher": x.publisher, "year": x.publish_year,
"series": x.series, "series_sequence": x.series_sequence,
"cover_url": x.cover_url, "language": x.language,
"genres": x.genres, "description": (x.description or "")[:200],
"confidence": x.confidence, "source_id": x.source_id,
} for x in r
],
}
except Exception as e:
return {"source": name, "ok": False, "error": f"{type(e).__name__}: {e}"}
results = await asyncio.gather(
_try("musicbrainz", search_musicbrainz(search_title, author)),
_try("open_library", search_open_library(search_title, author)),
_try("google_books", search_google_books(search_title, author)),
_try("dnb", search_dnb(search_title, author)),
)
return {
"input": {"title": title, "author": author},
"normalized": {"search_title": search_title, "series": series, "episode": episode},
"sources": results,
}
@router.post("/{item_id}/match/apply")
async def apply_match(
item_id: str,
body: dict,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""
Wendet einen manuell gewählten Match-Treffer an.
body: { source, id, title, author, narrator, description, publisher, publishYear, series, seriesSequence, language, genres, cover, ... }
"""
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
source = body.get("source", "manual")
source_id = body.get("id", "")
logger.info(
f"Manual apply: item={item_id} source={source} source_id={source_id} "
f"body_keys={sorted(body.keys())}"
)
# Immer aus body konstruieren (search_for_item liefert jetzt alle Felder)
match_result = MatchResult(
source=source,
source_id=source_id,
title=body.get("title") or item.title or "",
subtitle=body.get("subtitle"),
author=body.get("author"),
narrator=body.get("narrator"),
description=body.get("description"),
publisher=body.get("publisher"),
publish_year=body.get("publishYear"),
series=body.get("series"),
series_sequence=body.get("seriesSequence"),
language=body.get("language"),
genres=body.get("genres") or [],
cover_url=body.get("cover"),
confidence=1.0,
)
# Mit Details anreichern (Beschreibung, Kapitel) — überschreibt keine vorhandenen Werte
try:
if source == "musicbrainz":
details = await get_release_details(source_id)
if details:
_enrich_match(match_result, details)
elif source == "open_library":
details = await get_work_details(source_id)
if details:
_enrich_match(match_result, details)
except Exception as e:
logger.warning(f"Details-Laden fehlgeschlagen ({source}: {source_id}): {e}")
match_result.confidence = 1.0
await _apply_match(db, item, match_result, confidence=1.0)
item.match_locked = True
item.updated_at = datetime.utcnow()
await db.commit()
await db.refresh(item)
from ..routers.items import _enrich_item_with_files
return await _enrich_item_with_files(item, db)
@router.post("/{item_id}/extract-cover")
async def extract_local_cover(
item_id: str,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""Extrahiert ein Cover aus Ordner-Dateien oder eingebettetem Artwork."""
from ..services.scanner import _save_local_cover
from ..models.media_item import BookFile
import os
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
files_result = await db.execute(
select(BookFile).where(BookFile.library_item_id == item_id).order_by(BookFile.track_index)
)
audio_files = [f.path for f in files_result.scalars().all()]
cover = _save_local_cover(item.path, audio_files, item.id)
if cover:
item.cover_path = cover
item.updated_at = datetime.utcnow()
await db.commit()
logger.info(f"Lokales Cover gesetzt für {item_id}: {cover}")
return {"success": True, "cover_path": cover}
return {"success": False, "message": "Kein Cover gefunden"}
@router.delete("/{item_id}/match")
async def clear_match(
item_id: str,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
item.matched_source = "none"
item.matched_id = None
item.match_confidence = 0.0
item.match_locked = False
tags = item.tags or []
if "zu_prüfen" not in tags:
tags.append("zu_prüfen")
item.tags = tags
item.updated_at = datetime.utcnow()
await db.commit()
return {"success": True}