Diagnosis from connectivity check: 4/5 APIs reachable (only Google Books
rate-limited). So the network is fine — the search title was the problem.
'Folge 114 Die Villa der Toten' isn't indexed under that name anywhere.
The MP3 itself has the real metadata in ID3 tags (album, artist, year).
Scanner now reads ID3/Vorbis/MP4 tags from the first audio file:
- album → item.title
- albumartist / composer / artist → item.author
- date → publish_year
- organization / publisher → publisher
- language → language
- genre → genres
- artist (heuristic) → series, if it doesn't appear in album title
Parent folder name → series hint (skipped if it's a library root).
Only fills empty fields, never overwrites manually edited or matched data.
Runs on new items AND on re-scan for items without an active match.
Search title normalization improved: 'Folge 123 - X' / 'Band 7: Y' etc.
prefixes and infixes get stripped so APIs see the actual episode title.
New endpoint POST /api/items/{id}/extract-tags + 'Tags lesen' button in
BookDetail — triggers tag extraction on demand for existing items.
Returns before/after diff so user can see what was filled in.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
330 lines
12 KiB
Python
330 lines
12 KiB
Python
import asyncio
|
|
import logging
|
|
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select
|
|
from ..dependencies import get_db, get_current_user, require_admin
|
|
from ..models.user import User
|
|
from ..models.media_item import LibraryItem
|
|
from ..services.matcher import match_audiobook, search_for_item, _apply_match, _enrich_match
|
|
from ..services.matching.musicbrainz import get_release_details
|
|
from ..services.matching.open_library import get_work_details
|
|
from ..services.matching.base import MatchResult
|
|
from datetime import datetime
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/api/items", tags=["matching"])
|
|
|
|
|
|
@router.post("/{item_id}/match")
|
|
async def trigger_match(
|
|
item_id: str,
|
|
background_tasks: BackgroundTasks,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
|
item = result.scalar_one_or_none()
|
|
if not item:
|
|
raise HTTPException(status_code=404, detail="Item not found")
|
|
|
|
background_tasks.add_task(match_audiobook, item_id)
|
|
return {"message": "Matching gestartet", "itemId": item_id}
|
|
|
|
|
|
@router.get("/{item_id}/match/search")
|
|
async def search_match(
|
|
item_id: str,
|
|
q: str | None = None,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
|
item = result.scalar_one_or_none()
|
|
if not item:
|
|
raise HTTPException(status_code=404, detail="Item not found")
|
|
|
|
query = q or item.title or ""
|
|
author = item.author if not q else None
|
|
|
|
results = await search_for_item(query, author)
|
|
return {"results": results}
|
|
|
|
|
|
@router.get("/match/connectivity")
|
|
async def check_connectivity(
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
"""Testet ob das Backend die externen Metadaten-APIs erreichen kann."""
|
|
import httpx
|
|
import time
|
|
|
|
targets = [
|
|
("Google", "https://www.google.com"),
|
|
("MusicBrainz", "https://musicbrainz.org/ws/2/release?query=test&fmt=json&limit=1"),
|
|
("OpenLibrary", "https://openlibrary.org/search.json?title=test&limit=1"),
|
|
("GoogleBooks", "https://www.googleapis.com/books/v1/volumes?q=test&maxResults=1"),
|
|
("DNB", "https://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=tit%3Dtest&maximumRecords=1"),
|
|
]
|
|
headers = {"User-Agent": "audiolib/1.0"}
|
|
|
|
results = []
|
|
async with httpx.AsyncClient(headers=headers, timeout=15, follow_redirects=True) as client:
|
|
for name, url in targets:
|
|
t0 = time.time()
|
|
try:
|
|
r = await client.get(url)
|
|
results.append({
|
|
"name": name,
|
|
"url": url,
|
|
"ok": True,
|
|
"status": r.status_code,
|
|
"bytes": len(r.content),
|
|
"ms": int((time.time() - t0) * 1000),
|
|
"body_snippet": (r.text[:150] if r.status_code != 200 else None),
|
|
})
|
|
except Exception as e:
|
|
results.append({
|
|
"name": name,
|
|
"url": url,
|
|
"ok": False,
|
|
"error": f"{type(e).__name__}: {e}",
|
|
"ms": int((time.time() - t0) * 1000),
|
|
})
|
|
|
|
# Auch Env-Variablen die httpx beeinflussen
|
|
import os
|
|
proxy_env = {
|
|
k: v for k, v in os.environ.items()
|
|
if k.upper() in ("HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY", "ALL_PROXY")
|
|
}
|
|
|
|
return {
|
|
"results": results,
|
|
"proxy_env": proxy_env or "keine",
|
|
}
|
|
|
|
|
|
@router.get("/match/debug")
|
|
async def debug_match(
|
|
title: str,
|
|
author: str | None = None,
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
"""Debug-Endpoint: gibt rohe Ergebnisse aller Such-Quellen zurück.
|
|
Aufruf direkt aus Browser: /api/items/match/debug?title=Foo&author=Bar
|
|
"""
|
|
from ..services.matching.musicbrainz import search_musicbrainz
|
|
from ..services.matching.open_library import search_open_library
|
|
from ..services.matching.google_books import search_google_books
|
|
from ..services.matching.dnb import search_dnb
|
|
from ..services.matcher import _build_search_title, detect_series
|
|
|
|
series, episode = detect_series(title)
|
|
search_title = _build_search_title(title)
|
|
if series and episode:
|
|
search_title = f"{series} {episode}"
|
|
|
|
logger.info(f"DEBUG: title={title!r} → search={search_title!r} series={series!r} episode={episode!r}")
|
|
|
|
async def _try(name, coro):
|
|
try:
|
|
r = await coro
|
|
return {
|
|
"source": name,
|
|
"ok": True,
|
|
"count": len(r),
|
|
"results": [
|
|
{
|
|
"title": x.title, "author": x.author, "narrator": x.narrator,
|
|
"publisher": x.publisher, "year": x.publish_year,
|
|
"series": x.series, "series_sequence": x.series_sequence,
|
|
"cover_url": x.cover_url, "language": x.language,
|
|
"genres": x.genres, "description": (x.description or "")[:200],
|
|
"confidence": x.confidence, "source_id": x.source_id,
|
|
} for x in r
|
|
],
|
|
}
|
|
except Exception as e:
|
|
return {"source": name, "ok": False, "error": f"{type(e).__name__}: {e}"}
|
|
|
|
results = await asyncio.gather(
|
|
_try("musicbrainz", search_musicbrainz(search_title, author)),
|
|
_try("open_library", search_open_library(search_title, author)),
|
|
_try("google_books", search_google_books(search_title, author)),
|
|
_try("dnb", search_dnb(search_title, author)),
|
|
)
|
|
|
|
return {
|
|
"input": {"title": title, "author": author},
|
|
"normalized": {"search_title": search_title, "series": series, "episode": episode},
|
|
"sources": results,
|
|
}
|
|
|
|
|
|
@router.post("/{item_id}/match/apply")
|
|
async def apply_match(
|
|
item_id: str,
|
|
body: dict,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""
|
|
Wendet einen manuell gewählten Match-Treffer an.
|
|
body: { source, id, title, author, narrator, description, publisher, publishYear, series, seriesSequence, language, genres, cover, ... }
|
|
"""
|
|
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
|
item = result.scalar_one_or_none()
|
|
if not item:
|
|
raise HTTPException(status_code=404, detail="Item not found")
|
|
|
|
source = body.get("source", "manual")
|
|
source_id = body.get("id", "")
|
|
|
|
logger.info(
|
|
f"Manual apply: item={item_id} source={source} source_id={source_id} "
|
|
f"body_keys={sorted(body.keys())}"
|
|
)
|
|
|
|
# Immer aus body konstruieren (search_for_item liefert jetzt alle Felder)
|
|
match_result = MatchResult(
|
|
source=source,
|
|
source_id=source_id,
|
|
title=body.get("title") or item.title or "",
|
|
subtitle=body.get("subtitle"),
|
|
author=body.get("author"),
|
|
narrator=body.get("narrator"),
|
|
description=body.get("description"),
|
|
publisher=body.get("publisher"),
|
|
publish_year=body.get("publishYear"),
|
|
series=body.get("series"),
|
|
series_sequence=body.get("seriesSequence"),
|
|
language=body.get("language"),
|
|
genres=body.get("genres") or [],
|
|
cover_url=body.get("cover"),
|
|
confidence=1.0,
|
|
)
|
|
|
|
# Mit Details anreichern (Beschreibung, Kapitel) — überschreibt keine vorhandenen Werte
|
|
try:
|
|
if source == "musicbrainz":
|
|
details = await get_release_details(source_id)
|
|
if details:
|
|
_enrich_match(match_result, details)
|
|
elif source == "open_library":
|
|
details = await get_work_details(source_id)
|
|
if details:
|
|
_enrich_match(match_result, details)
|
|
except Exception as e:
|
|
logger.warning(f"Details-Laden fehlgeschlagen ({source}: {source_id}): {e}")
|
|
|
|
match_result.confidence = 1.0
|
|
await _apply_match(db, item, match_result, confidence=1.0)
|
|
item.match_locked = True
|
|
item.updated_at = datetime.utcnow()
|
|
await db.commit()
|
|
await db.refresh(item)
|
|
|
|
from ..routers.items import _enrich_item_with_files
|
|
return await _enrich_item_with_files(item, db)
|
|
|
|
|
|
@router.post("/{item_id}/extract-tags")
|
|
async def extract_audio_tags(
|
|
item_id: str,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""Liest ID3-Tags aus der ersten Audio-Datei und füllt leere Metadaten."""
|
|
from ..services.scanner import _extract_audio_tags, _apply_tags_to_item, _series_from_parent
|
|
from ..models.media_item import BookFile
|
|
from ..models.library import Library
|
|
|
|
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
|
item = result.scalar_one_or_none()
|
|
if not item:
|
|
raise HTTPException(status_code=404, detail="Item not found")
|
|
|
|
lib_result = await db.execute(select(Library).where(Library.id == item.library_id))
|
|
lib = lib_result.scalar_one_or_none()
|
|
library_folders = lib.folders if lib else []
|
|
|
|
files_result = await db.execute(
|
|
select(BookFile).where(BookFile.library_item_id == item_id).order_by(BookFile.track_index)
|
|
)
|
|
files = files_result.scalars().all()
|
|
if not files:
|
|
return {"success": False, "message": "Keine Audio-Dateien"}
|
|
|
|
tags = _extract_audio_tags(files[0].path)
|
|
parent_series = _series_from_parent(item.path, library_folders)
|
|
before = {
|
|
"title": item.title, "author": item.author, "publisher": item.publisher,
|
|
"publish_year": item.publish_year, "series": item.series, "genres": item.genres,
|
|
}
|
|
_apply_tags_to_item(item, tags, parent_series)
|
|
item.updated_at = datetime.utcnow()
|
|
await db.commit()
|
|
after = {
|
|
"title": item.title, "author": item.author, "publisher": item.publisher,
|
|
"publish_year": item.publish_year, "series": item.series, "genres": item.genres,
|
|
}
|
|
logger.info(f"Tags extrahiert für {item_id}: tags={list(tags.keys())} before={before} after={after}")
|
|
return {"success": True, "tags": tags, "before": before, "after": after}
|
|
|
|
|
|
@router.post("/{item_id}/extract-cover")
|
|
async def extract_local_cover(
|
|
item_id: str,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""Extrahiert ein Cover aus Ordner-Dateien oder eingebettetem Artwork."""
|
|
from ..services.scanner import _save_local_cover
|
|
from ..models.media_item import BookFile
|
|
import os
|
|
|
|
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
|
item = result.scalar_one_or_none()
|
|
if not item:
|
|
raise HTTPException(status_code=404, detail="Item not found")
|
|
|
|
files_result = await db.execute(
|
|
select(BookFile).where(BookFile.library_item_id == item_id).order_by(BookFile.track_index)
|
|
)
|
|
audio_files = [f.path for f in files_result.scalars().all()]
|
|
|
|
cover = _save_local_cover(item.path, audio_files, item.id)
|
|
if cover:
|
|
item.cover_path = cover
|
|
item.updated_at = datetime.utcnow()
|
|
await db.commit()
|
|
logger.info(f"Lokales Cover gesetzt für {item_id}: {cover}")
|
|
return {"success": True, "cover_path": cover}
|
|
return {"success": False, "message": "Kein Cover gefunden"}
|
|
|
|
|
|
@router.delete("/{item_id}/match")
|
|
async def clear_match(
|
|
item_id: str,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
|
item = result.scalar_one_or_none()
|
|
if not item:
|
|
raise HTTPException(status_code=404, detail="Item not found")
|
|
|
|
item.matched_source = "none"
|
|
item.matched_id = None
|
|
item.match_confidence = 0.0
|
|
item.match_locked = False
|
|
tags = item.tags or []
|
|
if "zu_prüfen" not in tags:
|
|
tags.append("zu_prüfen")
|
|
item.tags = tags
|
|
item.updated_at = datetime.utcnow()
|
|
await db.commit()
|
|
return {"success": True}
|