Read ID3 tags during scan — fixes 'Folge 114 Die Villa der Toten' problem

Diagnosis from connectivity check: 4/5 APIs reachable (only Google Books rate-limited). So the network is fine — the search title was the problem. 'Folge 114 Die Villa der Toten' isn't indexed under that name anywhere. The MP3 itself has the real metadata in ID3 tags (album, artist, year). Scanner now reads ID3/Vorbis/MP4 tags from the first audio file: - album → item.title - albumartist / composer / artist → item.author - date → publish_year - organization / publisher → publisher - language → language - genre → genres - artist (heuristic) → series, if it doesn't appear in album title Parent folder name → series hint (skipped if it's a library root). Only fills empty fields, never overwrites manually edited or matched data. Runs on new items AND on re-scan for items without an active match. Search title normalization improved: 'Folge 123 - X' / 'Band 7: Y' etc. prefixes and infixes get stripped so APIs see the actual episode title. New endpoint POST /api/items/{id}/extract-tags + 'Tags lesen' button in BookDetail — triggers tag extraction on demand for existing items. Returns before/after diff so user can see what was filled in. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 20:15:44 +02:00
parent 4fccb7abae
commit 0824894a7f
5 changed files with 192 additions and 2 deletions
--- a/backend/app/routers/matching.py
+++ b/backend/app/routers/matching.py
@@ -230,6 +230,50 @@ async def apply_match(
    return await _enrich_item_with_files(item, db)


+@router.post("/{item_id}/extract-tags")
+async def extract_audio_tags(
+    item_id: str,
+    current_user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Liest ID3-Tags aus der ersten Audio-Datei und füllt leere Metadaten."""
+    from ..services.scanner import _extract_audio_tags, _apply_tags_to_item, _series_from_parent
+    from ..models.media_item import BookFile
+    from ..models.library import Library
+
+    result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
+    item = result.scalar_one_or_none()
+    if not item:
+        raise HTTPException(status_code=404, detail="Item not found")
+
+    lib_result = await db.execute(select(Library).where(Library.id == item.library_id))
+    lib = lib_result.scalar_one_or_none()
+    library_folders = lib.folders if lib else []
+
+    files_result = await db.execute(
+        select(BookFile).where(BookFile.library_item_id == item_id).order_by(BookFile.track_index)
+    )
+    files = files_result.scalars().all()
+    if not files:
+        return {"success": False, "message": "Keine Audio-Dateien"}
+
+    tags = _extract_audio_tags(files[0].path)
+    parent_series = _series_from_parent(item.path, library_folders)
+    before = {
+        "title": item.title, "author": item.author, "publisher": item.publisher,
+        "publish_year": item.publish_year, "series": item.series, "genres": item.genres,
+    }
+    _apply_tags_to_item(item, tags, parent_series)
+    item.updated_at = datetime.utcnow()
+    await db.commit()
+    after = {
+        "title": item.title, "author": item.author, "publisher": item.publisher,
+        "publish_year": item.publish_year, "series": item.series, "genres": item.genres,
+    }
+    logger.info(f"Tags extrahiert für {item_id}: tags={list(tags.keys())} before={before} after={after}")
+    return {"success": True, "tags": tags, "before": before, "after": after}
+
+
@router.post("/{item_id}/extract-cover")
 async def extract_local_cover(
    item_id: str,