diff --git a/backend/app/routers/matching.py b/backend/app/routers/matching.py index f7dbc92..38b7c9d 100644 --- a/backend/app/routers/matching.py +++ b/backend/app/routers/matching.py @@ -230,6 +230,50 @@ async def apply_match( return await _enrich_item_with_files(item, db) +@router.post("/{item_id}/extract-tags") +async def extract_audio_tags( + item_id: str, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + """Liest ID3-Tags aus der ersten Audio-Datei und füllt leere Metadaten.""" + from ..services.scanner import _extract_audio_tags, _apply_tags_to_item, _series_from_parent + from ..models.media_item import BookFile + from ..models.library import Library + + result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id)) + item = result.scalar_one_or_none() + if not item: + raise HTTPException(status_code=404, detail="Item not found") + + lib_result = await db.execute(select(Library).where(Library.id == item.library_id)) + lib = lib_result.scalar_one_or_none() + library_folders = lib.folders if lib else [] + + files_result = await db.execute( + select(BookFile).where(BookFile.library_item_id == item_id).order_by(BookFile.track_index) + ) + files = files_result.scalars().all() + if not files: + return {"success": False, "message": "Keine Audio-Dateien"} + + tags = _extract_audio_tags(files[0].path) + parent_series = _series_from_parent(item.path, library_folders) + before = { + "title": item.title, "author": item.author, "publisher": item.publisher, + "publish_year": item.publish_year, "series": item.series, "genres": item.genres, + } + _apply_tags_to_item(item, tags, parent_series) + item.updated_at = datetime.utcnow() + await db.commit() + after = { + "title": item.title, "author": item.author, "publisher": item.publisher, + "publish_year": item.publish_year, "series": item.series, "genres": item.genres, + } + logger.info(f"Tags extrahiert für {item_id}: tags={list(tags.keys())} before={before} after={after}") + return {"success": True, "tags": tags, "before": before, "after": after} + + @router.post("/{item_id}/extract-cover") async def extract_local_cover( item_id: str, diff --git a/backend/app/services/matcher.py b/backend/app/services/matcher.py index 81711e7..20e42c8 100644 --- a/backend/app/services/matcher.py +++ b/backend/app/services/matcher.py @@ -70,10 +70,17 @@ def detect_series(title: str) -> tuple[str | None, str | None]: def _build_search_title(original: str) -> str: - """Bereinigt Titel für Such-APIs: ??? raus, Sonderzeichen, Klammer-Suffixe.""" + """Bereinigt Titel für Such-APIs: ??? raus, Folge-N-Prefix raus, Klammer-Suffixe raus.""" t = original + # ??? entfernen (CQL-Wildcard-Problem) t = re.sub(r"\?{2,}", "", t) + # "(Folge 123)" oder "(2007)" Suffixe entfernen t = re.sub(r"\s*\([^)]*\)\s*$", "", t) + # "Folge 123 -" oder "Folge 123:" oder "Folge 123 " am Anfang entfernen + t = re.sub(r"(?i)^\s*(?:folge|band|teil|episode|nr\.?|#)\s*\d+\s*[-:–\.]*\s*", "", t) + # "Folge 123" mitten im Titel reduzieren auf nichts + t = re.sub(r"(?i)\b(?:folge|band|teil|episode|nr\.?|#)\s*\d+\b\s*[-:–\.]*\s*", " ", t) + # Bindestriche/Unterstriche t = re.sub(r"[_\-–]+", " ", t) t = re.sub(r"\s+", " ", t).strip() return t diff --git a/backend/app/services/scanner.py b/backend/app/services/scanner.py index 82b4128..4251ed1 100644 --- a/backend/app/services/scanner.py +++ b/backend/app/services/scanner.py @@ -31,6 +31,105 @@ def _get_audio_duration(file_path: str) -> float: return 0.0 +def _extract_audio_tags(file_path: str) -> dict: + """Liest ID3/Vorbis/MP4-Tags via mutagen easy-API.""" + try: + from mutagen import File as MutagenFile + audio = MutagenFile(file_path, easy=True) + if not audio: + return {} + + def first(key: str): + v = audio.get(key) + if not v: + return None + if isinstance(v, list): + return v[0] if v else None + return v + + result = { + "album": first("album"), + "title": first("title"), + "artist": first("artist"), + "albumartist": first("albumartist"), + "composer": first("composer"), + "date": first("date"), + "publisher": first("organization") or first("publisher"), + "language": first("language"), + "discnumber": first("discnumber"), + "tracknumber": first("tracknumber"), + } + genre = audio.get("genre") + if genre: + result["genres"] = genre if isinstance(genre, list) else [genre] + return {k: v for k, v in result.items() if v} + except Exception as e: + logger.debug(f"Tag-Lesen fehlgeschlagen für {file_path}: {e}") + return {} + + +def _series_from_parent(folder_path: str, library_folders: list) -> str | None: + """Wenn der Parent-Ordner nicht selbst eine Library-Root ist, ist er möglicherweise die Serie.""" + import re as _re + parent_path = os.path.dirname(folder_path) + parent = os.path.basename(parent_path) + if not parent: + return None + # Skip wenn Parent eine Library-Root ist + for lib_folder in library_folders: + lib_path = lib_folder.get("fullPath", lib_folder.get("full_path", "")) + if lib_path and os.path.normpath(parent_path) == os.path.normpath(lib_path): + return None + if 2 < len(parent) < 60 and not _re.match(r"^[\d\W]+$", parent): + return parent + return None + + +def _apply_tags_to_item(item, tags: dict, parent_series_hint: str | None): + """Befüllt leere Felder aus ID3-Tags. Bestehende Werte werden NICHT überschrieben.""" + import re as _re + + album = tags.get("album") + artist = tags.get("albumartist") or tags.get("artist") + composer = tags.get("composer") + + # Title: Album ist normalerweise der Hörbuch-Titel + folder_title = _guess_title_from_path(item.path) + if album and (not item.title or item.title == folder_title): + item.title = album + + # Author: AlbumArtist > Composer > Artist + if not item.author: + if composer: + item.author = composer + elif artist: + item.author = artist + + if not item.publisher and tags.get("publisher"): + item.publisher = tags["publisher"] + + if not item.publish_year and tags.get("date"): + m = _re.search(r"\d{4}", str(tags["date"])) + if m: + item.publish_year = int(m.group()) + + if not item.language and tags.get("language"): + item.language = tags["language"] + + if not item.genres and tags.get("genres"): + item.genres = tags["genres"] + + # Serie aus tracknumber/discnumber wäre möglich aber unzuverlässig. + # Stattdessen: Parent-Ordner als Serien-Hinweis nehmen. + if not item.series and parent_series_hint: + item.series = parent_series_hint + # Bei "Die drei ???" Hörspielen: artist ist meist die Serie selbst + if not item.series and artist and len(artist) < 40: + # Heuristik: wenn artist und album sich nicht ähneln, könnte artist die Serie sein + if album and artist.lower() not in album.lower(): + item.series = artist + + def _get_file_size(file_path: str) -> int: try: return os.path.getsize(file_path) @@ -220,6 +319,11 @@ async def scan_library_task(library_id: str, job_id: str): total_duration = sum(_get_audio_duration(f) for f in audio_files) total_size = sum(_get_file_size(f) for f in audio_files) + # ID3-Tags aus erster Audio-Datei lesen + first_audio = audio_files[0] if audio_files else None + tags = _extract_audio_tags(first_audio) if first_audio else {} + parent_series = _series_from_parent(folder_path, folders) + if existing_item: existing_item.duration_seconds = total_duration existing_item.size_bytes = total_size @@ -227,6 +331,11 @@ async def scan_library_task(library_id: str, job_id: str): existing_item.is_missing = False existing_item.updated_at = datetime.utcnow() item = existing_item + # Tags nachziehen wenn kein Match aktiv ist + if not existing_item.match_locked and ( + not existing_item.matched_source or existing_item.matched_source == "none" + ): + _apply_tags_to_item(item, tags, parent_series) # Cover aus Ordner/Embed nachziehen falls noch keins da ist if not item.cover_path or not os.path.exists(item.cover_path or ""): local_cover = _save_local_cover(folder_path, audio_files, item.id) @@ -251,6 +360,13 @@ async def scan_library_task(library_id: str, job_id: str): ) db.add(item) await db.flush() + # Tags anwenden + _apply_tags_to_item(item, tags, parent_series) + logger.info( + f"Neu gescannt: id={item.id} title={item.title!r} " + f"author={item.author!r} series={item.series!r} " + f"year={item.publish_year} tags={list(tags.keys())}" + ) # BookFiles anlegen for idx, file_path in enumerate(audio_files): diff --git a/frontend/src/api/items.ts b/frontend/src/api/items.ts index 88a46cc..f9b7a4c 100644 --- a/frontend/src/api/items.ts +++ b/frontend/src/api/items.ts @@ -36,4 +36,7 @@ export const checkConnectivity = () => export const extractCover = (id: string) => api.post(`/api/items/${id}/extract-cover`).then((r) => r.data) +export const extractTags = (id: string) => + api.post(`/api/items/${id}/extract-tags`).then((r) => r.data) + export const coverUrl = (id: string) => `/api/items/${id}/cover` diff --git a/frontend/src/pages/BookDetail.tsx b/frontend/src/pages/BookDetail.tsx index 92d70e8..0c9ab40 100644 --- a/frontend/src/pages/BookDetail.tsx +++ b/frontend/src/pages/BookDetail.tsx @@ -4,7 +4,7 @@ import { Play, ArrowLeft, RefreshCw, Search, Check, Loader2, Trash2, X } from 'lucide-react' -import { getItem, updateItem, triggerMatch, searchMatch, applyMatch, debugMatch, checkConnectivity, extractCover, coverUrl } from '../api/items' +import { getItem, updateItem, triggerMatch, searchMatch, applyMatch, debugMatch, checkConnectivity, extractCover, extractTags, coverUrl } from '../api/items' import { getMe, createBookmark, deleteBookmark } from '../api/me' import { usePlayerStore } from '../store/playerStore' import CoverImage from '../components/common/CoverImage' @@ -114,6 +114,19 @@ export default function BookDetail() { } } + const handleExtractTags = async () => { + if (!id) return + const res = await extractTags(id) + if (res.success) { + const updated = await getItem(id) + setItem(updated) + const tagKeys = Object.keys(res.tags || {}) + alert(`Tags gelesen: ${tagKeys.join(', ') || '(keine)'}\n\nVorher: ${JSON.stringify(res.before, null, 2)}\n\nNachher: ${JSON.stringify(res.after, null, 2)}`) + } else { + alert(res.message || 'Keine Tags gefunden') + } + } + const fmtTime = (s: number) => { const h = Math.floor(s / 3600) const m = Math.floor((s % 3600) / 60) @@ -218,6 +231,13 @@ export default function BookDetail() { > Cover aus Datei +