diff --git a/backend/app/routers/matching.py b/backend/app/routers/matching.py
index f7dbc92..38b7c9d 100644
--- a/backend/app/routers/matching.py
+++ b/backend/app/routers/matching.py
@@ -230,6 +230,50 @@ async def apply_match(
return await _enrich_item_with_files(item, db)
+@router.post("/{item_id}/extract-tags")
+async def extract_audio_tags(
+ item_id: str,
+ current_user: User = Depends(get_current_user),
+ db: AsyncSession = Depends(get_db),
+):
+ """Liest ID3-Tags aus der ersten Audio-Datei und füllt leere Metadaten."""
+ from ..services.scanner import _extract_audio_tags, _apply_tags_to_item, _series_from_parent
+ from ..models.media_item import BookFile
+ from ..models.library import Library
+
+ result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
+ item = result.scalar_one_or_none()
+ if not item:
+ raise HTTPException(status_code=404, detail="Item not found")
+
+ lib_result = await db.execute(select(Library).where(Library.id == item.library_id))
+ lib = lib_result.scalar_one_or_none()
+ library_folders = lib.folders if lib else []
+
+ files_result = await db.execute(
+ select(BookFile).where(BookFile.library_item_id == item_id).order_by(BookFile.track_index)
+ )
+ files = files_result.scalars().all()
+ if not files:
+ return {"success": False, "message": "Keine Audio-Dateien"}
+
+ tags = _extract_audio_tags(files[0].path)
+ parent_series = _series_from_parent(item.path, library_folders)
+ before = {
+ "title": item.title, "author": item.author, "publisher": item.publisher,
+ "publish_year": item.publish_year, "series": item.series, "genres": item.genres,
+ }
+ _apply_tags_to_item(item, tags, parent_series)
+ item.updated_at = datetime.utcnow()
+ await db.commit()
+ after = {
+ "title": item.title, "author": item.author, "publisher": item.publisher,
+ "publish_year": item.publish_year, "series": item.series, "genres": item.genres,
+ }
+ logger.info(f"Tags extrahiert für {item_id}: tags={list(tags.keys())} before={before} after={after}")
+ return {"success": True, "tags": tags, "before": before, "after": after}
+
+
@router.post("/{item_id}/extract-cover")
async def extract_local_cover(
item_id: str,
diff --git a/backend/app/services/matcher.py b/backend/app/services/matcher.py
index 81711e7..20e42c8 100644
--- a/backend/app/services/matcher.py
+++ b/backend/app/services/matcher.py
@@ -70,10 +70,17 @@ def detect_series(title: str) -> tuple[str | None, str | None]:
def _build_search_title(original: str) -> str:
- """Bereinigt Titel für Such-APIs: ??? raus, Sonderzeichen, Klammer-Suffixe."""
+ """Bereinigt Titel für Such-APIs: ??? raus, Folge-N-Prefix raus, Klammer-Suffixe raus."""
t = original
+ # ??? entfernen (CQL-Wildcard-Problem)
t = re.sub(r"\?{2,}", "", t)
+ # "(Folge 123)" oder "(2007)" Suffixe entfernen
t = re.sub(r"\s*\([^)]*\)\s*$", "", t)
+ # "Folge 123 -" oder "Folge 123:" oder "Folge 123 " am Anfang entfernen
+ t = re.sub(r"(?i)^\s*(?:folge|band|teil|episode|nr\.?|#)\s*\d+\s*[-:–\.]*\s*", "", t)
+ # "Folge 123" mitten im Titel reduzieren auf nichts
+ t = re.sub(r"(?i)\b(?:folge|band|teil|episode|nr\.?|#)\s*\d+\b\s*[-:–\.]*\s*", " ", t)
+ # Bindestriche/Unterstriche
t = re.sub(r"[_\-–]+", " ", t)
t = re.sub(r"\s+", " ", t).strip()
return t
diff --git a/backend/app/services/scanner.py b/backend/app/services/scanner.py
index 82b4128..4251ed1 100644
--- a/backend/app/services/scanner.py
+++ b/backend/app/services/scanner.py
@@ -31,6 +31,105 @@ def _get_audio_duration(file_path: str) -> float:
return 0.0
+def _extract_audio_tags(file_path: str) -> dict:
+ """Liest ID3/Vorbis/MP4-Tags via mutagen easy-API."""
+ try:
+ from mutagen import File as MutagenFile
+ audio = MutagenFile(file_path, easy=True)
+ if not audio:
+ return {}
+
+ def first(key: str):
+ v = audio.get(key)
+ if not v:
+ return None
+ if isinstance(v, list):
+ return v[0] if v else None
+ return v
+
+ result = {
+ "album": first("album"),
+ "title": first("title"),
+ "artist": first("artist"),
+ "albumartist": first("albumartist"),
+ "composer": first("composer"),
+ "date": first("date"),
+ "publisher": first("organization") or first("publisher"),
+ "language": first("language"),
+ "discnumber": first("discnumber"),
+ "tracknumber": first("tracknumber"),
+ }
+ genre = audio.get("genre")
+ if genre:
+ result["genres"] = genre if isinstance(genre, list) else [genre]
+ return {k: v for k, v in result.items() if v}
+ except Exception as e:
+ logger.debug(f"Tag-Lesen fehlgeschlagen für {file_path}: {e}")
+ return {}
+
+
+def _series_from_parent(folder_path: str, library_folders: list) -> str | None:
+ """Wenn der Parent-Ordner nicht selbst eine Library-Root ist, ist er möglicherweise die Serie."""
+ import re as _re
+ parent_path = os.path.dirname(folder_path)
+ parent = os.path.basename(parent_path)
+ if not parent:
+ return None
+ # Skip wenn Parent eine Library-Root ist
+ for lib_folder in library_folders:
+ lib_path = lib_folder.get("fullPath", lib_folder.get("full_path", ""))
+ if lib_path and os.path.normpath(parent_path) == os.path.normpath(lib_path):
+ return None
+ if 2 < len(parent) < 60 and not _re.match(r"^[\d\W]+$", parent):
+ return parent
+ return None
+
+
+def _apply_tags_to_item(item, tags: dict, parent_series_hint: str | None):
+ """Befüllt leere Felder aus ID3-Tags. Bestehende Werte werden NICHT überschrieben."""
+ import re as _re
+
+ album = tags.get("album")
+ artist = tags.get("albumartist") or tags.get("artist")
+ composer = tags.get("composer")
+
+ # Title: Album ist normalerweise der Hörbuch-Titel
+ folder_title = _guess_title_from_path(item.path)
+ if album and (not item.title or item.title == folder_title):
+ item.title = album
+
+ # Author: AlbumArtist > Composer > Artist
+ if not item.author:
+ if composer:
+ item.author = composer
+ elif artist:
+ item.author = artist
+
+ if not item.publisher and tags.get("publisher"):
+ item.publisher = tags["publisher"]
+
+ if not item.publish_year and tags.get("date"):
+ m = _re.search(r"\d{4}", str(tags["date"]))
+ if m:
+ item.publish_year = int(m.group())
+
+ if not item.language and tags.get("language"):
+ item.language = tags["language"]
+
+ if not item.genres and tags.get("genres"):
+ item.genres = tags["genres"]
+
+ # Serie aus tracknumber/discnumber wäre möglich aber unzuverlässig.
+ # Stattdessen: Parent-Ordner als Serien-Hinweis nehmen.
+ if not item.series and parent_series_hint:
+ item.series = parent_series_hint
+ # Bei "Die drei ???" Hörspielen: artist ist meist die Serie selbst
+ if not item.series and artist and len(artist) < 40:
+ # Heuristik: wenn artist und album sich nicht ähneln, könnte artist die Serie sein
+ if album and artist.lower() not in album.lower():
+ item.series = artist
+
+
def _get_file_size(file_path: str) -> int:
try:
return os.path.getsize(file_path)
@@ -220,6 +319,11 @@ async def scan_library_task(library_id: str, job_id: str):
total_duration = sum(_get_audio_duration(f) for f in audio_files)
total_size = sum(_get_file_size(f) for f in audio_files)
+ # ID3-Tags aus erster Audio-Datei lesen
+ first_audio = audio_files[0] if audio_files else None
+ tags = _extract_audio_tags(first_audio) if first_audio else {}
+ parent_series = _series_from_parent(folder_path, folders)
+
if existing_item:
existing_item.duration_seconds = total_duration
existing_item.size_bytes = total_size
@@ -227,6 +331,11 @@ async def scan_library_task(library_id: str, job_id: str):
existing_item.is_missing = False
existing_item.updated_at = datetime.utcnow()
item = existing_item
+ # Tags nachziehen wenn kein Match aktiv ist
+ if not existing_item.match_locked and (
+ not existing_item.matched_source or existing_item.matched_source == "none"
+ ):
+ _apply_tags_to_item(item, tags, parent_series)
# Cover aus Ordner/Embed nachziehen falls noch keins da ist
if not item.cover_path or not os.path.exists(item.cover_path or ""):
local_cover = _save_local_cover(folder_path, audio_files, item.id)
@@ -251,6 +360,13 @@ async def scan_library_task(library_id: str, job_id: str):
)
db.add(item)
await db.flush()
+ # Tags anwenden
+ _apply_tags_to_item(item, tags, parent_series)
+ logger.info(
+ f"Neu gescannt: id={item.id} title={item.title!r} "
+ f"author={item.author!r} series={item.series!r} "
+ f"year={item.publish_year} tags={list(tags.keys())}"
+ )
# BookFiles anlegen
for idx, file_path in enumerate(audio_files):
diff --git a/frontend/src/api/items.ts b/frontend/src/api/items.ts
index 88a46cc..f9b7a4c 100644
--- a/frontend/src/api/items.ts
+++ b/frontend/src/api/items.ts
@@ -36,4 +36,7 @@ export const checkConnectivity = () =>
export const extractCover = (id: string) =>
api.post(`/api/items/${id}/extract-cover`).then((r) => r.data)
+export const extractTags = (id: string) =>
+ api.post(`/api/items/${id}/extract-tags`).then((r) => r.data)
+
export const coverUrl = (id: string) => `/api/items/${id}/cover`
diff --git a/frontend/src/pages/BookDetail.tsx b/frontend/src/pages/BookDetail.tsx
index 92d70e8..0c9ab40 100644
--- a/frontend/src/pages/BookDetail.tsx
+++ b/frontend/src/pages/BookDetail.tsx
@@ -4,7 +4,7 @@ import {
Play, ArrowLeft, RefreshCw, Search, Check,
Loader2, Trash2, X
} from 'lucide-react'
-import { getItem, updateItem, triggerMatch, searchMatch, applyMatch, debugMatch, checkConnectivity, extractCover, coverUrl } from '../api/items'
+import { getItem, updateItem, triggerMatch, searchMatch, applyMatch, debugMatch, checkConnectivity, extractCover, extractTags, coverUrl } from '../api/items'
import { getMe, createBookmark, deleteBookmark } from '../api/me'
import { usePlayerStore } from '../store/playerStore'
import CoverImage from '../components/common/CoverImage'
@@ -114,6 +114,19 @@ export default function BookDetail() {
}
}
+ const handleExtractTags = async () => {
+ if (!id) return
+ const res = await extractTags(id)
+ if (res.success) {
+ const updated = await getItem(id)
+ setItem(updated)
+ const tagKeys = Object.keys(res.tags || {})
+ alert(`Tags gelesen: ${tagKeys.join(', ') || '(keine)'}\n\nVorher: ${JSON.stringify(res.before, null, 2)}\n\nNachher: ${JSON.stringify(res.after, null, 2)}`)
+ } else {
+ alert(res.message || 'Keine Tags gefunden')
+ }
+ }
+
const fmtTime = (s: number) => {
const h = Math.floor(s / 3600)
const m = Math.floor((s % 3600) / 60)
@@ -218,6 +231,13 @@ export default function BookDetail() {
>
Cover aus Datei
+