Cleanup UI buttons + smarter series-aware search title

UI: Hide developer tools (Cover-aus-Datei, Tags-lesen,
Connectivity-Check) behind a '+ Tools' toggle. Default view has only
Play, Match, Auto-Match. Tag extraction runs automatically on scan
anyway, so the buttons were noise.

Matcher: Item metadata (series, author from tags or earlier matches)
now flows into the search:
- detect_series() also scans inside title, not only prefix — handles
  garbage chars (◆ U+25C6 etc.) before the series name
- New _strip_series_prefix removes "Die drei ???" from search title so
  APIs see only the episode title ("Die Villa der Toten") which is how
  most databases index these
- _build_search_title also strips non-printable / exotic chars and
  bracketed content anywhere (not just trailing)
- Effective series falls back to item.series when detect_series misses
- Search call now logs which series the search is using

Example: title='◆Die◆ drei ??? Die Villa der Toten (drei Fragezeichen)'
  detected_series='Die drei ???', search_title='Die Villa der Toten'

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Audiolib
2026-05-26 20:31:40 +02:00
parent 0824894a7f
commit edf057f36e
3 changed files with 89 additions and 110 deletions

View File

@@ -119,12 +119,16 @@ async def debug_match(
from ..services.matching.open_library import search_open_library from ..services.matching.open_library import search_open_library
from ..services.matching.google_books import search_google_books from ..services.matching.google_books import search_google_books
from ..services.matching.dnb import search_dnb from ..services.matching.dnb import search_dnb
from ..services.matcher import _build_search_title, detect_series from ..services.matcher import _build_search_title, detect_series, _strip_series_prefix
series, episode = detect_series(title) series, episode = detect_series(title)
search_title = _build_search_title(title)
if series and episode: if series and episode:
search_title = f"{series} {episode}" search_title = f"{series} {episode}"
elif series:
cleaned = _build_search_title(title)
search_title = _strip_series_prefix(cleaned, series)
else:
search_title = _build_search_title(title)
logger.info(f"DEBUG: title={title!r} → search={search_title!r} series={series!r} episode={episode!r}") logger.info(f"DEBUG: title={title!r} → search={search_title!r} series={series!r} episode={episode!r}")

View File

@@ -58,27 +58,37 @@ SERIES_PATTERNS_SERIES_ONLY = [
def detect_series(title: str) -> tuple[str | None, str | None]: def detect_series(title: str) -> tuple[str | None, str | None]:
t = title.strip() t = title.strip()
# 1. Mit Folgenummer am Anfang
for pattern, canonical_name in SERIES_PATTERNS_WITH_EPISODE: for pattern, canonical_name in SERIES_PATTERNS_WITH_EPISODE:
m = re.match(pattern, t) m = re.match(pattern, t)
if m: if m:
return (canonical_name or m.group(1).strip(), m.group(2)) return (canonical_name or m.group(1).strip(), m.group(2))
# 2. Ohne Folgenummer am Anfang
for pattern, canonical_name in SERIES_PATTERNS_SERIES_ONLY: for pattern, canonical_name in SERIES_PATTERNS_SERIES_ONLY:
m = re.match(pattern, t) m = re.match(pattern, t)
if m: if m:
return (canonical_name or m.group(1).strip(), None) return (canonical_name or m.group(1).strip(), None)
# 3. Series-Name irgendwo im Titel (falls Sonderzeichen / Müll davor)
for pattern, canonical_name in SERIES_PATTERNS_SERIES_ONLY:
if not canonical_name:
continue
m = re.search(pattern, t)
if m:
return (canonical_name, None)
return None, None return None, None
def _build_search_title(original: str) -> str: def _build_search_title(original: str) -> str:
"""Bereinigt Titel für Such-APIs: ??? raus, Folge-N-Prefix raus, Klammer-Suffixe raus.""" """Bereinigt Titel: ???, Sonderzeichen, Folge-N-Prefix, Klammer-Inhalte raus."""
t = original t = original
# Nicht-druckbare/exotische Zeichen raus (◆, ◇, U+FFFD etc.)
t = re.sub(r"[^\w\s\-:!?,.&'äöüÄÖÜß]", " ", t, flags=re.UNICODE)
# ??? entfernen (CQL-Wildcard-Problem) # ??? entfernen (CQL-Wildcard-Problem)
t = re.sub(r"\?{2,}", "", t) t = re.sub(r"\?{2,}", "", t)
# "(Folge 123)" oder "(2007)" Suffixe entfernen # Klammer-Inhalte (egal wo) entfernen
t = re.sub(r"\s*\([^)]*\)\s*$", "", t) t = re.sub(r"\([^)]*\)", " ", t)
# "Folge 123 -" oder "Folge 123:" oder "Folge 123 " am Anfang entfernen # "Folge 123" Prefixe und Infixes
t = re.sub(r"(?i)^\s*(?:folge|band|teil|episode|nr\.?|#)\s*\d+\s*[-:\.]*\s*", "", t) t = re.sub(r"(?i)^\s*(?:folge|band|teil|episode|nr\.?|#)\s*\d+\s*[-:\.]*\s*", "", t)
# "Folge 123" mitten im Titel reduzieren auf nichts
t = re.sub(r"(?i)\b(?:folge|band|teil|episode|nr\.?|#)\s*\d+\b\s*[-:\.]*\s*", " ", t) t = re.sub(r"(?i)\b(?:folge|band|teil|episode|nr\.?|#)\s*\d+\b\s*[-:\.]*\s*", " ", t)
# Bindestriche/Unterstriche # Bindestriche/Unterstriche
t = re.sub(r"[_\-]+", " ", t) t = re.sub(r"[_\-]+", " ", t)
@@ -86,6 +96,23 @@ def _build_search_title(original: str) -> str:
return t return t
def _strip_series_prefix(search_title: str, series: str) -> str:
"""Entfernt den Serien-Namen aus dem Suchtitel — nur Episode bleibt."""
if not series:
return search_title
series_clean = _build_search_title(series).lower()
base_lower = search_title.lower()
if series_clean and base_lower.startswith(series_clean):
rest = search_title[len(series_clean):].strip(" -:.")
if rest:
return rest
# auch mittendrin entfernen
pattern = re.escape(series_clean)
rest = re.sub(pattern, " ", search_title, flags=re.IGNORECASE)
rest = re.sub(r"\s+", " ", rest).strip(" -:.")
return rest if rest else search_title
def _title_similarity(a: str, b: str) -> float: def _title_similarity(a: str, b: str) -> float:
"""Wort-Überlapp mit Min/Max-Gewichtung — lenient für Teil-Treffer.""" """Wort-Überlapp mit Min/Max-Gewichtung — lenient für Teil-Treffer."""
if not a or not b: if not a or not b:
@@ -246,21 +273,27 @@ async def match_audiobook(item_id: str):
title = item.title or "" title = item.title or ""
author = item.author author = item.author
series, episode = detect_series(title) detected_series, episode = detect_series(title)
if series: effective_series = detected_series or item.series
if episode:
search_title = f"{series} {episode}" if effective_series and episode:
else: search_title = f"{effective_series} {episode}"
# Serie erkannt, keine Folgennummer → kompletten Titel suchen elif effective_series:
search_title = _build_search_title(title) # Series aus Titel entfernen → nur Episode-Teil suchen, präziser
if not item.series: cleaned = _build_search_title(title)
item.series = series search_title = _strip_series_prefix(cleaned, effective_series)
if not item.series_sequence and episode:
item.series_sequence = episode
else: else:
search_title = _build_search_title(title) search_title = _build_search_title(title)
logger.info(f"Matche: orig='{title}' suchTitel='{search_title}' author={author!r} | Quellen: {sources}") if detected_series and not item.series:
item.series = detected_series
if episode and not item.series_sequence:
item.series_sequence = episode
logger.info(
f"Matche: orig='{title}' suchTitel='{search_title}' "
f"author={author!r} series={effective_series!r} | Quellen: {sources}"
)
best: MatchResult | None = None best: MatchResult | None = None
best_score = 0.0 best_score = 0.0
@@ -311,8 +344,18 @@ async def match_audiobook(item_id: str):
async def search_for_item(title: str, author: str | None = None) -> list[dict]: async def search_for_item(title: str, author: str | None = None) -> list[dict]:
"""Suche über alle Quellen für manuelles Matching. Gibt alle relevanten Felder zurück.""" """Suche über alle Quellen für manuelles Matching. Gibt alle relevanten Felder zurück."""
search_title = _build_search_title(title) detected_series, episode = detect_series(title)
logger.info(f"Manuelle Suche: orig='{title}' bereinigt='{search_title}' author={author!r}") if detected_series and episode:
search_title = f"{detected_series} {episode}"
elif detected_series:
cleaned = _build_search_title(title)
search_title = _strip_series_prefix(cleaned, detected_series)
else:
search_title = _build_search_title(title)
logger.info(
f"Manuelle Suche: orig='{title}' bereinigt='{search_title}' "
f"author={author!r} series={detected_series!r}"
)
async def _search_source(name: str, coro): async def _search_source(name: str, coro):
try: try:

View File

@@ -4,7 +4,7 @@ import {
Play, ArrowLeft, RefreshCw, Search, Check, Play, ArrowLeft, RefreshCw, Search, Check,
Loader2, Trash2, X Loader2, Trash2, X
} from 'lucide-react' } from 'lucide-react'
import { getItem, updateItem, triggerMatch, searchMatch, applyMatch, debugMatch, checkConnectivity, extractCover, extractTags, coverUrl } from '../api/items' import { getItem, updateItem, triggerMatch, searchMatch, applyMatch, debugMatch, coverUrl } from '../api/items'
import { getMe, createBookmark, deleteBookmark } from '../api/me' import { getMe, createBookmark, deleteBookmark } from '../api/me'
import { usePlayerStore } from '../store/playerStore' import { usePlayerStore } from '../store/playerStore'
import CoverImage from '../components/common/CoverImage' import CoverImage from '../components/common/CoverImage'
@@ -23,8 +23,7 @@ export default function BookDetail() {
const [showMatchPanel, setShowMatchPanel] = useState(false) const [showMatchPanel, setShowMatchPanel] = useState(false)
const [debugData, setDebugData] = useState<any>(null) const [debugData, setDebugData] = useState<any>(null)
const [debugLoading, setDebugLoading] = useState(false) const [debugLoading, setDebugLoading] = useState(false)
const [connData, setConnData] = useState<any>(null) const [showDevTools, setShowDevTools] = useState(false)
const [connLoading, setConnLoading] = useState(false)
const { play, item: currentItem, currentTime } = usePlayerStore() const { play, item: currentItem, currentTime } = usePlayerStore()
useEffect(() => { useEffect(() => {
@@ -93,39 +92,6 @@ export default function BookDetail() {
} }
} }
const handleConnectivity = async () => {
setConnLoading(true)
try {
const data = await checkConnectivity()
setConnData(data)
} finally {
setConnLoading(false)
}
}
const handleExtractCover = async () => {
if (!id) return
const res = await extractCover(id)
if (res.success) {
const updated = await getItem(id)
setItem(updated)
} else {
alert('Kein lokales Cover gefunden')
}
}
const handleExtractTags = async () => {
if (!id) return
const res = await extractTags(id)
if (res.success) {
const updated = await getItem(id)
setItem(updated)
const tagKeys = Object.keys(res.tags || {})
alert(`Tags gelesen: ${tagKeys.join(', ') || '(keine)'}\n\nVorher: ${JSON.stringify(res.before, null, 2)}\n\nNachher: ${JSON.stringify(res.after, null, 2)}`)
} else {
alert(res.message || 'Keine Tags gefunden')
}
}
const fmtTime = (s: number) => { const fmtTime = (s: number) => {
const h = Math.floor(s / 3600) const h = Math.floor(s / 3600)
@@ -216,65 +182,31 @@ export default function BookDetail() {
Auto-Match Auto-Match
</button> </button>
<button <button
onClick={handleDebug} onClick={() => setShowDevTools(!showDevTools)}
disabled={debugLoading} className="text-muted hover:text-ink text-sm px-2 py-2 transition-colors"
className="flex items-center gap-2 bg-card border border-divider px-4 py-2 rounded-lg text-sm text-muted hover:text-ink hover:bg-card/80 disabled:opacity-50 transition-colors" style={{ fontSize: '12px' }}
title="Zeigt was die APIs tatsächlich zurückgeben" title="Entwickler-Werkzeuge"
> >
{debugLoading ? <Loader2 size={14} className="animate-spin" /> : <Search size={14} />} {showDevTools ? '' : '+'} Tools
Debug
</button>
<button
onClick={handleExtractCover}
className="flex items-center gap-2 bg-card border border-divider px-4 py-2 rounded-lg text-sm text-muted hover:text-ink hover:bg-card/80 transition-colors"
title="Cover aus Ordner-Datei oder MP3-Tag extrahieren"
>
Cover aus Datei
</button>
<button
onClick={handleExtractTags}
className="flex items-center gap-2 bg-card border border-divider px-4 py-2 rounded-lg text-sm text-muted hover:text-ink hover:bg-card/80 transition-colors"
title="ID3-Tags (Artist, Album, Jahr) aus MP3 lesen und Metadaten füllen"
>
Tags lesen
</button>
<button
onClick={handleConnectivity}
disabled={connLoading}
className="flex items-center gap-2 bg-card border border-divider px-4 py-2 rounded-lg text-sm text-muted hover:text-ink hover:bg-card/80 disabled:opacity-50 transition-colors"
title="Prüft ob Backend die externen Metadaten-APIs erreicht"
>
{connLoading ? <Loader2 size={14} className="animate-spin" /> : null}
Connectivity-Check
</button> </button>
</div> </div>
{showDevTools && (
<div className="flex gap-2 mt-2 flex-wrap">
<button
onClick={handleDebug}
disabled={debugLoading}
className="flex items-center gap-1 bg-card border border-divider px-3 py-1 rounded-lg text-muted hover:text-ink transition-colors disabled:opacity-50"
style={{ fontSize: '11px' }}
>
{debugLoading ? <Loader2 size={11} className="animate-spin" /> : null}
API-Debug
</button>
</div>
)}
</div> </div>
</div> </div>
{connData && (
<div className="mb-6 bg-surface border border-divider rounded-xl p-4">
<div className="flex items-center justify-between mb-3">
<h3 className="text-ink" style={{ fontSize: '13px', fontWeight: 600 }}>Externe Verbindungen</h3>
<button onClick={() => setConnData(null)} className="text-muted hover:text-ink">
<X size={14} />
</button>
</div>
{connData.proxy_env && connData.proxy_env !== 'keine' && (
<p className="text-yellow-400 mb-2" style={{ fontSize: '11px' }}>
Proxy-Variablen aktiv: <span className="font-mono">{JSON.stringify(connData.proxy_env)}</span>
</p>
)}
{connData.results?.map((r: any) => (
<div key={r.name} className="flex items-center justify-between py-1.5 border-t border-divider" style={{ fontSize: '12px' }}>
<span className="text-ink font-medium">{r.name}</span>
<span className={r.ok && r.status === 200 ? 'text-primary' : 'text-red-400'} style={{ fontSize: '11px' }}>
{r.ok ? `HTTP ${r.status} · ${r.bytes}B · ${r.ms}ms` : `${r.error} (${r.ms}ms)`}
</span>
</div>
))}
</div>
)}
{debugData && ( {debugData && (
<div className="mb-6 bg-surface border border-divider rounded-xl p-4"> <div className="mb-6 bg-surface border border-divider rounded-xl p-4">
<div className="flex items-center justify-between mb-3"> <div className="flex items-center justify-between mb-3">