Cleanup UI buttons + smarter series-aware search title
UI: Hide developer tools (Cover-aus-Datei, Tags-lesen,
Connectivity-Check) behind a '+ Tools' toggle. Default view has only
Play, Match, Auto-Match. Tag extraction runs automatically on scan
anyway, so the buttons were noise.
Matcher: Item metadata (series, author from tags or earlier matches)
now flows into the search:
- detect_series() also scans inside title, not only prefix — handles
garbage chars (◆ U+25C6 etc.) before the series name
- New _strip_series_prefix removes "Die drei ???" from search title so
APIs see only the episode title ("Die Villa der Toten") which is how
most databases index these
- _build_search_title also strips non-printable / exotic chars and
bracketed content anywhere (not just trailing)
- Effective series falls back to item.series when detect_series misses
- Search call now logs which series the search is using
Example: title='◆Die◆ drei ??? Die Villa der Toten (drei Fragezeichen)'
detected_series='Die drei ???', search_title='Die Villa der Toten'
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -119,12 +119,16 @@ async def debug_match(
|
||||
from ..services.matching.open_library import search_open_library
|
||||
from ..services.matching.google_books import search_google_books
|
||||
from ..services.matching.dnb import search_dnb
|
||||
from ..services.matcher import _build_search_title, detect_series
|
||||
from ..services.matcher import _build_search_title, detect_series, _strip_series_prefix
|
||||
|
||||
series, episode = detect_series(title)
|
||||
search_title = _build_search_title(title)
|
||||
if series and episode:
|
||||
search_title = f"{series} {episode}"
|
||||
elif series:
|
||||
cleaned = _build_search_title(title)
|
||||
search_title = _strip_series_prefix(cleaned, series)
|
||||
else:
|
||||
search_title = _build_search_title(title)
|
||||
|
||||
logger.info(f"DEBUG: title={title!r} → search={search_title!r} series={series!r} episode={episode!r}")
|
||||
|
||||
|
||||
@@ -58,27 +58,37 @@ SERIES_PATTERNS_SERIES_ONLY = [
|
||||
|
||||
def detect_series(title: str) -> tuple[str | None, str | None]:
|
||||
t = title.strip()
|
||||
# 1. Mit Folgenummer am Anfang
|
||||
for pattern, canonical_name in SERIES_PATTERNS_WITH_EPISODE:
|
||||
m = re.match(pattern, t)
|
||||
if m:
|
||||
return (canonical_name or m.group(1).strip(), m.group(2))
|
||||
# 2. Ohne Folgenummer am Anfang
|
||||
for pattern, canonical_name in SERIES_PATTERNS_SERIES_ONLY:
|
||||
m = re.match(pattern, t)
|
||||
if m:
|
||||
return (canonical_name or m.group(1).strip(), None)
|
||||
# 3. Series-Name irgendwo im Titel (falls Sonderzeichen / Müll davor)
|
||||
for pattern, canonical_name in SERIES_PATTERNS_SERIES_ONLY:
|
||||
if not canonical_name:
|
||||
continue
|
||||
m = re.search(pattern, t)
|
||||
if m:
|
||||
return (canonical_name, None)
|
||||
return None, None
|
||||
|
||||
|
||||
def _build_search_title(original: str) -> str:
|
||||
"""Bereinigt Titel für Such-APIs: ??? raus, Folge-N-Prefix raus, Klammer-Suffixe raus."""
|
||||
"""Bereinigt Titel: ???, Sonderzeichen, Folge-N-Prefix, Klammer-Inhalte raus."""
|
||||
t = original
|
||||
# Nicht-druckbare/exotische Zeichen raus (◆, ◇, U+FFFD etc.)
|
||||
t = re.sub(r"[^\w\s\-–:!?,.&'äöüÄÖÜß]", " ", t, flags=re.UNICODE)
|
||||
# ??? entfernen (CQL-Wildcard-Problem)
|
||||
t = re.sub(r"\?{2,}", "", t)
|
||||
# "(Folge 123)" oder "(2007)" Suffixe entfernen
|
||||
t = re.sub(r"\s*\([^)]*\)\s*$", "", t)
|
||||
# "Folge 123 -" oder "Folge 123:" oder "Folge 123 " am Anfang entfernen
|
||||
# Klammer-Inhalte (egal wo) entfernen
|
||||
t = re.sub(r"\([^)]*\)", " ", t)
|
||||
# "Folge 123" Prefixe und Infixes
|
||||
t = re.sub(r"(?i)^\s*(?:folge|band|teil|episode|nr\.?|#)\s*\d+\s*[-:–\.]*\s*", "", t)
|
||||
# "Folge 123" mitten im Titel reduzieren auf nichts
|
||||
t = re.sub(r"(?i)\b(?:folge|band|teil|episode|nr\.?|#)\s*\d+\b\s*[-:–\.]*\s*", " ", t)
|
||||
# Bindestriche/Unterstriche
|
||||
t = re.sub(r"[_\-–]+", " ", t)
|
||||
@@ -86,6 +96,23 @@ def _build_search_title(original: str) -> str:
|
||||
return t
|
||||
|
||||
|
||||
def _strip_series_prefix(search_title: str, series: str) -> str:
|
||||
"""Entfernt den Serien-Namen aus dem Suchtitel — nur Episode bleibt."""
|
||||
if not series:
|
||||
return search_title
|
||||
series_clean = _build_search_title(series).lower()
|
||||
base_lower = search_title.lower()
|
||||
if series_clean and base_lower.startswith(series_clean):
|
||||
rest = search_title[len(series_clean):].strip(" -:–.")
|
||||
if rest:
|
||||
return rest
|
||||
# auch mittendrin entfernen
|
||||
pattern = re.escape(series_clean)
|
||||
rest = re.sub(pattern, " ", search_title, flags=re.IGNORECASE)
|
||||
rest = re.sub(r"\s+", " ", rest).strip(" -:–.")
|
||||
return rest if rest else search_title
|
||||
|
||||
|
||||
def _title_similarity(a: str, b: str) -> float:
|
||||
"""Wort-Überlapp mit Min/Max-Gewichtung — lenient für Teil-Treffer."""
|
||||
if not a or not b:
|
||||
@@ -246,21 +273,27 @@ async def match_audiobook(item_id: str):
|
||||
title = item.title or ""
|
||||
author = item.author
|
||||
|
||||
series, episode = detect_series(title)
|
||||
if series:
|
||||
if episode:
|
||||
search_title = f"{series} {episode}"
|
||||
else:
|
||||
# Serie erkannt, keine Folgennummer → kompletten Titel suchen
|
||||
search_title = _build_search_title(title)
|
||||
if not item.series:
|
||||
item.series = series
|
||||
if not item.series_sequence and episode:
|
||||
item.series_sequence = episode
|
||||
detected_series, episode = detect_series(title)
|
||||
effective_series = detected_series or item.series
|
||||
|
||||
if effective_series and episode:
|
||||
search_title = f"{effective_series} {episode}"
|
||||
elif effective_series:
|
||||
# Series aus Titel entfernen → nur Episode-Teil suchen, präziser
|
||||
cleaned = _build_search_title(title)
|
||||
search_title = _strip_series_prefix(cleaned, effective_series)
|
||||
else:
|
||||
search_title = _build_search_title(title)
|
||||
|
||||
logger.info(f"Matche: orig='{title}' suchTitel='{search_title}' author={author!r} | Quellen: {sources}")
|
||||
if detected_series and not item.series:
|
||||
item.series = detected_series
|
||||
if episode and not item.series_sequence:
|
||||
item.series_sequence = episode
|
||||
|
||||
logger.info(
|
||||
f"Matche: orig='{title}' suchTitel='{search_title}' "
|
||||
f"author={author!r} series={effective_series!r} | Quellen: {sources}"
|
||||
)
|
||||
|
||||
best: MatchResult | None = None
|
||||
best_score = 0.0
|
||||
@@ -311,8 +344,18 @@ async def match_audiobook(item_id: str):
|
||||
|
||||
async def search_for_item(title: str, author: str | None = None) -> list[dict]:
|
||||
"""Suche über alle Quellen – für manuelles Matching. Gibt alle relevanten Felder zurück."""
|
||||
search_title = _build_search_title(title)
|
||||
logger.info(f"Manuelle Suche: orig='{title}' bereinigt='{search_title}' author={author!r}")
|
||||
detected_series, episode = detect_series(title)
|
||||
if detected_series and episode:
|
||||
search_title = f"{detected_series} {episode}"
|
||||
elif detected_series:
|
||||
cleaned = _build_search_title(title)
|
||||
search_title = _strip_series_prefix(cleaned, detected_series)
|
||||
else:
|
||||
search_title = _build_search_title(title)
|
||||
logger.info(
|
||||
f"Manuelle Suche: orig='{title}' bereinigt='{search_title}' "
|
||||
f"author={author!r} series={detected_series!r}"
|
||||
)
|
||||
|
||||
async def _search_source(name: str, coro):
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user