Phase 5-9: Matching-Engine, Podcast-Support, Web-Interface + Player
Backend: - Matching-Orchestrator mit deutschen Serien-Patterns (drei ???, TKKG, ...) - Vollständige MusicBrainz-Integration (Tracklist → Kapitel, Cover Art Archive) - OpenLibrary + Google Books als Fallback-Quellen - Auto-Accept (≥0.75) vs zu_prüfen (0.5-0.75) vs kein Match - Manuelles Matching: GET /api/items/:id/match/search, POST apply - RSS-Feed-Manager: feedparser, iTunes Search, periodisches Update - APScheduler für Podcast-Feed-Updates (konfigurierbares Intervall) - Podcast-Router: Feed-URL setzen, Episoden, Feed-Suche - HLS: FFmpeg läuft als Background-Task, wartet auf ersten Segment - main.py: APScheduler + neue Router eingebunden Frontend (React + Vite + Tailwind + HLS.js): - Login-Seite mit Fehlerbehandlung - Library-Seite: Grid/Listen-Ansicht, Suche, Tag-Filter, Pagination, Scan - BookCard: Cover, Fortschrittsbalken, zu_prüfen Badge, Quick-Play - BookDetail: Metadaten, Matching-Panel, Kapitel-Liste, Lesezeichen - AudioPlayer: HLS.js, Kapitel-Marker auf Fortschrittsbalken, Speed, Sleep-Timer, Lesezeichen, Keyboard-Shortcuts (Space/Arrows) - MiniPlayer: persistent an Fußzeile, expandierbar - PodcastDetail: Feed-URL, iTunes-Suche, Episoden-Liste - Admin-Panel: Benutzer/Bibliotheken/Einstellungen verwalten - App.tsx: React Router, Auth-Guard, Player-Overlay Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,70 +1,38 @@
|
||||
import os
|
||||
import asyncio
|
||||
import uuid
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from ..config import get_settings
|
||||
|
||||
|
||||
HLS_SEGMENT_DURATION = 10 # Sekunden pro Segment
|
||||
HLS_SEGMENT_DURATION = 10
|
||||
_running_sessions: dict[str, asyncio.Task] = {}
|
||||
|
||||
|
||||
async def create_hls_session(
|
||||
session_id: str,
|
||||
audio_files: list[str],
|
||||
start_time: float = 0.0,
|
||||
) -> str:
|
||||
"""
|
||||
Erstellt HLS-Segmente via FFmpeg für die gegebenen Audio-Dateien.
|
||||
Gibt den Pfad zum HLS-Verzeichnis zurück.
|
||||
"""
|
||||
async def _run_ffmpeg(session_id: str, audio_files: list[str], start_time: float = 0.0):
|
||||
settings = get_settings()
|
||||
session_dir = os.path.join(settings.hls_cache_dir, session_id)
|
||||
os.makedirs(session_dir, exist_ok=True)
|
||||
|
||||
playlist_path = os.path.join(session_dir, "output.m3u8")
|
||||
|
||||
if len(audio_files) == 1:
|
||||
input_path = audio_files[0]
|
||||
input_args = ["-ss", str(start_time), "-i", audio_files[0]]
|
||||
else:
|
||||
# Mehrere Dateien: Concat-Liste erstellen
|
||||
concat_file = os.path.join(session_dir, "concat.txt")
|
||||
with open(concat_file, "w", encoding="utf-8") as f:
|
||||
for af in audio_files:
|
||||
safe_path = af.replace("\\", "/")
|
||||
f.write(f"file '{safe_path}'\n")
|
||||
input_path = concat_file
|
||||
f.write(f"file '{af.replace(chr(92), '/')}'\n")
|
||||
input_args = ["-f", "concat", "-safe", "0", "-i", concat_file, "-ss", str(start_time)]
|
||||
|
||||
if len(audio_files) == 1:
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-ss", str(start_time),
|
||||
"-i", input_path,
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k",
|
||||
"-ac", "2",
|
||||
"-hls_time", str(HLS_SEGMENT_DURATION),
|
||||
"-hls_list_size", "0",
|
||||
"-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
|
||||
"-hls_flags", "independent_segments",
|
||||
playlist_path,
|
||||
]
|
||||
else:
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "concat", "-safe", "0",
|
||||
"-i", input_path,
|
||||
"-ss", str(start_time),
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k",
|
||||
"-ac", "2",
|
||||
"-hls_time", str(HLS_SEGMENT_DURATION),
|
||||
"-hls_list_size", "0",
|
||||
"-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
|
||||
"-hls_flags", "independent_segments",
|
||||
playlist_path,
|
||||
]
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
*input_args,
|
||||
"-c:a", "aac", "-b:a", "128k", "-ac", "2",
|
||||
"-hls_time", str(HLS_SEGMENT_DURATION),
|
||||
"-hls_list_size", "0",
|
||||
"-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
|
||||
"-hls_flags", "independent_segments",
|
||||
playlist_path,
|
||||
]
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
@@ -72,17 +40,49 @@ async def create_hls_session(
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0 and session_id in _running_sessions:
|
||||
err = stderr.decode(errors="replace") if stderr else "unknown"
|
||||
# Fehler-Datei schreiben damit der Client es merkt
|
||||
with open(os.path.join(session_dir, "error.txt"), "w") as f:
|
||||
f.write(err)
|
||||
|
||||
if proc.returncode != 0:
|
||||
error_msg = stderr.decode(errors="replace") if stderr else "unknown error"
|
||||
raise RuntimeError(f"FFmpeg fehler: {error_msg}")
|
||||
|
||||
def start_hls_session(session_id: str, audio_files: list[str], start_time: float = 0.0) -> str:
|
||||
"""Startet FFmpeg als Background-Task. Gibt den Session-Pfad zurück."""
|
||||
settings = get_settings()
|
||||
session_dir = os.path.join(settings.hls_cache_dir, session_id)
|
||||
os.makedirs(session_dir, exist_ok=True)
|
||||
|
||||
task = asyncio.create_task(_run_ffmpeg(session_id, audio_files, start_time))
|
||||
_running_sessions[session_id] = task
|
||||
return session_dir
|
||||
|
||||
|
||||
async def wait_for_playlist(session_id: str, timeout: float = 60.0) -> bool:
|
||||
"""Wartet bis das erste Segment fertig ist (max. timeout Sekunden)."""
|
||||
settings = get_settings()
|
||||
playlist = os.path.join(settings.hls_cache_dir, session_id, "output.m3u8")
|
||||
error_file = os.path.join(settings.hls_cache_dir, session_id, "error.txt")
|
||||
waited = 0.0
|
||||
while waited < timeout:
|
||||
if os.path.exists(error_file):
|
||||
return False
|
||||
if os.path.exists(playlist) and os.path.getsize(playlist) > 0:
|
||||
# Warte auf mindestens 1 Segment
|
||||
seg0 = os.path.join(settings.hls_cache_dir, session_id, "seg00000.ts")
|
||||
if os.path.exists(seg0):
|
||||
return True
|
||||
await asyncio.sleep(0.5)
|
||||
waited += 0.5
|
||||
return False
|
||||
|
||||
|
||||
def cleanup_hls_session(session_id: str):
|
||||
settings = get_settings()
|
||||
session_dir = os.path.join(settings.hls_cache_dir, session_id)
|
||||
task = _running_sessions.pop(session_id, None)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
if os.path.exists(session_dir):
|
||||
shutil.rmtree(session_dir, ignore_errors=True)
|
||||
|
||||
@@ -90,19 +90,4 @@ def cleanup_hls_session(session_id: str):
|
||||
def get_hls_session_path(session_id: str) -> Optional[str]:
|
||||
settings = get_settings()
|
||||
session_dir = os.path.join(settings.hls_cache_dir, session_id)
|
||||
playlist = os.path.join(session_dir, "output.m3u8")
|
||||
return session_dir if os.path.exists(playlist) else None
|
||||
|
||||
|
||||
def parse_m3u8_duration(playlist_path: str) -> float:
|
||||
"""Berechnet Gesamtdauer aus M3U8-Playlist."""
|
||||
total = 0.0
|
||||
try:
|
||||
with open(playlist_path, "r") as f:
|
||||
for line in f:
|
||||
if line.startswith("#EXTINF:"):
|
||||
duration_str = line.split(":")[1].split(",")[0]
|
||||
total += float(duration_str)
|
||||
except Exception:
|
||||
pass
|
||||
return total
|
||||
return session_dir if os.path.isdir(session_dir) else None
|
||||
|
||||
279
backend/app/services/matcher.py
Normal file
279
backend/app/services/matcher.py
Normal file
@@ -0,0 +1,279 @@
|
||||
"""
|
||||
Matching-Orchestrator:
|
||||
- Erkennt deutsche Hörbuch-Serien (die drei ???, TKKG, ...)
|
||||
- Versucht MusicBrainz → OpenLibrary → Google Books
|
||||
- Lädt Cover herunter
|
||||
- Bewertet Konfidenz und entscheidet über Auto-Accept
|
||||
"""
|
||||
import re
|
||||
import os
|
||||
import logging
|
||||
import httpx
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
|
||||
from ..config import get_settings
|
||||
from ..models.media_item import LibraryItem, BookFile, Chapter
|
||||
from ..models.session import ServerSetting
|
||||
from ..database import AsyncSessionLocal
|
||||
from .matching.base import MatchResult
|
||||
from .matching.musicbrainz import search_musicbrainz, get_release_details
|
||||
from .matching.open_library import search_open_library, get_work_details
|
||||
from .matching.google_books import search_google_books
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
AUTO_ACCEPT_THRESHOLD = 0.75
|
||||
UNCERTAIN_THRESHOLD = 0.50
|
||||
|
||||
# Bekannte deutsche Hörbuch-Serien: (regex, kanonischer Name)
|
||||
SERIES_PATTERNS = [
|
||||
(r"(?i)^(die drei \?\?\?|die drei fragezeichen|drei fragezeichen)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Die drei ???"),
|
||||
(r"(?i)^(tkkg)\s*[-–]?\s*(?:folge\s*)?(\d+)", "TKKG"),
|
||||
(r"(?i)^(fünf freunde|funf freunde)\s*[-–]?\s*(?:band\s*)?(\d+)", "Fünf Freunde"),
|
||||
(r"(?i)^(bibi blocksberg)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Bibi Blocksberg"),
|
||||
(r"(?i)^(benjamin blümchen|benjamin blumchen)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Benjamin Blümchen"),
|
||||
(r"(?i)^(bibi und tina)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Bibi und Tina"),
|
||||
(r"(?i)^(der kleine vampir)\s*[-–]?\s*(?:band\s*)?(\d+)", "Der kleine Vampir"),
|
||||
# Generisch: "Serie - Folge/Band/Teil N - Titel"
|
||||
(r"(?i)^(.+?)\s*[-–]\s*(?:folge|band|teil|nr\.?|#)\s*(\d+)", None),
|
||||
# Generisch: "Serie (Folge N)"
|
||||
(r"(?i)^(.+?)\s*\((?:folge|band|teil|nr\.?|#|episode)\s*(\d+)\)", None),
|
||||
]
|
||||
|
||||
|
||||
def detect_series(title: str) -> tuple[str | None, str | None]:
|
||||
"""Gibt (Serienname, Folgennummer) zurück oder (None, None)."""
|
||||
for pattern, canonical_name in SERIES_PATTERNS:
|
||||
m = re.match(pattern, title.strip())
|
||||
if m:
|
||||
series = canonical_name or m.group(1).strip()
|
||||
episode = m.group(2)
|
||||
return series, episode
|
||||
return None, None
|
||||
|
||||
|
||||
def _title_similarity(a: str, b: str) -> float:
|
||||
"""Einfache Ähnlichkeit: Wort-Überlapp."""
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
wa = set(re.findall(r'\w+', a.lower()))
|
||||
wb = set(re.findall(r'\w+', b.lower()))
|
||||
if not wa or not wb:
|
||||
return 0.0
|
||||
return len(wa & wb) / max(len(wa), len(wb))
|
||||
|
||||
|
||||
def _score_result(result: MatchResult, query_title: str, query_author: str | None) -> float:
|
||||
score = result.confidence
|
||||
title_sim = _title_similarity(result.title, query_title)
|
||||
score = score * 0.4 + title_sim * 0.6
|
||||
if query_author and result.author:
|
||||
author_sim = _title_similarity(result.author, query_author)
|
||||
score = score * 0.7 + author_sim * 0.3
|
||||
return min(score, 1.0)
|
||||
|
||||
|
||||
async def _download_cover(url: str, item_id: str) -> str | None:
|
||||
"""Lädt Cover herunter und speichert es lokal."""
|
||||
settings = get_settings()
|
||||
ext = ".jpg"
|
||||
if ".png" in url:
|
||||
ext = ".png"
|
||||
dest = os.path.join(settings.covers_dir, f"{item_id}{ext}")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
||||
r = await client.get(url)
|
||||
if r.status_code == 200:
|
||||
os.makedirs(settings.covers_dir, exist_ok=True)
|
||||
with open(dest, "wb") as f:
|
||||
f.write(r.content)
|
||||
return dest
|
||||
except Exception as e:
|
||||
logger.warning(f"Cover-Download fehlgeschlagen ({url}): {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def _apply_match(db: AsyncSession, item: LibraryItem, result: MatchResult, confidence: float):
|
||||
"""Schreibt Metadaten aus MatchResult in die DB."""
|
||||
if result.title:
|
||||
item.title = result.title
|
||||
if result.subtitle and not item.subtitle:
|
||||
item.subtitle = result.subtitle
|
||||
if result.author:
|
||||
item.author = result.author
|
||||
if result.narrator:
|
||||
item.narrator = result.narrator
|
||||
if result.description:
|
||||
item.description = result.description
|
||||
if result.publisher:
|
||||
item.publisher = result.publisher
|
||||
if result.publish_year:
|
||||
item.publish_year = result.publish_year
|
||||
if result.language:
|
||||
item.language = result.language
|
||||
if result.genres:
|
||||
item.genres = result.genres
|
||||
if result.series:
|
||||
item.series = result.series
|
||||
if result.series_sequence:
|
||||
item.series_sequence = result.series_sequence
|
||||
|
||||
item.matched_source = result.source
|
||||
item.matched_id = result.source_id
|
||||
item.match_confidence = confidence
|
||||
item.updated_at = datetime.utcnow()
|
||||
|
||||
# Cover herunterladen
|
||||
if result.cover_url and not item.cover_path:
|
||||
cover_path = await _download_cover(result.cover_url, item.id)
|
||||
if cover_path:
|
||||
item.cover_path = cover_path
|
||||
|
||||
# Kapitel aus MusicBrainz-Tracklisting
|
||||
if result.chapters:
|
||||
from sqlalchemy import delete
|
||||
from ..models.media_item import Chapter
|
||||
await db.execute(delete(Chapter).where(Chapter.library_item_id == item.id))
|
||||
for idx, ch in enumerate(result.chapters):
|
||||
chapter = Chapter(
|
||||
library_item_id=item.id,
|
||||
chapter_index=idx,
|
||||
title=ch.get("title", f"Kapitel {idx + 1}"),
|
||||
start_seconds=ch.get("start", 0.0),
|
||||
end_seconds=ch.get("end", 0.0),
|
||||
)
|
||||
db.add(chapter)
|
||||
|
||||
# zu_prüfen entfernen wenn Konfidenz hoch genug
|
||||
if confidence >= AUTO_ACCEPT_THRESHOLD:
|
||||
tags = item.tags or []
|
||||
item.tags = [t for t in tags if t != "zu_prüfen"]
|
||||
|
||||
|
||||
async def match_audiobook(item_id: str):
|
||||
"""
|
||||
Haupt-Matching-Funktion. Wird nach dem Scan als Hintergrund-Task gestartet.
|
||||
"""
|
||||
async with AsyncSessionLocal() as db:
|
||||
result_row = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
||||
item = result_row.scalar_one_or_none()
|
||||
if not item or item.match_locked:
|
||||
return
|
||||
|
||||
# Einstellung prüfen
|
||||
setting = await db.execute(
|
||||
select(ServerSetting).where(ServerSetting.key == "autoMatchBooks")
|
||||
)
|
||||
s = setting.scalar_one_or_none()
|
||||
if s and s.value is False:
|
||||
return
|
||||
|
||||
title = item.title or ""
|
||||
author = item.author
|
||||
|
||||
# Serien-Erkennung verbessert den Suchbegriff
|
||||
series, episode = detect_series(title)
|
||||
search_title = title
|
||||
if series:
|
||||
search_title = f"{series} {episode}" if episode else series
|
||||
if not item.series:
|
||||
item.series = series
|
||||
if not item.series_sequence and episode:
|
||||
item.series_sequence = episode
|
||||
|
||||
logger.info(f"Matche: '{title}' (Serie: {series}, Folge: {episode})")
|
||||
|
||||
best: MatchResult | None = None
|
||||
best_score = 0.0
|
||||
|
||||
# 1. MusicBrainz
|
||||
try:
|
||||
mb_results = await search_musicbrainz(search_title, author)
|
||||
for r in mb_results:
|
||||
score = _score_result(r, title, author)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = r
|
||||
except Exception as e:
|
||||
logger.warning(f"MusicBrainz Fehler: {e}")
|
||||
|
||||
# Wenn guter MB-Treffer → Details holen (Tracklist + Cover)
|
||||
if best and best_score >= UNCERTAIN_THRESHOLD and best.source == "musicbrainz":
|
||||
try:
|
||||
details = await get_release_details(best.source_id)
|
||||
if details:
|
||||
details.confidence = best_score
|
||||
best = details
|
||||
except Exception as e:
|
||||
logger.warning(f"MusicBrainz Details Fehler: {e}")
|
||||
|
||||
# 2. OpenLibrary als Fallback
|
||||
if best_score < UNCERTAIN_THRESHOLD:
|
||||
try:
|
||||
ol_results = await search_open_library(search_title, author)
|
||||
for r in ol_results:
|
||||
score = _score_result(r, title, author)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = r
|
||||
if best and best.source == "open_library" and best_score >= UNCERTAIN_THRESHOLD:
|
||||
details = await get_work_details(best.source_id)
|
||||
if details and details.description:
|
||||
best.description = details.description
|
||||
except Exception as e:
|
||||
logger.warning(f"OpenLibrary Fehler: {e}")
|
||||
|
||||
# 3. Google Books als letzter Fallback
|
||||
if best_score < UNCERTAIN_THRESHOLD:
|
||||
try:
|
||||
gb_results = await search_google_books(search_title, author)
|
||||
for r in gb_results:
|
||||
score = _score_result(r, title, author)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = r
|
||||
except Exception as e:
|
||||
logger.warning(f"Google Books Fehler: {e}")
|
||||
|
||||
if best and best_score >= UNCERTAIN_THRESHOLD:
|
||||
await _apply_match(db, item, best, best_score)
|
||||
logger.info(f"Match angewendet: '{item.title}' ← {best.source} (Konfidenz: {best_score:.2f})")
|
||||
else:
|
||||
logger.info(f"Kein Match gefunden für '{title}' (beste Konfidenz: {best_score:.2f})")
|
||||
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def search_for_item(title: str, author: str | None = None) -> list[dict]:
|
||||
"""Suche über alle Quellen – für manuelles Matching."""
|
||||
results = []
|
||||
|
||||
async def _search_source(coro):
|
||||
try:
|
||||
return await coro
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
mb, ol, gb = await asyncio.gather(
|
||||
_search_source(search_musicbrainz(title, author)),
|
||||
_search_source(search_open_library(title, author)),
|
||||
_search_source(search_google_books(title, author)),
|
||||
)
|
||||
|
||||
for r in mb + ol + gb:
|
||||
results.append({
|
||||
"source": r.source,
|
||||
"id": r.source_id,
|
||||
"title": r.title,
|
||||
"author": r.author,
|
||||
"publishYear": r.publish_year,
|
||||
"cover": r.cover_url,
|
||||
"confidence": r.confidence,
|
||||
})
|
||||
|
||||
results.sort(key=lambda x: x["confidence"], reverse=True)
|
||||
return results
|
||||
@@ -1,4 +1,3 @@
|
||||
"""Google Books-Matching — Phase 5."""
|
||||
import httpx
|
||||
from .base import MatchResult
|
||||
|
||||
@@ -10,26 +9,52 @@ async def search_google_books(title: str, author: str | None = None) -> list[Mat
|
||||
if author:
|
||||
q += f' inauthor:"{author}"'
|
||||
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
resp = await client.get(f"{GB_BASE}/volumes", params={"q": q, "maxResults": 5, "langRestrict": "de"})
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
async with httpx.AsyncClient(timeout=12) as client:
|
||||
try:
|
||||
r = await client.get(
|
||||
f"{GB_BASE}/volumes",
|
||||
params={"q": q, "maxResults": 5, "langRestrict": "de", "printType": "books"},
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for item in data.get("items", []):
|
||||
vol = item.get("volumeInfo", {})
|
||||
authors = vol.get("authors", [])
|
||||
results.append(
|
||||
MatchResult(
|
||||
source="google_books",
|
||||
source_id=item.get("id", ""),
|
||||
title=vol.get("title", title),
|
||||
author=authors[0] if authors else None,
|
||||
description=vol.get("description"),
|
||||
publisher=vol.get("publisher"),
|
||||
publish_year=int(vol.get("publishedDate", "0")[:4]) if vol.get("publishedDate") else None,
|
||||
language=vol.get("language"),
|
||||
confidence=0.5,
|
||||
|
||||
cover_url = None
|
||||
image_links = vol.get("imageLinks", {})
|
||||
if image_links:
|
||||
cover_url = (
|
||||
image_links.get("extraLarge")
|
||||
or image_links.get("large")
|
||||
or image_links.get("medium")
|
||||
or image_links.get("thumbnail", "").replace("zoom=1", "zoom=3")
|
||||
)
|
||||
)
|
||||
|
||||
year = None
|
||||
pub_date = vol.get("publishedDate", "")
|
||||
if pub_date and len(pub_date) >= 4:
|
||||
try:
|
||||
year = int(pub_date[:4])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
results.append(MatchResult(
|
||||
source="google_books",
|
||||
source_id=item.get("id", ""),
|
||||
title=vol.get("title", title),
|
||||
subtitle=vol.get("subtitle"),
|
||||
author=authors[0] if authors else None,
|
||||
description=vol.get("description"),
|
||||
publisher=vol.get("publisher"),
|
||||
publish_year=year,
|
||||
language=vol.get("language"),
|
||||
genres=vol.get("categories", []),
|
||||
cover_url=cover_url,
|
||||
confidence=0.5,
|
||||
))
|
||||
return results
|
||||
|
||||
@@ -1,40 +1,115 @@
|
||||
"""MusicBrainz-Matching — Phase 5."""
|
||||
import httpx
|
||||
import asyncio
|
||||
from .base import MatchResult
|
||||
|
||||
MB_BASE = "https://musicbrainz.org/ws/2"
|
||||
HEADERS = {"User-Agent": "audiolib/1.0 (https://github.com/audiolib)"}
|
||||
CAA_BASE = "https://coverartarchive.org"
|
||||
HEADERS = {"User-Agent": "audiolib/1.0 (contact@audiolib.local)"}
|
||||
_semaphore = asyncio.Semaphore(2) # MusicBrainz Rate-Limit: max 1 req/s
|
||||
|
||||
|
||||
async def _get(client: httpx.AsyncClient, url: str, **params) -> dict:
|
||||
async with _semaphore:
|
||||
await asyncio.sleep(1.1) # MusicBrainz erlaubt 1 req/s
|
||||
r = await client.get(url, params={"fmt": "json", **params}, timeout=15)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
async def search_musicbrainz(title: str, artist: str | None = None) -> list[MatchResult]:
|
||||
query = f'release:"{title}"'
|
||||
if artist:
|
||||
query += f' AND artist:"{artist}"'
|
||||
query += " AND format:Digital"
|
||||
|
||||
async with httpx.AsyncClient(headers=HEADERS, timeout=10) as client:
|
||||
resp = await client.get(
|
||||
f"{MB_BASE}/release",
|
||||
params={"query": query, "fmt": "json", "limit": 5},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
async with httpx.AsyncClient(headers=HEADERS) as client:
|
||||
try:
|
||||
data = await _get(client, f"{MB_BASE}/release", query=query, limit=5)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for release in data.get("releases", []):
|
||||
confidence = release.get("score", 0) / 100.0
|
||||
artist_name = None
|
||||
credits = release.get("artist-credit", [])
|
||||
if credits:
|
||||
artist_name = credits[0].get("name") or credits[0].get("artist", {}).get("name")
|
||||
for rel in data.get("releases", []):
|
||||
confidence = rel.get("score", 0) / 100.0
|
||||
artist_name = _first_artist(rel)
|
||||
release_id = rel.get("id", "")
|
||||
|
||||
results.append(
|
||||
MatchResult(
|
||||
source="musicbrainz",
|
||||
source_id=release.get("id", ""),
|
||||
title=release.get("title", title),
|
||||
author=artist_name,
|
||||
confidence=confidence,
|
||||
)
|
||||
)
|
||||
results.append(MatchResult(
|
||||
source="musicbrainz",
|
||||
source_id=release_id,
|
||||
title=rel.get("title", title),
|
||||
author=artist_name,
|
||||
publish_year=_parse_year(rel.get("date", "")),
|
||||
confidence=confidence,
|
||||
))
|
||||
return results
|
||||
|
||||
|
||||
async def get_release_details(release_id: str) -> MatchResult | None:
|
||||
"""Lädt vollständige Release-Details inkl. Tracklist (= Kapitel) und Cover."""
|
||||
async with httpx.AsyncClient(headers=HEADERS) as client:
|
||||
try:
|
||||
data = await _get(
|
||||
client, f"{MB_BASE}/release/{release_id}",
|
||||
inc="recordings+artists+release-groups"
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
artist_name = _first_artist(data)
|
||||
rg = data.get("release-group", {})
|
||||
series = rg.get("title") if rg.get("primary-type") == "Album" else None
|
||||
|
||||
# Tracklist → Kapitel
|
||||
chapters = []
|
||||
offset = 0.0
|
||||
for medium in data.get("media", []):
|
||||
for track in medium.get("tracks", []):
|
||||
duration_ms = track.get("length") or track.get("recording", {}).get("length") or 0
|
||||
duration_s = duration_ms / 1000.0
|
||||
chapters.append({
|
||||
"title": track.get("title", f"Track {track.get('position', '')}"),
|
||||
"start": offset,
|
||||
"end": offset + duration_s,
|
||||
})
|
||||
offset += duration_s
|
||||
|
||||
# Cover Art
|
||||
cover_url = None
|
||||
try:
|
||||
caa = await client.get(f"{CAA_BASE}/release/{release_id}", timeout=10)
|
||||
if caa.status_code == 200:
|
||||
caa_data = caa.json()
|
||||
images = caa_data.get("images", [])
|
||||
front = next((i for i in images if i.get("front")), images[0] if images else None)
|
||||
if front:
|
||||
cover_url = front.get("thumbnails", {}).get("large") or front.get("image")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return MatchResult(
|
||||
source="musicbrainz",
|
||||
source_id=release_id,
|
||||
title=data.get("title", ""),
|
||||
author=artist_name,
|
||||
publish_year=_parse_year(data.get("date", "")),
|
||||
cover_url=cover_url,
|
||||
chapters=chapters,
|
||||
confidence=1.0,
|
||||
)
|
||||
|
||||
|
||||
def _first_artist(release: dict) -> str | None:
|
||||
credits = release.get("artist-credit", [])
|
||||
if credits:
|
||||
c = credits[0]
|
||||
return c.get("name") or c.get("artist", {}).get("name")
|
||||
return None
|
||||
|
||||
|
||||
def _parse_year(date_str: str) -> int | None:
|
||||
if date_str and len(date_str) >= 4:
|
||||
try:
|
||||
return int(date_str[:4])
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
"""OpenLibrary-Matching — Phase 5."""
|
||||
import httpx
|
||||
from .base import MatchResult
|
||||
|
||||
@@ -6,25 +5,55 @@ OL_BASE = "https://openlibrary.org"
|
||||
|
||||
|
||||
async def search_open_library(title: str, author: str | None = None) -> list[MatchResult]:
|
||||
params: dict = {"title": title, "limit": 5}
|
||||
params: dict = {"title": title, "limit": 5, "fields": "key,title,author_name,first_publish_year,cover_i,subject"}
|
||||
if author:
|
||||
params["author"] = author
|
||||
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
resp = await client.get(f"{OL_BASE}/search.json", params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
async with httpx.AsyncClient(timeout=12) as client:
|
||||
try:
|
||||
r = await client.get(f"{OL_BASE}/search.json", params=params)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for doc in data.get("docs", []):
|
||||
results.append(
|
||||
MatchResult(
|
||||
source="open_library",
|
||||
source_id=doc.get("key", ""),
|
||||
title=doc.get("title", title),
|
||||
author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
|
||||
publish_year=doc.get("first_publish_year"),
|
||||
confidence=0.6,
|
||||
)
|
||||
)
|
||||
cover_url = None
|
||||
if doc.get("cover_i"):
|
||||
cover_url = f"https://covers.openlibrary.org/b/id/{doc['cover_i']}-L.jpg"
|
||||
|
||||
results.append(MatchResult(
|
||||
source="open_library",
|
||||
source_id=doc.get("key", ""),
|
||||
title=doc.get("title", title),
|
||||
author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
|
||||
publish_year=doc.get("first_publish_year"),
|
||||
cover_url=cover_url,
|
||||
genres=doc.get("subject", [])[:5],
|
||||
confidence=0.55,
|
||||
))
|
||||
return results
|
||||
|
||||
|
||||
async def get_work_details(work_key: str) -> MatchResult | None:
|
||||
"""Lädt Beschreibung und Genres nach."""
|
||||
async with httpx.AsyncClient(timeout=12) as client:
|
||||
try:
|
||||
r = await client.get(f"{OL_BASE}{work_key}.json")
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
desc = data.get("description")
|
||||
if isinstance(desc, dict):
|
||||
desc = desc.get("value")
|
||||
|
||||
return MatchResult(
|
||||
source="open_library",
|
||||
source_id=work_key,
|
||||
title=data.get("title", ""),
|
||||
description=desc,
|
||||
confidence=1.0,
|
||||
)
|
||||
|
||||
186
backend/app/services/podcast_feed.py
Normal file
186
backend/app/services/podcast_feed.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""
|
||||
Podcast-Feed-Manager:
|
||||
- RSS-Feed parsen
|
||||
- Episoden mit lokalen Dateien abgleichen
|
||||
- Periodisches Update
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
import httpx
|
||||
import feedparser
|
||||
from datetime import datetime
|
||||
from difflib import SequenceMatcher
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from ..database import AsyncSessionLocal
|
||||
from ..models.library import Library
|
||||
from ..models.media_item import LibraryItem
|
||||
from ..models.podcast import Podcast, PodcastEpisode
|
||||
from ..services.matcher import _download_cover
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _similarity(a: str, b: str) -> float:
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
|
||||
|
||||
|
||||
def _parse_duration(s: str | None) -> float:
|
||||
"""Parst "HH:MM:SS" oder "MM:SS" oder reine Sekunden."""
|
||||
if not s:
|
||||
return 0.0
|
||||
s = s.strip()
|
||||
try:
|
||||
if ":" in s:
|
||||
parts = s.split(":")
|
||||
if len(parts) == 3:
|
||||
return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
|
||||
elif len(parts) == 2:
|
||||
return int(parts[0]) * 60 + float(parts[1])
|
||||
return float(s)
|
||||
except (ValueError, IndexError):
|
||||
return 0.0
|
||||
|
||||
|
||||
async def search_podcast_feeds(query: str) -> list[dict]:
|
||||
"""Sucht Podcast-Feeds über iTunes Search API."""
|
||||
results = []
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=12) as client:
|
||||
r = await client.get(
|
||||
"https://itunes.apple.com/search",
|
||||
params={"term": query, "media": "podcast", "limit": 10, "country": "de"},
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
for item in data.get("results", []):
|
||||
results.append({
|
||||
"title": item.get("collectionName", ""),
|
||||
"author": item.get("artistName", ""),
|
||||
"feedUrl": item.get("feedUrl", ""),
|
||||
"artworkUrl": item.get("artworkUrl600") or item.get("artworkUrl100", ""),
|
||||
"trackCount": item.get("trackCount", 0),
|
||||
"itunesId": item.get("collectionId"),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"iTunes-Suche fehlgeschlagen: {e}")
|
||||
return results
|
||||
|
||||
|
||||
async def fetch_and_update_feed(library_item_id: str):
|
||||
"""
|
||||
Holt RSS-Feed und aktualisiert Metadaten + Episoden in der DB.
|
||||
"""
|
||||
async with AsyncSessionLocal() as db:
|
||||
item_result = await db.execute(select(LibraryItem).where(LibraryItem.id == library_item_id))
|
||||
item = item_result.scalar_one_or_none()
|
||||
if not item:
|
||||
return
|
||||
|
||||
podcast_result = await db.execute(select(Podcast).where(Podcast.library_item_id == library_item_id))
|
||||
podcast = podcast_result.scalar_one_or_none()
|
||||
if not podcast or not podcast.feed_url:
|
||||
logger.warning(f"Kein Feed für Item {library_item_id}")
|
||||
return
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
||||
r = await client.get(podcast.feed_url)
|
||||
r.raise_for_status()
|
||||
raw = r.text
|
||||
except Exception as e:
|
||||
logger.error(f"Feed-Abruf fehlgeschlagen ({podcast.feed_url}): {e}")
|
||||
return
|
||||
|
||||
feed = feedparser.parse(raw)
|
||||
channel = feed.feed
|
||||
|
||||
# Podcast-Metadaten aktualisieren
|
||||
if channel.get("title") and not item.title:
|
||||
item.title = channel.get("title")
|
||||
if channel.get("author") and not item.author:
|
||||
item.author = channel.get("author")
|
||||
if channel.get("summary") and not item.description:
|
||||
item.description = channel.get("summary")
|
||||
if channel.get("language") and not item.language:
|
||||
item.language = channel.get("language")
|
||||
|
||||
# Cover
|
||||
cover_url = None
|
||||
if channel.get("image"):
|
||||
cover_url = channel.image.get("href") or channel.image.get("url")
|
||||
if cover_url and not item.cover_path:
|
||||
cover_path = await _download_cover(cover_url, item.id)
|
||||
if cover_path:
|
||||
item.cover_path = cover_path
|
||||
|
||||
podcast.feed_last_checked = datetime.utcnow()
|
||||
|
||||
# Lokale Episode-Dateien holen
|
||||
episodes_result = await db.execute(
|
||||
select(PodcastEpisode).where(PodcastEpisode.podcast_id == podcast.id)
|
||||
)
|
||||
existing_episodes = {ep.feed_episode_id: ep for ep in episodes_result.scalars().all()}
|
||||
|
||||
# Feed-Einträge verarbeiten
|
||||
for entry in feed.entries:
|
||||
feed_ep_id = entry.get("id") or entry.get("link", "")
|
||||
title = entry.get("title", "")
|
||||
description = entry.get("summary") or entry.get("content", [{}])[0].get("value", "") if entry.get("content") else ""
|
||||
pub_date = None
|
||||
if entry.get("published_parsed"):
|
||||
import time
|
||||
pub_date = datetime(*entry.published_parsed[:6])
|
||||
|
||||
enclosure_url = None
|
||||
duration_s = 0.0
|
||||
for enc in entry.get("enclosures", []):
|
||||
if enc.get("type", "").startswith("audio/"):
|
||||
enclosure_url = enc.get("href") or enc.get("url")
|
||||
break
|
||||
duration_s = _parse_duration(entry.get("itunes_duration"))
|
||||
|
||||
ep_num = entry.get("itunes_episode")
|
||||
season_num = entry.get("itunes_season")
|
||||
|
||||
if feed_ep_id in existing_episodes:
|
||||
# Vorhandene Episode aktualisieren
|
||||
ep = existing_episodes[feed_ep_id]
|
||||
ep.title = title
|
||||
ep.description = description
|
||||
ep.feed_episode_url = enclosure_url
|
||||
ep.duration_seconds = duration_s or ep.duration_seconds
|
||||
else:
|
||||
# Neue Episode anlegen
|
||||
ep = PodcastEpisode(
|
||||
podcast_id=podcast.id,
|
||||
title=title,
|
||||
description=description,
|
||||
pub_date=pub_date,
|
||||
duration_seconds=duration_s,
|
||||
feed_episode_id=feed_ep_id,
|
||||
feed_episode_url=enclosure_url,
|
||||
episode_number=str(ep_num) if ep_num else None,
|
||||
season_number=str(season_num) if season_num else None,
|
||||
)
|
||||
db.add(ep)
|
||||
|
||||
item.updated_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
logger.info(f"Feed aktualisiert: {item.title} ({len(feed.entries)} Einträge)")
|
||||
|
||||
|
||||
async def update_all_feeds():
|
||||
"""Aktualisiert alle Podcast-Feeds (wird vom Scheduler aufgerufen)."""
|
||||
async with AsyncSessionLocal() as db:
|
||||
result = await db.execute(select(Podcast).where(Podcast.feed_url.isnot(None)))
|
||||
podcasts = result.scalars().all()
|
||||
|
||||
for podcast in podcasts:
|
||||
try:
|
||||
await fetch_and_update_feed(podcast.library_item_id)
|
||||
except Exception as e:
|
||||
logger.error(f"Feed-Update fehlgeschlagen für {podcast.id}: {e}")
|
||||
Reference in New Issue
Block a user