Phase 5-9: Matching-Engine, Podcast-Support, Web-Interface + Player
Backend: - Matching-Orchestrator mit deutschen Serien-Patterns (drei ???, TKKG, ...) - Vollständige MusicBrainz-Integration (Tracklist → Kapitel, Cover Art Archive) - OpenLibrary + Google Books als Fallback-Quellen - Auto-Accept (≥0.75) vs zu_prüfen (0.5-0.75) vs kein Match - Manuelles Matching: GET /api/items/:id/match/search, POST apply - RSS-Feed-Manager: feedparser, iTunes Search, periodisches Update - APScheduler für Podcast-Feed-Updates (konfigurierbares Intervall) - Podcast-Router: Feed-URL setzen, Episoden, Feed-Suche - HLS: FFmpeg läuft als Background-Task, wartet auf ersten Segment - main.py: APScheduler + neue Router eingebunden Frontend (React + Vite + Tailwind + HLS.js): - Login-Seite mit Fehlerbehandlung - Library-Seite: Grid/Listen-Ansicht, Suche, Tag-Filter, Pagination, Scan - BookCard: Cover, Fortschrittsbalken, zu_prüfen Badge, Quick-Play - BookDetail: Metadaten, Matching-Panel, Kapitel-Liste, Lesezeichen - AudioPlayer: HLS.js, Kapitel-Marker auf Fortschrittsbalken, Speed, Sleep-Timer, Lesezeichen, Keyboard-Shortcuts (Space/Arrows) - MiniPlayer: persistent an Fußzeile, expandierbar - PodcastDetail: Feed-URL, iTunes-Suche, Episoden-Liste - Admin-Panel: Benutzer/Bibliotheken/Einstellungen verwalten - App.tsx: React Router, Auth-Guard, Player-Overlay Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,32 +1,32 @@
|
||||
import uuid
|
||||
import logging
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
import os
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
|
||||
from .database import init_db, AsyncSessionLocal
|
||||
from .config import get_settings
|
||||
from .models import User, Library
|
||||
from .services.auth import hash_password, create_token
|
||||
from .services.file_watcher import start_file_watcher, stop_file_watcher
|
||||
from .services.podcast_feed import update_all_feeds
|
||||
from sqlalchemy import select
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
|
||||
)
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_scheduler = AsyncIOScheduler()
|
||||
|
||||
|
||||
async def _seed_admin():
|
||||
settings = get_settings()
|
||||
async with AsyncSessionLocal() as db:
|
||||
result = await db.execute(select(User).where(User.is_admin == True))
|
||||
if result.scalar_one_or_none():
|
||||
return # Admin existiert bereits
|
||||
|
||||
return
|
||||
logger.info(f"Lege Admin-User an: {settings.admin_username}")
|
||||
admin = User(
|
||||
id=str(uuid.uuid4()),
|
||||
@@ -37,7 +37,6 @@ async def _seed_admin():
|
||||
)
|
||||
db.add(admin)
|
||||
await db.flush()
|
||||
# Token mit echter ID erstellen
|
||||
admin.token = create_token(admin.id)
|
||||
await db.commit()
|
||||
logger.info("Admin-User angelegt.")
|
||||
@@ -48,8 +47,7 @@ async def _seed_default_library():
|
||||
async with AsyncSessionLocal() as db:
|
||||
result = await db.execute(select(Library))
|
||||
if result.scalar_one_or_none():
|
||||
return # Bereits eine Library vorhanden
|
||||
|
||||
return
|
||||
folder_id = str(uuid.uuid4())
|
||||
lib = Library(
|
||||
id=str(uuid.uuid4()),
|
||||
@@ -66,29 +64,28 @@ async def _seed_default_library():
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# Startup
|
||||
settings = get_settings()
|
||||
os.makedirs(settings.hls_cache_dir, exist_ok=True)
|
||||
os.makedirs(settings.covers_dir, exist_ok=True)
|
||||
os.makedirs(settings.log_dir, exist_ok=True)
|
||||
for d in [settings.hls_cache_dir, settings.covers_dir, settings.log_dir]:
|
||||
os.makedirs(d, exist_ok=True)
|
||||
|
||||
await init_db()
|
||||
await _seed_admin()
|
||||
await _seed_default_library()
|
||||
await start_file_watcher()
|
||||
|
||||
# Podcast-Feed-Scheduler
|
||||
_scheduler.add_job(update_all_feeds, "interval", hours=settings.podcast_update_interval_hours, id="feed_update")
|
||||
_scheduler.start()
|
||||
|
||||
logger.info("Audiolib gestartet.")
|
||||
yield
|
||||
# Shutdown
|
||||
|
||||
stop_file_watcher()
|
||||
_scheduler.shutdown(wait=False)
|
||||
logger.info("Audiolib gestoppt.")
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Audiolib",
|
||||
version="2.4.0",
|
||||
description="Selbst gehosteter Audiobook-Server, API-kompatibel mit Audiobookshelf",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
app = FastAPI(title="Audiolib", version="2.4.0", lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
@@ -98,13 +95,12 @@ app.add_middleware(
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Cover-Dateien direkt ausliefern
|
||||
settings = get_settings()
|
||||
if os.path.exists(settings.covers_dir):
|
||||
app.mount("/covers", StaticFiles(directory=settings.covers_dir), name="covers")
|
||||
|
||||
# Router registrieren
|
||||
from .routers import auth, libraries, items, stream, me, users, settings as settings_router
|
||||
from .routers import matching, podcasts
|
||||
|
||||
app.include_router(auth.router)
|
||||
app.include_router(libraries.router)
|
||||
@@ -113,3 +109,5 @@ app.include_router(stream.router)
|
||||
app.include_router(me.router)
|
||||
app.include_router(users.router)
|
||||
app.include_router(settings_router.router)
|
||||
app.include_router(matching.router)
|
||||
app.include_router(podcasts.router)
|
||||
|
||||
124
backend/app/routers/matching.py
Normal file
124
backend/app/routers/matching.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import asyncio
|
||||
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from ..dependencies import get_db, get_current_user, require_admin
|
||||
from ..models.user import User
|
||||
from ..models.media_item import LibraryItem
|
||||
from ..services.matcher import match_audiobook, search_for_item, _apply_match, _score_result
|
||||
from ..services.matching.musicbrainz import get_release_details
|
||||
from ..services.matching.open_library import get_work_details
|
||||
from ..services.matching.google_books import search_google_books
|
||||
from ..services.matching.base import MatchResult
|
||||
from datetime import datetime
|
||||
|
||||
router = APIRouter(prefix="/api/items", tags=["matching"])
|
||||
|
||||
|
||||
@router.post("/{item_id}/match")
|
||||
async def trigger_match(
|
||||
item_id: str,
|
||||
background_tasks: BackgroundTasks,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
||||
item = result.scalar_one_or_none()
|
||||
if not item:
|
||||
raise HTTPException(status_code=404, detail="Item not found")
|
||||
|
||||
background_tasks.add_task(match_audiobook, item_id)
|
||||
return {"message": "Matching gestartet", "itemId": item_id}
|
||||
|
||||
|
||||
@router.get("/{item_id}/match/search")
|
||||
async def search_match(
|
||||
item_id: str,
|
||||
q: str | None = None,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
||||
item = result.scalar_one_or_none()
|
||||
if not item:
|
||||
raise HTTPException(status_code=404, detail="Item not found")
|
||||
|
||||
query = q or item.title or ""
|
||||
author = item.author if not q else None
|
||||
|
||||
results = await search_for_item(query, author)
|
||||
return {"results": results}
|
||||
|
||||
|
||||
@router.post("/{item_id}/match/apply")
|
||||
async def apply_match(
|
||||
item_id: str,
|
||||
body: dict,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Wendet einen manuell gewählten Match-Treffer an.
|
||||
body: { source, id, title, author, ... }
|
||||
"""
|
||||
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
||||
item = result.scalar_one_or_none()
|
||||
if not item:
|
||||
raise HTTPException(status_code=404, detail="Item not found")
|
||||
|
||||
source = body.get("source", "manual")
|
||||
source_id = body.get("id", "")
|
||||
|
||||
# Versuche Details zu laden wenn MusicBrainz/OpenLibrary
|
||||
match_result = None
|
||||
if source == "musicbrainz":
|
||||
match_result = await get_release_details(source_id)
|
||||
elif source == "open_library":
|
||||
from ..services.matching.open_library import get_work_details
|
||||
match_result = await get_work_details(source_id)
|
||||
|
||||
if not match_result:
|
||||
# Fallback: nur die übergebenen Daten verwenden
|
||||
match_result = MatchResult(
|
||||
source=source,
|
||||
source_id=source_id,
|
||||
title=body.get("title", item.title or ""),
|
||||
author=body.get("author"),
|
||||
publish_year=body.get("publishYear"),
|
||||
cover_url=body.get("cover"),
|
||||
confidence=1.0,
|
||||
)
|
||||
|
||||
match_result.confidence = 1.0 # Manuell → immer akzeptieren
|
||||
await _apply_match(db, item, match_result, confidence=1.0)
|
||||
item.match_locked = True
|
||||
item.updated_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
await db.refresh(item)
|
||||
|
||||
from ..routers.items import _enrich_item_with_files
|
||||
return await _enrich_item_with_files(item, db)
|
||||
|
||||
|
||||
@router.delete("/{item_id}/match")
|
||||
async def clear_match(
|
||||
item_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
||||
item = result.scalar_one_or_none()
|
||||
if not item:
|
||||
raise HTTPException(status_code=404, detail="Item not found")
|
||||
|
||||
item.matched_source = "none"
|
||||
item.matched_id = None
|
||||
item.match_confidence = 0.0
|
||||
item.match_locked = False
|
||||
tags = item.tags or []
|
||||
if "zu_prüfen" not in tags:
|
||||
tags.append("zu_prüfen")
|
||||
item.tags = tags
|
||||
item.updated_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
return {"success": True}
|
||||
178
backend/app/routers/podcasts.py
Normal file
178
backend/app/routers/podcasts.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from ..dependencies import get_db, get_current_user, require_admin
|
||||
from ..models.user import User
|
||||
from ..models.library import Library
|
||||
from ..models.media_item import LibraryItem
|
||||
from ..models.podcast import Podcast, PodcastEpisode
|
||||
from ..services.podcast_feed import fetch_and_update_feed, search_podcast_feeds
|
||||
|
||||
router = APIRouter(prefix="/api/podcasts", tags=["podcasts"])
|
||||
|
||||
|
||||
def _episode_out(ep: PodcastEpisode) -> dict:
|
||||
return {
|
||||
"id": ep.id,
|
||||
"podcastId": ep.podcast_id,
|
||||
"title": ep.title,
|
||||
"description": ep.description,
|
||||
"episode": ep.episode_number,
|
||||
"season": ep.season_number,
|
||||
"pubDate": ep.pub_date.isoformat() if ep.pub_date else None,
|
||||
"duration": ep.duration_seconds,
|
||||
"size": ep.size_bytes,
|
||||
"path": ep.path,
|
||||
"feedEpisodeId": ep.feed_episode_id,
|
||||
"feedEpisodeUrl": ep.feed_episode_url,
|
||||
"explicit": ep.explicit,
|
||||
"addedAt": int(ep.created_at.timestamp() * 1000) if ep.created_at else 0,
|
||||
}
|
||||
|
||||
|
||||
def _podcast_out(podcast: Podcast, item: LibraryItem, episodes: list[PodcastEpisode]) -> dict:
|
||||
return {
|
||||
"id": item.id,
|
||||
"libraryId": item.library_id,
|
||||
"title": item.title,
|
||||
"author": item.author or podcast.author,
|
||||
"description": item.description,
|
||||
"cover": f"/api/items/{item.id}/cover" if item.cover_path else None,
|
||||
"feedUrl": podcast.feed_url,
|
||||
"feedLastChecked": podcast.feed_last_checked.isoformat() if podcast.feed_last_checked else None,
|
||||
"updateIntervalHours": podcast.update_interval_hours,
|
||||
"tags": item.tags or [],
|
||||
"episodes": [_episode_out(ep) for ep in episodes],
|
||||
"numEpisodes": len(episodes),
|
||||
"matchedSource": item.matched_source,
|
||||
}
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_podcasts(
|
||||
library_id: str | None = None,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
query = select(LibraryItem).where(LibraryItem.media_type == "podcast")
|
||||
if library_id:
|
||||
query = query.where(LibraryItem.library_id == library_id)
|
||||
items_result = await db.execute(query)
|
||||
items = items_result.scalars().all()
|
||||
|
||||
result = []
|
||||
for item in items:
|
||||
podcast_result = await db.execute(select(Podcast).where(Podcast.library_item_id == item.id))
|
||||
podcast = podcast_result.scalar_one_or_none()
|
||||
if not podcast:
|
||||
continue
|
||||
ep_result = await db.execute(
|
||||
select(PodcastEpisode).where(PodcastEpisode.podcast_id == podcast.id)
|
||||
.order_by(PodcastEpisode.pub_date.desc())
|
||||
)
|
||||
episodes = ep_result.scalars().all()
|
||||
result.append(_podcast_out(podcast, item, episodes))
|
||||
|
||||
return {"podcasts": result}
|
||||
|
||||
|
||||
@router.get("/search")
|
||||
async def search_feeds(
|
||||
q: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
results = await search_podcast_feeds(q)
|
||||
return {"results": results}
|
||||
|
||||
|
||||
@router.get("/{item_id}")
|
||||
async def get_podcast(
|
||||
item_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
item_result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
||||
item = item_result.scalar_one_or_none()
|
||||
if not item or item.media_type != "podcast":
|
||||
raise HTTPException(status_code=404, detail="Podcast not found")
|
||||
|
||||
podcast_result = await db.execute(select(Podcast).where(Podcast.library_item_id == item_id))
|
||||
podcast = podcast_result.scalar_one_or_none()
|
||||
if not podcast:
|
||||
raise HTTPException(status_code=404, detail="Podcast data not found")
|
||||
|
||||
ep_result = await db.execute(
|
||||
select(PodcastEpisode).where(PodcastEpisode.podcast_id == podcast.id)
|
||||
.order_by(PodcastEpisode.pub_date.desc())
|
||||
)
|
||||
episodes = ep_result.scalars().all()
|
||||
return _podcast_out(podcast, item, episodes)
|
||||
|
||||
|
||||
@router.patch("/{item_id}/feed")
|
||||
async def set_feed_url(
|
||||
item_id: str,
|
||||
body: dict,
|
||||
background_tasks: BackgroundTasks,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
item_result = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
||||
item = item_result.scalar_one_or_none()
|
||||
if not item:
|
||||
raise HTTPException(status_code=404, detail="Item not found")
|
||||
|
||||
podcast_result = await db.execute(select(Podcast).where(Podcast.library_item_id == item_id))
|
||||
podcast = podcast_result.scalar_one_or_none()
|
||||
|
||||
if not podcast:
|
||||
podcast = Podcast(library_item_id=item_id)
|
||||
db.add(podcast)
|
||||
|
||||
feed_url = body.get("feedUrl", "")
|
||||
if not feed_url:
|
||||
raise HTTPException(status_code=400, detail="feedUrl required")
|
||||
|
||||
podcast.feed_url = feed_url
|
||||
if body.get("updateIntervalHours"):
|
||||
podcast.update_interval_hours = int(body["updateIntervalHours"])
|
||||
|
||||
await db.commit()
|
||||
background_tasks.add_task(fetch_and_update_feed, item_id)
|
||||
return {"success": True, "message": "Feed wird aktualisiert..."}
|
||||
|
||||
|
||||
@router.post("/{item_id}/update-feed")
|
||||
async def update_feed(
|
||||
item_id: str,
|
||||
background_tasks: BackgroundTasks,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
podcast_result = await db.execute(select(Podcast).where(Podcast.library_item_id == item_id))
|
||||
podcast = podcast_result.scalar_one_or_none()
|
||||
if not podcast or not podcast.feed_url:
|
||||
raise HTTPException(status_code=400, detail="Kein Feed konfiguriert")
|
||||
|
||||
background_tasks.add_task(fetch_and_update_feed, item_id)
|
||||
return {"success": True}
|
||||
|
||||
|
||||
@router.get("/{item_id}/episodes")
|
||||
async def get_episodes(
|
||||
item_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
podcast_result = await db.execute(select(Podcast).where(Podcast.library_item_id == item_id))
|
||||
podcast = podcast_result.scalar_one_or_none()
|
||||
if not podcast:
|
||||
raise HTTPException(status_code=404, detail="Podcast not found")
|
||||
|
||||
ep_result = await db.execute(
|
||||
select(PodcastEpisode).where(PodcastEpisode.podcast_id == podcast.id)
|
||||
.order_by(PodcastEpisode.pub_date.desc())
|
||||
)
|
||||
return {"episodes": [_episode_out(ep) for ep in ep_result.scalars().all()]}
|
||||
@@ -10,7 +10,7 @@ from ..models.user import User
|
||||
from ..models.media_item import LibraryItem, BookFile, Chapter
|
||||
from ..models.session import PlaybackSession
|
||||
from ..models.progress import MediaProgress
|
||||
from ..services.hls import create_hls_session, cleanup_hls_session, get_hls_session_path
|
||||
from ..services.hls import start_hls_session, wait_for_playlist, cleanup_hls_session
|
||||
from ..config import get_settings
|
||||
|
||||
router = APIRouter(tags=["stream"])
|
||||
@@ -34,14 +34,13 @@ async def start_playback(
|
||||
)
|
||||
files = files_result.scalars().all()
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No audio files for this item")
|
||||
raise HTTPException(status_code=400, detail="Keine Audio-Dateien vorhanden")
|
||||
|
||||
chapters_result = await db.execute(
|
||||
select(Chapter).where(Chapter.library_item_id == item_id).order_by(Chapter.chapter_index)
|
||||
)
|
||||
chapters = chapters_result.scalars().all()
|
||||
|
||||
# Fortschritt ermitteln
|
||||
progress_result = await db.execute(
|
||||
select(MediaProgress).where(
|
||||
MediaProgress.user_id == current_user.id,
|
||||
@@ -54,10 +53,14 @@ async def start_playback(
|
||||
current_time = float(body["startTime"])
|
||||
|
||||
session_id = str(uuid.uuid4())
|
||||
|
||||
# HLS-Session asynchron starten
|
||||
audio_paths = [f.path for f in files]
|
||||
hls_dir = await create_hls_session(session_id, audio_paths, start_time=0.0)
|
||||
hls_dir = start_hls_session(session_id, audio_paths, start_time=0.0)
|
||||
|
||||
# Warten bis erste Segmente da sind (max. 60s)
|
||||
ready = await wait_for_playlist(session_id, timeout=60.0)
|
||||
if not ready:
|
||||
cleanup_hls_session(session_id)
|
||||
raise HTTPException(status_code=500, detail="HLS-Konvertierung fehlgeschlagen")
|
||||
|
||||
session = PlaybackSession(
|
||||
id=session_id,
|
||||
@@ -75,35 +78,9 @@ async def start_playback(
|
||||
db.add(session)
|
||||
await db.commit()
|
||||
|
||||
settings = get_settings()
|
||||
# URL-Basis relativ — wird durch nginx weitergeleitet
|
||||
hls_url = f"/hls/{session_id}/output.m3u8"
|
||||
|
||||
audio_tracks = [
|
||||
{
|
||||
"index": 0,
|
||||
"startOffset": 0.0,
|
||||
"duration": item.duration_seconds or 0.0,
|
||||
"title": "Part 1",
|
||||
"contentUrl": hls_url,
|
||||
"mimeType": "application/x-mpegURL",
|
||||
"metadata": {
|
||||
"filename": "output.m3u8",
|
||||
"ext": ".m3u8",
|
||||
"path": hls_url,
|
||||
"relPath": "output.m3u8",
|
||||
"size": 0,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
chapters_out = [
|
||||
{
|
||||
"id": c.chapter_index,
|
||||
"start": c.start_seconds,
|
||||
"end": c.end_seconds,
|
||||
"title": c.title,
|
||||
}
|
||||
{"id": c.chapter_index, "start": c.start_seconds, "end": c.end_seconds, "title": c.title}
|
||||
for c in chapters
|
||||
]
|
||||
|
||||
@@ -119,7 +96,7 @@ async def start_playback(
|
||||
"displayAuthor": item.author,
|
||||
"coverPath": f"/api/items/{item_id}/cover" if item.cover_path else None,
|
||||
"duration": item.duration_seconds or 0.0,
|
||||
"playMethod": 0, # 0 = HLS Transcode
|
||||
"playMethod": 0,
|
||||
"mediaPlayer": body.get("mediaPlayer", ""),
|
||||
"deviceInfo": body.get("deviceInfo", {}),
|
||||
"serverVersion": "2.4.0",
|
||||
@@ -130,7 +107,15 @@ async def start_playback(
|
||||
"currentTime": current_time,
|
||||
"startedAt": int(datetime.utcnow().timestamp() * 1000),
|
||||
"updatedAt": int(datetime.utcnow().timestamp() * 1000),
|
||||
"audioTracks": audio_tracks,
|
||||
"audioTracks": [{
|
||||
"index": 0,
|
||||
"startOffset": 0.0,
|
||||
"duration": item.duration_seconds or 0.0,
|
||||
"title": "Part 1",
|
||||
"contentUrl": hls_url,
|
||||
"mimeType": "application/x-mpegURL",
|
||||
"metadata": {"filename": "output.m3u8", "ext": ".m3u8", "path": hls_url, "relPath": "output.m3u8", "size": 0},
|
||||
}],
|
||||
"videoTrack": None,
|
||||
}
|
||||
|
||||
@@ -138,15 +123,13 @@ async def start_playback(
|
||||
@router.post("/api/playback-session/{session_id}/sync")
|
||||
async def sync_session(
|
||||
session_id: str,
|
||||
body: dict = {},
|
||||
body: dict | None = None,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
body = body or {}
|
||||
result = await db.execute(
|
||||
select(PlaybackSession).where(
|
||||
PlaybackSession.id == session_id,
|
||||
PlaybackSession.user_id == current_user.id,
|
||||
)
|
||||
select(PlaybackSession).where(PlaybackSession.id == session_id, PlaybackSession.user_id == current_user.id)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
if not session:
|
||||
@@ -154,13 +137,10 @@ async def sync_session(
|
||||
|
||||
current_time = float(body.get("currentTime", session.current_time))
|
||||
duration = float(body.get("duration", session.duration))
|
||||
time_listening = float(body.get("timeListening", 0))
|
||||
|
||||
session.current_time = current_time
|
||||
session.duration = duration
|
||||
session.updated_at = datetime.utcnow()
|
||||
|
||||
# Fortschritt persistieren
|
||||
progress_result = await db.execute(
|
||||
select(MediaProgress).where(
|
||||
MediaProgress.user_id == current_user.id,
|
||||
@@ -200,39 +180,28 @@ async def close_session(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(
|
||||
select(PlaybackSession).where(
|
||||
PlaybackSession.id == session_id,
|
||||
PlaybackSession.user_id == current_user.id,
|
||||
)
|
||||
select(PlaybackSession).where(PlaybackSession.id == session_id, PlaybackSession.user_id == current_user.id)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
session.is_active = False
|
||||
await db.commit()
|
||||
|
||||
# HLS-Temp-Dateien bereinigen
|
||||
cleanup_hls_session(session_id)
|
||||
return {"success": True}
|
||||
|
||||
|
||||
@router.get("/hls/{session_id}/{filename}")
|
||||
@router.get("/hls/{session_id}/{filename:path}")
|
||||
async def serve_hls(
|
||||
session_id: str,
|
||||
filename: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
# Session prüfen
|
||||
result = await db.execute(
|
||||
select(PlaybackSession).where(
|
||||
PlaybackSession.id == session_id,
|
||||
PlaybackSession.user_id == current_user.id,
|
||||
)
|
||||
select(PlaybackSession).where(PlaybackSession.id == session_id, PlaybackSession.user_id == current_user.id)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
if not session:
|
||||
if not result.scalar_one_or_none():
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
settings = get_settings()
|
||||
@@ -242,7 +211,4 @@ async def serve_hls(
|
||||
|
||||
if filename.endswith(".m3u8"):
|
||||
return FileResponse(file_path, media_type="application/x-mpegURL")
|
||||
elif filename.endswith(".ts"):
|
||||
return FileResponse(file_path, media_type="video/MP2T")
|
||||
else:
|
||||
return FileResponse(file_path)
|
||||
return FileResponse(file_path, media_type="video/MP2T")
|
||||
|
||||
@@ -1,70 +1,38 @@
|
||||
import os
|
||||
import asyncio
|
||||
import uuid
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from ..config import get_settings
|
||||
|
||||
|
||||
HLS_SEGMENT_DURATION = 10 # Sekunden pro Segment
|
||||
HLS_SEGMENT_DURATION = 10
|
||||
_running_sessions: dict[str, asyncio.Task] = {}
|
||||
|
||||
|
||||
async def create_hls_session(
|
||||
session_id: str,
|
||||
audio_files: list[str],
|
||||
start_time: float = 0.0,
|
||||
) -> str:
|
||||
"""
|
||||
Erstellt HLS-Segmente via FFmpeg für die gegebenen Audio-Dateien.
|
||||
Gibt den Pfad zum HLS-Verzeichnis zurück.
|
||||
"""
|
||||
async def _run_ffmpeg(session_id: str, audio_files: list[str], start_time: float = 0.0):
|
||||
settings = get_settings()
|
||||
session_dir = os.path.join(settings.hls_cache_dir, session_id)
|
||||
os.makedirs(session_dir, exist_ok=True)
|
||||
|
||||
playlist_path = os.path.join(session_dir, "output.m3u8")
|
||||
|
||||
if len(audio_files) == 1:
|
||||
input_path = audio_files[0]
|
||||
input_args = ["-ss", str(start_time), "-i", audio_files[0]]
|
||||
else:
|
||||
# Mehrere Dateien: Concat-Liste erstellen
|
||||
concat_file = os.path.join(session_dir, "concat.txt")
|
||||
with open(concat_file, "w", encoding="utf-8") as f:
|
||||
for af in audio_files:
|
||||
safe_path = af.replace("\\", "/")
|
||||
f.write(f"file '{safe_path}'\n")
|
||||
input_path = concat_file
|
||||
f.write(f"file '{af.replace(chr(92), '/')}'\n")
|
||||
input_args = ["-f", "concat", "-safe", "0", "-i", concat_file, "-ss", str(start_time)]
|
||||
|
||||
if len(audio_files) == 1:
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-ss", str(start_time),
|
||||
"-i", input_path,
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k",
|
||||
"-ac", "2",
|
||||
"-hls_time", str(HLS_SEGMENT_DURATION),
|
||||
"-hls_list_size", "0",
|
||||
"-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
|
||||
"-hls_flags", "independent_segments",
|
||||
playlist_path,
|
||||
]
|
||||
else:
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "concat", "-safe", "0",
|
||||
"-i", input_path,
|
||||
"-ss", str(start_time),
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k",
|
||||
"-ac", "2",
|
||||
"-hls_time", str(HLS_SEGMENT_DURATION),
|
||||
"-hls_list_size", "0",
|
||||
"-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
|
||||
"-hls_flags", "independent_segments",
|
||||
playlist_path,
|
||||
]
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
*input_args,
|
||||
"-c:a", "aac", "-b:a", "128k", "-ac", "2",
|
||||
"-hls_time", str(HLS_SEGMENT_DURATION),
|
||||
"-hls_list_size", "0",
|
||||
"-hls_segment_filename", os.path.join(session_dir, "seg%05d.ts"),
|
||||
"-hls_flags", "independent_segments",
|
||||
playlist_path,
|
||||
]
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
@@ -72,17 +40,49 @@ async def create_hls_session(
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0 and session_id in _running_sessions:
|
||||
err = stderr.decode(errors="replace") if stderr else "unknown"
|
||||
# Fehler-Datei schreiben damit der Client es merkt
|
||||
with open(os.path.join(session_dir, "error.txt"), "w") as f:
|
||||
f.write(err)
|
||||
|
||||
if proc.returncode != 0:
|
||||
error_msg = stderr.decode(errors="replace") if stderr else "unknown error"
|
||||
raise RuntimeError(f"FFmpeg fehler: {error_msg}")
|
||||
|
||||
def start_hls_session(session_id: str, audio_files: list[str], start_time: float = 0.0) -> str:
|
||||
"""Startet FFmpeg als Background-Task. Gibt den Session-Pfad zurück."""
|
||||
settings = get_settings()
|
||||
session_dir = os.path.join(settings.hls_cache_dir, session_id)
|
||||
os.makedirs(session_dir, exist_ok=True)
|
||||
|
||||
task = asyncio.create_task(_run_ffmpeg(session_id, audio_files, start_time))
|
||||
_running_sessions[session_id] = task
|
||||
return session_dir
|
||||
|
||||
|
||||
async def wait_for_playlist(session_id: str, timeout: float = 60.0) -> bool:
|
||||
"""Wartet bis das erste Segment fertig ist (max. timeout Sekunden)."""
|
||||
settings = get_settings()
|
||||
playlist = os.path.join(settings.hls_cache_dir, session_id, "output.m3u8")
|
||||
error_file = os.path.join(settings.hls_cache_dir, session_id, "error.txt")
|
||||
waited = 0.0
|
||||
while waited < timeout:
|
||||
if os.path.exists(error_file):
|
||||
return False
|
||||
if os.path.exists(playlist) and os.path.getsize(playlist) > 0:
|
||||
# Warte auf mindestens 1 Segment
|
||||
seg0 = os.path.join(settings.hls_cache_dir, session_id, "seg00000.ts")
|
||||
if os.path.exists(seg0):
|
||||
return True
|
||||
await asyncio.sleep(0.5)
|
||||
waited += 0.5
|
||||
return False
|
||||
|
||||
|
||||
def cleanup_hls_session(session_id: str):
|
||||
settings = get_settings()
|
||||
session_dir = os.path.join(settings.hls_cache_dir, session_id)
|
||||
task = _running_sessions.pop(session_id, None)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
if os.path.exists(session_dir):
|
||||
shutil.rmtree(session_dir, ignore_errors=True)
|
||||
|
||||
@@ -90,19 +90,4 @@ def cleanup_hls_session(session_id: str):
|
||||
def get_hls_session_path(session_id: str) -> Optional[str]:
|
||||
settings = get_settings()
|
||||
session_dir = os.path.join(settings.hls_cache_dir, session_id)
|
||||
playlist = os.path.join(session_dir, "output.m3u8")
|
||||
return session_dir if os.path.exists(playlist) else None
|
||||
|
||||
|
||||
def parse_m3u8_duration(playlist_path: str) -> float:
|
||||
"""Berechnet Gesamtdauer aus M3U8-Playlist."""
|
||||
total = 0.0
|
||||
try:
|
||||
with open(playlist_path, "r") as f:
|
||||
for line in f:
|
||||
if line.startswith("#EXTINF:"):
|
||||
duration_str = line.split(":")[1].split(",")[0]
|
||||
total += float(duration_str)
|
||||
except Exception:
|
||||
pass
|
||||
return total
|
||||
return session_dir if os.path.isdir(session_dir) else None
|
||||
|
||||
279
backend/app/services/matcher.py
Normal file
279
backend/app/services/matcher.py
Normal file
@@ -0,0 +1,279 @@
|
||||
"""
|
||||
Matching-Orchestrator:
|
||||
- Erkennt deutsche Hörbuch-Serien (die drei ???, TKKG, ...)
|
||||
- Versucht MusicBrainz → OpenLibrary → Google Books
|
||||
- Lädt Cover herunter
|
||||
- Bewertet Konfidenz und entscheidet über Auto-Accept
|
||||
"""
|
||||
import re
|
||||
import os
|
||||
import logging
|
||||
import httpx
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
|
||||
from ..config import get_settings
|
||||
from ..models.media_item import LibraryItem, BookFile, Chapter
|
||||
from ..models.session import ServerSetting
|
||||
from ..database import AsyncSessionLocal
|
||||
from .matching.base import MatchResult
|
||||
from .matching.musicbrainz import search_musicbrainz, get_release_details
|
||||
from .matching.open_library import search_open_library, get_work_details
|
||||
from .matching.google_books import search_google_books
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
AUTO_ACCEPT_THRESHOLD = 0.75
|
||||
UNCERTAIN_THRESHOLD = 0.50
|
||||
|
||||
# Bekannte deutsche Hörbuch-Serien: (regex, kanonischer Name)
|
||||
SERIES_PATTERNS = [
|
||||
(r"(?i)^(die drei \?\?\?|die drei fragezeichen|drei fragezeichen)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Die drei ???"),
|
||||
(r"(?i)^(tkkg)\s*[-–]?\s*(?:folge\s*)?(\d+)", "TKKG"),
|
||||
(r"(?i)^(fünf freunde|funf freunde)\s*[-–]?\s*(?:band\s*)?(\d+)", "Fünf Freunde"),
|
||||
(r"(?i)^(bibi blocksberg)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Bibi Blocksberg"),
|
||||
(r"(?i)^(benjamin blümchen|benjamin blumchen)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Benjamin Blümchen"),
|
||||
(r"(?i)^(bibi und tina)\s*[-–]?\s*(?:folge\s*)?(\d+)", "Bibi und Tina"),
|
||||
(r"(?i)^(der kleine vampir)\s*[-–]?\s*(?:band\s*)?(\d+)", "Der kleine Vampir"),
|
||||
# Generisch: "Serie - Folge/Band/Teil N - Titel"
|
||||
(r"(?i)^(.+?)\s*[-–]\s*(?:folge|band|teil|nr\.?|#)\s*(\d+)", None),
|
||||
# Generisch: "Serie (Folge N)"
|
||||
(r"(?i)^(.+?)\s*\((?:folge|band|teil|nr\.?|#|episode)\s*(\d+)\)", None),
|
||||
]
|
||||
|
||||
|
||||
def detect_series(title: str) -> tuple[str | None, str | None]:
|
||||
"""Gibt (Serienname, Folgennummer) zurück oder (None, None)."""
|
||||
for pattern, canonical_name in SERIES_PATTERNS:
|
||||
m = re.match(pattern, title.strip())
|
||||
if m:
|
||||
series = canonical_name or m.group(1).strip()
|
||||
episode = m.group(2)
|
||||
return series, episode
|
||||
return None, None
|
||||
|
||||
|
||||
def _title_similarity(a: str, b: str) -> float:
|
||||
"""Einfache Ähnlichkeit: Wort-Überlapp."""
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
wa = set(re.findall(r'\w+', a.lower()))
|
||||
wb = set(re.findall(r'\w+', b.lower()))
|
||||
if not wa or not wb:
|
||||
return 0.0
|
||||
return len(wa & wb) / max(len(wa), len(wb))
|
||||
|
||||
|
||||
def _score_result(result: MatchResult, query_title: str, query_author: str | None) -> float:
|
||||
score = result.confidence
|
||||
title_sim = _title_similarity(result.title, query_title)
|
||||
score = score * 0.4 + title_sim * 0.6
|
||||
if query_author and result.author:
|
||||
author_sim = _title_similarity(result.author, query_author)
|
||||
score = score * 0.7 + author_sim * 0.3
|
||||
return min(score, 1.0)
|
||||
|
||||
|
||||
async def _download_cover(url: str, item_id: str) -> str | None:
|
||||
"""Lädt Cover herunter und speichert es lokal."""
|
||||
settings = get_settings()
|
||||
ext = ".jpg"
|
||||
if ".png" in url:
|
||||
ext = ".png"
|
||||
dest = os.path.join(settings.covers_dir, f"{item_id}{ext}")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
||||
r = await client.get(url)
|
||||
if r.status_code == 200:
|
||||
os.makedirs(settings.covers_dir, exist_ok=True)
|
||||
with open(dest, "wb") as f:
|
||||
f.write(r.content)
|
||||
return dest
|
||||
except Exception as e:
|
||||
logger.warning(f"Cover-Download fehlgeschlagen ({url}): {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def _apply_match(db: AsyncSession, item: LibraryItem, result: MatchResult, confidence: float):
|
||||
"""Schreibt Metadaten aus MatchResult in die DB."""
|
||||
if result.title:
|
||||
item.title = result.title
|
||||
if result.subtitle and not item.subtitle:
|
||||
item.subtitle = result.subtitle
|
||||
if result.author:
|
||||
item.author = result.author
|
||||
if result.narrator:
|
||||
item.narrator = result.narrator
|
||||
if result.description:
|
||||
item.description = result.description
|
||||
if result.publisher:
|
||||
item.publisher = result.publisher
|
||||
if result.publish_year:
|
||||
item.publish_year = result.publish_year
|
||||
if result.language:
|
||||
item.language = result.language
|
||||
if result.genres:
|
||||
item.genres = result.genres
|
||||
if result.series:
|
||||
item.series = result.series
|
||||
if result.series_sequence:
|
||||
item.series_sequence = result.series_sequence
|
||||
|
||||
item.matched_source = result.source
|
||||
item.matched_id = result.source_id
|
||||
item.match_confidence = confidence
|
||||
item.updated_at = datetime.utcnow()
|
||||
|
||||
# Cover herunterladen
|
||||
if result.cover_url and not item.cover_path:
|
||||
cover_path = await _download_cover(result.cover_url, item.id)
|
||||
if cover_path:
|
||||
item.cover_path = cover_path
|
||||
|
||||
# Kapitel aus MusicBrainz-Tracklisting
|
||||
if result.chapters:
|
||||
from sqlalchemy import delete
|
||||
from ..models.media_item import Chapter
|
||||
await db.execute(delete(Chapter).where(Chapter.library_item_id == item.id))
|
||||
for idx, ch in enumerate(result.chapters):
|
||||
chapter = Chapter(
|
||||
library_item_id=item.id,
|
||||
chapter_index=idx,
|
||||
title=ch.get("title", f"Kapitel {idx + 1}"),
|
||||
start_seconds=ch.get("start", 0.0),
|
||||
end_seconds=ch.get("end", 0.0),
|
||||
)
|
||||
db.add(chapter)
|
||||
|
||||
# zu_prüfen entfernen wenn Konfidenz hoch genug
|
||||
if confidence >= AUTO_ACCEPT_THRESHOLD:
|
||||
tags = item.tags or []
|
||||
item.tags = [t for t in tags if t != "zu_prüfen"]
|
||||
|
||||
|
||||
async def match_audiobook(item_id: str):
|
||||
"""
|
||||
Haupt-Matching-Funktion. Wird nach dem Scan als Hintergrund-Task gestartet.
|
||||
"""
|
||||
async with AsyncSessionLocal() as db:
|
||||
result_row = await db.execute(select(LibraryItem).where(LibraryItem.id == item_id))
|
||||
item = result_row.scalar_one_or_none()
|
||||
if not item or item.match_locked:
|
||||
return
|
||||
|
||||
# Einstellung prüfen
|
||||
setting = await db.execute(
|
||||
select(ServerSetting).where(ServerSetting.key == "autoMatchBooks")
|
||||
)
|
||||
s = setting.scalar_one_or_none()
|
||||
if s and s.value is False:
|
||||
return
|
||||
|
||||
title = item.title or ""
|
||||
author = item.author
|
||||
|
||||
# Serien-Erkennung verbessert den Suchbegriff
|
||||
series, episode = detect_series(title)
|
||||
search_title = title
|
||||
if series:
|
||||
search_title = f"{series} {episode}" if episode else series
|
||||
if not item.series:
|
||||
item.series = series
|
||||
if not item.series_sequence and episode:
|
||||
item.series_sequence = episode
|
||||
|
||||
logger.info(f"Matche: '{title}' (Serie: {series}, Folge: {episode})")
|
||||
|
||||
best: MatchResult | None = None
|
||||
best_score = 0.0
|
||||
|
||||
# 1. MusicBrainz
|
||||
try:
|
||||
mb_results = await search_musicbrainz(search_title, author)
|
||||
for r in mb_results:
|
||||
score = _score_result(r, title, author)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = r
|
||||
except Exception as e:
|
||||
logger.warning(f"MusicBrainz Fehler: {e}")
|
||||
|
||||
# Wenn guter MB-Treffer → Details holen (Tracklist + Cover)
|
||||
if best and best_score >= UNCERTAIN_THRESHOLD and best.source == "musicbrainz":
|
||||
try:
|
||||
details = await get_release_details(best.source_id)
|
||||
if details:
|
||||
details.confidence = best_score
|
||||
best = details
|
||||
except Exception as e:
|
||||
logger.warning(f"MusicBrainz Details Fehler: {e}")
|
||||
|
||||
# 2. OpenLibrary als Fallback
|
||||
if best_score < UNCERTAIN_THRESHOLD:
|
||||
try:
|
||||
ol_results = await search_open_library(search_title, author)
|
||||
for r in ol_results:
|
||||
score = _score_result(r, title, author)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = r
|
||||
if best and best.source == "open_library" and best_score >= UNCERTAIN_THRESHOLD:
|
||||
details = await get_work_details(best.source_id)
|
||||
if details and details.description:
|
||||
best.description = details.description
|
||||
except Exception as e:
|
||||
logger.warning(f"OpenLibrary Fehler: {e}")
|
||||
|
||||
# 3. Google Books als letzter Fallback
|
||||
if best_score < UNCERTAIN_THRESHOLD:
|
||||
try:
|
||||
gb_results = await search_google_books(search_title, author)
|
||||
for r in gb_results:
|
||||
score = _score_result(r, title, author)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = r
|
||||
except Exception as e:
|
||||
logger.warning(f"Google Books Fehler: {e}")
|
||||
|
||||
if best and best_score >= UNCERTAIN_THRESHOLD:
|
||||
await _apply_match(db, item, best, best_score)
|
||||
logger.info(f"Match angewendet: '{item.title}' ← {best.source} (Konfidenz: {best_score:.2f})")
|
||||
else:
|
||||
logger.info(f"Kein Match gefunden für '{title}' (beste Konfidenz: {best_score:.2f})")
|
||||
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def search_for_item(title: str, author: str | None = None) -> list[dict]:
|
||||
"""Suche über alle Quellen – für manuelles Matching."""
|
||||
results = []
|
||||
|
||||
async def _search_source(coro):
|
||||
try:
|
||||
return await coro
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
mb, ol, gb = await asyncio.gather(
|
||||
_search_source(search_musicbrainz(title, author)),
|
||||
_search_source(search_open_library(title, author)),
|
||||
_search_source(search_google_books(title, author)),
|
||||
)
|
||||
|
||||
for r in mb + ol + gb:
|
||||
results.append({
|
||||
"source": r.source,
|
||||
"id": r.source_id,
|
||||
"title": r.title,
|
||||
"author": r.author,
|
||||
"publishYear": r.publish_year,
|
||||
"cover": r.cover_url,
|
||||
"confidence": r.confidence,
|
||||
})
|
||||
|
||||
results.sort(key=lambda x: x["confidence"], reverse=True)
|
||||
return results
|
||||
@@ -1,4 +1,3 @@
|
||||
"""Google Books-Matching — Phase 5."""
|
||||
import httpx
|
||||
from .base import MatchResult
|
||||
|
||||
@@ -10,26 +9,52 @@ async def search_google_books(title: str, author: str | None = None) -> list[Mat
|
||||
if author:
|
||||
q += f' inauthor:"{author}"'
|
||||
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
resp = await client.get(f"{GB_BASE}/volumes", params={"q": q, "maxResults": 5, "langRestrict": "de"})
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
async with httpx.AsyncClient(timeout=12) as client:
|
||||
try:
|
||||
r = await client.get(
|
||||
f"{GB_BASE}/volumes",
|
||||
params={"q": q, "maxResults": 5, "langRestrict": "de", "printType": "books"},
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for item in data.get("items", []):
|
||||
vol = item.get("volumeInfo", {})
|
||||
authors = vol.get("authors", [])
|
||||
results.append(
|
||||
MatchResult(
|
||||
source="google_books",
|
||||
source_id=item.get("id", ""),
|
||||
title=vol.get("title", title),
|
||||
author=authors[0] if authors else None,
|
||||
description=vol.get("description"),
|
||||
publisher=vol.get("publisher"),
|
||||
publish_year=int(vol.get("publishedDate", "0")[:4]) if vol.get("publishedDate") else None,
|
||||
language=vol.get("language"),
|
||||
confidence=0.5,
|
||||
|
||||
cover_url = None
|
||||
image_links = vol.get("imageLinks", {})
|
||||
if image_links:
|
||||
cover_url = (
|
||||
image_links.get("extraLarge")
|
||||
or image_links.get("large")
|
||||
or image_links.get("medium")
|
||||
or image_links.get("thumbnail", "").replace("zoom=1", "zoom=3")
|
||||
)
|
||||
)
|
||||
|
||||
year = None
|
||||
pub_date = vol.get("publishedDate", "")
|
||||
if pub_date and len(pub_date) >= 4:
|
||||
try:
|
||||
year = int(pub_date[:4])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
results.append(MatchResult(
|
||||
source="google_books",
|
||||
source_id=item.get("id", ""),
|
||||
title=vol.get("title", title),
|
||||
subtitle=vol.get("subtitle"),
|
||||
author=authors[0] if authors else None,
|
||||
description=vol.get("description"),
|
||||
publisher=vol.get("publisher"),
|
||||
publish_year=year,
|
||||
language=vol.get("language"),
|
||||
genres=vol.get("categories", []),
|
||||
cover_url=cover_url,
|
||||
confidence=0.5,
|
||||
))
|
||||
return results
|
||||
|
||||
@@ -1,40 +1,115 @@
|
||||
"""MusicBrainz-Matching — Phase 5."""
|
||||
import httpx
|
||||
import asyncio
|
||||
from .base import MatchResult
|
||||
|
||||
MB_BASE = "https://musicbrainz.org/ws/2"
|
||||
HEADERS = {"User-Agent": "audiolib/1.0 (https://github.com/audiolib)"}
|
||||
CAA_BASE = "https://coverartarchive.org"
|
||||
HEADERS = {"User-Agent": "audiolib/1.0 (contact@audiolib.local)"}
|
||||
_semaphore = asyncio.Semaphore(2) # MusicBrainz Rate-Limit: max 1 req/s
|
||||
|
||||
|
||||
async def _get(client: httpx.AsyncClient, url: str, **params) -> dict:
|
||||
async with _semaphore:
|
||||
await asyncio.sleep(1.1) # MusicBrainz erlaubt 1 req/s
|
||||
r = await client.get(url, params={"fmt": "json", **params}, timeout=15)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
async def search_musicbrainz(title: str, artist: str | None = None) -> list[MatchResult]:
|
||||
query = f'release:"{title}"'
|
||||
if artist:
|
||||
query += f' AND artist:"{artist}"'
|
||||
query += " AND format:Digital"
|
||||
|
||||
async with httpx.AsyncClient(headers=HEADERS, timeout=10) as client:
|
||||
resp = await client.get(
|
||||
f"{MB_BASE}/release",
|
||||
params={"query": query, "fmt": "json", "limit": 5},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
async with httpx.AsyncClient(headers=HEADERS) as client:
|
||||
try:
|
||||
data = await _get(client, f"{MB_BASE}/release", query=query, limit=5)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for release in data.get("releases", []):
|
||||
confidence = release.get("score", 0) / 100.0
|
||||
artist_name = None
|
||||
credits = release.get("artist-credit", [])
|
||||
if credits:
|
||||
artist_name = credits[0].get("name") or credits[0].get("artist", {}).get("name")
|
||||
for rel in data.get("releases", []):
|
||||
confidence = rel.get("score", 0) / 100.0
|
||||
artist_name = _first_artist(rel)
|
||||
release_id = rel.get("id", "")
|
||||
|
||||
results.append(
|
||||
MatchResult(
|
||||
source="musicbrainz",
|
||||
source_id=release.get("id", ""),
|
||||
title=release.get("title", title),
|
||||
author=artist_name,
|
||||
confidence=confidence,
|
||||
)
|
||||
)
|
||||
results.append(MatchResult(
|
||||
source="musicbrainz",
|
||||
source_id=release_id,
|
||||
title=rel.get("title", title),
|
||||
author=artist_name,
|
||||
publish_year=_parse_year(rel.get("date", "")),
|
||||
confidence=confidence,
|
||||
))
|
||||
return results
|
||||
|
||||
|
||||
async def get_release_details(release_id: str) -> MatchResult | None:
|
||||
"""Lädt vollständige Release-Details inkl. Tracklist (= Kapitel) und Cover."""
|
||||
async with httpx.AsyncClient(headers=HEADERS) as client:
|
||||
try:
|
||||
data = await _get(
|
||||
client, f"{MB_BASE}/release/{release_id}",
|
||||
inc="recordings+artists+release-groups"
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
artist_name = _first_artist(data)
|
||||
rg = data.get("release-group", {})
|
||||
series = rg.get("title") if rg.get("primary-type") == "Album" else None
|
||||
|
||||
# Tracklist → Kapitel
|
||||
chapters = []
|
||||
offset = 0.0
|
||||
for medium in data.get("media", []):
|
||||
for track in medium.get("tracks", []):
|
||||
duration_ms = track.get("length") or track.get("recording", {}).get("length") or 0
|
||||
duration_s = duration_ms / 1000.0
|
||||
chapters.append({
|
||||
"title": track.get("title", f"Track {track.get('position', '')}"),
|
||||
"start": offset,
|
||||
"end": offset + duration_s,
|
||||
})
|
||||
offset += duration_s
|
||||
|
||||
# Cover Art
|
||||
cover_url = None
|
||||
try:
|
||||
caa = await client.get(f"{CAA_BASE}/release/{release_id}", timeout=10)
|
||||
if caa.status_code == 200:
|
||||
caa_data = caa.json()
|
||||
images = caa_data.get("images", [])
|
||||
front = next((i for i in images if i.get("front")), images[0] if images else None)
|
||||
if front:
|
||||
cover_url = front.get("thumbnails", {}).get("large") or front.get("image")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return MatchResult(
|
||||
source="musicbrainz",
|
||||
source_id=release_id,
|
||||
title=data.get("title", ""),
|
||||
author=artist_name,
|
||||
publish_year=_parse_year(data.get("date", "")),
|
||||
cover_url=cover_url,
|
||||
chapters=chapters,
|
||||
confidence=1.0,
|
||||
)
|
||||
|
||||
|
||||
def _first_artist(release: dict) -> str | None:
|
||||
credits = release.get("artist-credit", [])
|
||||
if credits:
|
||||
c = credits[0]
|
||||
return c.get("name") or c.get("artist", {}).get("name")
|
||||
return None
|
||||
|
||||
|
||||
def _parse_year(date_str: str) -> int | None:
|
||||
if date_str and len(date_str) >= 4:
|
||||
try:
|
||||
return int(date_str[:4])
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
"""OpenLibrary-Matching — Phase 5."""
|
||||
import httpx
|
||||
from .base import MatchResult
|
||||
|
||||
@@ -6,25 +5,55 @@ OL_BASE = "https://openlibrary.org"
|
||||
|
||||
|
||||
async def search_open_library(title: str, author: str | None = None) -> list[MatchResult]:
|
||||
params: dict = {"title": title, "limit": 5}
|
||||
params: dict = {"title": title, "limit": 5, "fields": "key,title,author_name,first_publish_year,cover_i,subject"}
|
||||
if author:
|
||||
params["author"] = author
|
||||
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
resp = await client.get(f"{OL_BASE}/search.json", params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
async with httpx.AsyncClient(timeout=12) as client:
|
||||
try:
|
||||
r = await client.get(f"{OL_BASE}/search.json", params=params)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for doc in data.get("docs", []):
|
||||
results.append(
|
||||
MatchResult(
|
||||
source="open_library",
|
||||
source_id=doc.get("key", ""),
|
||||
title=doc.get("title", title),
|
||||
author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
|
||||
publish_year=doc.get("first_publish_year"),
|
||||
confidence=0.6,
|
||||
)
|
||||
)
|
||||
cover_url = None
|
||||
if doc.get("cover_i"):
|
||||
cover_url = f"https://covers.openlibrary.org/b/id/{doc['cover_i']}-L.jpg"
|
||||
|
||||
results.append(MatchResult(
|
||||
source="open_library",
|
||||
source_id=doc.get("key", ""),
|
||||
title=doc.get("title", title),
|
||||
author=doc.get("author_name", [None])[0] if doc.get("author_name") else None,
|
||||
publish_year=doc.get("first_publish_year"),
|
||||
cover_url=cover_url,
|
||||
genres=doc.get("subject", [])[:5],
|
||||
confidence=0.55,
|
||||
))
|
||||
return results
|
||||
|
||||
|
||||
async def get_work_details(work_key: str) -> MatchResult | None:
|
||||
"""Lädt Beschreibung und Genres nach."""
|
||||
async with httpx.AsyncClient(timeout=12) as client:
|
||||
try:
|
||||
r = await client.get(f"{OL_BASE}{work_key}.json")
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
desc = data.get("description")
|
||||
if isinstance(desc, dict):
|
||||
desc = desc.get("value")
|
||||
|
||||
return MatchResult(
|
||||
source="open_library",
|
||||
source_id=work_key,
|
||||
title=data.get("title", ""),
|
||||
description=desc,
|
||||
confidence=1.0,
|
||||
)
|
||||
|
||||
186
backend/app/services/podcast_feed.py
Normal file
186
backend/app/services/podcast_feed.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""
|
||||
Podcast-Feed-Manager:
|
||||
- RSS-Feed parsen
|
||||
- Episoden mit lokalen Dateien abgleichen
|
||||
- Periodisches Update
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
import httpx
|
||||
import feedparser
|
||||
from datetime import datetime
|
||||
from difflib import SequenceMatcher
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from ..database import AsyncSessionLocal
|
||||
from ..models.library import Library
|
||||
from ..models.media_item import LibraryItem
|
||||
from ..models.podcast import Podcast, PodcastEpisode
|
||||
from ..services.matcher import _download_cover
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _similarity(a: str, b: str) -> float:
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
|
||||
|
||||
|
||||
def _parse_duration(s: str | None) -> float:
|
||||
"""Parst "HH:MM:SS" oder "MM:SS" oder reine Sekunden."""
|
||||
if not s:
|
||||
return 0.0
|
||||
s = s.strip()
|
||||
try:
|
||||
if ":" in s:
|
||||
parts = s.split(":")
|
||||
if len(parts) == 3:
|
||||
return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
|
||||
elif len(parts) == 2:
|
||||
return int(parts[0]) * 60 + float(parts[1])
|
||||
return float(s)
|
||||
except (ValueError, IndexError):
|
||||
return 0.0
|
||||
|
||||
|
||||
async def search_podcast_feeds(query: str) -> list[dict]:
|
||||
"""Sucht Podcast-Feeds über iTunes Search API."""
|
||||
results = []
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=12) as client:
|
||||
r = await client.get(
|
||||
"https://itunes.apple.com/search",
|
||||
params={"term": query, "media": "podcast", "limit": 10, "country": "de"},
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
for item in data.get("results", []):
|
||||
results.append({
|
||||
"title": item.get("collectionName", ""),
|
||||
"author": item.get("artistName", ""),
|
||||
"feedUrl": item.get("feedUrl", ""),
|
||||
"artworkUrl": item.get("artworkUrl600") or item.get("artworkUrl100", ""),
|
||||
"trackCount": item.get("trackCount", 0),
|
||||
"itunesId": item.get("collectionId"),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"iTunes-Suche fehlgeschlagen: {e}")
|
||||
return results
|
||||
|
||||
|
||||
async def fetch_and_update_feed(library_item_id: str):
|
||||
"""
|
||||
Holt RSS-Feed und aktualisiert Metadaten + Episoden in der DB.
|
||||
"""
|
||||
async with AsyncSessionLocal() as db:
|
||||
item_result = await db.execute(select(LibraryItem).where(LibraryItem.id == library_item_id))
|
||||
item = item_result.scalar_one_or_none()
|
||||
if not item:
|
||||
return
|
||||
|
||||
podcast_result = await db.execute(select(Podcast).where(Podcast.library_item_id == library_item_id))
|
||||
podcast = podcast_result.scalar_one_or_none()
|
||||
if not podcast or not podcast.feed_url:
|
||||
logger.warning(f"Kein Feed für Item {library_item_id}")
|
||||
return
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
||||
r = await client.get(podcast.feed_url)
|
||||
r.raise_for_status()
|
||||
raw = r.text
|
||||
except Exception as e:
|
||||
logger.error(f"Feed-Abruf fehlgeschlagen ({podcast.feed_url}): {e}")
|
||||
return
|
||||
|
||||
feed = feedparser.parse(raw)
|
||||
channel = feed.feed
|
||||
|
||||
# Podcast-Metadaten aktualisieren
|
||||
if channel.get("title") and not item.title:
|
||||
item.title = channel.get("title")
|
||||
if channel.get("author") and not item.author:
|
||||
item.author = channel.get("author")
|
||||
if channel.get("summary") and not item.description:
|
||||
item.description = channel.get("summary")
|
||||
if channel.get("language") and not item.language:
|
||||
item.language = channel.get("language")
|
||||
|
||||
# Cover
|
||||
cover_url = None
|
||||
if channel.get("image"):
|
||||
cover_url = channel.image.get("href") or channel.image.get("url")
|
||||
if cover_url and not item.cover_path:
|
||||
cover_path = await _download_cover(cover_url, item.id)
|
||||
if cover_path:
|
||||
item.cover_path = cover_path
|
||||
|
||||
podcast.feed_last_checked = datetime.utcnow()
|
||||
|
||||
# Lokale Episode-Dateien holen
|
||||
episodes_result = await db.execute(
|
||||
select(PodcastEpisode).where(PodcastEpisode.podcast_id == podcast.id)
|
||||
)
|
||||
existing_episodes = {ep.feed_episode_id: ep for ep in episodes_result.scalars().all()}
|
||||
|
||||
# Feed-Einträge verarbeiten
|
||||
for entry in feed.entries:
|
||||
feed_ep_id = entry.get("id") or entry.get("link", "")
|
||||
title = entry.get("title", "")
|
||||
description = entry.get("summary") or entry.get("content", [{}])[0].get("value", "") if entry.get("content") else ""
|
||||
pub_date = None
|
||||
if entry.get("published_parsed"):
|
||||
import time
|
||||
pub_date = datetime(*entry.published_parsed[:6])
|
||||
|
||||
enclosure_url = None
|
||||
duration_s = 0.0
|
||||
for enc in entry.get("enclosures", []):
|
||||
if enc.get("type", "").startswith("audio/"):
|
||||
enclosure_url = enc.get("href") or enc.get("url")
|
||||
break
|
||||
duration_s = _parse_duration(entry.get("itunes_duration"))
|
||||
|
||||
ep_num = entry.get("itunes_episode")
|
||||
season_num = entry.get("itunes_season")
|
||||
|
||||
if feed_ep_id in existing_episodes:
|
||||
# Vorhandene Episode aktualisieren
|
||||
ep = existing_episodes[feed_ep_id]
|
||||
ep.title = title
|
||||
ep.description = description
|
||||
ep.feed_episode_url = enclosure_url
|
||||
ep.duration_seconds = duration_s or ep.duration_seconds
|
||||
else:
|
||||
# Neue Episode anlegen
|
||||
ep = PodcastEpisode(
|
||||
podcast_id=podcast.id,
|
||||
title=title,
|
||||
description=description,
|
||||
pub_date=pub_date,
|
||||
duration_seconds=duration_s,
|
||||
feed_episode_id=feed_ep_id,
|
||||
feed_episode_url=enclosure_url,
|
||||
episode_number=str(ep_num) if ep_num else None,
|
||||
season_number=str(season_num) if season_num else None,
|
||||
)
|
||||
db.add(ep)
|
||||
|
||||
item.updated_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
logger.info(f"Feed aktualisiert: {item.title} ({len(feed.entries)} Einträge)")
|
||||
|
||||
|
||||
async def update_all_feeds():
|
||||
"""Aktualisiert alle Podcast-Feeds (wird vom Scheduler aufgerufen)."""
|
||||
async with AsyncSessionLocal() as db:
|
||||
result = await db.execute(select(Podcast).where(Podcast.feed_url.isnot(None)))
|
||||
podcasts = result.scalars().all()
|
||||
|
||||
for podcast in podcasts:
|
||||
try:
|
||||
await fetch_and_update_feed(podcast.library_item_id)
|
||||
except Exception as e:
|
||||
logger.error(f"Feed-Update fehlgeschlagen für {podcast.id}: {e}")
|
||||
@@ -13,3 +13,4 @@ mutagen==1.47.0
|
||||
aiofiles==24.1.0
|
||||
pillow==10.4.0
|
||||
feedparser==6.0.11
|
||||
apscheduler==3.10.4
|
||||
|
||||
Reference in New Issue
Block a user