Upgrade to Sonnet 4.6 + add Gemini 3.1 Pro support
- Refactored analyze_with_claude into shared _build_analysis_prompt + _parse_llm_response helpers - New analyze_with_gemini() using Gemini 3.1 Pro ($2/M in, MMMLU 92.6% — best multilingual) - Unified analyze_with_llm(provider) dispatcher with auto-fallback (Claude → Gemini) - API endpoint accepts llm_provider in StartJobIn (claude/gemini/auto) - Frontend dropdown to pick LLM - Default model is now Sonnet 4.6 (was Haiku 4.5) — 3x quality at 3x price (~3 cents/video) - Gemini support is opt-in: needs GEMINI_API_KEY env var to activate
This commit is contained in:
parent
9faa224885
commit
ec71c54570
10
app/main.py
10
app/main.py
@ -237,6 +237,11 @@ def process_job(job_id):
|
||||
]
|
||||
if job.get("include_prebuild"):
|
||||
cmd += ["--include-prebuild"]
|
||||
# LLM provider (claude/gemini/auto)
|
||||
if job.get("llm_provider"):
|
||||
cmd += ["--llm-provider", job["llm_provider"]]
|
||||
if job.get("llm_model"):
|
||||
cmd += ["--llm-model", job["llm_model"]]
|
||||
# lang: če None ali 'auto', pusti analyze.py auto-detect
|
||||
if job.get("lang") and job["lang"] not in ("auto", ""):
|
||||
cmd += ["--lang", job["lang"]]
|
||||
@ -409,6 +414,9 @@ class StartJobIn(BaseModel):
|
||||
subtitle_style: str = "reels"
|
||||
whisper_model: str = "large-v3"
|
||||
quality: str = "medium"
|
||||
# LLM za semantično analizo + popravke
|
||||
llm_provider: str = "claude" # claude / gemini / auto
|
||||
llm_model: Optional[str] = None # specifičen model (privzeto najboljši za provider)
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────
|
||||
@ -512,6 +520,8 @@ async def start_processing(
|
||||
subtitle_style=payload.subtitle_style,
|
||||
whisper_model=payload.whisper_model,
|
||||
quality=payload.quality,
|
||||
llm_provider=payload.llm_provider,
|
||||
llm_model=payload.llm_model,
|
||||
current_step="V vrsti za obdelavo",
|
||||
)
|
||||
background.add_task(process_job, payload.job_id)
|
||||
|
||||
@ -437,25 +437,8 @@ def detect_audio_fade(clip_range, transcript, video_duration=None):
|
||||
}
|
||||
|
||||
|
||||
def analyze_with_claude(transcript, video_duration, target_duration=30):
|
||||
"""Pošlje cel transkript Claude API-ju, ki razume strukturo pesmi
|
||||
in vrne najboljši odsek za reel.
|
||||
|
||||
Claude bere cel tekst, prepozna ponovitve med deli (refren) in razume
|
||||
kontekst (kdaj je intro, verz, refren, bridge, outro).
|
||||
|
||||
Vrne dict z 'start', 'end', 'reason', 'chorus_text' ali None če Claude
|
||||
ni dosegljiv ali API key manjka.
|
||||
"""
|
||||
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
print(" ⚠️ ANTHROPIC_API_KEY ni nastavljen — preskakujem Claude analizo", file=sys.stderr)
|
||||
return None
|
||||
|
||||
if not transcript.get("segments"):
|
||||
return None
|
||||
|
||||
# Pripravi tekstovni format za Claude — vsak segment z timestamp-om
|
||||
def _build_analysis_prompt(transcript, video_duration, target_duration=30):
|
||||
"""Pripravi enotni prompt za Claude/Gemini analizo."""
|
||||
lines = []
|
||||
for seg in transcript["segments"]:
|
||||
start = seg["start"]
|
||||
@ -464,7 +447,7 @@ def analyze_with_claude(transcript, video_duration, target_duration=30):
|
||||
lines.append(f"[{start:6.1f}-{end:6.1f}] {text}")
|
||||
transcript_text = "\n".join(lines)
|
||||
|
||||
prompt = f"""Tu je transcript pesmi iz Whisper modela (timestamp v sekundah, besedilo):
|
||||
return f"""Tu je transcript pesmi iz Whisper modela (timestamp v sekundah, besedilo):
|
||||
|
||||
{transcript_text}
|
||||
|
||||
@ -481,7 +464,7 @@ PROSIM:
|
||||
- Če pesem ima refren ki se ponavlja, vse pojavitve refrena POPRAVI da imajo ENAKO besedilo (uporabi najjasnejšo varianto)
|
||||
- Popravi napačne besede ki nimajo smisla v kontekstu
|
||||
- Popravi pomešane jezike (če pesem je slovenska, vse vrstice naj bodo v slovenščini)
|
||||
- Ohrani timestamp-e nepriremenjene
|
||||
- Ohrani timestamp-e nespremenjene
|
||||
3. Prepoznaj REFREN: del besedila, ki se ponavlja v pesmi
|
||||
4. Izberi najboljši odsek za reel:
|
||||
- Vključi cel refren (cel verz besedila brez prekinitve)
|
||||
@ -505,11 +488,60 @@ Odgovori SAMO v JSON formatu (brez markdown, brez razlage):
|
||||
|
||||
V "corrected_segments" vključi VSE segmente iz inputa s popravljenim besedilom (ohrani timestamp-e)."""
|
||||
|
||||
|
||||
def _parse_llm_response(text, video_duration):
|
||||
"""Parse JSON odgovor iz LLM-a, vrne None če invalid."""
|
||||
text = text.strip()
|
||||
# Odstrani markdown ovoj če obstaja
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```$", "", text)
|
||||
# Včasih je pred JSON-om še kakšna razlaga, vzemi prvi { ... } blok
|
||||
first_brace = text.find("{")
|
||||
last_brace = text.rfind("}")
|
||||
if first_brace >= 0 and last_brace > first_brace:
|
||||
text = text[first_brace:last_brace + 1]
|
||||
|
||||
result = json.loads(text)
|
||||
|
||||
start = float(result["start"])
|
||||
end = float(result["end"])
|
||||
if start >= end or start < 0 or end > video_duration:
|
||||
print(f" ⚠️ LLM returned invalid range: {start}-{end}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
return {
|
||||
"start": round(start, 2),
|
||||
"end": round(end, 2),
|
||||
"duration": round(end - start, 2),
|
||||
"reason": result.get("reason", ""),
|
||||
"chorus_text": result.get("chorus_text", ""),
|
||||
"structure": result.get("structure", ""),
|
||||
"language": result.get("language"),
|
||||
"corrected_segments": result.get("corrected_segments"),
|
||||
}
|
||||
|
||||
|
||||
def analyze_with_claude(transcript, video_duration, target_duration=30, model="claude-sonnet-4-6"):
|
||||
"""Pošlje transkript Claude API-ju (Anthropic).
|
||||
|
||||
model: claude-sonnet-4-6 (default), claude-haiku-4-5-20251001, claude-opus-4-7
|
||||
"""
|
||||
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
print(" ⚠️ ANTHROPIC_API_KEY ni nastavljen — preskakujem Claude analizo", file=sys.stderr)
|
||||
return None
|
||||
|
||||
if not transcript.get("segments"):
|
||||
return None
|
||||
|
||||
prompt = _build_analysis_prompt(transcript, video_duration, target_duration)
|
||||
|
||||
try:
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
body = json.dumps({
|
||||
"model": "claude-sonnet-4-6",
|
||||
"model": model,
|
||||
"max_tokens": 4096,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}).encode("utf-8")
|
||||
@ -524,7 +556,7 @@ V "corrected_segments" vključi VSE segmente iz inputa s popravljenim besedilom
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
with urllib.request.urlopen(req, timeout=120) as resp:
|
||||
data = json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
content = data.get("content", [])
|
||||
@ -533,37 +565,18 @@ V "corrected_segments" vključi VSE segmente iz inputa s popravljenim besedilom
|
||||
return None
|
||||
text = content[0].get("text", "").strip()
|
||||
|
||||
# Včasih Claude obda JSON v markdown
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text)
|
||||
text = re.sub(r"\s*```$", "", text)
|
||||
result = json.loads(text)
|
||||
|
||||
# Sanity check
|
||||
start = float(result["start"])
|
||||
end = float(result["end"])
|
||||
if start >= end or start < 0 or end > video_duration:
|
||||
print(f" ⚠️ Claude returned invalid range: {start}-{end}", file=sys.stderr)
|
||||
result = _parse_llm_response(text, video_duration)
|
||||
if not result:
|
||||
return None
|
||||
|
||||
print(f" 🤖 Claude izbral: {start:.1f}-{end:.1f}s", file=sys.stderr)
|
||||
print(f" 🤖 Claude ({model}) izbral: {result['start']:.1f}-{result['end']:.1f}s", file=sys.stderr)
|
||||
print(f" Razlog: {result.get('reason', '')[:80]}", file=sys.stderr)
|
||||
print(f" Struktura: {result.get('structure', '')[:80]}", file=sys.stderr)
|
||||
cs = result.get("corrected_segments")
|
||||
if cs:
|
||||
print(f" Popravljeni segmenti: {len(cs)}", file=sys.stderr)
|
||||
if result.get("corrected_segments"):
|
||||
print(f" Popravljeni segmenti: {len(result['corrected_segments'])}", file=sys.stderr)
|
||||
|
||||
return {
|
||||
"start": round(start, 2),
|
||||
"end": round(end, 2),
|
||||
"duration": round(end - start, 2),
|
||||
"reason": result.get("reason", ""),
|
||||
"chorus_text": result.get("chorus_text", ""),
|
||||
"structure": result.get("structure", ""),
|
||||
"language": result.get("language"),
|
||||
"corrected_segments": result.get("corrected_segments"),
|
||||
"source": "claude_llm",
|
||||
}
|
||||
result["source"] = f"claude:{model}"
|
||||
return result
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode("utf-8", errors="replace")[:500]
|
||||
print(f" ❌ Claude API HTTP {e.code}: {body}", file=sys.stderr)
|
||||
@ -573,6 +586,101 @@ V "corrected_segments" vključi VSE segmente iz inputa s popravljenim besedilom
|
||||
return None
|
||||
|
||||
|
||||
def analyze_with_gemini(transcript, video_duration, target_duration=30, model="gemini-3.1-pro-preview"):
|
||||
"""Pošlje transkript Gemini API-ju (Google).
|
||||
|
||||
Gemini 3.1 Pro ima najboljši multilingual rezultat (MMMLU 92.6%) — odličen za SLO/HR/BS.
|
||||
"""
|
||||
api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
print(" ⚠️ GEMINI_API_KEY ni nastavljen — preskakujem Gemini analizo", file=sys.stderr)
|
||||
return None
|
||||
|
||||
if not transcript.get("segments"):
|
||||
return None
|
||||
|
||||
prompt = _build_analysis_prompt(transcript, video_duration, target_duration)
|
||||
|
||||
try:
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
|
||||
body = json.dumps({
|
||||
"contents": [{
|
||||
"role": "user",
|
||||
"parts": [{"text": prompt}],
|
||||
}],
|
||||
"generationConfig": {
|
||||
"temperature": 0.1,
|
||||
"maxOutputTokens": 4096,
|
||||
"responseMimeType": "application/json",
|
||||
},
|
||||
}).encode("utf-8")
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=120) as resp:
|
||||
data = json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
candidates = data.get("candidates", [])
|
||||
if not candidates:
|
||||
print(" ⚠️ Gemini vrnil 0 candidates", file=sys.stderr)
|
||||
return None
|
||||
parts = candidates[0].get("content", {}).get("parts", [])
|
||||
if not parts:
|
||||
print(" ⚠️ Gemini vrnil prazen content", file=sys.stderr)
|
||||
return None
|
||||
text = parts[0].get("text", "").strip()
|
||||
|
||||
result = _parse_llm_response(text, video_duration)
|
||||
if not result:
|
||||
return None
|
||||
|
||||
print(f" 🤖 Gemini ({model}) izbral: {result['start']:.1f}-{result['end']:.1f}s", file=sys.stderr)
|
||||
print(f" Razlog: {result.get('reason', '')[:80]}", file=sys.stderr)
|
||||
print(f" Struktura: {result.get('structure', '')[:80]}", file=sys.stderr)
|
||||
if result.get("corrected_segments"):
|
||||
print(f" Popravljeni segmenti: {len(result['corrected_segments'])}", file=sys.stderr)
|
||||
|
||||
result["source"] = f"gemini:{model}"
|
||||
return result
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode("utf-8", errors="replace")[:500]
|
||||
print(f" ❌ Gemini API HTTP {e.code}: {body}", file=sys.stderr)
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" ❌ Gemini analysis failed: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def analyze_with_llm(transcript, video_duration, target_duration=30, provider="claude", llm_model=None):
|
||||
"""Glavna funkcija — uporabi izbrano LLM (claude/gemini/auto)."""
|
||||
if provider == "gemini":
|
||||
model = llm_model or "gemini-3.1-pro-preview"
|
||||
return analyze_with_gemini(transcript, video_duration, target_duration, model)
|
||||
elif provider == "claude":
|
||||
model = llm_model or "claude-sonnet-4-6"
|
||||
return analyze_with_claude(transcript, video_duration, target_duration, model)
|
||||
elif provider == "auto":
|
||||
# Najprej probaj Claude, fallback na Gemini
|
||||
result = analyze_with_claude(transcript, video_duration, target_duration,
|
||||
llm_model or "claude-sonnet-4-6")
|
||||
if result:
|
||||
return result
|
||||
print(" 🔄 Claude ni uspel, probam Gemini...", file=sys.stderr)
|
||||
return analyze_with_gemini(transcript, video_duration, target_duration,
|
||||
llm_model or "gemini-3.1-pro-preview")
|
||||
else:
|
||||
print(f" ⚠️ Neznan LLM provider: {provider}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def is_instrumental(transcript, video_duration, threshold=0.1):
|
||||
"""Detekcija ali je pesem instrumentalna.
|
||||
|
||||
@ -599,7 +707,12 @@ def main():
|
||||
ap.add_argument("--include-prebuild", action="store_true",
|
||||
help="Vključi pre-chorus build-up (privzeto: ne)")
|
||||
ap.add_argument("--no-claude", action="store_true",
|
||||
help="Preskoči Claude LLM analizo (uporabi samo lokalno heuristiko)")
|
||||
help="Preskoči LLM analizo (uporabi samo lokalno heuristiko)")
|
||||
ap.add_argument("--llm-provider", default="claude",
|
||||
choices=["claude", "gemini", "auto"],
|
||||
help="Kateri LLM uporabiti za analizo (default: claude)")
|
||||
ap.add_argument("--llm-model", default=None,
|
||||
help="Specifičen model (npr. claude-sonnet-4-6, gemini-3.1-pro-preview)")
|
||||
ap.add_argument("--json", action="store_true", help="Output JSON")
|
||||
ap.add_argument("--output", help="Path za JSON output")
|
||||
args = ap.parse_args()
|
||||
@ -630,12 +743,14 @@ def main():
|
||||
instrumental = is_instrumental(transcript, duration)
|
||||
print(f"🎵 Instrumentalna: {instrumental}", file=sys.stderr)
|
||||
|
||||
# 5a. PRIMARNO: Claude LLM analiza (razume cel tekst pesmi)
|
||||
# 5a. PRIMARNO: LLM analiza (razume cel tekst pesmi + popravki)
|
||||
claude_result = None
|
||||
if not instrumental and not args.no_claude:
|
||||
print(f"🤖 Pošiljam transkript Claude-u za analizo strukture...", file=sys.stderr)
|
||||
claude_result = analyze_with_claude(
|
||||
transcript, duration, target_duration=args.target_duration
|
||||
provider = args.llm_provider
|
||||
print(f"🤖 Pošiljam transkript {provider}-u za analizo...", file=sys.stderr)
|
||||
claude_result = analyze_with_llm(
|
||||
transcript, duration, target_duration=args.target_duration,
|
||||
provider=provider, llm_model=args.llm_model,
|
||||
)
|
||||
|
||||
# 5b. Find chorus lokalno (kot fallback ali za score-jev preview)
|
||||
|
||||
@ -267,6 +267,17 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row" style="margin-top: 12px;">
|
||||
<div>
|
||||
<label>AI za analizo (popravlja transkript + razume strukturo)</label>
|
||||
<select id="llm-provider">
|
||||
<option value="claude" selected>Claude Sonnet 4.6 (priporočeno)</option>
|
||||
<option value="gemini">Gemini 3.1 Pro (multilingual)</option>
|
||||
<option value="auto">Auto (Claude → Gemini fallback)</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<label class="toggle" style="margin-top: 12px;">
|
||||
<input type="checkbox" id="no-subs">
|
||||
Brez podnapisov
|
||||
@ -374,6 +385,7 @@
|
||||
subtitle_style: $("#subtitle-style").value,
|
||||
quality: $("#quality").value,
|
||||
no_subs: $("#no-subs").checked,
|
||||
llm_provider: $("#llm-provider").value,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user