diff --git a/scripts/analyze.py b/scripts/analyze.py index 7db582e..10b4359 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -542,7 +542,9 @@ def analyze_with_claude(transcript, video_duration, target_duration=30, model="c import urllib.error body = json.dumps({ "model": model, - "max_tokens": 4096, + # 8192 je dovolj za ~250 corrected_segments + ostali metadata pri dolgih pesmih. + # Sonnet 4.6 podpira precej več, ampak 8192 je varen default. + "max_tokens": 8192, "messages": [{"role": "user", "content": prompt}], }).encode("utf-8") @@ -563,6 +565,18 @@ def analyze_with_claude(transcript, video_duration, target_duration=30, model="c if not content: print(" ⚠️ Claude vrnil prazen odgovor", file=sys.stderr) return None + + # Diagnostika: če je bil response odrezan, je JSON nepopoln + stop_reason = data.get("stop_reason") + if stop_reason == "max_tokens": + usage = data.get("usage", {}) + print( + f" ⚠️ Claude odrezan (max_tokens): " + f"input={usage.get('input_tokens')} output={usage.get('output_tokens')}", + file=sys.stderr, + ) + return None + text = content[0].get("text", "").strip() result = _parse_llm_response(text, video_duration) @@ -606,6 +620,11 @@ def analyze_with_gemini(transcript, video_duration, target_duration=30, model="g import urllib.error url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}" + # Gemini 3.x Pro je THINKING model — porabi tokene tudi za internal reasoning (thoughtsTokenCount). + # 4096 je prenizko: pri velikih transkriptih thinking lahko porabi 1500-3000 tokenov, + # output (corrected_segments za 60+ segmentov) pa še dodatnih 3000-7000 → odreže JSON na pol + # (finishReason: MAX_TOKENS) in vrne nepopolen, neveljaven JSON. + # 32768 daje dovolj prostora za thinking + cel JSON output tudi pri dolgih pesmih. body = json.dumps({ "contents": [{ "role": "user", @@ -613,7 +632,7 @@ def analyze_with_gemini(transcript, video_duration, target_duration=30, model="g }], "generationConfig": { "temperature": 0.1, - "maxOutputTokens": 4096, + "maxOutputTokens": 32768, "responseMimeType": "application/json", }, }).encode("utf-8") @@ -624,18 +643,46 @@ def analyze_with_gemini(transcript, video_duration, target_duration=30, model="g headers={"Content-Type": "application/json"}, method="POST", ) - with urllib.request.urlopen(req, timeout=120) as resp: + with urllib.request.urlopen(req, timeout=180) as resp: data = json.loads(resp.read().decode("utf-8")) candidates = data.get("candidates", []) if not candidates: print(" ⚠️ Gemini vrnil 0 candidates", file=sys.stderr) return None - parts = candidates[0].get("content", {}).get("parts", []) + + cand0 = candidates[0] + finish_reason = cand0.get("finishReason", "?") + usage = data.get("usageMetadata", {}) + + # Diagnostika: če je finishReason == MAX_TOKENS, je output odrezan in JSON je invalid + if finish_reason == "MAX_TOKENS": + print( + f" ⚠️ Gemini odrezan (MAX_TOKENS): " + f"prompt={usage.get('promptTokenCount')} " + f"thoughts={usage.get('thoughtsTokenCount')} " + f"output={usage.get('candidatesTokenCount')}", + file=sys.stderr, + ) + return None + + parts = cand0.get("content", {}).get("parts", []) if not parts: - print(" ⚠️ Gemini vrnil prazen content", file=sys.stderr) + print( + f" ⚠️ Gemini vrnil prazen content (finishReason={finish_reason}, " + f"thoughts={usage.get('thoughtsTokenCount')})", + file=sys.stderr, + ) return None text = parts[0].get("text", "").strip() + if not text: + print( + f" ⚠️ Gemini vrnil prazen text (finishReason={finish_reason}, " + f"thoughts={usage.get('thoughtsTokenCount')}, " + f"output={usage.get('candidatesTokenCount')})", + file=sys.stderr, + ) + return None result = _parse_llm_response(text, video_duration) if not result: