From 91caf957f287a6881a996a470d8d01edc44a72d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Wed, 29 Apr 2026 18:58:12 +0000 Subject: [PATCH] Auto-route NZ folk-pop directly to Gemini (skip Scribe) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User feedback: Scribe consistently produces bad transcripts for Slovenian narodno-zabavna (NZ) folk-pop music: - 'Saša Avsenik - Žena ME TEPE': hallucinated 'sam sam sam' x14 - 'FEHTARJI - Gorenjska Ljubljena': total hallucination ('finančni moduli') - 'Ansambel UNIKAT - PA PA': mistranscribed 'mu' as 'vsem' - 'Ansambel Saša Avsenika - CVETELE SO MALINE': wrong lyrics entirely Common pattern: all are Slovenian folk-pop with diatonic accordion. Scribe training data has very little of this genre, so it consistently fails. Solution: auto-detect NZ songs by filename keywords and route directly to Gemini 3 Pro (which handles them correctly), skipping Scribe entirely. is_likely_folk_pop() detects: - Slovenian: ansambel, avsenik, slak, fehtar, modrijan, atomik, gadi, vikend, stil, unikat, korenjaki, gorenjski, štajerski, polka, valček - Croatian: klapa, thompson, mate bulić - Serbian/Bosnian: lepa brena, ceca, halid bešlić When detected: 1. Skip Scribe entirely (it would fail anyway) 2. Go directly to Gemini 3 Pro (~100s, /bin/sh.20) 3. If Gemini fails, fall back to Scribe (rare) Cost analysis (10 reels/day, 30% NZ): - Before: 10x Scribe = $0.13/day, ~30% need re-process - Hybrid (fallback): 10x Scribe + 3x Gemini retry = $0.79/day - NZ-routing (now): 7x Scribe + 3x Gemini = $0.69/day, FIRST-TRY success Saves time AND money for NZ-heavy workloads. --- scripts/analyze.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/scripts/analyze.py b/scripts/analyze.py index 40c3b1c..be7927a 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -575,6 +575,43 @@ Output ONLY the JSON object.""" return None +def is_likely_folk_pop(filename_hint=None): + """Detekcija ali je pesem narodno-zabavna (NZ) za auto-routing na Gemini. + + Scribe slabo transkribira NZ glasbo (Avsenik, Modrijani, itd.), + Gemini 3 Pro je veliko zanesljivejši. Zato auto-routamo NZ pesmi + direktno na Gemini. + """ + if not filename_hint: + return False + + name = filename_hint.lower() + + # Whitelist znanih NZ izvajalcev (slovenski + hrvaški + bosanski + srbski folk-pop) + NZ_KEYWORDS = [ + # Tipične besede + "ansambel", "ansambl", "tamburaški", "polka", "valček", "narodno", + "veseli", "studio", "vox", "trio", "kvintet", "oktet", + # Slovenski Avseniki & sorodni + "avsenik", "slak", "fehtar", "modrijan", "atomik", "feniks", + "gadi", "mladi korenjaki", "prinčki", "vikend", "stil", "unikat", + "veseli svatje", "topliški", "korenjaki", "gorenjski", + "prleški", "štajerski", "primorski", "dolenjski", + "fantje", "trio", "kvartet", + # Hrvaški + "klapa", "stoka", "thompson", "marko perković", "mate bulić", + "miroslav škoro", "halid bešlić", "haris džinović", + # Srbski / bosanski folk + "lepa brena", "ceca", "bijelo dugme", "azra", + ] + + for kw in NZ_KEYWORDS: + if kw in name: + return True + + return False + + def transcribe_full(audio_path, lang=None, model_size="small", provider="auto", filename_hint=None): """Whisper/Scribe transcript dispatcher z hybrid fallback. @@ -594,6 +631,21 @@ def transcribe_full(audio_path, lang=None, model_size="small", provider="auto", if provider == "auto": provider = "hybrid" if (has_scribe and has_gemini) else ("elevenlabs" if has_scribe else "local") + # ─── NZ AUTO-ROUTING ────────────────────────────────────── + # Če je pesem zaznana kot narodno-zabavna IN imamo Gemini → preskoči Scribe. + # Scribe slabo transkribira NZ glasbo (Avsenik, Modrijani itd.), Gemini boljši. + if has_gemini and provider in ("hybrid", "auto") and is_likely_folk_pop(filename_hint): + print(f"🎻 NZ pesem zaznana ('{filename_hint}') → Gemini 3 Pro direktno (preskoči Scribe)", file=sys.stderr) + result = transcribe_with_gemini(audio_path, lang=lang, filename_hint=filename_hint) + if result and result.get("segments"): + return result + # Če Gemini failed, pade na Scribe + if has_scribe: + print(f" ⚠️ Gemini failed → fallback na Scribe", file=sys.stderr) + provider = "elevenlabs" + else: + return {"language": "unknown", "language_probability": 0.0, "segments": []} + # ─── HYBRID: Scribe primary, Gemini fallback ─── if provider == "hybrid": if not has_scribe: