From 2abd9daae105dfe7866830cf39452e716c8f26d3 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 3 May 2026 14:32:25 +0000 Subject: [PATCH] =?UTF-8?q?Fix=20CP1250=20encoding=20bug=20v=20sync=5Fqnet?= =?UTF-8?q?.py=20=E2=80=94=20=C3=88=E2=86=92=C4=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PROBLEM: Songs.txt na MB Windows playerjih je v CP1250 (slovenski/CEE), NE Windows-1252 (Western European). iconv -f WINDOWS-1252 je 'Č' (0xC8) napačno interpretiral kot 'È', zaradi česar je 811 zapisov v Qnet bazi imelo 'È' namesto 'Č' (npr. 'POSKOÈNI', 'ÈAS ZA ZABAVO', 'STORŽIÈ'). Posledica: ko je qnet_match povezal job na napačno labeliran zapis, je 'parsed_title' polnil z mojibake iz Qnet baze (15 jobov). FIX: WINDOWS-1252 → WINDOWS-1250. Razlike v CP1250 vs CP1252 (slovanske črke): Č↔È, č↔è, Ć↔Æ, ć↔æ, Đ↔Ð, đ↔ð, Ń↔Ñ, Ł↔£, ł↔³, Ś↔Œ, ś↔œ, ź↔Ÿ Ž, š, ž — ostanejo (isti byte v obeh) BACKFILL (ločen skript, že apliciran): - Qnet lookup: 2746 polj v 20860 zapisih popravljenih - Qnet songs.json: 2856 polj - 15 jobov: parsed_artist/title popravljen na pravilen UTF-8 --- scripts/sync_qnet.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/sync_qnet.py b/scripts/sync_qnet.py index 37f417b..6478695 100644 --- a/scripts/sync_qnet.py +++ b/scripts/sync_qnet.py @@ -74,14 +74,18 @@ def ssh_exec(cmd: str, timeout: int = 60) -> dict: def fetch_one(station: str, ip: str, subdir: str) -> str: - """Fetcha Songs.txt z windows playerja, vrne UTF-8 string.""" + """Fetcha Songs.txt z windows playerja, vrne UTF-8 string. + + Songs.txt je v CP1250 encoding (Windows Slovenian/CE), NE 1252 (Western). + 1252 bi 'Č' (0xC8) interpretiral kot 'È', 'Š' kot 'Š' OK ampak 'Ž' (0xDE) kot 'Þ' itd. + """ # 1) scp z playerja na openclaw, iconv v utf8, base64 nazaj cmd = ( f"set -e; " f"TMP=$(mktemp); " f"scp -i {SSH_KEY} -o StrictHostKeyChecking=no " f'"folxadmin@{ip}:c:/{subdir}/Data/Songs.txt" "$TMP"; ' - f'iconv -f WINDOWS-1252 -t UTF-8 "$TMP" | base64 -w 0; ' + f'iconv -f WINDOWS-1250 -t UTF-8 "$TMP" | base64 -w 0; ' f'rm -f "$TMP"' ) res = ssh_exec(cmd, timeout=90)