reels-app/scripts/sync_qnet.py
Claude 2abd9daae1 Fix CP1250 encoding bug v sync_qnet.py — È→Č
PROBLEM: Songs.txt na MB Windows playerjih je v CP1250 (slovenski/CEE),
NE Windows-1252 (Western European). iconv -f WINDOWS-1252 je 'Č' (0xC8)
napačno interpretiral kot 'È', zaradi česar je 811 zapisov v Qnet bazi
imelo 'È' namesto 'Č' (npr. 'POSKOÈNI', 'ÈAS ZA ZABAVO', 'STORŽIÈ').

Posledica: ko je qnet_match povezal job na napačno labeliran zapis,
je 'parsed_title' polnil z mojibake iz Qnet baze (15 jobov).

FIX: WINDOWS-1252 → WINDOWS-1250.
Razlike v CP1250 vs CP1252 (slovanske črke):
  Č↔È, č↔è, Ć↔Æ, ć↔æ, Đ↔Ð, đ↔ð, Ń↔Ñ, Ł↔£, ł↔³, Ś↔Œ, ś↔œ, ź↔Ÿ
  Ž, š, ž — ostanejo (isti byte v obeh)

BACKFILL (ločen skript, že apliciran):
- Qnet lookup: 2746 polj v 20860 zapisih popravljenih
- Qnet songs.json: 2856 polj
- 15 jobov: parsed_artist/title popravljen na pravilen UTF-8
2026-05-03 14:32:25 +00:00

191 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""
Qnet baz fetcher za reels-app.
Fetcha Songs.txt iz 5 Qnet instalacij na MB playerjih (preko ssh-api proxy-ja
na openclaw → SSH na Windows playerje), pretvori iz Windows-1252 v UTF-8,
parsa TSV in shrani enotno JSON bazo v /data/qnet/songs.json.
Cron-friendly: poženi enkrat na uro.
Output struktura:
{
"synced_at": 1746198000.0,
"stations": {
"FOLX DE": {"count": 4038, "fetched_at": 1746198000.0},
...
},
"songs": [
{
"station": "FOLX DE",
"artist": "Sašo Avsenik und seine Oberkrainer",
"title": "Na Golici",
"file": "Sašo Avsenik und seine Oberkrainer - Na Golici.mp4",
"type": "DGL",
"length": "2:32.277",
"comments": "",
"last_played": "17/4/2026"
},
...
]
}
"""
import csv
import io
import json
import os
import sys
import time
import base64
import requests
from pathlib import Path
SSH_API = os.environ.get("PTC_SSH_API", "https://mail.folx.tv/ssh-api/v2")
SSH_TOKEN = os.environ.get("PTC_SSH_TOKEN") or "ptc-ssh-2026-a7b3c9d4e5f6012389abcdef01234567890abcdef01234567890abcdef012345"
OUT_PATH = Path(os.environ.get("QNET_DB_PATH", "/data/qnet/songs.json"))
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
# (station_label, player_ip, qnet_subdir_on_C)
STATIONS = [
("FOLX DE", "100.64.0.2", "qnet"),
("ZWEI", "100.64.0.2", "qnetzwei"),
("ONE", "100.64.0.3", "QnetONE"),
("ADRIA", "100.64.0.4", "Qnet"),
("FOLX SLO", "100.64.0.4", "QnetFOLXSLO"),
]
SSH_KEY = "/root/.ssh/players/folx_players"
def ssh_exec(cmd: str, timeout: int = 60) -> dict:
"""Pošlji ukaz preko ssh-api na openclaw."""
r = requests.post(
SSH_API,
headers={
"Authorization": f"Bearer {SSH_TOKEN}",
"Content-Type": "application/json",
},
json={"host": "openclaw", "cmd": cmd, "timeout": timeout},
timeout=timeout + 30,
)
r.raise_for_status()
return r.json()
def fetch_one(station: str, ip: str, subdir: str) -> str:
"""Fetcha Songs.txt z windows playerja, vrne UTF-8 string.
Songs.txt je v CP1250 encoding (Windows Slovenian/CE), NE 1252 (Western).
1252 bi 'Č' (0xC8) interpretiral kot 'È', 'Š' kot 'Š' OK ampak 'Ž' (0xDE) kot 'Þ' itd.
"""
# 1) scp z playerja na openclaw, iconv v utf8, base64 nazaj
cmd = (
f"set -e; "
f"TMP=$(mktemp); "
f"scp -i {SSH_KEY} -o StrictHostKeyChecking=no "
f'"folxadmin@{ip}:c:/{subdir}/Data/Songs.txt" "$TMP"; '
f'iconv -f WINDOWS-1250 -t UTF-8 "$TMP" | base64 -w 0; '
f'rm -f "$TMP"'
)
res = ssh_exec(cmd, timeout=90)
if res.get("exit_code") != 0:
raise RuntimeError(f"{station}: ssh-api error: {res}")
b64 = res.get("output", "").strip()
if not b64:
raise RuntimeError(f"{station}: empty response")
return base64.b64decode(b64).decode("utf-8", errors="replace")
def parse_songs_tsv(text: str, station: str) -> list[dict]:
"""Parse TSV → list of clean dicts. Drop incomplete rows."""
out = []
reader = csv.DictReader(io.StringIO(text), delimiter="\t")
for row in reader:
artist = (row.get("Artist") or "").strip()
title = (row.get("Title") or "").strip()
file_ = (row.get("File") or "").strip()
# Skip popolnoma prazne vrstice
if not (artist or title or file_):
continue
out.append({
"station": station,
"artist": artist,
"title": title,
"file": file_,
"type": (row.get("Type") or "").strip(),
"length": (row.get("Length") or "").strip(),
"comments": (row.get("Comments") or "").strip(),
"language": (row.get("Language") or "").strip(),
"genre": (row.get("Genre") or "").strip(),
"last_played": (row.get("Last date played") or "").strip(),
"display_artist": (row.get("Display artist") or "").strip(),
"display_title": (row.get("Display title") or "").strip(),
})
return out
def main():
t0 = time.time()
all_songs = []
stations_meta = {}
errors = []
for station, ip, subdir in STATIONS:
try:
print(f"{station} ({ip}:c:/{subdir}/Data/Songs.txt)", flush=True)
text = fetch_one(station, ip, subdir)
songs = parse_songs_tsv(text, station)
all_songs.extend(songs)
stations_meta[station] = {
"count": len(songs),
"fetched_at": time.time(),
"ok": True,
}
print(f"{len(songs)} songov", flush=True)
except Exception as e:
err = f"{station}: {type(e).__name__}: {e}"
print(f"{err}", flush=True)
errors.append(err)
stations_meta[station] = {"count": 0, "ok": False, "error": str(e)}
# Zapiši na disk (atomic preko temp + rename)
payload = {
"synced_at": time.time(),
"duration_seconds": round(time.time() - t0, 1),
"total_songs": len(all_songs),
"stations": stations_meta,
"errors": errors,
"songs": all_songs,
}
tmp = OUT_PATH.with_suffix(".json.tmp")
tmp.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
tmp.replace(OUT_PATH)
# Tudi ločen "lookup index" — manjši fajl samo za matching
lookup = []
for s in all_songs:
if s["artist"] and s["title"]:
lookup.append({
"station": s["station"],
"artist": s["artist"],
"title": s["title"],
"file": s["file"],
})
lookup_path = OUT_PATH.parent / "songs_lookup.json"
tmp2 = lookup_path.with_suffix(".json.tmp")
tmp2.write_text(json.dumps(lookup, ensure_ascii=False), encoding="utf-8")
tmp2.replace(lookup_path)
print(f"\n✓ Done: {len(all_songs)} songov v {OUT_PATH} ({round(time.time()-t0,1)}s)")
if errors:
print(f"{len(errors)} napak:")
for e in errors:
print(f" - {e}")
sys.exit(1 if len(errors) == len(STATIONS) else 0)
if __name__ == "__main__":
main()