From 4e2c690bc57cbed4115c6edf5c8ad56129ed6442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Sat, 2 May 2026 08:10:30 +0000 Subject: [PATCH] =?UTF-8?q?Bolj=20agresivno=20=C4=8Di=C5=A1=C4=8Denje=20fi?= =?UTF-8?q?lename:=20()=20prazni=20+=20catch-all=20noise=20besede?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User: 'Topliška pomlad — KAR PADA NAJ SNEG ( - ) — tile oklepaji pa Official video itd. Daj ko se nalaga na Nextcloud mora biti samo izvajalec in naslov komada.' Dodatni NOISE_PATTERNS: 1. Prazni / dummy oklepaji: '( )', '( - )', '(-)', '(.)' itd. 2. Catch-all za oklepaje z noise besedami: video|audio|version|mix|edit|remix|cover|live|hd|hq|4k|8k| remaster|extended|clean|explicit|radio|lyric|official|musik 3. Avtor/producer brackets: '(prod. by X)', '(feat. Y)', '(ft. Z)' Test rezultat: 'Topliška pomlad - KAR PADA NAJ SNEG ( - )(Official 4K Video).mp4' → 'Topliška pomlad - KAR PADA NAJ SNEG - REEL.mp4' 'Sarah Connor - FICKA (Offizielles Musikvideo).mp4' → 'Sarah Connor - FICKA - REEL.mp4' Vsi novi uploadi bodo imeli čista imena. TODO ločeno: rename obstoječih 31 datotek na Nextcloudu (skript pripravljen) --- app/main.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/app/main.py b/app/main.py index 0017668..5f3919e 100644 --- a/app/main.py +++ b/app/main.py @@ -217,6 +217,23 @@ _NOISE_PATTERNS = [ # Trailing year in parens (npr. "(2024)") r"\(\d{4}\)\s*$", + + # Prazni / dummy oklepaji: "( )", "( )", "( - )", "(-)", "(.)" + r"\(\s*[-–—._]*\s*\)", + + # Catch-all: oklepaji z "video"/"audio"/"version"/"mix"/"edit"/"remix" + # (široko match — če oklepaji vsebujejo te besede, so verjetno noise) + r"\([^)]*\b(?:video|audio|version|mix|edit|remix|cover|live|hd|hq|4k|8k|remaster(?:ed)?|extended|clean|explicit|radio|lyric[s]?|official|offizielles?|musik)\b[^)]*\)", + + # Catch-all: oglati oklepaji z noise besedami + r"\[[^\]]*\b(?:video|audio|version|mix|edit|remix|cover|live|hd|hq|4k|official|musik)\b[^\]]*\]", + + # Avtor/feat. v oklepajih: "(prod. by X)", "(feat. Y)", "(ft. Z)" + r"\(\s*(?:prod\.?(?:uced)?\s+by|feat\.?(?:uring)?|ft\.?)\s+[^)]+\)", + + # Trailing številke ki označujejo verzije: "33" na koncu (npr. "Modrijani - X 33") + # POZOR: zelo previdno, ker so lahko legit (npr. del naslova) + # — Ne dodam splošnega trailing številk pattern-a, ker bi razbil legitime ] def parse_artist_title(filename_or_title):