Files
Lycostorrent/app/torrent_parser.py
2026-03-23 20:59:26 +01:00

268 lines
9.2 KiB
Python

import re
import logging
import json
import os
logger = logging.getLogger(__name__)
# Chemin vers le fichier de configuration des filtres
FILTERS_CONFIG_PATH = '/app/config/filters_config.json'
# Configuration par défaut (fallback)
DEFAULT_FILTERS = {
"quality": {
"name": "Qualité",
"icon": "📺",
"values": ["2160p", "1080p", "720p", "480p", "360p", "4K", "UHD"]
},
"source": {
"name": "Source",
"icon": "📀",
"values": ["BluRay", "Blu-Ray", "WEB-DL", "WEBRip", "HDTV", "DVDRip", "Remux"]
},
"video_codec": {
"name": "Codec Vidéo",
"icon": "🎬",
"values": ["x265", "x264", "H265", "H264", "HEVC", "AVC", "AV1"]
},
"audio": {
"name": "Audio",
"icon": "🔊",
"values": ["DTS-HD MA", "DTS", "Atmos", "TrueHD", "AAC", "AC3", "FLAC", "MP3"]
},
"language": {
"name": "Langue",
"icon": "🗣️",
"values": ["FRENCH", "TRUEFRENCH", "VFF", "VOSTFR", "MULTI", "ENGLISH"]
},
"hdr": {
"name": "HDR",
"icon": "",
"values": ["HDR10+", "HDR10", "HDR", "DV", "Dolby Vision"]
},
"audio_format": {
"name": "Format Audio",
"icon": "🎵",
"values": ["FLAC", "MP3", "AAC", "320", "V0", "24bit", "16bit", "Lossless"]
},
"music_type": {
"name": "Type Musique",
"icon": "💿",
"values": ["Album", "Single", "EP", "Live", "Concert", "Discography", "Soundtrack"]
},
"music_source": {
"name": "Source Musique",
"icon": "📻",
"values": ["CD", "Vinyl", "WEB", "SACD"]
},
"platform": {
"name": "Plateforme",
"icon": "🎮",
"values": ["PC", "Windows", "Linux", "Mac", "MacOS", "Android", "iOS", "PS5", "PS4", "Xbox", "Switch", "Steam", "GOG"]
},
"software_type": {
"name": "Type Logiciel",
"icon": "💻",
"values": ["Portable", "Repack", "ISO", "Setup", "Crack", "Keygen", "Patch", "x64", "x86"]
},
"ebook_format": {
"name": "Format Ebook",
"icon": "📚",
"values": ["EPUB", "PDF", "MOBI", "AZW3", "CBR", "CBZ", "DJVU"]
},
"ebook_type": {
"name": "Type Ebook",
"icon": "📖",
"values": ["Roman", "BD", "Comics", "Manga", "Magazine", "Guide", "Audiobook"]
},
"game_type": {
"name": "Type Jeu",
"icon": "🕹️",
"values": ["RPG", "FPS", "Action", "Adventure", "Strategy", "Simulation", "Sport", "Racing"]
}
}
def load_filters_config():
"""Charge la configuration des filtres depuis le fichier JSON"""
try:
if os.path.exists(FILTERS_CONFIG_PATH):
with open(FILTERS_CONFIG_PATH, 'r', encoding='utf-8') as f:
config = json.load(f)
return config.get('filters', DEFAULT_FILTERS)
except Exception as e:
logger.warning(f"Impossible de charger filters_config.json: {e}")
return DEFAULT_FILTERS
def save_filters_config(filters):
"""Sauvegarde la configuration des filtres dans le fichier JSON"""
try:
os.makedirs(os.path.dirname(FILTERS_CONFIG_PATH), exist_ok=True)
with open(FILTERS_CONFIG_PATH, 'w', encoding='utf-8') as f:
json.dump({'filters': filters}, f, indent=2, ensure_ascii=False)
return True
except Exception as e:
logger.error(f"Erreur sauvegarde filters_config.json: {e}")
return False
def get_default_filters():
"""Retourne les filtres par défaut"""
return DEFAULT_FILTERS.copy()
class TorrentParser:
"""Parser pour extraire les métadonnées des titres de torrents"""
def __init__(self):
self._config_mtime = 0 # Date de modification du fichier config
self.reload_config()
# Patterns fixes (non configurables)
self.fixed_patterns = {
'release_group': r'-([A-Za-z0-9]+)(?:\s*\(|$|\s*$)',
'year': r'\b(19\d{2}|20\d{2})\b',
'season': r'[Ss](\d{1,2})(?:[Ee]\d{1,2})?',
'episode': r'[Ss]\d{1,2}[Ee](\d{1,2})',
'bit_depth': r'\b(10[\s-]?bit|8[\s-]?bit)\b',
'edition': r'\b(EXTENDED|REMASTERED|DIRECTOR\'?S?\.?CUT|UNCUT|UNRATED|THEATRICAL|DELUXE|SPECIAL\.?EDITION)\b',
'repack': r'\b(REPACK|PROPER|RERIP|REAL)\b',
}
def _check_config_update(self):
"""Vérifie si le fichier config a été modifié et recharge si nécessaire"""
try:
if os.path.exists(FILTERS_CONFIG_PATH):
mtime = os.path.getmtime(FILTERS_CONFIG_PATH)
if mtime > self._config_mtime:
logger.info("🔄 Config des filtres modifiée, rechargement...")
self.reload_config()
self._config_mtime = mtime
except Exception as e:
logger.warning(f"Erreur vérification config: {e}")
def reload_config(self):
"""Recharge la configuration des filtres"""
self.filters_config = load_filters_config()
self._build_patterns()
try:
if os.path.exists(FILTERS_CONFIG_PATH):
self._config_mtime = os.path.getmtime(FILTERS_CONFIG_PATH)
except:
pass
def _build_patterns(self):
"""Construit les patterns regex à partir de la config"""
self.patterns = {}
for filter_key, filter_data in self.filters_config.items():
values = filter_data.get('values', [])
if values:
# Échapper les caractères spéciaux et créer le pattern
escaped_values = [re.escape(v) for v in values]
# Trier par longueur décroissante pour matcher les plus longs d'abord
escaped_values.sort(key=len, reverse=True)
pattern = r'\b(' + '|'.join(escaped_values) + r')\b'
self.patterns[filter_key] = pattern
def parse(self, title):
"""Parse un titre de torrent et retourne les métadonnées extraites"""
# Vérifier si la config a changé
self._check_config_update()
if not title:
return {}
parsed = {}
# Extraire avec les patterns dynamiques (filtres configurables)
for key, pattern in self.patterns.items():
try:
matches = re.findall(pattern, title, re.IGNORECASE)
if matches:
# Normaliser et dédupliquer
normalized = list(set([self._normalize(m, key) for m in matches]))
parsed[key] = normalized
except re.error as e:
logger.warning(f"Regex error for {key}: {e}")
# Extraire avec les patterns fixes
for key, pattern in self.fixed_patterns.items():
try:
matches = re.findall(pattern, title, re.IGNORECASE)
if matches:
if key in ['year', 'season', 'episode', 'release_group']:
parsed[key] = matches[0] if matches else None
else:
parsed[key] = list(set(matches))
except re.error as e:
logger.warning(f"Regex error for {key}: {e}")
return parsed
def _normalize(self, value, key):
"""Normalise une valeur (met en forme standard)"""
if not value:
return value
# Chercher la valeur exacte dans la config (case-insensitive)
if key in self.filters_config:
config_values = self.filters_config[key].get('values', [])
for config_val in config_values:
if config_val.lower() == value.lower():
return config_val
return value
def enrich_torrent(self, torrent):
"""Ajoute les métadonnées parsées à un torrent"""
title = torrent.get('Title', '')
torrent['parsed'] = self.parse(title)
return torrent
def get_filters_info(self):
"""Retourne les infos des filtres (pour le frontend)"""
self._check_config_update()
return self.filters_config
# Instance globale (optionnel, pour réutilisation)
_parser_instance = None
def get_parser():
"""Retourne l'instance du parser (singleton)"""
global _parser_instance
if _parser_instance is None:
_parser_instance = TorrentParser()
return _parser_instance
def reload_parser():
"""Force le rechargement de la config du parser"""
global _parser_instance
if _parser_instance:
_parser_instance.reload_config()
# Test du parser
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
parser = TorrentParser()
test_titles = [
'Avatar.2009.2160p.BluRay.x265.10bit.HDR.DTS-HD.MA-GROUP',
'Gojira.-.Fortitude.2021.FLAC.24bit.WEB.Album-GROUP',
'The.Office.S01E01.FRENCH.1080p.WEB-DL.x264-TEAM',
'Pink.Floyd.-.Discography.1967-2014.FLAC.Lossless-BAND',
'Metallica.-.Live.in.Paris.2024.MP3.320.Concert-METAL',
'The.Last.of.Us.Part.I.v1.1.2-FitGirl.Repack',
]
for title in test_titles:
print(f"\n{'='*60}")
print(f"📺 {title}")
parsed = parser.parse(title)
for key, value in parsed.items():
if value:
print(f" {key}: {value}")