Initial commit
This commit is contained in:
268
app/torrent_parser.py
Normal file
268
app/torrent_parser.py
Normal file
@@ -0,0 +1,268 @@
|
||||
import re
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Chemin vers le fichier de configuration des filtres
|
||||
FILTERS_CONFIG_PATH = '/app/config/filters_config.json'
|
||||
|
||||
# Configuration par défaut (fallback)
|
||||
DEFAULT_FILTERS = {
|
||||
"quality": {
|
||||
"name": "Qualité",
|
||||
"icon": "📺",
|
||||
"values": ["2160p", "1080p", "720p", "480p", "360p", "4K", "UHD"]
|
||||
},
|
||||
"source": {
|
||||
"name": "Source",
|
||||
"icon": "📀",
|
||||
"values": ["BluRay", "Blu-Ray", "WEB-DL", "WEBRip", "HDTV", "DVDRip", "Remux"]
|
||||
},
|
||||
"video_codec": {
|
||||
"name": "Codec Vidéo",
|
||||
"icon": "🎬",
|
||||
"values": ["x265", "x264", "H265", "H264", "HEVC", "AVC", "AV1"]
|
||||
},
|
||||
"audio": {
|
||||
"name": "Audio",
|
||||
"icon": "🔊",
|
||||
"values": ["DTS-HD MA", "DTS", "Atmos", "TrueHD", "AAC", "AC3", "FLAC", "MP3"]
|
||||
},
|
||||
"language": {
|
||||
"name": "Langue",
|
||||
"icon": "🗣️",
|
||||
"values": ["FRENCH", "TRUEFRENCH", "VFF", "VOSTFR", "MULTI", "ENGLISH"]
|
||||
},
|
||||
"hdr": {
|
||||
"name": "HDR",
|
||||
"icon": "✨",
|
||||
"values": ["HDR10+", "HDR10", "HDR", "DV", "Dolby Vision"]
|
||||
},
|
||||
"audio_format": {
|
||||
"name": "Format Audio",
|
||||
"icon": "🎵",
|
||||
"values": ["FLAC", "MP3", "AAC", "320", "V0", "24bit", "16bit", "Lossless"]
|
||||
},
|
||||
"music_type": {
|
||||
"name": "Type Musique",
|
||||
"icon": "💿",
|
||||
"values": ["Album", "Single", "EP", "Live", "Concert", "Discography", "Soundtrack"]
|
||||
},
|
||||
"music_source": {
|
||||
"name": "Source Musique",
|
||||
"icon": "📻",
|
||||
"values": ["CD", "Vinyl", "WEB", "SACD"]
|
||||
},
|
||||
"platform": {
|
||||
"name": "Plateforme",
|
||||
"icon": "🎮",
|
||||
"values": ["PC", "Windows", "Linux", "Mac", "MacOS", "Android", "iOS", "PS5", "PS4", "Xbox", "Switch", "Steam", "GOG"]
|
||||
},
|
||||
"software_type": {
|
||||
"name": "Type Logiciel",
|
||||
"icon": "💻",
|
||||
"values": ["Portable", "Repack", "ISO", "Setup", "Crack", "Keygen", "Patch", "x64", "x86"]
|
||||
},
|
||||
"ebook_format": {
|
||||
"name": "Format Ebook",
|
||||
"icon": "📚",
|
||||
"values": ["EPUB", "PDF", "MOBI", "AZW3", "CBR", "CBZ", "DJVU"]
|
||||
},
|
||||
"ebook_type": {
|
||||
"name": "Type Ebook",
|
||||
"icon": "📖",
|
||||
"values": ["Roman", "BD", "Comics", "Manga", "Magazine", "Guide", "Audiobook"]
|
||||
},
|
||||
"game_type": {
|
||||
"name": "Type Jeu",
|
||||
"icon": "🕹️",
|
||||
"values": ["RPG", "FPS", "Action", "Adventure", "Strategy", "Simulation", "Sport", "Racing"]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def load_filters_config():
|
||||
"""Charge la configuration des filtres depuis le fichier JSON"""
|
||||
try:
|
||||
if os.path.exists(FILTERS_CONFIG_PATH):
|
||||
with open(FILTERS_CONFIG_PATH, 'r', encoding='utf-8') as f:
|
||||
config = json.load(f)
|
||||
return config.get('filters', DEFAULT_FILTERS)
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible de charger filters_config.json: {e}")
|
||||
|
||||
return DEFAULT_FILTERS
|
||||
|
||||
|
||||
def save_filters_config(filters):
|
||||
"""Sauvegarde la configuration des filtres dans le fichier JSON"""
|
||||
try:
|
||||
os.makedirs(os.path.dirname(FILTERS_CONFIG_PATH), exist_ok=True)
|
||||
with open(FILTERS_CONFIG_PATH, 'w', encoding='utf-8') as f:
|
||||
json.dump({'filters': filters}, f, indent=2, ensure_ascii=False)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur sauvegarde filters_config.json: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_default_filters():
|
||||
"""Retourne les filtres par défaut"""
|
||||
return DEFAULT_FILTERS.copy()
|
||||
|
||||
|
||||
class TorrentParser:
|
||||
"""Parser pour extraire les métadonnées des titres de torrents"""
|
||||
|
||||
def __init__(self):
|
||||
self._config_mtime = 0 # Date de modification du fichier config
|
||||
self.reload_config()
|
||||
|
||||
# Patterns fixes (non configurables)
|
||||
self.fixed_patterns = {
|
||||
'release_group': r'-([A-Za-z0-9]+)(?:\s*\(|$|\s*$)',
|
||||
'year': r'\b(19\d{2}|20\d{2})\b',
|
||||
'season': r'[Ss](\d{1,2})(?:[Ee]\d{1,2})?',
|
||||
'episode': r'[Ss]\d{1,2}[Ee](\d{1,2})',
|
||||
'bit_depth': r'\b(10[\s-]?bit|8[\s-]?bit)\b',
|
||||
'edition': r'\b(EXTENDED|REMASTERED|DIRECTOR\'?S?\.?CUT|UNCUT|UNRATED|THEATRICAL|DELUXE|SPECIAL\.?EDITION)\b',
|
||||
'repack': r'\b(REPACK|PROPER|RERIP|REAL)\b',
|
||||
}
|
||||
|
||||
def _check_config_update(self):
|
||||
"""Vérifie si le fichier config a été modifié et recharge si nécessaire"""
|
||||
try:
|
||||
if os.path.exists(FILTERS_CONFIG_PATH):
|
||||
mtime = os.path.getmtime(FILTERS_CONFIG_PATH)
|
||||
if mtime > self._config_mtime:
|
||||
logger.info("🔄 Config des filtres modifiée, rechargement...")
|
||||
self.reload_config()
|
||||
self._config_mtime = mtime
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur vérification config: {e}")
|
||||
|
||||
def reload_config(self):
|
||||
"""Recharge la configuration des filtres"""
|
||||
self.filters_config = load_filters_config()
|
||||
self._build_patterns()
|
||||
try:
|
||||
if os.path.exists(FILTERS_CONFIG_PATH):
|
||||
self._config_mtime = os.path.getmtime(FILTERS_CONFIG_PATH)
|
||||
except:
|
||||
pass
|
||||
|
||||
def _build_patterns(self):
|
||||
"""Construit les patterns regex à partir de la config"""
|
||||
self.patterns = {}
|
||||
|
||||
for filter_key, filter_data in self.filters_config.items():
|
||||
values = filter_data.get('values', [])
|
||||
if values:
|
||||
# Échapper les caractères spéciaux et créer le pattern
|
||||
escaped_values = [re.escape(v) for v in values]
|
||||
# Trier par longueur décroissante pour matcher les plus longs d'abord
|
||||
escaped_values.sort(key=len, reverse=True)
|
||||
pattern = r'\b(' + '|'.join(escaped_values) + r')\b'
|
||||
self.patterns[filter_key] = pattern
|
||||
|
||||
def parse(self, title):
|
||||
"""Parse un titre de torrent et retourne les métadonnées extraites"""
|
||||
# Vérifier si la config a changé
|
||||
self._check_config_update()
|
||||
|
||||
if not title:
|
||||
return {}
|
||||
|
||||
parsed = {}
|
||||
|
||||
# Extraire avec les patterns dynamiques (filtres configurables)
|
||||
for key, pattern in self.patterns.items():
|
||||
try:
|
||||
matches = re.findall(pattern, title, re.IGNORECASE)
|
||||
if matches:
|
||||
# Normaliser et dédupliquer
|
||||
normalized = list(set([self._normalize(m, key) for m in matches]))
|
||||
parsed[key] = normalized
|
||||
except re.error as e:
|
||||
logger.warning(f"Regex error for {key}: {e}")
|
||||
|
||||
# Extraire avec les patterns fixes
|
||||
for key, pattern in self.fixed_patterns.items():
|
||||
try:
|
||||
matches = re.findall(pattern, title, re.IGNORECASE)
|
||||
if matches:
|
||||
if key in ['year', 'season', 'episode', 'release_group']:
|
||||
parsed[key] = matches[0] if matches else None
|
||||
else:
|
||||
parsed[key] = list(set(matches))
|
||||
except re.error as e:
|
||||
logger.warning(f"Regex error for {key}: {e}")
|
||||
|
||||
return parsed
|
||||
|
||||
def _normalize(self, value, key):
|
||||
"""Normalise une valeur (met en forme standard)"""
|
||||
if not value:
|
||||
return value
|
||||
|
||||
# Chercher la valeur exacte dans la config (case-insensitive)
|
||||
if key in self.filters_config:
|
||||
config_values = self.filters_config[key].get('values', [])
|
||||
for config_val in config_values:
|
||||
if config_val.lower() == value.lower():
|
||||
return config_val
|
||||
|
||||
return value
|
||||
|
||||
def enrich_torrent(self, torrent):
|
||||
"""Ajoute les métadonnées parsées à un torrent"""
|
||||
title = torrent.get('Title', '')
|
||||
torrent['parsed'] = self.parse(title)
|
||||
return torrent
|
||||
|
||||
def get_filters_info(self):
|
||||
"""Retourne les infos des filtres (pour le frontend)"""
|
||||
self._check_config_update()
|
||||
return self.filters_config
|
||||
|
||||
|
||||
# Instance globale (optionnel, pour réutilisation)
|
||||
_parser_instance = None
|
||||
|
||||
def get_parser():
|
||||
"""Retourne l'instance du parser (singleton)"""
|
||||
global _parser_instance
|
||||
if _parser_instance is None:
|
||||
_parser_instance = TorrentParser()
|
||||
return _parser_instance
|
||||
|
||||
def reload_parser():
|
||||
"""Force le rechargement de la config du parser"""
|
||||
global _parser_instance
|
||||
if _parser_instance:
|
||||
_parser_instance.reload_config()
|
||||
|
||||
|
||||
# Test du parser
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
parser = TorrentParser()
|
||||
|
||||
test_titles = [
|
||||
'Avatar.2009.2160p.BluRay.x265.10bit.HDR.DTS-HD.MA-GROUP',
|
||||
'Gojira.-.Fortitude.2021.FLAC.24bit.WEB.Album-GROUP',
|
||||
'The.Office.S01E01.FRENCH.1080p.WEB-DL.x264-TEAM',
|
||||
'Pink.Floyd.-.Discography.1967-2014.FLAC.Lossless-BAND',
|
||||
'Metallica.-.Live.in.Paris.2024.MP3.320.Concert-METAL',
|
||||
'The.Last.of.Us.Part.I.v1.1.2-FitGirl.Repack',
|
||||
]
|
||||
|
||||
for title in test_titles:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"📺 {title}")
|
||||
parsed = parser.parse(title)
|
||||
for key, value in parsed.items():
|
||||
if value:
|
||||
print(f" {key}: {value}")
|
||||
Reference in New Issue
Block a user