559 lines
20 KiB
Python
559 lines
20 KiB
Python
"""
|
|
RSS Source - Parser générique pour flux RSS de trackers torrent
|
|
Permet d'ajouter n'importe quel tracker qui fournit un flux RSS
|
|
"""
|
|
|
|
import os
|
|
import requests
|
|
import logging
|
|
import re
|
|
import xml.etree.ElementTree as ET
|
|
from datetime import datetime
|
|
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RSSSource:
|
|
"""Classe pour gérer les flux RSS de trackers torrent"""
|
|
|
|
# Protocoles autorisés
|
|
ALLOWED_PROTOCOLS = ['http', 'https']
|
|
|
|
def __init__(self, flaresolverr_url=None):
|
|
self.session = requests.Session()
|
|
self.flaresolverr_url = flaresolverr_url or os.getenv('FLARESOLVERR_URL', '')
|
|
|
|
# User-Agent réaliste pour éviter les blocages
|
|
self.session.headers.update({
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept': 'application/rss+xml, application/xml, text/xml, */*',
|
|
'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
'Connection': 'keep-alive',
|
|
'Cache-Control': 'no-cache'
|
|
})
|
|
|
|
if self.flaresolverr_url:
|
|
logger.info(f"✅ Flaresolverr configuré: {self.flaresolverr_url}")
|
|
|
|
def _validate_url(self, url):
|
|
"""Valide qu'une URL est sûre"""
|
|
try:
|
|
parsed = urlparse(url)
|
|
if parsed.scheme not in self.ALLOWED_PROTOCOLS:
|
|
logger.warning(f"⚠️ Protocole non autorisé: {parsed.scheme}")
|
|
return False
|
|
if not parsed.netloc:
|
|
logger.warning("⚠️ URL sans domaine")
|
|
return False
|
|
# Bloquer les URLs locales (sécurité SSRF)
|
|
if parsed.netloc in ['localhost', '127.0.0.1', '0.0.0.0']:
|
|
logger.warning("⚠️ URL locale bloquée")
|
|
return False
|
|
return True
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ URL invalide: {e}")
|
|
return False
|
|
|
|
def fetch_feed(self, feed_config, max_results=50):
|
|
"""
|
|
Récupère et parse un flux RSS.
|
|
|
|
Args:
|
|
feed_config: dict avec {url, name, category, passkey, use_flaresolverr, cookies}
|
|
max_results: nombre max de résultats
|
|
|
|
Returns:
|
|
Liste de résultats formatés comme Jackett/Prowlarr
|
|
"""
|
|
try:
|
|
url = feed_config.get('url', '')
|
|
name = feed_config.get('name', 'RSS')
|
|
passkey = feed_config.get('passkey', '')
|
|
use_flaresolverr = feed_config.get('use_flaresolverr', False)
|
|
cookies = feed_config.get('cookies', '')
|
|
|
|
# Injecter le passkey si présent
|
|
if passkey and '{passkey}' in url:
|
|
url = url.replace('{passkey}', passkey)
|
|
|
|
# Valider l'URL
|
|
if not self._validate_url(url):
|
|
logger.error(f"❌ RSS {name}: URL invalide ou non autorisée")
|
|
return []
|
|
|
|
logger.info(f"🔗 RSS {name}: Fetching {self._mask_url(url)}")
|
|
|
|
# Utiliser Flaresolverr si activé et configuré
|
|
if use_flaresolverr and self.flaresolverr_url:
|
|
content = self._fetch_with_flaresolverr(url, cookies)
|
|
if content is None:
|
|
return []
|
|
else:
|
|
# Requête directe avec cookies si fournis
|
|
if cookies:
|
|
cookie_dict = self._parse_cookies(cookies)
|
|
response = self.session.get(url, timeout=30, cookies=cookie_dict)
|
|
else:
|
|
response = self.session.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
content = response.content
|
|
|
|
# Parser le XML
|
|
results = self._parse_rss(content, name)
|
|
|
|
logger.info(f"📦 RSS {name}: {len(results)} résultats")
|
|
|
|
return results[:max_results]
|
|
|
|
except requests.exceptions.Timeout:
|
|
logger.error(f"⏱️ RSS {feed_config.get('name', 'Unknown')}: Timeout")
|
|
return []
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"❌ RSS {feed_config.get('name', 'Unknown')}: Erreur connexion - {e}")
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"❌ RSS {feed_config.get('name', 'Unknown')}: Erreur - {e}", exc_info=True)
|
|
return []
|
|
|
|
def _parse_cookies(self, cookies_str):
|
|
"""Parse une chaîne de cookies en dictionnaire"""
|
|
cookie_dict = {}
|
|
if not cookies_str:
|
|
return cookie_dict
|
|
|
|
# Format: "nom1=valeur1; nom2=valeur2"
|
|
for part in cookies_str.split(';'):
|
|
part = part.strip()
|
|
if '=' in part:
|
|
key, value = part.split('=', 1)
|
|
cookie_dict[key.strip()] = value.strip()
|
|
|
|
return cookie_dict
|
|
|
|
def _fetch_with_flaresolverr(self, url, cookies=''):
|
|
"""Récupère une URL via Flaresolverr pour bypass Cloudflare"""
|
|
try:
|
|
logger.info(f"🛡️ Utilisation de Flaresolverr pour: {self._mask_url(url)}")
|
|
|
|
# Utiliser une session persistante pour garder les cookies
|
|
session_id = "lycostorrent_session"
|
|
|
|
payload = {
|
|
"cmd": "request.get",
|
|
"url": url,
|
|
"session": session_id,
|
|
"maxTimeout": 60000
|
|
}
|
|
|
|
# Ajouter les cookies si fournis
|
|
if cookies:
|
|
cookie_list = []
|
|
for part in cookies.split(';'):
|
|
part = part.strip()
|
|
if '=' in part:
|
|
key, value = part.split('=', 1)
|
|
cookie_list.append({
|
|
"name": key.strip(),
|
|
"value": value.strip(),
|
|
"domain": self._extract_domain(url)
|
|
})
|
|
if cookie_list:
|
|
payload["cookies"] = cookie_list
|
|
logger.info(f"🍪 {len(cookie_list)} cookies ajoutés à la requête")
|
|
|
|
response = requests.post(
|
|
f"{self.flaresolverr_url}/v1",
|
|
json=payload,
|
|
timeout=65
|
|
)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
if data.get('status') == 'ok':
|
|
solution = data.get('solution', {})
|
|
html_content = solution.get('response', '')
|
|
|
|
logger.info(f"✅ Flaresolverr: succès (status {solution.get('status', 'N/A')})")
|
|
|
|
# Vérifier si c'est du XML RSS
|
|
if html_content.strip().startswith('<?xml') or '<rss' in html_content[:500]:
|
|
logger.info("📄 Contenu XML détecté")
|
|
return html_content.encode('utf-8')
|
|
|
|
# Si ce n'est pas du XML, c'est probablement une page de login
|
|
if 'login' in html_content.lower() or 'connexion' in html_content.lower():
|
|
logger.warning("⚠️ Page de connexion détectée - vérifiez vos cookies")
|
|
else:
|
|
logger.warning(f"⚠️ Le contenu ne semble pas être du XML RSS")
|
|
logger.debug(f"📄 Début du contenu: {html_content[:500]}")
|
|
|
|
# Retourner quand même pour essayer de parser
|
|
return html_content.encode('utf-8')
|
|
else:
|
|
logger.error(f"❌ Flaresolverr error: {data.get('message', 'Unknown error')}")
|
|
return None
|
|
|
|
except requests.exceptions.Timeout:
|
|
logger.error("⏱️ Flaresolverr timeout")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"❌ Flaresolverr error: {e}")
|
|
return None
|
|
|
|
def _extract_domain(self, url):
|
|
"""Extrait le domaine d'une URL"""
|
|
try:
|
|
parsed = urlparse(url)
|
|
# Retourner le domaine avec le point devant pour matcher les sous-domaines
|
|
domain = parsed.netloc
|
|
if domain.startswith('www.'):
|
|
domain = domain[4:]
|
|
return f".{domain}"
|
|
except:
|
|
return ".yggtorrent.org"
|
|
|
|
def _parse_rss(self, content, source_name):
|
|
"""Parse le contenu XML RSS et retourne une liste de résultats"""
|
|
results = []
|
|
|
|
try:
|
|
root = ET.fromstring(content)
|
|
|
|
# Trouver les items (RSS 2.0 ou Atom)
|
|
items = root.findall('.//item')
|
|
if not items:
|
|
# Essayer le format Atom
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
|
items = root.findall('.//atom:entry', ns)
|
|
|
|
for item in items:
|
|
result = self._parse_item(item, source_name)
|
|
if result:
|
|
results.append(result)
|
|
|
|
except ET.ParseError as e:
|
|
logger.error(f"❌ Erreur parsing XML: {e}")
|
|
|
|
return results
|
|
|
|
def _parse_item(self, item, source_name):
|
|
"""Parse un item RSS individuel"""
|
|
try:
|
|
# Namespaces courants
|
|
ns = {
|
|
'torrent': 'http://xmlns.ezrss.it/0.1/',
|
|
'atom': 'http://www.w3.org/2005/Atom',
|
|
'newznab': 'http://www.newznab.com/DTD/2010/feeds/attributes/'
|
|
}
|
|
|
|
# Titre
|
|
title = self._get_text(item, 'title') or ''
|
|
if not title:
|
|
return None
|
|
|
|
# Lien torrent/magnet
|
|
link = self._get_text(item, 'link') or ''
|
|
enclosure = item.find('enclosure')
|
|
if enclosure is not None:
|
|
link = enclosure.get('url', link)
|
|
|
|
# Magnet URI
|
|
magnet = ''
|
|
magnet_el = item.find('.//torrent:magnetURI', ns)
|
|
if magnet_el is not None and magnet_el.text:
|
|
magnet = magnet_el.text
|
|
elif link.startswith('magnet:'):
|
|
magnet = link
|
|
link = ''
|
|
|
|
# Taille
|
|
size = 0
|
|
size_el = item.find('.//torrent:contentLength', ns)
|
|
if size_el is not None and size_el.text:
|
|
try:
|
|
size = int(size_el.text)
|
|
except ValueError:
|
|
pass
|
|
|
|
# Essayer avec enclosure
|
|
if size == 0 and enclosure is not None:
|
|
try:
|
|
size = int(enclosure.get('length', 0))
|
|
except ValueError:
|
|
pass
|
|
|
|
# Essayer avec newznab:attr
|
|
if size == 0:
|
|
for attr in item.findall('.//newznab:attr', ns):
|
|
if attr.get('name') == 'size':
|
|
try:
|
|
size = int(attr.get('value', 0))
|
|
except ValueError:
|
|
pass
|
|
break
|
|
|
|
# Seeders/Leechers (si disponibles)
|
|
seeders = 0
|
|
leechers = 0
|
|
|
|
seeders_el = item.find('.//torrent:seeds', ns)
|
|
if seeders_el is not None and seeders_el.text:
|
|
try:
|
|
seeders = int(seeders_el.text)
|
|
except ValueError:
|
|
pass
|
|
|
|
leechers_el = item.find('.//torrent:peers', ns)
|
|
if leechers_el is not None and leechers_el.text:
|
|
try:
|
|
leechers = int(leechers_el.text)
|
|
except ValueError:
|
|
pass
|
|
|
|
# Date de publication
|
|
pub_date = self._get_text(item, 'pubDate') or ''
|
|
pub_date_formatted = self._format_date(pub_date)
|
|
pub_date_iso = self._parse_date_to_iso(pub_date)
|
|
|
|
# Lien détails
|
|
details = self._get_text(item, 'guid') or self._get_text(item, 'link') or ''
|
|
if details.startswith('magnet:'):
|
|
details = ''
|
|
|
|
# Catégorie
|
|
category = self._get_text(item, 'category') or ''
|
|
|
|
return {
|
|
'Title': title,
|
|
'Link': link if not link.startswith('magnet:') else '',
|
|
'MagnetUri': magnet,
|
|
'Size': size,
|
|
'SizeFormatted': self._format_size(size),
|
|
'Seeders': seeders,
|
|
'Peers': leechers,
|
|
'PublishDate': pub_date_formatted,
|
|
'PublishDateRaw': pub_date_iso or pub_date,
|
|
'Tracker': source_name,
|
|
'Details': details,
|
|
'Category': category,
|
|
'Source': 'rss'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ Erreur parsing item RSS: {e}")
|
|
return None
|
|
|
|
def _get_text(self, element, tag):
|
|
"""Récupère le texte d'un sous-élément"""
|
|
el = element.find(tag)
|
|
if el is not None and el.text:
|
|
return el.text.strip()
|
|
return None
|
|
|
|
def _format_date(self, date_str):
|
|
"""Formate une date RSS en format lisible"""
|
|
if not date_str:
|
|
return ''
|
|
|
|
try:
|
|
# Format RSS standard: "Tue, 24 Dec 2025 10:30:00 +0000"
|
|
for fmt in [
|
|
'%a, %d %b %Y %H:%M:%S %z',
|
|
'%a, %d %b %Y %H:%M:%S %Z',
|
|
'%Y-%m-%dT%H:%M:%S%z',
|
|
'%Y-%m-%dT%H:%M:%SZ',
|
|
'%Y-%m-%d %H:%M:%S',
|
|
]:
|
|
try:
|
|
dt = datetime.strptime(date_str.strip(), fmt)
|
|
return dt.strftime('%d/%m/%Y %H:%M')
|
|
except ValueError:
|
|
continue
|
|
|
|
# Si aucun format ne marche, retourner tel quel
|
|
return date_str[:16] if len(date_str) > 16 else date_str
|
|
|
|
except Exception:
|
|
return date_str
|
|
|
|
def _parse_date_to_iso(self, date_str):
|
|
"""Convertit une date RSS en format ISO pour le tri"""
|
|
if not date_str:
|
|
return ''
|
|
|
|
try:
|
|
# Formats de date courants
|
|
for fmt in [
|
|
'%a, %d %b %Y %H:%M:%S %z',
|
|
'%a, %d %b %Y %H:%M:%S %Z',
|
|
'%a, %d %b %Y %H:%M:%S',
|
|
'%Y-%m-%dT%H:%M:%S%z',
|
|
'%Y-%m-%dT%H:%M:%SZ',
|
|
'%Y-%m-%d %H:%M:%S',
|
|
'%d/%m/%Y %H:%M:%S',
|
|
'%d/%m/%Y %H:%M',
|
|
]:
|
|
try:
|
|
dt = datetime.strptime(date_str.strip(), fmt)
|
|
# Retourner en format ISO triable
|
|
return dt.strftime('%Y-%m-%dT%H:%M:%S')
|
|
except ValueError:
|
|
continue
|
|
|
|
return ''
|
|
|
|
except Exception:
|
|
return ''
|
|
|
|
def _format_size(self, size_bytes):
|
|
"""Formate une taille en bytes en format lisible"""
|
|
if not size_bytes or size_bytes == 0:
|
|
return 'N/A'
|
|
|
|
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
|
if size_bytes < 1024:
|
|
return f"{size_bytes:.1f} {unit}"
|
|
size_bytes /= 1024
|
|
|
|
return f"{size_bytes:.1f} PB"
|
|
|
|
def _mask_url(self, url):
|
|
"""Masque les informations sensibles dans l'URL pour les logs"""
|
|
try:
|
|
parsed = urlparse(url)
|
|
query = parse_qs(parsed.query)
|
|
|
|
# Masquer passkey, apikey, etc.
|
|
sensitive_keys = ['passkey', 'apikey', 'api_key', 'key', 'token', 'auth']
|
|
for key in sensitive_keys:
|
|
if key in query:
|
|
query[key] = ['***']
|
|
|
|
# Reconstruire l'URL
|
|
new_query = urlencode(query, doseq=True)
|
|
masked = urlunparse((parsed.scheme, parsed.netloc, parsed.path,
|
|
parsed.params, new_query, parsed.fragment))
|
|
return masked
|
|
|
|
except Exception:
|
|
return url[:50] + '...'
|
|
|
|
|
|
class RSSManager:
|
|
"""Gestionnaire des flux RSS configurés"""
|
|
|
|
def __init__(self, config_path='/app/config/rss_feeds.json'):
|
|
self.config_path = config_path
|
|
self.rss_source = RSSSource()
|
|
self.feeds = []
|
|
self.load_config()
|
|
|
|
def load_config(self):
|
|
"""Charge la configuration des flux RSS"""
|
|
import json
|
|
import os
|
|
|
|
try:
|
|
if os.path.exists(self.config_path):
|
|
with open(self.config_path, 'r') as f:
|
|
data = json.load(f)
|
|
self.feeds = data.get('feeds', [])
|
|
logger.info(f"✅ {len(self.feeds)} flux RSS configurés")
|
|
else:
|
|
self.feeds = []
|
|
logger.info("📝 Aucun flux RSS configuré")
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur chargement config RSS: {e}")
|
|
self.feeds = []
|
|
|
|
def save_config(self):
|
|
"""Sauvegarde la configuration des flux RSS"""
|
|
import json
|
|
import os
|
|
|
|
try:
|
|
os.makedirs(os.path.dirname(self.config_path), exist_ok=True)
|
|
with open(self.config_path, 'w') as f:
|
|
json.dump({'feeds': self.feeds}, f, indent=2)
|
|
logger.info(f"✅ Configuration RSS sauvegardée")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur sauvegarde config RSS: {e}")
|
|
return False
|
|
|
|
def get_feeds(self, category=None):
|
|
"""Retourne les flux RSS, optionnellement filtrés par catégorie"""
|
|
if category:
|
|
return [f for f in self.feeds if f.get('category') == category or not f.get('category')]
|
|
return self.feeds
|
|
|
|
def get_feeds_for_latest(self, category):
|
|
"""Retourne les flux RSS configurés pour une catégorie de nouveautés"""
|
|
matching = []
|
|
for feed in self.feeds:
|
|
if feed.get('enabled', True):
|
|
feed_cat = feed.get('category', '')
|
|
if feed_cat == category or feed_cat == 'all':
|
|
matching.append(feed)
|
|
return matching
|
|
|
|
def add_feed(self, feed):
|
|
"""Ajoute un nouveau flux RSS"""
|
|
# Générer un ID unique
|
|
import uuid
|
|
feed['id'] = str(uuid.uuid4())[:8]
|
|
feed['enabled'] = feed.get('enabled', True)
|
|
self.feeds.append(feed)
|
|
self.save_config()
|
|
return feed
|
|
|
|
def update_feed(self, feed_id, updates):
|
|
"""Met à jour un flux RSS existant"""
|
|
for feed in self.feeds:
|
|
if feed.get('id') == feed_id:
|
|
feed.update(updates)
|
|
self.save_config()
|
|
return feed
|
|
return None
|
|
|
|
def delete_feed(self, feed_id):
|
|
"""Supprime un flux RSS"""
|
|
self.feeds = [f for f in self.feeds if f.get('id') != feed_id]
|
|
self.save_config()
|
|
return True
|
|
|
|
def test_feed(self, url, passkey='', use_flaresolverr=False, cookies=''):
|
|
"""Teste un flux RSS et retourne un aperçu"""
|
|
test_config = {
|
|
'url': url,
|
|
'name': 'Test',
|
|
'passkey': passkey,
|
|
'use_flaresolverr': use_flaresolverr,
|
|
'cookies': cookies
|
|
}
|
|
results = self.rss_source.fetch_feed(test_config, max_results=5)
|
|
return {
|
|
'success': len(results) > 0,
|
|
'count': len(results),
|
|
'sample': results[:3] if results else []
|
|
}
|
|
|
|
def fetch_latest(self, category, max_results=50):
|
|
"""Récupère les nouveautés de tous les flux RSS pour une catégorie"""
|
|
all_results = []
|
|
feeds = self.get_feeds_for_latest(category)
|
|
|
|
for feed in feeds:
|
|
try:
|
|
results = self.rss_source.fetch_feed(feed, max_results=max_results)
|
|
all_results.extend(results)
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur fetch RSS {feed.get('name')}: {e}")
|
|
|
|
# Trier par date (plus récent en premier)
|
|
all_results.sort(key=lambda x: x.get('PublishDateRaw', ''), reverse=True)
|
|
|
|
return all_results[:max_results] |