Lycostorrent/app/lastfm_api.py

import requests
import logging
import re

logger = logging.getLogger(__name__)


class LastFmAPI:
    """Classe pour interagir avec l'API Last.fm"""

    def __init__(self, api_key=None):
        self.api_key = api_key
        self.base_url = "http://ws.audioscrobbler.com/2.0/"
        self.session = requests.Session()

    def search_album(self, query):
        """Recherche un album sur Last.fm"""
        try:
            clean_query = self._clean_music_title(query)
            artist, album = self._extract_artist_album(clean_query)

            logger.info(f"🎵 Recherche Last.fm: Artiste='{artist}' Album='{album}'")

            if not album:
                return None

            params = {
                'method': 'album.search',
                'album': album,
                'api_key': self.api_key,
                'format': 'json',
                'limit': 1
            }

            response = self.session.get(self.base_url, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()

            results = data.get('results', {}).get('albummatches', {}).get('album', [])

            if results:
                album_data = results[0]
                return self.get_album_info(album_data['artist'], album_data['name'])

            return None

        except Exception as e:
            logger.error(f"Erreur recherche album Last.fm: {e}")
            return None

    def get_album_info(self, artist, album):
        """Récupère les infos complètes d'un album"""
        try:
            params = {
                'method': 'album.getinfo',
                'artist': artist,
                'album': album,
                'api_key': self.api_key,
                'format': 'json'
            }

            response = self.session.get(self.base_url, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()

            # Vérifier si erreur Last.fm
            if 'error' in data:
                logger.debug(f"Last.fm error: {data.get('message', 'Unknown error')}")
                return None

            album_info = data.get('album')

            # Vérifier que album_info est un dict et non une string
            if album_info and isinstance(album_info, dict):
                return self._format_album(album_info)

            return None

        except Exception as e:
            logger.error(f"Erreur info album: {e}")
            return None

    def _format_album(self, album):
        """Formate les données d'un album"""
        try:
            # Récupérer la plus grande image disponible
            images = album.get('image', [])
            cover_url = None

            if isinstance(images, list):
                for img in reversed(images):
                    if isinstance(img, dict) and img.get('size') in ['extralarge', 'large', 'medium']:
                        cover_url = img.get('#text')
                        if cover_url:
                            break

            # Récupérer les tags
            tags_data = album.get('tags', {})
            tags = []
            if isinstance(tags_data, dict):
                tag_list = tags_data.get('tag', [])
                if isinstance(tag_list, list):
                    tags = [tag.get('name') for tag in tag_list[:5] if isinstance(tag, dict)]

            # Récupérer le wiki
            wiki = album.get('wiki', {})
            summary = ''
            published = ''
            if isinstance(wiki, dict):
                summary = wiki.get('summary', '')
                published = wiki.get('published', '')

            return {
                'artist': album.get('artist', ''),
                'album': album.get('name', ''),
                'cover_url': cover_url,
                'summary': summary,
                'published': published,
                'listeners': album.get('listeners', 0),
                'playcount': album.get('playcount', 0),
                'tags': tags,
                'url': album.get('url', ''),
                'type': 'album'
            }
        except Exception as e:
            logger.error(f"Erreur formatage album: {e}")
            return None

    def _clean_music_title(self, title):
        """Nettoie un titre de torrent musical"""
        original = title

        # Supprimer les préfixes comme [Request]
        title = re.sub(r'^\s*\[.*?\]\s*', '', title)

        # Remplacer les points et underscores par des espaces SAUF le tiret
        title = title.replace('.', ' ').replace('_', ' ')

        # Supprimer les tags de qualité
        title = re.sub(r'\b(FLAC|MP3|AAC|WAV|OGG|ALAC|DSD|WEB|CD|VINYL)\b', '', title, flags=re.IGNORECASE)
        title = re.sub(r'\b(320|256|192|128|24bit|16bit)\s*(kbps|khz)?\b', '', title, flags=re.IGNORECASE)
        title = re.sub(r'\b(CBR|VBR|Lossless)\b', '', title, flags=re.IGNORECASE)

        # Supprimer les infos de format entre parenthèses ou crochets à la fin
        # mais garder le contenu principal
        title = re.sub(r'\s*\([^)]*(?:FLAC|MP3|Lossless|kbps|Vinyl|CD|WEB)[^)]*\)\s*', '', title, flags=re.IGNORECASE)
        title = re.sub(r'\s*\[[^\]]*(?:FLAC|MP3|Lossless|kbps|Vinyl|CD|WEB)[^\]]*\]\s*', '', title, flags=re.IGNORECASE)

        # Supprimer les années entre parenthèses (2025) ou à la fin
        title = re.sub(r'\s*\(\s*(19|20)\d{2}\s*\)\s*', ' ', title)
        title = re.sub(r'\s+(19|20)\d{2}\s*$', '', title)

        # Supprimer (EP), (Single), (Remaster), etc.
        title = re.sub(r'\s*\(\s*(EP|Single|Remaster|Remastered|Deluxe|Edition|Upconvert)\s*\)\s*', '', title, flags=re.IGNORECASE)

        # Supprimer Discography, Anthology, etc.
        title = re.sub(r'\s*[-–]\s*Discography.*$', '', title, flags=re.IGNORECASE)
        title = re.sub(r'\s+Discography.*$', '', title, flags=re.IGNORECASE)

        # Supprimer les groupes de release à la fin (-GROUPE)
        title = re.sub(r'\s*-\s*[A-Z0-9]{2,}$', '', title)

        # Supprimer les parenthèses/crochets incomplets (ex: "(20" ou "[FLA")
        title = re.sub(r'\s*\([^)]*$', '', title)  # Parenthèse ouvrante sans fermante
        title = re.sub(r'\s*\[[^\]]*$', '', title)  # Crochet ouvrant sans fermant

        # Supprimer les "..." à la fin
        title = re.sub(r'\s*\.{2,}\s*$', '', title)

        # Nettoyer les espaces multiples
        title = re.sub(r'\s+', ' ', title).strip()

        # Supprimer les tirets en début ou fin
        title = re.sub(r'^-+\s*|\s*-+$', '', title)

        logger.debug(f"Music title cleaned: '{original[:50]}' → '{title}'")

        return title

    def _extract_artist_album(self, title):
        """Extrait l'artiste et l'album du titre"""
        # Chercher le séparateur " - " (artiste - album)
        if ' - ' in title:
            parts = title.split(' - ', 1)
            artist = parts[0].strip()
            album = parts[1].strip()

            # Si l'album est vide, utiliser le titre entier comme album
            if not album:
                return '', title.strip()

            return artist, album

        # Pas de séparateur trouvé - essayer de deviner
        # Parfois le format est "Artiste Album" sans séparateur
        return '', title.strip()

    def enrich_torrent(self, torrent_title):
        """Enrichit un torrent musical avec les données Last.fm"""
        try:
            album_data = self.search_album(torrent_title)

            if album_data:
                logger.info(f"✅ Album trouvé: {album_data['artist']} - {album_data['album']}")
                return album_data
            else:
                logger.warning(f"❌ Album non trouvé: {torrent_title[:60]}")
                return None

        except Exception as e:
            logger.error(f"Erreur enrichissement musique: {e}")
            return None