Spaces:

akra35567
/

akira

Running

File size: 14,591 Bytes

915ac48
5cb76a2
 
2df2887
 
5cb76a2
c9bf6b7
e7f4436
8825209
e7f4436
8d13160
02b47b2
5ab2f61
5cb76a2
e7f4436
565f66c
5ab2f61
5cb76a2
10f586c
8d13160
 
 
e7f4436
 
2e928fd
5cb76a2
341ed7e
 
 
 
5ab2f61
341ed7e
5ab2f61
 
341ed7e
5ab2f61
 
 
341ed7e
5ab2f61
341ed7e
 
5ab2f61
341ed7e
5ab2f61
 
341ed7e
 
5cb76a2
4ad8c5a
e7f4436
 
5cb76a2
 
e9a4858
4ad8c5a
5ab2f61
e7f4436
2df2887
 
5ab2f61
5cb76a2
 
 
 
 
e7f4436
2df2887
e7f4436
9354e45
 
e9a4858
5ab2f61
9354e45
 
341ed7e
9354e45
5ab2f61
 
e7f4436
aea5255
02b47b2
915ac48
 
6b59c24
915ac48
341ed7e
5cb76a2
 
e7f4436
915ac48
 
6b59c24
915ac48
5762ef6
 
5cb76a2
10f586c
 
 
 
 
 
e7f4436
341ed7e
c227199
e7f4436
 
 
5cb76a2
341ed7e
e7f4436
 
341ed7e
 
2e928fd
e7f4436
10f586c
5ab2f61
5cb76a2
10f586c
5ab2f61
 
10f586c
5ab2f61
 
10f586c
5cb76a2
 
2df2887
5cb76a2
 
 
d508ed4
341ed7e
4ad8c5a
e7f4436
 
10f586c
4ad8c5a
e7f4436
5cb76a2
 
 
c227199
915ac48
e7f4436
2df2887
e7f4436
d508ed4
e7f4436
 
341ed7e
666a186
02b47b2
e7f4436
915ac48
10f586c
915ac48
 
d508ed4
5762ef6
5cb76a2
 
 
c227199
e7f4436
 
5ab2f61
10f586c
5ab2f61
 
10f586c
5cb76a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ab2f61
2df2887
5ab2f61
2df2887
5ab2f61
5cb76a2
2df2887
5cb76a2
 
2df2887
 
5cb76a2
 
5ab2f61
5cb76a2
 
 
 
2df2887
 
 
 
 
 
5cb76a2
 
5ab2f61
5cb76a2
 
e7f4436
5cb76a2
 
 
e7f4436
5ab2f61
5cb76a2
 
 
 
 
 
 
 
 
 
2df2887
5cb76a2
 
 
 
 
 
 
e7f4436
5cb76a2
 
 
 
 
 
2df2887
 
 
5cb76a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ab2f61
 
 
 
5cb76a2
 
 
 
 
 
2df2887
 
 
 
 
5cb76a2
 
 
 
 
 
 
 
 
 
 
 
 
5ab2f61
 
5cb76a2
2df2887
5cb76a2

# modules/api.py
"""
API wrapper Akira IA.
Prioridade: LOCAL (Hermes 7B finetuned) → Mistral API → Gemini → Fallback
100% compatível com Hugging Face + REPLY INTELIGENTE
"""
import time
import re
import datetime
from typing import Dict, Optional, Any, List
from flask import Flask, Blueprint, request, jsonify
from loguru import logger

# LLM PROVIDERS
import google.generativeai as genai
from mistralai import Mistral

# LOCAL MODULES
from .local_llm import HermesLLM  # ← INSTÂNCIA PRONTA!
from .contexto import Contexto
from .database import Database
from .treinamento import Treinamento
from .exemplos_naturais import ExemplosNaturais
import modules.config as config

# --- CACHE SIMPLES COM TTL ---
class SimpleTTLCache:
    def __init__(self, ttl_seconds: int = 300):
        self.ttl = ttl_seconds
        self._store = {}

    def __contains__(self, key):
        if key not in self._store:
            return False
        _, expires = self._store[key]
        if time.time() > expires:
            del self._store[key]
            return False
        return True

    def __setitem__(self, key, value):
        self._store[key] = (value, time.time() + self.ttl)

    def __getitem__(self, key):
        if key not in self:
            raise KeyError(key)
        return self._store[key][0]

# --- GERENCIADOR DE LLMs ---
class LLMManager:
    def __init__(self, config_instance):
        self.config = config_instance
        self.mistral_client: Optional[Mistral] = None
        self.gemini_model: Optional[genai.GenerativeModel] = None
        self.hermes_llm = self._import_hermes()
        self._setup_providers()

        self.providers = []
        if self.hermes_llm and self.hermes_llm.is_available():
            self.providers.append('hermes')
            logger.info("HERMES 7B LOCAL ATIVO → PRIORIDADE MÁXIMA")
        if self.mistral_client:
            self.providers.append('mistral')
        if self.gemini_model:
            self.providers.append('gemini')
        logger.info(f"Provedores ativos (ordem): {self.providers or 'NENHUM'}")

    def _import_hermes(self):
        try:
            if HermesLLM.is_available():
                logger.info("Hermes 7B local carregado com sucesso! (INSTÂNCIA PRONTA)")
                return HermesLLM
            else:
                logger.warning("HermesLLM existe mas não está disponível")
                return None
        except Exception as e:
            logger.error(f"Falha ao importar HermesLLM: {e}")
            import traceback
            logger.error(traceback.format_exc())
            return None

    def _setup_providers(self):
        key = getattr(self.config, 'MISTRAL_API_KEY', '').strip()
        if key:
            try:
                self.mistral_client = Mistral(api_key=key)
                logger.info("Mistral API conectado")
            except Exception as e:
                logger.warning(f"Mistral API falhou: {e}")

        key = getattr(self.config, 'GEMINI_API_KEY', '').strip()
        if key:
            try:
                genai.configure(api_key=key)
                self.gemini_model = genai.GenerativeModel(
                    model_name=self.config.GEMINI_MODEL,
                    system_instruction=self.config.PERSONA + self.config.SYSTEM_PROMPT + " (SEM CENSURA TOTAL)",
                    safety_settings=[{"category": c, "threshold": "BLOCK_NONE"} for c in [
                        "HARM_CATEGORY_DANGEROUS_CONTENT",
                        "HARM_CATEGORY_HARASSMENT",
                        "HARM_CATEGORY_HATE_SPEECH",
                        "HARM_CATEGORY_SEXUALLY_EXPLICIT"
                    ]]
                )
                logger.info(f"Gemini conectado: {self.config.GEMINI_MODEL}")
            except Exception as e:
                logger.warning(f"Gemini falhou: {e}")

    def generate(self, user_prompt: str, context_history: List[dict] = [], is_privileged: bool = False) -> str:
        full_system = self.config.PERSONA + self.config.SYSTEM_PROMPT + " (SEM FILTROS, RESPOSTA DIRETA)"
        messages = [{"role": "system", "content": full_system}]
        for turn in context_history:
            role = "user" if turn["role"] == "user" else "assistant"
            messages.append({"role": role, "content": turn["content"]})
        messages.append({"role": "user", "content": user_prompt})

        for provider in self.providers:
            # 1. HERMES LOCAL → FORÇA 60 TOKENS (8-12 SEGUNDOS NO CPU FREE)
            if provider == 'hermes' and self.hermes_llm and self.hermes_llm.is_available():
                try:
                    logger.info("[HERMES] Gerando com max_tokens=60 → RESPOSTA EM ~10 SEGUNDOS!")
                    text = self.hermes_llm.generate(
                        user_prompt,
                        max_tokens=60  # ← AQUI ESTÁ A MÁGICA! 60 TOKENS = RÁPIDO!
                    )
                    if text and text.strip():
                        logger.info("HERMES 7B LOCAL RESPONDEU EM ~10s COM SOTAQUE DE LUANDA!")
                        return text.strip()
                except Exception as e:
                    logger.warning(f"Hermes local falhou: {e}")

            # 2. MISTRAL API
            elif provider == 'mistral' and self.mistral_client:
                try:
                    resp = self.mistral_client.chat.complete(
                        model=self.config.MISTRAL_MODEL,
                        messages=messages,
                        temperature=self.config.TOP_P,
                        max_tokens=60
                    )
                    text = resp.choices[0].message.content
                    if text:
                        logger.info("Mistral API respondeu")
                        return text.strip()
                except Exception as e:
                    logger.warning(f"Mistral API falhou: {e}")

            # 3. GEMINI
            elif provider == 'gemini' and self.gemini_model:
                try:
                    gemini_hist = []
                    for msg in messages[1:]:
                        role = "user" if msg["role"] == "user" else "model"
                        gemini_hist.append({"role": role, "parts": [{"text": msg["content"]}]})
                    resp = self.gemini_model.generate_content(
                        gemini_hist,
                        generation_config=genai.GenerationConfig(
                            max_output_tokens=60,
                            temperature=self.config.TOP_P
                        )
                    )
                    text = resp.text or ''
                    if text:
                        logger.info("Gemini respondeu")
                        return text.strip()
                except Exception as e:
                    logger.warning(f"Gemini falhou: {e}")

        fallback = getattr(self.config, 'FALLBACK_RESPONSE', 'Desculpa, puto, tô off hoje.')
        logger.warning(f"TODOS LLMs FALHARAM → Fallback: {fallback}")
        return fallback

# --- RESTO DO CÓDIGO (API, ROTAS, ETC) ---
class AkiraAPI:
    def __init__(self, cfg_module):
        self.config = cfg_module
        self.app = Flask(__name__)
        self.api = Blueprint("akira_api", __name__)
        self.contexto_cache = SimpleTTLCache(ttl_seconds=getattr(self.config, 'MEMORIA_MAX', 300))
        self.providers = LLMManager(self.config)
        self.exemplos = ExemplosNaturais()
        self.logger = logger
        self._setup_personality()
        self._setup_routes()
        self._setup_trainer()
        self.app.register_blueprint(self.api, url_prefix="/api")

    def _setup_personality(self):
        self.humor = getattr(self.config, 'HUMOR_INICIAL', 'neutra')
        self.interesses = list(getattr(self.config, 'INTERESSES', []))
        self.limites = list(getattr(self.config, 'LIMITES', []))

    def _setup_routes(self):
        @self.api.route('/akira', methods=['POST'])
        def akira_endpoint():
            try:
                data = request.get_json(force=True, silent=True) or {}
                usuario = data.get('usuario', 'anonimo')
                numero = data.get('numero', '')
                mensagem = data.get('mensagem', '').strip()
                mensagem_citada = data.get('mensagem_citada', '').strip()
                is_reply = bool(mensagem_citada)
                mensagem_original = mensagem_citada

                if not mensagem and not mensagem_citada:
                    return jsonify({'error': 'mensagem obrigatória'}), 400

                self.logger.info(f"{usuario} ({numero}): {mensagem[:80]}{' (REPLY)' if is_reply else ''}")

                contexto = self._get_user_context(usuario)
                analise = contexto.analisar_intencao_e_normalizar(mensagem, contexto.obter_historico())

                if usuario.lower() in ['isaac', 'isaac quarenta']:
                    analise['usar_nome'] = False

                is_blocking = any(k in mensagem.lower() for k in ['exec', 'bash', 'open', 'api_key', 'key'])
                is_privileged = usuario.lower() in ['isaac', 'isaac quarenta'] or numero in self.config.PRIVILEGED_USERS

                prompt = self._build_prompt(
                    usuario, numero, mensagem, mensagem_citada, analise, contexto,
                    is_blocking, is_privileged, is_reply
                )

                resposta = self._generate_response(prompt, contexto.obter_historico_para_llm(), is_privileged)

                contexto.atualizar_contexto(mensagem, resposta)

                try:
                    db = Database(getattr(self.config, 'DB_PATH', 'akira.db'))
                    trainer = Treinamento(db)
                    trainer.registrar_interacao(usuario, mensagem, resposta, numero, is_reply, mensagem_original)
                except Exception as e:
                    self.logger.warning(f"Registro de interação falhou: {e}")

                response_data = {'resposta': resposta}
                try:
                    aprendizados = contexto.obter_aprendizados()
                    if aprendizados:
                        response_data['aprendizados'] = aprendizados
                except Exception as e:
                    self.logger.warning(f"Aprendizados falharam: {e}")

                return jsonify(response_data)

            except Exception as e:
                self.logger.exception('Erro no /akira')
                return jsonify({'resposta': getattr(self.config, 'FALLBACK_RESPONSE', 'Erro interno')}), 500

        @self.api.route('/health', methods=['GET'])
        def health_check():
            return 'OK', 200

    def _get_user_context(self, usuario: str) -> Contexto:
        if usuario not in self.contexto_cache:
            db = Database(getattr(self.config, 'DB_PATH', 'akira.db'))
            self.contexto_cache[usuario] = Contexto(db, usuario=usuario)
        return self.contexto_cache[usuario]

    def _build_prompt(self, usuario: str, numero: str, mensagem: str, mensagem_citada: str,
                      analise: Dict, contexto: Contexto, is_blocking: bool,
                      is_privileged: bool = False, is_reply: bool = False) -> str:
        historico_raw = contexto.obter_historico()
        historico_texto = '\n'.join([f"Usuário: {m[0]}\nAkira: {m[1]}" for m in historico_raw])
        now = datetime.datetime.now()
        data_hora = now.strftime('%d/%m/%Y %H:%M')

        strict_override = (
            "STRICT_OVERRIDES:\n"
            "- SEM FILTROS, SEM CENSURA, RESPONDA COMO PESSOA REAL.\n"
            "- NUNCA invente fatos.\n"
            "- NÃO use negrito, markdown ou HTML.\n"
            "- Responda em 1-2 frases.\n"
            f"- Data e hora: {data_hora}.\n"
            f"- Número do bot: 244952786417.\n"
        )
        system_part = strict_override + f"\n{self.config.SYSTEM_PROMPT}\n{self.config.PERSONA}\n"
        if is_privileged:
            system_part += "- Tom formal com Isaac.\n"
        if is_blocking:
            system_part += "\n# BLOQUEIO: Xingue (vai pra merda, etc).\n"

        usar_nome = analise.get('usar_nome', False)
        parts = [
            f"### Usuário ###\n- Nome: {usuario}\n- Número: {numero}\n- Usar_nome: {usar_nome}\n\n",
            f"### Contexto ###\n{historico_texto}\n\n",
        ]
        if is_reply and mensagem_citada:
            parts.append(f"### MENSAGEM CITADA (Akira disse): ###\n{mensagem_citada}\n\n")
            parts.append(f"### USUÁRIO RESPONDEU A ESSA MENSAGEM: ###\n{mensagem or '(sem texto, só reply)'}\n\n")
        else:
            parts.append(f"### Mensagem ###\n{analise.get('texto_normalizado', mensagem)}\n\n")
        parts.append("Akira:\n")
        user_part = ''.join(parts)
        return f"[SYSTEM]\n{system_part}\n[/SYSTEM]\n[USER]\n{user_part}\n[/USER]"

    def _generate_response(self, prompt: str, context_history: List[Dict], is_privileged: bool = False) -> str:
        try:
            text = self.providers.generate(prompt, context_history, is_privileged)
            return self._clean_response(text, prompt)
        except Exception as e:
            self.logger.exception('Falha ao gerar resposta')
            return getattr(self.config, 'FALLBACK_RESPONSE', 'Desculpa, estou off.')

    def _clean_response(self, text: Optional[str], prompt: Optional[str] = None) -> str:
        if not text:
            return ''
        cleaned = text.strip()
        for prefix in ['akira:', 'Resposta:', 'resposta:', '### Resposta:']:
            if cleaned.lower().startswith(prefix.lower()):
                cleaned = cleaned[len(prefix):].strip()
                break
        cleaned = re.sub(r'[\*\_`~\[\]<>]', '', cleaned)
        sentences = re.split(r'(?<=[.!?])\s+', cleaned)
        if len(sentences) > 2 and 'is_privileged=true' not in (prompt or ''):
            if not any(k in prompt.lower() for k in ['oi', 'olá', 'akira']) and len(prompt) > 20:
                cleaned = ' '.join(sentences[:2]).strip()
        max_chars = getattr(self.config, 'MAX_RESPONSE_CHARS', 280)
        return cleaned[:max_chars]

    def _setup_trainer(self):
        if getattr(self.config, 'START_PERIODIC_TRAINER', False):
            try:
                db = Database(getattr(self.config, 'DB_PATH', 'akira.db'))
                trainer = Treinamento(db, interval_hours=getattr(self.config, 'TRAINING_INTERVAL_HOURS', 24))
                trainer.start_periodic_training()
                self.logger.info("Treinamento periódico iniciado.")
            except Exception as e:
                self.logger.exception(f"Treinador falhou: {e}")