akra35567 commited on
Commit
41302cf
·
verified ·
1 Parent(s): a8e9d35

Create treinamneto.py

Browse files
Files changed (1) hide show
  1. treinamneto.py +130 -0
treinamneto.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # treinamento.py — V25 — FINE-TUNE AUTOMÁTICO (NA RAIZ)
2
+ import json
3
+ import os
4
+ import threading
5
+ import time
6
+ import requests
7
+ from log pérd
8
+ from loguru import logger
9
+ from database import Database
10
+ from sentence_transformers import SentenceTransformer
11
+ import config
12
+
13
+ # === CONFIGURAÇÃO ===
14
+ MODEL_BASE = "qwen2.5:1.5b-instruct-q4_0"
15
+ MODEL_FINE = "akira-luanda-v25"
16
+ DATASET_PATH = "/app/dataset.jsonl"
17
+ MODelfile_PATH = "/app/Modelfile"
18
+ EMBEDDING_MODEL = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
19
+
20
+ # Lock + dataset
21
+ _lock = threading.Lock()
22
+ _dataset = []
23
+
24
+ def gerar_embedding(text: str):
25
+ return EMBEDDING_MODEL.encode(text, convert_to_numpy=True).tolist()
26
+
27
+ def salvar_dataset():
28
+ with open(DATASET_PATH, "w", encoding="utf-8") as f:
29
+ for entry in _dataset:
30
+ f.write(json.dumps(entry, ensure_ascii=False) + "\n")
31
+
32
+ def criar_modelfile():
33
+ modelfile = f"""
34
+ FROM {MODEL_BASE}
35
+ SYSTEM """ + f'"""{config.PERSONA}"""' + """
36
+ PARAMETER temperature 0.9
37
+ PARAMETER num_ctx 4096
38
+ """
39
+ with _lock:
40
+ data = _dataset.copy()
41
+ for d in data:
42
+ modelfile += f"\nUSER: {d['user']}\nASSISTANT: {d['assistant']}\n"
43
+ return modelfile
44
+
45
+ class Treinamento:
46
+ def __init__(self, db: Database, min_interactions: int = 25, interval_hours: int = 4):
47
+ self.db = db
48
+ self.min_interactions = min_interactions
49
+ self.interval = interval_hours * 3600
50
+ self.thread = None
51
+ self.carregar_dataset()
52
+ self.iniciar_loop()
53
+
54
+ def carregar_dataset(self):
55
+ global _dataset
56
+ if os.path.exists(DATASET_PATH):
57
+ try:
58
+ with open(DATASET_PATH, "r", encoding="utf-8") as f:
59
+ _dataset = [json.loads(l) for l in f if l.strip()]
60
+ logger.info(f"{len(_dataset)} kandandos carregados do dataset!")
61
+ except Exception as e:
62
+ logger.error(f"Erro ao carregar dataset: {e}")
63
+ _dataset = []
64
+
65
+ def iniciar_loop(self):
66
+ if not self.thread or not self.thread.is_alive():
67
+ self.thread = threading.Thread(target=self._loop, daemon=True)
68
+ self.thread.start()
69
+ logger.info("Loop de fine-tune iniciado!")
70
+
71
+ def registrar_interacao(self, usuario, mensagem, resposta, numero):
72
+ try:
73
+ # === SALVA NO BANCO ===
74
+ self.db.salvar_mensagem(usuario, mensagem, resposta, numero)
75
+
76
+ # === EMBEDDING ===
77
+ texto = f"{mensagem} {resposta}".lower()
78
+ embedding = gerar_embedding(texto)
79
+ self.db.salvar_embedding(numero, mensagem, resposta, embedding, texto=texto)
80
+
81
+ # === DATASET ===
82
+ entry = {"user": mensagem.strip(), "assistant": resposta.strip()}
83
+ with _lock:
84
+ _dataset.append(entry)
85
+ with open(DATASET_PATH, "a", encoding="utf-8") as f:
86
+ json.dump(entry, f, ensure_ascii=False)
87
+ f.write("\n")
88
+
89
+ logger.info(f"Kandando salvo: {usuario[:10]}... ({len(_dataset)} total)")
90
+
91
+ # === TREINA SE CHEGAR A 25 ===
92
+ if len(_dataset) >= self.min_interactions:
93
+ threading.Thread(target=self._treinar, daemon=True).start()
94
+
95
+ except Exception as e:
96
+ logger.error(f"Erro ao registrar: {e}")
97
+
98
+ def _treinar(self):
99
+ if len(_dataset) < self.min_interactions:
100
+ return
101
+
102
+ logger.info(f"INICIANDO FINE-TUNE → {MODEL_FINE} com {len(_dataset)} kandandos")
103
+
104
+ try:
105
+ salvar_dataset()
106
+ modelfile = criar_modelfile()
107
+ with open(MODelfile_PATH, "w", encoding="utf-8") as f:
108
+ f.write(modelfile)
109
+
110
+ files = {'modelfile': open(MODelfile_PATH, 'rb')}
111
+ data = {'name': MODEL_FINE}
112
+
113
+ resp = requests.post("http://localhost:11434/api/create", files=files, data=data, timeout=600)
114
+
115
+ if resp.status_code == 200:
116
+ config.OLLAMA_MODEL = MODEL_FINE
117
+ logger.success(f"MODELO {MODEL_FINE} CRIADO COM SUCESSO!")
118
+ else:
119
+ logger.error(f"Erro Ollama: {resp.status_code} {resp.text}")
120
+
121
+ os.remove(MODelfile_PATH)
122
+
123
+ except Exception as e:
124
+ logger.error(f"Erro no fine-tune: {e}")
125
+
126
+ def _loop(self):
127
+ while True:
128
+ time.sleep(self.interval)
129
+ if len(_dataset) >= self.min_interactions:
130
+ self._treinar()