| |
| """ |
| Buddy Math - Memory Service (Pinecone Edition) |
| ============================================== |
| ืืจืกื v2.1: ืฉืืคืืจ ืื ืื ืื ืืืืื ืืืืืื. |
| ืืืงืื ืืืืืง ืกืืจ ืืืืื (ืฉื ืฉืืจ ื-OCR), ืืืืงืื ืืคืืคืช ืืืืื (Jaccard). |
| """ |
|
|
| import os |
| import json |
| import logging |
| import uuid |
| import re |
| from typing import Optional, Dict |
|
|
| from sentence_transformers import SentenceTransformer |
| from pinecone import Pinecone |
|
|
| logger = logging.getLogger("MemoryService") |
|
|
| class MemoryService: |
| def __init__(self): |
| self.api_key = os.environ.get("PINECONE_API_KEY") |
| |
| if not self.api_key: |
| logger.warning("โ ๏ธ PINECONE_API_KEY not found! Memory will be disabled.") |
| self.index = None |
| return |
|
|
| try: |
| self.pc = Pinecone(api_key=self.api_key) |
| self.index_name = "buddy-math" |
| self.index = self.pc.Index(self.index_name) |
| |
| logger.info("โณ Loading embedding model...") |
| self.embedder = SentenceTransformer('all-MiniLM-L6-v2') |
| |
| logger.info("โ
Brain initialized (Pinecone + MiniLM)") |
| |
| except Exception as e: |
| logger.error(f"โ Failed to init Pinecone: {e}") |
| self.index = None |
|
|
| def find_similar_solution(self, problem_text: str, vector_threshold: float = 0.85) -> Optional[Dict]: |
| """ |
| ืืืคืฉ ืคืชืจืื ืืืืืจืื. |
| ืืืฆืข ืืืืืช ืืคืื: ืืงืืืจื (ืืฉืืขืืช) + ืืคืืคืช ืืืืื (ืชืืื). |
| """ |
| if not self.index: return None |
| |
| try: |
| |
| vector = self.embedder.encode(problem_text).tolist() |
| |
| results = self.index.query( |
| vector=vector, |
| top_k=1, |
| include_metadata=True |
| ) |
| |
| if not results['matches']: |
| return None |
| |
| match = results['matches'][0] |
| vector_score = match['score'] |
| |
| |
| if vector_score < vector_threshold: |
| return None |
|
|
| |
| cached_text = match['metadata'].get('text', '') |
| |
| |
| |
| text_similarity = self._calculate_jaccard_similarity(problem_text, cached_text) |
| |
| logger.info(f"๐ง Brain Check: Vector={vector_score:.3f}, Jaccard={text_similarity:.3f}") |
|
|
| |
| if text_similarity < 0.3: |
| logger.warning("โ ๏ธ High vector score but low text overlap. Ignoring.") |
| return None |
| |
| |
| solution_json = match['metadata'].get('solution_json') |
| if solution_json: |
| logger.info("๐ง Brain HIT! Verified match found.") |
| return json.loads(solution_json) |
| |
| return None |
| |
| except Exception as e: |
| logger.error(f"Memory search failed: {e}") |
| return None |
|
|
| def learn_solution(self, problem_text: str, solution_data: dict): |
| """ืฉืืืจ ืคืชืจืื ืืืฉ""" |
| if not self.index: return |
| |
| try: |
| clean_text = problem_text.strip() |
| if len(clean_text) < 10: return |
|
|
| vector = self.embedder.encode(clean_text).tolist() |
| json_str = json.dumps(solution_data, ensure_ascii=False) |
| |
| |
| if len(json_str.encode('utf-8')) > 38000: |
| logger.warning("โ ๏ธ Solution too big for memory. Skipping save.") |
| return |
|
|
| metadata = { |
| "text": clean_text[:1000], |
| "topic": solution_data.get("meta", {}).get("topic", "unknown"), |
| "solution_json": json_str |
| } |
| |
| self.index.upsert(vectors=[{ |
| "id": str(uuid.uuid4()), |
| "values": vector, |
| "metadata": metadata |
| }]) |
| |
| logger.info("๐ง Brain LEARNED and saved to Cloud!") |
| |
| except Exception as e: |
| logger.error(f"Memory learn failed: {e}") |
|
|
| def _calculate_jaccard_similarity(self, text1: str, text2: str) -> float: |
| """ |
| ืืืฉื ืืืืื ืืคื ืืคืืคืช ืืืืื (ืืชืขืื ืืกืืจ ืืืืืื). |
| ืืื ื-OCR ืขืืจืืช/ืื ืืืืช ืฉืืชืืคื. |
| """ |
| |
| tokens1 = self._tokenize(text1) |
| tokens2 = self._tokenize(text2) |
| |
| if not tokens1 or not tokens2: |
| return 0.0 |
| |
| |
| intersection = len(tokens1.intersection(tokens2)) |
| union = len(tokens1.union(tokens2)) |
| |
| return intersection / union |
|
|
| def _tokenize(self, text: str) -> set: |
| """ืืคืจืง ืืงืกื ืืกื ืฉื ืืืืื ื ืงืืืช""" |
| |
| clean = re.sub(r'[^\w\s]', '', text) |
| |
| words = clean.lower().split() |
| |
| return {w for w in words if len(w) > 1} |