Viveka1zha
/

neuro-symbolic-search

+import torch
+from sentence_transformers import SentenceTransformer
+from typing import List, Dict, Optional
+import re
+class SimpleSymbolicParser:
+    """Parse simple logical queries like 'color:red AND type:image'"""
+    def parse(self, query: str) -> Dict:
+        """Extract filters from query string"""
+        filters = {}
+        # Extract key:value patterns
+        pattern = r'(\w+):(\w+)'
+        matches = re.findall(pattern, query)
+        for key, value in matches:
+            if key not in filters:
+                filters[key] = []
+            filters[key].append(value.lower())
+        # Detect operators
+        has_and = 'AND' in query.upper()
+        has_or = 'OR' in query.upper()
+        return {
+            'filters': filters,
+            'operator': 'AND' if has_and else 'OR' if has_or else 'AND'
+        }
+class SimpleNeuroSymbolicSearch:
+    """
+    Simple Neuro-Symbolic Search combining:
+    1. Neural: Sentence embeddings for semantic search
+    2. Symbolic: Rule-based filtering
+    """
+    def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
+        """Initialize with a lightweight sentence transformer"""
+        print(f"Loading model: {model_name}")
+        self.encoder = SentenceTransformer(model_name)
+        self.parser = SimpleSymbolicParser()
+        # Store indexed data
+        self.documents = []
+        self.embeddings = None
+        self.metadata = []
+    def index_documents(self, documents: List[str], metadata: List[Dict] = None):
+        """
+        Index documents for search
+        Args:
+            documents: List of text documents
+            metadata: Optional metadata for each document
+        """
+        print(f"Indexing {len(documents)} documents...")
+        self.documents = documents
+        self.metadata = metadata if metadata else [{} for _ in documents]
+        # Generate embeddings
+        self.embeddings = self.encoder.encode(
+            documents,
+            convert_to_tensor=True,
+            show_progress_bar=True
+        )
+        print("✅ Indexing complete!")
+    def apply_symbolic_filters(self, indices: List[int], filters: Dict) -> List[int]:
+        """Apply rule-based filters to results"""
+        if not filters:
+            return indices
+        filtered = []
+        for idx in indices:
+            metadata = self.metadata[idx]
+            match = True
+            for key, values in filters.items():
+                if key not in metadata:
+                    match = False
+                    break
+                meta_value = str(metadata[key]).lower()
+                if not any(v in meta_value for v in values):
+                    match = False
+                    break
+            if match:
+                filtered.append(idx)
+        return filtered
+    def search(
+        self,
+        query: str,
+        top_k: int = 5,
+        use_symbolic: bool = True
+    ) -> List[Dict]:
+        """
+        Search documents using neuro-symbolic approach
+        Args:
+            query: Search query (can include filters like 'color:red')
+            top_k: Number of results to return
+            use_symbolic: Whether to apply symbolic filtering
+        Returns:
+            List of results with scores and metadata
+        """
+        if self.embeddings is None:
+            raise ValueError("No documents indexed. Call index_documents() first.")
+        # Parse query for symbolic filters
+        parsed = self.parser.parse(query) if use_symbolic else {'filters': {}}
+        # Remove filter syntax from query for neural search
+        clean_query = re.sub(r'\w+:\w+', '', query)
+        clean_query = re.sub(r'\s+', ' ', clean_query).strip()
+        clean_query = clean_query.replace('AND', '').replace('OR', '').strip()
+        if not clean_query:
+            clean_query = query
+        # Neural search: compute similarity
+        query_embedding = self.encoder.encode(clean_query, convert_to_tensor=True)
+        similarities = torch.cosine_similarity(
+            query_embedding.unsqueeze(0),
+            self.embeddings
+        )
+        # Get top results
+        top_results = torch.topk(similarities, k=min(top_k * 3, len(self.documents)))
+        top_indices = top_results.indices.tolist()
+        top_scores = top_results.values.tolist()
+        # Apply symbolic filters
+        if use_symbolic and parsed['filters']:
+            print(f"🔧 Applying filters: {parsed['filters']}")
+            top_indices = self.apply_symbolic_filters(top_indices, parsed['filters'])
+        # Format results
+        results = []
+        for i, idx in enumerate(top_indices[:top_k]):
+            score_idx = top_indices.index(idx) if idx in top_indices else i
+            results.append({
+                'rank': i + 1,
+                'score': top_scores[score_idx] if score_idx < len(top_scores) else 0.0,
+                'document': self.documents[idx],
+                'metadata': self.metadata[idx]
+            })
+        return results
+def demo():
+    """Demo of the neuro-symbolic search system"""
+    # Sample documents with metadata
+    documents = [
+        "A red sports car racing on the track",
+        "A blue sedan parked in the garage",
+        "A yellow taxi driving in the city",
+        "A green bicycle leaning against a wall",
+        "A red fire truck responding to emergency",
+        "A black motorcycle parked on the street",
+        "Children playing soccer in the park",
+        "A cat sleeping on a red couch",
+    ]
+    metadata = [
+        {'type': 'vehicle', 'color': 'red', 'category': 'car'},
+        {'type': 'vehicle', 'color': 'blue', 'category': 'car'},
+        {'type': 'vehicle', 'color': 'yellow', 'category': 'car'},
+        {'type': 'vehicle', 'color': 'green', 'category': 'bicycle'},
+        {'type': 'vehicle', 'color': 'red', 'category': 'truck'},
+        {'type': 'vehicle', 'color': 'black', 'category': 'motorcycle'},
+        {'type': 'activity', 'color': 'none', 'category': 'sports'},
+        {'type': 'animal', 'color': 'red', 'category': 'pet'},
+    ]
+    # Initialize search system
+    search = SimpleNeuroSymbolicSearch()
+    # Index documents
+    search.index_documents(documents, metadata)
+    print("\n" + "="*70)
+    print("🔍 DEMO: Neuro-Symbolic Search")
+    print("="*70)
+    # Test 1: Pure neural search
+    print("\n1. Neural Search: 'red vehicle'")
+    print("-" * 70)
+    results = search.search("red vehicle", top_k=3, use_symbolic=False)
+    for r in results:
+        print(f"  [{r['rank']}] Score: {r['score']:.3f} | {r['document']}")
+    # Test 2: Neuro-symbolic search with filters
+    print("\n2. Neuro-Symbolic: 'vehicle color:red'")
+    print("-" * 70)
+    results = search.search("vehicle color:red", top_k=3)
+    for r in results:
+        print(f"  [{r['rank']}] Score: {r['score']:.3f} | {r['document']}")
+        print(f"       Metadata: {r['metadata']}")
+    # Test 3: Complex query
+    print("\n3. Neuro-Symbolic: 'fast vehicle color:red category:car'")
+    print("-" * 70)
+    results = search.search("fast vehicle color:red category:car", top_k=3)
+    for r in results:
+        print(f"  [{r['rank']}] Score: {r['score']:.3f} | {r['document']}")
+    print("\n" + "="*70)
+    print("✅ Demo complete!")
+if __name__ == "__main__":
+    demo()