Viveka1zha commited on
Commit
1f80cd6
·
verified ·
1 Parent(s): 6d3d1eb

Upload model.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. model.py +223 -0
model.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from sentence_transformers import SentenceTransformer
3
+ from typing import List, Dict, Optional
4
+ import re
5
+
6
+
7
+ class SimpleSymbolicParser:
8
+ """Parse simple logical queries like 'color:red AND type:image'"""
9
+
10
+ def parse(self, query: str) -> Dict:
11
+ """Extract filters from query string"""
12
+ filters = {}
13
+
14
+ # Extract key:value patterns
15
+ pattern = r'(\w+):(\w+)'
16
+ matches = re.findall(pattern, query)
17
+
18
+ for key, value in matches:
19
+ if key not in filters:
20
+ filters[key] = []
21
+ filters[key].append(value.lower())
22
+
23
+ # Detect operators
24
+ has_and = 'AND' in query.upper()
25
+ has_or = 'OR' in query.upper()
26
+
27
+ return {
28
+ 'filters': filters,
29
+ 'operator': 'AND' if has_and else 'OR' if has_or else 'AND'
30
+ }
31
+
32
+
33
+ class SimpleNeuroSymbolicSearch:
34
+ """
35
+ Simple Neuro-Symbolic Search combining:
36
+ 1. Neural: Sentence embeddings for semantic search
37
+ 2. Symbolic: Rule-based filtering
38
+ """
39
+
40
+ def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
41
+ """Initialize with a lightweight sentence transformer"""
42
+ print(f"Loading model: {model_name}")
43
+ self.encoder = SentenceTransformer(model_name)
44
+ self.parser = SimpleSymbolicParser()
45
+
46
+ # Store indexed data
47
+ self.documents = []
48
+ self.embeddings = None
49
+ self.metadata = []
50
+
51
+ def index_documents(self, documents: List[str], metadata: List[Dict] = None):
52
+ """
53
+ Index documents for search
54
+
55
+ Args:
56
+ documents: List of text documents
57
+ metadata: Optional metadata for each document
58
+ """
59
+ print(f"Indexing {len(documents)} documents...")
60
+
61
+ self.documents = documents
62
+ self.metadata = metadata if metadata else [{} for _ in documents]
63
+
64
+ # Generate embeddings
65
+ self.embeddings = self.encoder.encode(
66
+ documents,
67
+ convert_to_tensor=True,
68
+ show_progress_bar=True
69
+ )
70
+
71
+ print("✅ Indexing complete!")
72
+
73
+ def apply_symbolic_filters(self, indices: List[int], filters: Dict) -> List[int]:
74
+ """Apply rule-based filters to results"""
75
+ if not filters:
76
+ return indices
77
+
78
+ filtered = []
79
+ for idx in indices:
80
+ metadata = self.metadata[idx]
81
+ match = True
82
+
83
+ for key, values in filters.items():
84
+ if key not in metadata:
85
+ match = False
86
+ break
87
+
88
+ meta_value = str(metadata[key]).lower()
89
+ if not any(v in meta_value for v in values):
90
+ match = False
91
+ break
92
+
93
+ if match:
94
+ filtered.append(idx)
95
+
96
+ return filtered
97
+
98
+ def search(
99
+ self,
100
+ query: str,
101
+ top_k: int = 5,
102
+ use_symbolic: bool = True
103
+ ) -> List[Dict]:
104
+ """
105
+ Search documents using neuro-symbolic approach
106
+
107
+ Args:
108
+ query: Search query (can include filters like 'color:red')
109
+ top_k: Number of results to return
110
+ use_symbolic: Whether to apply symbolic filtering
111
+
112
+ Returns:
113
+ List of results with scores and metadata
114
+ """
115
+ if self.embeddings is None:
116
+ raise ValueError("No documents indexed. Call index_documents() first.")
117
+
118
+ # Parse query for symbolic filters
119
+ parsed = self.parser.parse(query) if use_symbolic else {'filters': {}}
120
+
121
+ # Remove filter syntax from query for neural search
122
+ clean_query = re.sub(r'\w+:\w+', '', query)
123
+ clean_query = re.sub(r'\s+', ' ', clean_query).strip()
124
+ clean_query = clean_query.replace('AND', '').replace('OR', '').strip()
125
+
126
+ if not clean_query:
127
+ clean_query = query
128
+
129
+ # Neural search: compute similarity
130
+ query_embedding = self.encoder.encode(clean_query, convert_to_tensor=True)
131
+ similarities = torch.cosine_similarity(
132
+ query_embedding.unsqueeze(0),
133
+ self.embeddings
134
+ )
135
+
136
+ # Get top results
137
+ top_results = torch.topk(similarities, k=min(top_k * 3, len(self.documents)))
138
+ top_indices = top_results.indices.tolist()
139
+ top_scores = top_results.values.tolist()
140
+
141
+ # Apply symbolic filters
142
+ if use_symbolic and parsed['filters']:
143
+ print(f"🔧 Applying filters: {parsed['filters']}")
144
+ top_indices = self.apply_symbolic_filters(top_indices, parsed['filters'])
145
+
146
+ # Format results
147
+ results = []
148
+ for i, idx in enumerate(top_indices[:top_k]):
149
+ score_idx = top_indices.index(idx) if idx in top_indices else i
150
+ results.append({
151
+ 'rank': i + 1,
152
+ 'score': top_scores[score_idx] if score_idx < len(top_scores) else 0.0,
153
+ 'document': self.documents[idx],
154
+ 'metadata': self.metadata[idx]
155
+ })
156
+
157
+ return results
158
+
159
+
160
+ def demo():
161
+ """Demo of the neuro-symbolic search system"""
162
+
163
+ # Sample documents with metadata
164
+ documents = [
165
+ "A red sports car racing on the track",
166
+ "A blue sedan parked in the garage",
167
+ "A yellow taxi driving in the city",
168
+ "A green bicycle leaning against a wall",
169
+ "A red fire truck responding to emergency",
170
+ "A black motorcycle parked on the street",
171
+ "Children playing soccer in the park",
172
+ "A cat sleeping on a red couch",
173
+ ]
174
+
175
+ metadata = [
176
+ {'type': 'vehicle', 'color': 'red', 'category': 'car'},
177
+ {'type': 'vehicle', 'color': 'blue', 'category': 'car'},
178
+ {'type': 'vehicle', 'color': 'yellow', 'category': 'car'},
179
+ {'type': 'vehicle', 'color': 'green', 'category': 'bicycle'},
180
+ {'type': 'vehicle', 'color': 'red', 'category': 'truck'},
181
+ {'type': 'vehicle', 'color': 'black', 'category': 'motorcycle'},
182
+ {'type': 'activity', 'color': 'none', 'category': 'sports'},
183
+ {'type': 'animal', 'color': 'red', 'category': 'pet'},
184
+ ]
185
+
186
+ # Initialize search system
187
+ search = SimpleNeuroSymbolicSearch()
188
+
189
+ # Index documents
190
+ search.index_documents(documents, metadata)
191
+
192
+ print("\n" + "="*70)
193
+ print("🔍 DEMO: Neuro-Symbolic Search")
194
+ print("="*70)
195
+
196
+ # Test 1: Pure neural search
197
+ print("\n1. Neural Search: 'red vehicle'")
198
+ print("-" * 70)
199
+ results = search.search("red vehicle", top_k=3, use_symbolic=False)
200
+ for r in results:
201
+ print(f" [{r['rank']}] Score: {r['score']:.3f} | {r['document']}")
202
+
203
+ # Test 2: Neuro-symbolic search with filters
204
+ print("\n2. Neuro-Symbolic: 'vehicle color:red'")
205
+ print("-" * 70)
206
+ results = search.search("vehicle color:red", top_k=3)
207
+ for r in results:
208
+ print(f" [{r['rank']}] Score: {r['score']:.3f} | {r['document']}")
209
+ print(f" Metadata: {r['metadata']}")
210
+
211
+ # Test 3: Complex query
212
+ print("\n3. Neuro-Symbolic: 'fast vehicle color:red category:car'")
213
+ print("-" * 70)
214
+ results = search.search("fast vehicle color:red category:car", top_k=3)
215
+ for r in results:
216
+ print(f" [{r['rank']}] Score: {r['score']:.3f} | {r['document']}")
217
+
218
+ print("\n" + "="*70)
219
+ print("✅ Demo complete!")
220
+
221
+
222
+ if __name__ == "__main__":
223
+ demo()