Spaces:

dotandru
/

BuddyMath

Sleeping

App Files Files Community

dotandru commited on Mar 17

Commit

a162bd1

1 Parent(s): 984ec8c

V9.0.2: CRITICAL Fix - Data Anchor Failure via Robust OCR Payload Flattener

Browse files

Files changed (2) hide show

ocr_strip_engine.py +3 -0
orchestrator.py +52 -31

ocr_strip_engine.py CHANGED Viewed

@@ -285,6 +285,9 @@ def _parse_structured_json(raw_text: str) -> list[dict]:
                 flat.extend(item)
             elif isinstance(item, dict):
                 flat.append(item)
         return [p for p in flat if isinstance(p, dict)]
     if isinstance(result, dict) and not result.get("logic_error"):
         return [result]

                 flat.extend(item)
             elif isinstance(item, dict):
                 flat.append(item)
+            elif isinstance(item, str) and item.strip():
+                # V9.0.2 FIX: Handle strings by wrapping them in a text block
+                flat.append({"type": "text", "content": item.strip()})
         return [p for p in flat if isinstance(p, dict)]
     if isinstance(result, dict) and not result.get("logic_error"):
         return [result]

orchestrator.py CHANGED Viewed

@@ -697,35 +697,56 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
     # OCR_STRIP_MODE=development → Stitch & Strip (single-pass, HD, structured)
     # OCR_STRIP_MODE=production  → Legacy Triple-Pass (safe, proven)
-    def _flatten_ocr_if_json(self, ocr_text: str) -> str:
-        """V9.0.0: Robust sanitization for OCR results.
-        If the OCR pass returns a JSON array string (Gemini hallucination),
-        flatten it to a raw text string to avoid confusing the Planner LLM."""
-        if not ocr_text: return ""
-        s = ocr_text.strip()
-        # Check if it looks like JSON
-        if (s.startswith('[') and s.endswith(']')) or (s.startswith('{') and s.endswith('}')):
-            try:
-                # Use standard extraction
-                data = safe_extract_json(s, caller="OCR_FLATTENER")
-                if isinstance(data, list):
-                    # Join content of all blocks
-                    parts = []
-                    for item in data:
-                        if isinstance(item, dict):
-                            content = item.get("content", "")
-                            if content: parts.append(str(content))
-                    if parts:
-                        flattened = " ".join(parts)
-                        print(f"📸 🛡️ [V9.0.0] OCR JSON detected and flattened: '{flattened[:100]}...'")
-                        return flattened
-                elif isinstance(data, dict):
-                     # If it's a single object with 'text' or 'content'
-                     res = data.get("text") or data.get("content") or ""
-                     if res: return str(res)
-            except Exception as e:
-                logging.debug(f"⚠️ [V9.0.0] OCR Flattening failed: {e}")
-        return ocr_text
     async def transcribe_image(self, image_bytes: bytes) -> str:
         """
@@ -813,8 +834,8 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
             results.append("Error")
         final_text = self._merge_ocr_results(results)
-        # V9.0.0: Flatten if Gemini returned JSON as text
-        final_text = self._flatten_ocr_if_json(final_text)
         # Build minimal structured list for consistency
         self._last_ocr_structured = [{"type": "text", "content": final_text}]

     # OCR_STRIP_MODE=development → Stitch & Strip (single-pass, HD, structured)
     # OCR_STRIP_MODE=production  → Legacy Triple-Pass (safe, proven)
+    def _flatten_ocr_payload(self, ocr_data) -> str:
+        """
+        V9.0.2: Ensures the OCR data is converted into a single, continuous text string
+        regardless of the API response format (JSON string, dict, or list).
+        """
+        if not ocr_data:
+            return ""
+        # 1. If it's a string, it might be a raw string OR a JSON string
+        if isinstance(ocr_data, str):
+            s = ocr_data.strip()
+            if (s.startswith('[') and s.endswith(']')) or (s.startswith('{') and s.endswith('}')):
+                try:
+                    # Attempt to parse if it's a JSON structured string
+                    parsed_data = json.loads(s)
+                    ocr_data = parsed_data  # Pass to dict/list handling below
+                except json.JSONDecodeError:
+                    # It's just a regular raw string
+                    return s
+            else:
+                return s
+        # 2. If it's a List (This fixes the V9.0.1 bug!)
+        if isinstance(ocr_data, list):
+            # Join all elements with a newline/space, ignoring empty items
+            # V9.0.2 FIX: Handle both list of strings AND list of dicts (Stitch & Strip)
+            parts = []
+            for item in ocr_data:
+                if isinstance(item, dict):
+                    # Handle structured block format: {"content": "...", "type": "..."}
+                    content = item.get("content") or item.get("text") or ""
+                    if content: parts.append(str(content).strip())
+                elif item:
+                    parts.append(str(item).strip())
+            if parts:
+                return " \n ".join(parts)
+            return ""
+        # 3. If it's a Dictionary
+        elif isinstance(ocr_data, dict):
+            # Look for a primary text key, otherwise convert the whole dict to string
+            res = ocr_data.get("text") or ocr_data.get("content")
+            if res:
+                return str(res).strip()
+            else:
+                return " \n ".join([f"{k}: {v}" for k, v in ocr_data.items()])
+        # 4. Ultimate Fallback for any other type
+        return str(ocr_data).strip()
     async def transcribe_image(self, image_bytes: bytes) -> str:
         """
             results.append("Error")
         final_text = self._merge_ocr_results(results)
+        # V9.0.2: Flatten payload (Robust handling of Union[str, list, dict])
+        final_text = self._flatten_ocr_payload(final_text)
         # Build minimal structured list for consistency
         self._last_ocr_structured = [{"type": "text", "content": final_text}]