dotandru commited on
Commit
a162bd1
ยท
1 Parent(s): 984ec8c

V9.0.2: CRITICAL Fix - Data Anchor Failure via Robust OCR Payload Flattener

Browse files
Files changed (2) hide show
  1. ocr_strip_engine.py +3 -0
  2. orchestrator.py +52 -31
ocr_strip_engine.py CHANGED
@@ -285,6 +285,9 @@ def _parse_structured_json(raw_text: str) -> list[dict]:
285
  flat.extend(item)
286
  elif isinstance(item, dict):
287
  flat.append(item)
 
 
 
288
  return [p for p in flat if isinstance(p, dict)]
289
  if isinstance(result, dict) and not result.get("logic_error"):
290
  return [result]
 
285
  flat.extend(item)
286
  elif isinstance(item, dict):
287
  flat.append(item)
288
+ elif isinstance(item, str) and item.strip():
289
+ # V9.0.2 FIX: Handle strings by wrapping them in a text block
290
+ flat.append({"type": "text", "content": item.strip()})
291
  return [p for p in flat if isinstance(p, dict)]
292
  if isinstance(result, dict) and not result.get("logic_error"):
293
  return [result]
orchestrator.py CHANGED
@@ -697,35 +697,56 @@ ctx.finish("$$ 4 $$", "ืžืขื•ืœื”! ื”ื’ืขื ื• ืœืชื•ืฆืื”.")
697
  # OCR_STRIP_MODE=development โ†’ Stitch & Strip (single-pass, HD, structured)
698
  # OCR_STRIP_MODE=production โ†’ Legacy Triple-Pass (safe, proven)
699
 
700
- def _flatten_ocr_if_json(self, ocr_text: str) -> str:
701
- """V9.0.0: Robust sanitization for OCR results.
702
- If the OCR pass returns a JSON array string (Gemini hallucination),
703
- flatten it to a raw text string to avoid confusing the Planner LLM."""
704
- if not ocr_text: return ""
705
- s = ocr_text.strip()
706
- # Check if it looks like JSON
707
- if (s.startswith('[') and s.endswith(']')) or (s.startswith('{') and s.endswith('}')):
708
- try:
709
- # Use standard extraction
710
- data = safe_extract_json(s, caller="OCR_FLATTENER")
711
- if isinstance(data, list):
712
- # Join content of all blocks
713
- parts = []
714
- for item in data:
715
- if isinstance(item, dict):
716
- content = item.get("content", "")
717
- if content: parts.append(str(content))
718
- if parts:
719
- flattened = " ".join(parts)
720
- print(f"๐Ÿ“ธ ๐Ÿ›ก๏ธ [V9.0.0] OCR JSON detected and flattened: '{flattened[:100]}...'")
721
- return flattened
722
- elif isinstance(data, dict):
723
- # If it's a single object with 'text' or 'content'
724
- res = data.get("text") or data.get("content") or ""
725
- if res: return str(res)
726
- except Exception as e:
727
- logging.debug(f"โš ๏ธ [V9.0.0] OCR Flattening failed: {e}")
728
- return ocr_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
 
730
  async def transcribe_image(self, image_bytes: bytes) -> str:
731
  """
@@ -813,8 +834,8 @@ ctx.finish("$$ 4 $$", "ืžืขื•ืœื”! ื”ื’ืขื ื• ืœืชื•ืฆืื”.")
813
  results.append("Error")
814
 
815
  final_text = self._merge_ocr_results(results)
816
- # V9.0.0: Flatten if Gemini returned JSON as text
817
- final_text = self._flatten_ocr_if_json(final_text)
818
 
819
  # Build minimal structured list for consistency
820
  self._last_ocr_structured = [{"type": "text", "content": final_text}]
 
697
  # OCR_STRIP_MODE=development โ†’ Stitch & Strip (single-pass, HD, structured)
698
  # OCR_STRIP_MODE=production โ†’ Legacy Triple-Pass (safe, proven)
699
 
700
+ def _flatten_ocr_payload(self, ocr_data) -> str:
701
+ """
702
+ V9.0.2: Ensures the OCR data is converted into a single, continuous text string
703
+ regardless of the API response format (JSON string, dict, or list).
704
+ """
705
+ if not ocr_data:
706
+ return ""
707
+
708
+ # 1. If it's a string, it might be a raw string OR a JSON string
709
+ if isinstance(ocr_data, str):
710
+ s = ocr_data.strip()
711
+ if (s.startswith('[') and s.endswith(']')) or (s.startswith('{') and s.endswith('}')):
712
+ try:
713
+ # Attempt to parse if it's a JSON structured string
714
+ parsed_data = json.loads(s)
715
+ ocr_data = parsed_data # Pass to dict/list handling below
716
+ except json.JSONDecodeError:
717
+ # It's just a regular raw string
718
+ return s
719
+ else:
720
+ return s
721
+
722
+ # 2. If it's a List (This fixes the V9.0.1 bug!)
723
+ if isinstance(ocr_data, list):
724
+ # Join all elements with a newline/space, ignoring empty items
725
+ # V9.0.2 FIX: Handle both list of strings AND list of dicts (Stitch & Strip)
726
+ parts = []
727
+ for item in ocr_data:
728
+ if isinstance(item, dict):
729
+ # Handle structured block format: {"content": "...", "type": "..."}
730
+ content = item.get("content") or item.get("text") or ""
731
+ if content: parts.append(str(content).strip())
732
+ elif item:
733
+ parts.append(str(item).strip())
734
+
735
+ if parts:
736
+ return " \n ".join(parts)
737
+ return ""
738
+
739
+ # 3. If it's a Dictionary
740
+ elif isinstance(ocr_data, dict):
741
+ # Look for a primary text key, otherwise convert the whole dict to string
742
+ res = ocr_data.get("text") or ocr_data.get("content")
743
+ if res:
744
+ return str(res).strip()
745
+ else:
746
+ return " \n ".join([f"{k}: {v}" for k, v in ocr_data.items()])
747
+
748
+ # 4. Ultimate Fallback for any other type
749
+ return str(ocr_data).strip()
750
 
751
  async def transcribe_image(self, image_bytes: bytes) -> str:
752
  """
 
834
  results.append("Error")
835
 
836
  final_text = self._merge_ocr_results(results)
837
+ # V9.0.2: Flatten payload (Robust handling of Union[str, list, dict])
838
+ final_text = self._flatten_ocr_payload(final_text)
839
 
840
  # Build minimal structured list for consistency
841
  self._last_ocr_structured = [{"type": "text", "content": final_text}]