Danielfonseca1212 commited on
Commit
5a9f09e
Β·
verified Β·
1 Parent(s): 9ca8eb5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +813 -0
app.py ADDED
@@ -0,0 +1,813 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py β€” Structured Output Extractor | Function Calling + Pydantic
2
+ import streamlit as st
3
+ import json
4
+ import os
5
+
6
+ st.set_page_config(
7
+ page_title="Structured Extractor Β· Daniel Fonseca",
8
+ page_icon="⬑",
9
+ layout="wide",
10
+ initial_sidebar_state="expanded",
11
+ )
12
+
13
+ # ── CSS: TERMINAL HACKER ──────────────────────────────────────
14
+ st.markdown("""
15
+ <style>
16
+ @import url('https://fonts.googleapis.com/css2?family=Share+Tech+Mono&family=Orbitron:wght@400;700;900&family=VT323&display=swap');
17
+
18
+ :root {
19
+ --bg: #060810;
20
+ --bg2: #0a0d18;
21
+ --bg3: #0e1220;
22
+ --green: #00ff88;
23
+ --green2: #00cc66;
24
+ --green3: #009944;
25
+ --amber: #ffb700;
26
+ --cyan: #00d4ff;
27
+ --red: #ff3355;
28
+ --dim: #1a2a1a;
29
+ --grid: #0d1a0d;
30
+ --border: #0a3a0a;
31
+ --border2: #1a4a1a;
32
+ --text: #c8ffc8;
33
+ --text2: #88cc88;
34
+ --text3: #446644;
35
+ }
36
+
37
+ html, body, [class*="css"] {
38
+ background: var(--bg) !important;
39
+ color: var(--text) !important;
40
+ font-family: 'Share Tech Mono', monospace !important;
41
+ }
42
+
43
+ /* CRT scanlines overlay */
44
+ body::before {
45
+ content: '';
46
+ position: fixed;
47
+ top: 0; left: 0; right: 0; bottom: 0;
48
+ background: repeating-linear-gradient(
49
+ 0deg,
50
+ transparent,
51
+ transparent 2px,
52
+ rgba(0,255,136,0.015) 2px,
53
+ rgba(0,255,136,0.015) 4px
54
+ );
55
+ pointer-events: none;
56
+ z-index: 9999;
57
+ }
58
+
59
+ #MainMenu, footer, header { visibility: hidden; }
60
+ .block-container { padding-top: 1rem; max-width: 1300px; }
61
+
62
+ /* ── HEADER ── */
63
+ .term-header {
64
+ border-bottom: 1px solid var(--green3);
65
+ padding-bottom: 0.8rem;
66
+ margin-bottom: 1.2rem;
67
+ }
68
+ .term-title {
69
+ font-family: 'Orbitron', monospace;
70
+ font-weight: 900;
71
+ font-size: 1.8rem;
72
+ color: var(--green);
73
+ letter-spacing: 0.08em;
74
+ text-shadow: 0 0 20px rgba(0,255,136,0.5);
75
+ line-height: 1;
76
+ }
77
+ .term-sub {
78
+ font-family: 'Share Tech Mono', monospace;
79
+ font-size: 0.7rem;
80
+ color: var(--green3);
81
+ letter-spacing: 0.2em;
82
+ margin-top: 0.3rem;
83
+ }
84
+ .blink {
85
+ animation: blink 1s step-end infinite;
86
+ color: var(--green);
87
+ }
88
+ @keyframes blink { 50% { opacity: 0; } }
89
+
90
+ /* ── TERMINAL WINDOW ── */
91
+ .term-window {
92
+ background: var(--bg2);
93
+ border: 1px solid var(--border2);
94
+ border-radius: 4px;
95
+ overflow: hidden;
96
+ margin-bottom: 1rem;
97
+ }
98
+ .term-titlebar {
99
+ background: var(--bg3);
100
+ border-bottom: 1px solid var(--border);
101
+ padding: 0.4rem 0.8rem;
102
+ display: flex;
103
+ align-items: center;
104
+ gap: 0.5rem;
105
+ }
106
+ .term-dot {
107
+ width: 8px; height: 8px;
108
+ border-radius: 50%;
109
+ display: inline-block;
110
+ }
111
+ .dot-r { background: #ff3355; }
112
+ .dot-y { background: #ffb700; }
113
+ .dot-g { background: #00ff88; }
114
+ .term-wintitle {
115
+ font-size: 0.65rem;
116
+ color: var(--text3);
117
+ letter-spacing: 0.15em;
118
+ text-transform: uppercase;
119
+ margin-left: 0.5rem;
120
+ }
121
+ .term-body { padding: 1rem 1.2rem; }
122
+
123
+ /* ── PROMPT LINE ── */
124
+ .prompt-line {
125
+ font-size: 0.8rem;
126
+ color: var(--green3);
127
+ margin-bottom: 0.4rem;
128
+ }
129
+ .prompt-line span { color: var(--green); }
130
+
131
+ /* ── JSON RENDERER ── */
132
+ .json-output {
133
+ background: #040608;
134
+ border: 1px solid var(--border);
135
+ border-radius: 3px;
136
+ padding: 1.2rem;
137
+ font-family: 'Share Tech Mono', monospace;
138
+ font-size: 0.8rem;
139
+ line-height: 1.7;
140
+ overflow-x: auto;
141
+ position: relative;
142
+ }
143
+ .json-key { color: var(--cyan); }
144
+ .json-str { color: var(--amber); }
145
+ .json-num { color: #ff88aa; }
146
+ .json-bool { color: var(--green); font-weight: bold; }
147
+ .json-null { color: var(--text3); font-style: italic; }
148
+ .json-bracket { color: var(--text2); }
149
+
150
+ /* ── FIELD CARDS ── */
151
+ .field-grid {
152
+ display: grid;
153
+ grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
154
+ gap: 0.6rem;
155
+ margin-top: 0.8rem;
156
+ }
157
+ .field-card {
158
+ background: #040a08;
159
+ border: 1px solid var(--border);
160
+ border-left: 2px solid var(--green3);
161
+ border-radius: 3px;
162
+ padding: 0.6rem 0.8rem;
163
+ transition: border-color 0.2s;
164
+ }
165
+ .field-card:hover { border-left-color: var(--green); }
166
+ .field-key {
167
+ font-size: 0.65rem;
168
+ color: var(--cyan);
169
+ text-transform: uppercase;
170
+ letter-spacing: 0.12em;
171
+ margin-bottom: 0.2rem;
172
+ }
173
+ .field-val {
174
+ font-size: 0.82rem;
175
+ color: var(--amber);
176
+ word-break: break-word;
177
+ }
178
+ .field-val-null { color: var(--text3); font-style: italic; }
179
+ .field-val-bool-true { color: var(--green); }
180
+ .field-val-bool-false { color: var(--red); }
181
+
182
+ /* ── STATS BAR ── */
183
+ .stats-bar {
184
+ display: flex;
185
+ gap: 1.5rem;
186
+ padding: 0.5rem 0;
187
+ border-top: 1px solid var(--border);
188
+ margin-top: 0.8rem;
189
+ flex-wrap: wrap;
190
+ }
191
+ .stat-item {
192
+ font-size: 0.68rem;
193
+ color: var(--text3);
194
+ }
195
+ .stat-item span { color: var(--green2); }
196
+
197
+ /* ── SCHEMA SELECTOR ── */
198
+ .schema-btn-active {
199
+ background: var(--dim) !important;
200
+ border: 1px solid var(--green) !important;
201
+ color: var(--green) !important;
202
+ }
203
+
204
+ /* ── SIDEBAR ── */
205
+ section[data-testid="stSidebar"] {
206
+ background: var(--bg2) !important;
207
+ border-right: 1px solid var(--border) !important;
208
+ }
209
+ section[data-testid="stSidebar"] * { color: var(--text2) !important; }
210
+
211
+ /* ── STREAMLIT OVERRIDES ── */
212
+ .stTextArea textarea {
213
+ background: #040608 !important;
214
+ border: 1px solid var(--border2) !important;
215
+ border-radius: 3px !important;
216
+ color: var(--text) !important;
217
+ font-family: 'Share Tech Mono', monospace !important;
218
+ font-size: 0.8rem !important;
219
+ line-height: 1.6 !important;
220
+ }
221
+ .stTextArea textarea:focus {
222
+ border-color: var(--green) !important;
223
+ box-shadow: 0 0 8px rgba(0,255,136,0.2) !important;
224
+ }
225
+ .stSelectbox select, .stSelectbox > div {
226
+ background: var(--bg2) !important;
227
+ border-color: var(--border2) !important;
228
+ color: var(--text) !important;
229
+ font-family: 'Share Tech Mono', monospace !important;
230
+ }
231
+ .stButton button {
232
+ background: transparent !important;
233
+ border: 1px solid var(--green3) !important;
234
+ color: var(--green) !important;
235
+ border-radius: 3px !important;
236
+ font-family: 'Orbitron', monospace !important;
237
+ font-size: 0.68rem !important;
238
+ letter-spacing: 0.1em !important;
239
+ text-transform: uppercase !important;
240
+ transition: all 0.2s !important;
241
+ }
242
+ .stButton button:hover {
243
+ background: var(--dim) !important;
244
+ border-color: var(--green) !important;
245
+ box-shadow: 0 0 12px rgba(0,255,136,0.3) !important;
246
+ }
247
+ .stTextInput input {
248
+ background: #040608 !important;
249
+ border: 1px solid var(--border2) !important;
250
+ color: var(--text) !important;
251
+ font-family: 'Share Tech Mono', monospace !important;
252
+ font-size: 0.8rem !important;
253
+ }
254
+ div[data-testid="stTabs"] button {
255
+ font-family: 'Orbitron', monospace !important;
256
+ font-size: 0.62rem !important;
257
+ letter-spacing: 0.08em !important;
258
+ color: var(--text3) !important;
259
+ }
260
+ div[data-testid="stTabs"] button[aria-selected="true"] {
261
+ color: var(--green) !important;
262
+ border-bottom-color: var(--green) !important;
263
+ }
264
+ hr { border-color: var(--border) !important; }
265
+ </style>
266
+ """, unsafe_allow_html=True)
267
+
268
+ # ── SESSION STATE ──────────────────────────────────────────────
269
+ for k, v in {
270
+ 'openai_key': '',
271
+ 'history': [],
272
+ 'active_schema': 'Contrato Legal',
273
+ 'custom_schema': '',
274
+ }.items():
275
+ if k not in st.session_state:
276
+ st.session_state[k] = v
277
+
278
+ # ── HELPERS ───────────────────────────────────────────────────
279
+ def get_key():
280
+ try:
281
+ if 'OPENAI_API_KEY' in st.secrets:
282
+ return st.secrets['OPENAI_API_KEY']
283
+ except Exception:
284
+ pass
285
+ return os.getenv('OPENAI_API_KEY', st.session_state.openai_key)
286
+
287
+
288
+ def syntax_highlight_json(obj, indent=0) -> str:
289
+ """Renderiza JSON com syntax highlighting HTML."""
290
+ pad = "&nbsp;" * (indent * 3)
291
+ pad2 = "&nbsp;" * ((indent + 1) * 3)
292
+
293
+ if isinstance(obj, dict):
294
+ if not obj:
295
+ return '<span class="json-bracket">{}</span>'
296
+ lines = ['<span class="json-bracket">{</span>']
297
+ items = list(obj.items())
298
+ for i, (k, v) in enumerate(items):
299
+ comma = "," if i < len(items) - 1 else ""
300
+ val_html = syntax_highlight_json(v, indent + 1)
301
+ lines.append(f'{pad2}<span class="json-key">"{k}"</span>: {val_html}{comma}')
302
+ lines.append(f'{pad}<span class="json-bracket">}}</span>')
303
+ return "\n".join(lines)
304
+
305
+ elif isinstance(obj, list):
306
+ if not obj:
307
+ return '<span class="json-bracket">[]</span>'
308
+ lines = ['<span class="json-bracket">[</span>']
309
+ for i, item in enumerate(obj):
310
+ comma = "," if i < len(obj) - 1 else ""
311
+ val_html = syntax_highlight_json(item, indent + 1)
312
+ lines.append(f'{pad2}{val_html}{comma}')
313
+ lines.append(f'{pad}<span class="json-bracket">]</span>')
314
+ return "\n".join(lines)
315
+
316
+ elif isinstance(obj, str):
317
+ escaped = obj.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
318
+ return f'<span class="json-str">"{escaped}"</span>'
319
+
320
+ elif isinstance(obj, bool):
321
+ cls = "json-bool"
322
+ return f'<span class="{cls}">{"true" if obj else "false"}</span>'
323
+
324
+ elif obj is None:
325
+ return '<span class="json-null">null</span>'
326
+
327
+ elif isinstance(obj, (int, float)):
328
+ return f'<span class="json-num">{obj}</span>'
329
+
330
+ else:
331
+ return f'<span class="json-str">"{obj}"</span>'
332
+
333
+
334
+ def render_flat_fields(data: dict) -> str:
335
+ """Renderiza campos flat (nΓ£o-aninhados) como cards."""
336
+ cards = []
337
+ for k, v in data.items():
338
+ if isinstance(v, (dict, list)):
339
+ continue
340
+ key_html = f'<div class="field-key">{k}</div>'
341
+ if v is None:
342
+ val_html = '<div class="field-val field-val-null">null</div>'
343
+ elif isinstance(v, bool):
344
+ cls = "field-val-bool-true" if v else "field-val-bool-false"
345
+ val_html = f'<div class="field-val {cls}">{"true" if v else "false"}</div>'
346
+ else:
347
+ escaped = str(v).replace("<", "&lt;").replace(">", "&gt;")
348
+ val_html = f'<div class="field-val">{escaped}</div>'
349
+ cards.append(f'<div class="field-card">{key_html}{val_html}</div>')
350
+ if not cards:
351
+ return ""
352
+ return f'<div class="field-grid">{"".join(cards)}</div>'
353
+
354
+
355
+ # ── SIDEBAR ───────────────────────────────────────────────────
356
+ with st.sidebar:
357
+ st.markdown("""
358
+ <div style='font-family:Orbitron,monospace;font-weight:900;
359
+ font-size:1rem;color:#00ff88;text-shadow:0 0 10px rgba(0,255,136,0.4);
360
+ letter-spacing:0.1em'>STRUCT//EXTRACT</div>
361
+ <div style='font-family:Share Tech Mono,monospace;font-size:0.6rem;
362
+ color:#446644;letter-spacing:0.2em;text-transform:uppercase;margin-top:0.2rem'>
363
+ v1.0 Β· Function Calling Engine
364
+ </div>
365
+ """, unsafe_allow_html=True)
366
+ st.divider()
367
+
368
+ st.markdown("**πŸ”‘ OpenAI API Key**")
369
+ k_in = st.text_input("", type="password", value=st.session_state.openai_key,
370
+ placeholder="sk-...", label_visibility="collapsed")
371
+ if k_in:
372
+ st.session_state.openai_key = k_in
373
+ if get_key():
374
+ st.markdown('<div style="color:#00ff88;font-size:0.75rem">βœ“ KEY LOADED</div>',
375
+ unsafe_allow_html=True)
376
+ else:
377
+ st.markdown('<div style="color:#ff3355;font-size:0.75rem">βœ— KEY MISSING</div>',
378
+ unsafe_allow_html=True)
379
+
380
+ st.divider()
381
+ st.markdown("""
382
+ <div style='font-family:Share Tech Mono,monospace;font-size:0.72rem;
383
+ color:#446644;line-height:1.8'>
384
+ <div style='color:#00cc66;margin-bottom:0.4rem'>// PIPELINE</div>
385
+ 01. Text input<br>
386
+ 02. Schema selection<br>
387
+ 03. Tool definition (OpenAI)<br>
388
+ 04. Function calling<br>
389
+ 05. JSON parse + validate<br>
390
+ 06. Retry on error<br>
391
+ 07. Render + export
392
+ </div>
393
+ """, unsafe_allow_html=True)
394
+ st.divider()
395
+ st.markdown("""
396
+ <div style='font-family:Share Tech Mono,monospace;font-size:0.65rem;color:#2a4a2a'>
397
+ model: gpt-4o-mini<br>
398
+ tool_choice: required<br>
399
+ temperature: 0.0<br>
400
+ max_retries: 2<br>
401
+ validation: pydantic v2
402
+ </div>
403
+ """, unsafe_allow_html=True)
404
+ st.divider()
405
+ if st.button("⬑ Limpar histórico", use_container_width=True):
406
+ st.session_state.history = []
407
+ st.rerun()
408
+
409
+ # ── HEADER ────────────────────────────────────────────────────
410
+ st.markdown("""
411
+ <div class="term-header">
412
+ <div class="term-title">⬑ STRUCTURED OUTPUT EXTRACTOR <span class="blink">_</span></div>
413
+ <div class="term-sub">OpenAI Function Calling Β· Pydantic v2 Β· Dynamic JSON Schema Β· Auto-Retry</div>
414
+ </div>
415
+ """, unsafe_allow_html=True)
416
+
417
+ # ── TABS ──────────────────────────────────────────────────────
418
+ tab_extract, tab_custom, tab_history = st.tabs([
419
+ "⬑ Extrair",
420
+ "β¬’ Schema Customizado",
421
+ "⬣ Histórico",
422
+ ])
423
+
424
+ # ════════════════════════════════════════════════════════════════
425
+ # EXEMPLOS
426
+ # ════════════════════════════════════════════════════════════════
427
+ EXAMPLES = {
428
+ "Contrato Legal": """CONTRATO DE PRESTAÇÃO DE SERVIΓ‡OS DE CONSULTORIA EM INTELIGÊNCIA ARTIFICIAL
429
+
430
+ Entre as partes:
431
+ CONTRATANTE: TechCorp Brasil Ltda., CNPJ 12.345.678/0001-99, com sede em SΓ£o Paulo/SP.
432
+ CONTRATADO: Daniel Fonseca - ML Engineer, CPF 123.456.789-00, residente no Rio de Janeiro/RJ.
433
+
434
+ CLÁUSULA 1 - OBJETO
435
+ O CONTRATADO prestarΓ‘ serviΓ§os de consultoria em Graph Neural Networks e sistemas de detecΓ§Γ£o de fraude com IA Generativa, incluindo desenvolvimento de modelos, treinamento de equipes e documentaΓ§Γ£o tΓ©cnica.
436
+
437
+ CLÁUSULA 2 - VALOR
438
+ O valor total dos serviΓ§os Γ© de R$ 48.000,00 (quarenta e oito mil reais), pagos em 4 parcelas mensais de R$ 12.000,00.
439
+
440
+ CLÁUSULA 3 - PRAZO
441
+ VigΓͺncia de 4 (quatro) meses, iniciando em 01/04/2025 e encerrando em 31/07/2025.
442
+
443
+ CLÁUSULA 4 - OBRIGAÇÕES DO CONTRATADO
444
+ - Entregar relatΓ³rios mensais de progresso
445
+ - Participar de reuniΓ΅es semanais remotas
446
+ - Manter confidencialidade sobre os dados da empresa
447
+
448
+ CLÁUSULA 5 - FORO
449
+ Fica eleito o foro da Comarca de SΓ£o Paulo/SP para dirimir quaisquer controvΓ©rsias.
450
+
451
+ Assinado digitalmente em 28/03/2025.""",
452
+
453
+ "NotΓ­cia / Artigo": """Meta anuncia novo modelo de linguagem open-source com 405 bilhΓ΅es de parΓ’metros
454
+
455
+ SAN FRANCISCO, 15 de marΓ§o de 2025 β€” A Meta Platforms anunciou nesta quinta-feira o lanΓ§amento do Llama 4, seu mais novo modelo de linguagem de grande escala com 405 bilhΓ΅es de parΓ’metros, disponΓ­vel gratuitamente para pesquisadores e empresas sob licenΓ§a open-source.
456
+
457
+ O CEO Mark Zuckerberg afirmou que o modelo supera o GPT-4o em 73% dos benchmarks testados internamente, incluindo MMLU, HumanEval e MT-Bench. A vice-presidente de IA da empresa, Yann LeCun, destacou que o modelo foi treinado em 30 trilhΓ΅es de tokens de dados multimodais.
458
+
459
+ O lanΓ§amento acontece em meio Γ  crescente disputa entre Meta, OpenAI, Google e Anthropic pelo mercado de IA generativa, avaliado em US$ 2,4 trilhΓ΅es atΓ© 2030 segundo a consultoria Goldman Sachs.
460
+
461
+ Especialistas do MIT e Stanford avaliam que a decisΓ£o de tornar o modelo open-source pode democratizar o acesso Γ  IA avanΓ§ada, embora levante preocupaΓ§Γ΅es sobre uso malicioso. O governo americano jΓ‘ sinalizou que pode regulamentar o setor ainda em 2025.""",
462
+
463
+ "Artigo CientΓ­fico": """GraphSAGE: Inductive Representation Learning on Large Graphs
464
+
465
+ Autores: William L. Hamilton, Rex Ying, Jure Leskovec
466
+ Venue: NeurIPS 2017, Long Beach, CA
467
+
468
+ Abstract:
469
+ Low-dimensional embeddings of nodes in large graphs have proved extremely useful in a variety of prediction tasks. However, most existing approaches require that all nodes in the graph are present during training of the embeddings; these previous approaches are inherently transductive and do not naturally generalize to unseen nodes.
470
+
471
+ Problema resolvido:
472
+ A maioria dos mΓ©todos de embedding para grafos Γ© transductive β€” sΓ³ funciona para nΓ³s vistos durante o treino. Em aplicaΓ§Γ΅es reais como redes sociais e sistemas de recomendaΓ§Γ£o, novos nΓ³s aparecem constantemente.
473
+
474
+ Metodologia:
475
+ O GraphSAGE aprende funΓ§Γ΅es de agregaΓ§Γ£o (mean, LSTM, pooling) que generalizam para nΓ³s nΓ£o vistos, combinando features do nΓ³ com as de sua vizinhanΓ§a amostrada.
476
+
477
+ Resultados:
478
+ - Dataset Citation (Cora): F1 = 0.935
479
+ - Dataset Reddit: F1 = 0.950
480
+ - Dataset PPI (Protein-Protein Interaction): F1 = 0.612 (vs 0.421 baseline)
481
+
482
+ ContribuiΓ§Γ΅es principais:
483
+ 1. Framework inductive para grafos de larga escala
484
+ 2. TrΓͺs agregadores comparados: mean, LSTM, max-pooling
485
+ 3. Mini-batch training para escalabilidade
486
+ 4. Open-source no repositΓ³rio snap-stanford/GraphSAGE""",
487
+ }
488
+
489
+ # ════════════════════════════════════════════════════════════════
490
+ # TAB 1 β€” EXTRAIR
491
+ # ════════════════════════════════════════════════════════════════
492
+ with tab_extract:
493
+ from extractor import PRESET_SCHEMAS
494
+
495
+ # Schema selector
496
+ st.markdown("""
497
+ <div class="prompt-line">user@extractor:~$ <span>select --schema</span></div>
498
+ """, unsafe_allow_html=True)
499
+
500
+ schema_cols = st.columns(len(PRESET_SCHEMAS))
501
+ for i, (name, _) in enumerate(PRESET_SCHEMAS.items()):
502
+ with schema_cols[i]:
503
+ active = st.session_state.active_schema == name
504
+ if st.button(name, key=f"sc_{i}", use_container_width=True):
505
+ st.session_state.active_schema = name
506
+ st.rerun()
507
+
508
+ active_schema = PRESET_SCHEMAS[st.session_state.active_schema]
509
+ st.markdown(f"""
510
+ <div style='font-family:Share Tech Mono,monospace;font-size:0.68rem;
511
+ color:#446644;margin:0.4rem 0 0.8rem;padding:0.4rem 0.8rem;
512
+ border-left:2px solid #0a3a0a;background:#040a04'>
513
+ // {st.session_state.active_schema} β€” {active_schema['description']}
514
+ </div>
515
+ """, unsafe_allow_html=True)
516
+
517
+ # Exemplo rΓ‘pido
518
+ col_ex, _ = st.columns([2, 3])
519
+ with col_ex:
520
+ if st.button(f"⬑ Carregar exemplo: {st.session_state.active_schema}",
521
+ use_container_width=True):
522
+ ex_text = EXAMPLES.get(st.session_state.active_schema, "")
523
+ if ex_text:
524
+ st.session_state["load_example"] = ex_text
525
+
526
+ default_text = st.session_state.pop("load_example", "")
527
+
528
+ st.markdown("""
529
+ <div class="prompt-line" style="margin-top:0.8rem">
530
+ user@extractor:~$ <span>paste --input</span></div>
531
+ """, unsafe_allow_html=True)
532
+
533
+ text_input = st.text_area(
534
+ "", value=default_text, height=220,
535
+ placeholder="Cole qualquer texto aqui: contrato, notΓ­cia, currΓ­culo, invoice, artigo...",
536
+ label_visibility="collapsed", key="main_text"
537
+ )
538
+
539
+ run_col, _ = st.columns([1, 3])
540
+ with run_col:
541
+ run_btn = st.button("⬑ EXTRAIR DADOS", use_container_width=True, type="primary")
542
+
543
+ if run_btn:
544
+ if not get_key():
545
+ st.markdown('<div style="color:#ff3355;font-size:0.8rem">βœ— API Key nΓ£o configurada</div>',
546
+ unsafe_allow_html=True)
547
+ st.stop()
548
+ if not text_input.strip():
549
+ st.markdown('<div style="color:#ffb700;font-size:0.8rem">⚠ Cole um texto para extrair</div>',
550
+ unsafe_allow_html=True)
551
+ st.stop()
552
+
553
+ from extractor import StructuredExtractor
554
+
555
+ # Terminal de progresso
556
+ prog_ph = st.empty()
557
+ prog_ph.markdown("""
558
+ <div class="term-window">
559
+ <div class="term-titlebar">
560
+ <span class="term-dot dot-r"></span>
561
+ <span class="term-dot dot-y"></span>
562
+ <span class="term-dot dot-g"></span>
563
+ <span class="term-wintitle">extraction.log</span>
564
+ </div>
565
+ <div class="term-body" style="font-size:0.75rem;color:#446644;line-height:2">
566
+ <div>β†’ Inicializando engine...</div>
567
+ <div>β†’ Tool definition criada</div>
568
+ <div>β†’ Chamando gpt-4o-mini com tool_choice=required...</div>
569
+ <div style="color:#ffb700">⟳ Aguardando resposta<span class="blink">_</span></div>
570
+ </div>
571
+ </div>
572
+ """, unsafe_allow_html=True)
573
+
574
+ try:
575
+ engine = StructuredExtractor(get_key())
576
+ result = engine.extract(
577
+ text=text_input,
578
+ schema=active_schema["schema"],
579
+ schema_name=st.session_state.active_schema,
580
+ )
581
+
582
+ prog_ph.markdown(f"""
583
+ <div class="term-window">
584
+ <div class="term-titlebar">
585
+ <span class="term-dot dot-r"></span>
586
+ <span class="term-dot dot-y"></span>
587
+ <span class="term-dot dot-g"></span>
588
+ <span class="term-wintitle">extraction.log</span>
589
+ </div>
590
+ <div class="term-body" style="font-size:0.75rem;color:#446644;line-height:2">
591
+ <div>βœ“ Engine inicializado</div>
592
+ <div>βœ“ Tool definition: <span style="color:#00d4ff">{st.session_state.active_schema}</span></div>
593
+ <div>βœ“ Function call executado com sucesso</div>
594
+ <div>βœ“ JSON parseado e validado</div>
595
+ <div style="color:#00ff88">βœ“ EXTRAÇÃO COMPLETA em {result['attempts']} tentativa(s) Β· {result['tokens']} tokens</div>
596
+ </div>
597
+ </div>
598
+ """, unsafe_allow_html=True)
599
+
600
+ # Salva no histΓ³rico
601
+ st.session_state.history.append({
602
+ "schema": st.session_state.active_schema,
603
+ "text_preview": text_input[:120] + "...",
604
+ "result": result,
605
+ })
606
+
607
+ # ── OUTPUT ──────────────────────────────────────────
608
+ out_col, raw_col = st.columns([3, 2], gap="large")
609
+
610
+ with out_col:
611
+ st.markdown("""
612
+ <div class="prompt-line">user@extractor:~$ <span>render --view=structured</span></div>
613
+ """, unsafe_allow_html=True)
614
+
615
+ # Cards de campos flat
616
+ flat_html = render_flat_fields(result["data"])
617
+ if flat_html:
618
+ st.markdown(flat_html, unsafe_allow_html=True)
619
+
620
+ # Campos complexos (listas/objetos)
621
+ for k, v in result["data"].items():
622
+ if not isinstance(v, (dict, list)):
623
+ continue
624
+ st.markdown(f"""
625
+ <div style='font-family:Share Tech Mono,monospace;font-size:0.65rem;
626
+ color:#00d4ff;text-transform:uppercase;letter-spacing:0.1em;
627
+ margin:0.8rem 0 0.3rem'>// {k}</div>
628
+ """, unsafe_allow_html=True)
629
+
630
+ if isinstance(v, list):
631
+ for item in v:
632
+ if isinstance(item, dict):
633
+ st.markdown(f"""
634
+ <div class="json-output" style="font-size:0.75rem;margin-bottom:0.4rem">
635
+ {syntax_highlight_json(item, 0)}
636
+ </div>
637
+ """, unsafe_allow_html=True)
638
+ else:
639
+ esc = str(item).replace("<","&lt;")
640
+ st.markdown(f'<div class="field-card"><div class="field-val">{esc}</div></div>',
641
+ unsafe_allow_html=True)
642
+ elif isinstance(v, dict):
643
+ st.markdown(f"""
644
+ <div class="json-output" style="font-size:0.75rem">
645
+ {syntax_highlight_json(v, 0)}
646
+ </div>
647
+ """, unsafe_allow_html=True)
648
+
649
+ # Stats
650
+ st.markdown(f"""
651
+ <div class="stats-bar">
652
+ <div class="stat-item">schema: <span>{st.session_state.active_schema}</span></div>
653
+ <div class="stat-item">fields: <span>{len(result['data'])}</span></div>
654
+ <div class="stat-item">tokens: <span>{result['tokens']}</span></div>
655
+ <div class="stat-item">attempts: <span>{result['attempts']}</span></div>
656
+ <div class="stat-item">method: <span>{result['method']}</span></div>
657
+ </div>
658
+ """, unsafe_allow_html=True)
659
+
660
+ with raw_col:
661
+ st.markdown("""
662
+ <div class="prompt-line">user@extractor:~$ <span>cat output.json</span></div>
663
+ """, unsafe_allow_html=True)
664
+ json_html = syntax_highlight_json(result["data"])
665
+ st.markdown(f'<div class="json-output">{json_html}</div>',
666
+ unsafe_allow_html=True)
667
+
668
+ # Download
669
+ st.download_button(
670
+ "⬑ Download JSON",
671
+ data=json.dumps(result["data"], ensure_ascii=False, indent=2),
672
+ file_name=f"extracted_{st.session_state.active_schema.lower().replace(' ','_')}.json",
673
+ mime="application/json",
674
+ use_container_width=True,
675
+ )
676
+
677
+ except Exception as e:
678
+ prog_ph.markdown(f"""
679
+ <div class="term-window">
680
+ <div class="term-titlebar">
681
+ <span class="term-dot dot-r"></span><span class="term-wintitle">error.log</span>
682
+ </div>
683
+ <div class="term-body" style="color:#ff3355;font-size:0.8rem">
684
+ βœ— ERRO: {e}
685
+ </div>
686
+ </div>
687
+ """, unsafe_allow_html=True)
688
+
689
+ # ════════════════════════════════════════════════════════════════
690
+ # TAB 2 β€” SCHEMA CUSTOMIZADO
691
+ # ════════════════════════════════════════════════════════════════
692
+ with tab_custom:
693
+ st.markdown("""
694
+ <div class="prompt-line">user@extractor:~$ <span>define --schema=custom</span></div>
695
+ <div style='font-family:Share Tech Mono,monospace;font-size:0.7rem;
696
+ color:#446644;margin:0.3rem 0 0.8rem'>
697
+ // Defina seu prΓ³prio JSON Schema e extraia qualquer estrutura de qualquer texto
698
+ </div>
699
+ """, unsafe_allow_html=True)
700
+
701
+ DEFAULT_CUSTOM = '''{
702
+ "type": "object",
703
+ "properties": {
704
+ "nome_produto": {"type": "string"},
705
+ "preco": {"type": "number"},
706
+ "categorias": {"type": "array", "items": {"type": "string"}},
707
+ "disponivel": {"type": "boolean"},
708
+ "especificacoes": {
709
+ "type": "object",
710
+ "properties": {
711
+ "peso": {"type": "string"},
712
+ "cor": {"type": "string"}
713
+ }
714
+ }
715
+ },
716
+ "required": ["nome_produto"]
717
+ }'''
718
+
719
+ c_left, c_right = st.columns(2, gap="large")
720
+
721
+ with c_left:
722
+ st.markdown('<div class="prompt-line">$ <span>vim schema.json</span></div>',
723
+ unsafe_allow_html=True)
724
+ custom_schema = st.text_area(
725
+ "", value=st.session_state.custom_schema or DEFAULT_CUSTOM,
726
+ height=280, label_visibility="collapsed", key="custom_schema_input"
727
+ )
728
+
729
+ with c_right:
730
+ st.markdown('<div class="prompt-line">$ <span>cat input.txt</span></div>',
731
+ unsafe_allow_html=True)
732
+ custom_text = st.text_area(
733
+ "", height=280, label_visibility="collapsed", key="custom_text",
734
+ placeholder="Cole o texto para extrair..."
735
+ )
736
+
737
+ run_custom = st.button("⬑ EXTRAIR COM SCHEMA CUSTOMIZADO", use_container_width=True)
738
+
739
+ if run_custom:
740
+ if not get_key():
741
+ st.error("Configure a API Key na sidebar.")
742
+ st.stop()
743
+ if not custom_text.strip() or not custom_schema.strip():
744
+ st.warning("Preencha o schema e o texto.")
745
+ st.stop()
746
+
747
+ from extractor import StructuredExtractor
748
+ with st.spinner("Extraindo..."):
749
+ try:
750
+ engine = StructuredExtractor(get_key())
751
+ result = engine.extract_with_custom_schema(custom_text, custom_schema)
752
+
753
+ st.markdown('<div class="prompt-line">$ <span>cat output.json</span></div>',
754
+ unsafe_allow_html=True)
755
+ json_html = syntax_highlight_json(result["data"])
756
+ st.markdown(f'<div class="json-output">{json_html}</div>',
757
+ unsafe_allow_html=True)
758
+
759
+ st.markdown(f"""
760
+ <div class="stats-bar">
761
+ <div class="stat-item">tokens: <span>{result['tokens']}</span></div>
762
+ <div class="stat-item">attempts: <span>{result['attempts']}</span></div>
763
+ </div>
764
+ """, unsafe_allow_html=True)
765
+
766
+ st.download_button(
767
+ "⬑ Download JSON",
768
+ data=json.dumps(result["data"], ensure_ascii=False, indent=2),
769
+ file_name="custom_extraction.json",
770
+ mime="application/json",
771
+ )
772
+ st.session_state.history.append({
773
+ "schema": "Custom",
774
+ "text_preview": custom_text[:120] + "...",
775
+ "result": result,
776
+ })
777
+ except ValueError as e:
778
+ st.error(f"Schema invΓ‘lido: {e}")
779
+ except Exception as e:
780
+ st.error(f"Erro: {e}")
781
+
782
+ # ════════════════════════════════════════════════════════════════
783
+ # TAB 3 β€” HISTΓ“RICO
784
+ # ════════════════════════════════════════════════════════════════
785
+ with tab_history:
786
+ if not st.session_state.history:
787
+ st.markdown("""
788
+ <div style='font-family:Share Tech Mono,monospace;font-size:0.8rem;
789
+ color:#2a4a2a;text-align:center;padding:3rem'>
790
+ // nenhuma extraΓ§Γ£o executada ainda
791
+ </div>
792
+ """, unsafe_allow_html=True)
793
+ else:
794
+ for i, h in enumerate(reversed(st.session_state.history)):
795
+ r = h["result"]
796
+ with st.expander(
797
+ f"#{len(st.session_state.history)-i} Β· {h['schema']} Β· {r['tokens']} tokens",
798
+ expanded=(i == 0)
799
+ ):
800
+ st.markdown(f"""
801
+ <div style='font-family:Share Tech Mono,monospace;font-size:0.7rem;
802
+ color:#446644;margin-bottom:0.5rem'>// {h['text_preview']}</div>
803
+ """, unsafe_allow_html=True)
804
+ json_html = syntax_highlight_json(r["data"])
805
+ st.markdown(f'<div class="json-output" style="font-size:0.75rem">{json_html}</div>',
806
+ unsafe_allow_html=True)
807
+ st.download_button(
808
+ "⬑ Download",
809
+ data=json.dumps(r["data"], ensure_ascii=False, indent=2),
810
+ file_name=f"extract_{i}.json",
811
+ mime="application/json",
812
+ key=f"dl_{i}",
813
+ )