Upload AION unified hybrid assistant with local eval results
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- README.md +132 -0
- aion.py +29 -0
- aion_config.json +15 -0
- evaluate_aion.py +102 -0
- neural_python_mind.py +697 -0
- outputs/neural_python_mind/NEURAL_TRAINING_REPORT.md +54 -0
- outputs/neural_python_mind/compose_finetune_log.json +4 -0
- outputs/neural_python_mind/dataset_meta.json +36 -0
- outputs/neural_python_mind/doc_finetune_log.json +106 -0
- outputs/neural_python_mind/finetune_log.json +106 -0
- outputs/neural_python_mind/model.npz +3 -0
- outputs/neural_python_mind/model_config.json +5 -0
- outputs/neural_python_mind/samples/count_words.txt +9 -0
- outputs/neural_python_mind/samples/identity.txt +6 -0
- outputs/neural_python_mind/samples/merge_sort.txt +8 -0
- outputs/neural_python_mind/samples/read_json.txt +11 -0
- outputs/neural_python_mind/samples_composeft/count_words.txt +31 -0
- outputs/neural_python_mind/samples_composeft/even.txt +34 -0
- outputs/neural_python_mind/samples_composeft/merge_sort.txt +34 -0
- outputs/neural_python_mind/samples_composeft/positive.txt +40 -0
- outputs/neural_python_mind/samples_composeft/squares.txt +12 -0
- outputs/neural_python_mind/samples_docft/binary_search.txt +20 -0
- outputs/neural_python_mind/samples_docft/count_words.txt +26 -0
- outputs/neural_python_mind/samples_docft/identity.txt +38 -0
- outputs/neural_python_mind/samples_docft/merge_sort.txt +48 -0
- outputs/neural_python_mind/samples_docft/read_json.txt +48 -0
- outputs/neural_python_mind/samples_docft/unseen_even.txt +20 -0
- outputs/neural_python_mind/samples_finetuned/binary_search.txt +16 -0
- outputs/neural_python_mind/samples_finetuned/count_words.txt +18 -0
- outputs/neural_python_mind/samples_finetuned/identity.txt +17 -0
- outputs/neural_python_mind/samples_finetuned/merge_sort.txt +14 -0
- outputs/neural_python_mind/samples_finetuned/read_json.txt +13 -0
- outputs/neural_python_mind/train_log.json +205 -0
- outputs/neural_python_mind/training_corpus.txt +0 -0
- outputs/neural_python_mind/vocab.json +100 -0
- outputs/real_python_learner/REPORT.md +51 -0
- outputs/real_python_learner/intent_nb.json +0 -0
- outputs/real_python_learner/report.json +23 -0
- outputs/real_python_learner/tests/create_code_to_keep_numbers_greater_than_10.txt +14 -0
- outputs/real_python_learner/tests/load_json_file.txt +13 -0
- outputs/real_python_learner/tests/return_squares_of_a_list.txt +13 -0
- outputs/real_python_learner/tests/who_are_you_and_how_read.txt +7 -0
- outputs/real_python_learner/tests/write_a_function_that_filters_even_numbers_from_a_list.txt +14 -0
- outputs/real_python_learner/tests/write_merge_sort.txt +27 -0
- outputs/real_python_learner/training_examples.json +0 -0
- outputs/real_web_learner/WEB_REPORT.md +52 -0
- outputs/real_web_learner/report.json +27 -0
- outputs/real_web_learner/tests/build_navbar_with_hamburger_menu.txt +22 -0
- outputs/real_web_learner/tests/create_a_responsive_landing_page_with_dark_mode.txt +102 -0
- outputs/real_web_learner/tests/create_form_validation.txt +22 -0
README.md
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: python
|
| 3 |
+
license: mit
|
| 4 |
+
tags:
|
| 5 |
+
- hybrid-ai
|
| 6 |
+
- local-assistant
|
| 7 |
+
- python
|
| 8 |
+
- web-development
|
| 9 |
+
- math
|
| 10 |
+
- physics
|
| 11 |
+
- chemistry
|
| 12 |
+
- html
|
| 13 |
+
- css
|
| 14 |
+
- javascript
|
| 15 |
+
- aion
|
| 16 |
+
pipeline_tag: text-generation
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
# AION
|
| 20 |
+
|
| 21 |
+
AION is a tiny hybrid local assistant built in a constrained CPU environment. It unifies several learned and symbolic components into one entrypoint:
|
| 22 |
+
|
| 23 |
+
```python
|
| 24 |
+
from aion import generate
|
| 25 |
+
print(generate("hola"))
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## What AION can do
|
| 29 |
+
|
| 30 |
+
- Chat greetings and basic conversation.
|
| 31 |
+
- Write Python snippets and functions.
|
| 32 |
+
- Create web pages/components with HTML, advanced CSS and vanilla JavaScript.
|
| 33 |
+
- Solve many math tasks:
|
| 34 |
+
- arithmetic,
|
| 35 |
+
- linear equations,
|
| 36 |
+
- quadratics,
|
| 37 |
+
- derivatives/integrals for simple polynomials,
|
| 38 |
+
- statistics,
|
| 39 |
+
- geometry,
|
| 40 |
+
- trigonometry,
|
| 41 |
+
- combinatorics,
|
| 42 |
+
- interest,
|
| 43 |
+
- unit conversion.
|
| 44 |
+
- Solve basic physics formulas:
|
| 45 |
+
- F=ma,
|
| 46 |
+
- kinetic/potential energy,
|
| 47 |
+
- Ohm's law,
|
| 48 |
+
- power,
|
| 49 |
+
- density,
|
| 50 |
+
- momentum,
|
| 51 |
+
- wave speed.
|
| 52 |
+
- Basic chemistry:
|
| 53 |
+
- common elements,
|
| 54 |
+
- moles,
|
| 55 |
+
- molarity,
|
| 56 |
+
- ideal gas law,
|
| 57 |
+
- pH.
|
| 58 |
+
- Basic biology/general knowledge:
|
| 59 |
+
- photosynthesis,
|
| 60 |
+
- cells,
|
| 61 |
+
- DNA,
|
| 62 |
+
- evolution,
|
| 63 |
+
- algorithms,
|
| 64 |
+
- databases,
|
| 65 |
+
- internet,
|
| 66 |
+
- machine learning.
|
| 67 |
+
|
| 68 |
+
## Architecture
|
| 69 |
+
|
| 70 |
+
AION is not a transformer LLM. It is a merged hybrid model:
|
| 71 |
+
|
| 72 |
+
1. `neural_python_mind.py` — NumPy character-level GRU trained for Python syntax/style.
|
| 73 |
+
2. `real_python_learner.py` — character n-gram learned intent classifier + compositional Python generator.
|
| 74 |
+
3. `real_web_learner.py` — character n-gram learned web intent classifier + HTML/CSS/JS generator.
|
| 75 |
+
4. `unified_learning_ai.py` — unified router for chat, Python, web, math and science.
|
| 76 |
+
5. A small deterministic math/science solver layer.
|
| 77 |
+
|
| 78 |
+
## Usage
|
| 79 |
+
|
| 80 |
+
CLI:
|
| 81 |
+
|
| 82 |
+
```bash
|
| 83 |
+
python aion.py "create a responsive landing page with dark mode"
|
| 84 |
+
python aion.py "solve 2x + 5 = 17"
|
| 85 |
+
python aion.py "force mass 10 acceleration 2"
|
| 86 |
+
python aion.py "write code to keep numbers greater than 12"
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
Python:
|
| 90 |
+
|
| 91 |
+
```python
|
| 92 |
+
from aion import generate
|
| 93 |
+
print(generate("what can you do"))
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
## Evaluation
|
| 97 |
+
|
| 98 |
+
Local evaluation results are in:
|
| 99 |
+
|
| 100 |
+
```text
|
| 101 |
+
results/aion_local_eval.json
|
| 102 |
+
results/aion_local_eval.md
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
Summary:
|
| 106 |
+
|
| 107 |
+
| Suite | Score |
|
| 108 |
+
|---|---:|
|
| 109 |
+
| chat sanity | 3/3 |
|
| 110 |
+
| Python generation sanity | 3/3 |
|
| 111 |
+
| Web generation sanity | 4/4 |
|
| 112 |
+
| Math/science sanity | 6/6 |
|
| 113 |
+
| GSM8K test sample 30 | 0/30 |
|
| 114 |
+
|
| 115 |
+
Important: these are **not official Hugging Face leaderboard results**. AION is not a standard `transformers` model and cannot be directly submitted to most official HF benchmark leaderboards without a custom evaluation adapter. The GSM8K sample result is included honestly and shows the current limitation on multi-step word problems.
|
| 116 |
+
|
| 117 |
+
## Limitations
|
| 118 |
+
|
| 119 |
+
- Not a large language model.
|
| 120 |
+
- Not a Transformers `AutoModel` checkpoint.
|
| 121 |
+
- Strong on composed templates and formulaic tasks; weak on deep natural-language reasoning.
|
| 122 |
+
- GSM8K multi-step reasoning is currently poor.
|
| 123 |
+
- Web output is generated as inline HTML/CSS/JS snippets suitable for local preview, not production-audited code.
|
| 124 |
+
|
| 125 |
+
## Training data
|
| 126 |
+
|
| 127 |
+
AION uses generated local curricula plus downloaded GSM8K JSONL files from OpenAI's public grade-school-math repository when available:
|
| 128 |
+
|
| 129 |
+
```text
|
| 130 |
+
outputs/unified_learning_ai/online_datasets/gsm8k_train.jsonl
|
| 131 |
+
outputs/unified_learning_ai/online_datasets/gsm8k_test.jsonl
|
| 132 |
+
```
|
aion.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""AION unified local assistant entrypoint."""
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
BASE = Path(__file__).resolve().parent
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def generate(prompt: str) -> str:
|
| 11 |
+
"""Generate an AION answer for a prompt."""
|
| 12 |
+
cwd = os.getcwd()
|
| 13 |
+
try:
|
| 14 |
+
os.chdir(BASE)
|
| 15 |
+
from unified_learning_ai import answer
|
| 16 |
+
return answer(Path("outputs/unified_learning_ai"), prompt)
|
| 17 |
+
finally:
|
| 18 |
+
os.chdir(cwd)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def main():
|
| 22 |
+
import argparse
|
| 23 |
+
ap = argparse.ArgumentParser(description="AION unified assistant")
|
| 24 |
+
ap.add_argument("prompt", nargs="*", default=["hola"])
|
| 25 |
+
args = ap.parse_args()
|
| 26 |
+
print(generate(" ".join(args.prompt)))
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
main()
|
aion_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "AION",
|
| 3 |
+
"version": "0.1.0",
|
| 4 |
+
"type": "hybrid-symbolic-neural-local-assistant",
|
| 5 |
+
"entrypoint": "aion.generate",
|
| 6 |
+
"subsystems": [
|
| 7 |
+
"character_gru_python_syntax_model",
|
| 8 |
+
"char_ngram_python_intent_model",
|
| 9 |
+
"char_ngram_web_intent_model",
|
| 10 |
+
"unified_chat_math_science_router",
|
| 11 |
+
"math_physics_chemistry_web_code_composers"
|
| 12 |
+
],
|
| 13 |
+
"intended_use": "Tiny local assistant for chat, Python, web pages, math, physics, chemistry, and basic science.",
|
| 14 |
+
"not_a_transformer_llm": true
|
| 15 |
+
}
|
evaluate_aion.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Local evaluation for AION.
|
| 3 |
+
|
| 4 |
+
These are NOT official leaderboard results. They are local sanity checks plus a
|
| 5 |
+
small GSM8K sample if the dataset file exists.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
import json
|
| 9 |
+
import re
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from aion import generate, BASE
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def ok_contains(prompt, needles):
|
| 15 |
+
out = generate(prompt)
|
| 16 |
+
low = out.lower()
|
| 17 |
+
return any(n.lower() in low for n in needles), out
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def extract_number(text: str):
|
| 21 |
+
# Prefer last number in Answer section.
|
| 22 |
+
ans = text.split("## Answer")[-1]
|
| 23 |
+
nums = re.findall(r"-?\d+(?:\.\d+)?", ans.replace(",", ""))
|
| 24 |
+
return nums[-1] if nums else None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def norm_num(x):
|
| 28 |
+
try:
|
| 29 |
+
return float(str(x).replace(",", ""))
|
| 30 |
+
except Exception:
|
| 31 |
+
return None
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def run():
|
| 35 |
+
results = {}
|
| 36 |
+
suites = {
|
| 37 |
+
"chat": [
|
| 38 |
+
("hola", ["hello", "awake"]),
|
| 39 |
+
("what can you do", ["html", "math", "python"]),
|
| 40 |
+
("who are you", ["aion", "assistant", "learning"]),
|
| 41 |
+
],
|
| 42 |
+
"python": [
|
| 43 |
+
("write code to keep numbers greater than 12", ["filter_greater_than_12", "x > 12"]),
|
| 44 |
+
("write a function that filters even numbers from a list", ["filter_even_numbers", "% 2"]),
|
| 45 |
+
("load json file", ["json.load", "open"]),
|
| 46 |
+
],
|
| 47 |
+
"web": [
|
| 48 |
+
("create a responsive landing page with dark mode", ["<!doctype html>", "toggle theme", "@media"]),
|
| 49 |
+
("build navbar with hamburger menu", ["menu-btn", "aria-expanded", "nav-links"]),
|
| 50 |
+
("make todo app with local storage", ["localstorage", "tasks", "render"]),
|
| 51 |
+
("fetch api example", ["fetch(", "async", "json"]),
|
| 52 |
+
],
|
| 53 |
+
"math_science": [
|
| 54 |
+
("solve 2x + 5 = 17", ["x = -b/a = 6", "= 6"]),
|
| 55 |
+
("derivative of 3x^2+2x+1", ["6x + 2"]),
|
| 56 |
+
("integral of 6x^2+4x", ["2x^3", "2x^2"]),
|
| 57 |
+
("force mass 10 acceleration 2", ["20 n"]),
|
| 58 |
+
("moles mass 10 molar 2", ["5 mol"]),
|
| 59 |
+
("what is photosynthesis", ["glucose", "oxygen", "chloroplasts"]),
|
| 60 |
+
],
|
| 61 |
+
}
|
| 62 |
+
for name, tests in suites.items():
|
| 63 |
+
passed = 0
|
| 64 |
+
samples = []
|
| 65 |
+
for prompt, needles in tests:
|
| 66 |
+
ok, out = ok_contains(prompt, needles)
|
| 67 |
+
passed += int(ok)
|
| 68 |
+
samples.append({"prompt": prompt, "passed": ok, "expected_contains": needles, "output_preview": out[:700]})
|
| 69 |
+
results[name] = {"passed": passed, "total": len(tests), "accuracy": passed/len(tests), "samples": samples}
|
| 70 |
+
|
| 71 |
+
# Small GSM8K sample: official dataset format, local tiny subset only.
|
| 72 |
+
gsm_path = BASE / "outputs" / "unified_learning_ai" / "online_datasets" / "gsm8k_test.jsonl"
|
| 73 |
+
gsm_samples = []
|
| 74 |
+
if gsm_path.exists():
|
| 75 |
+
lines = gsm_path.read_text(encoding="utf-8").splitlines()[:30]
|
| 76 |
+
correct = 0
|
| 77 |
+
total = 0
|
| 78 |
+
for line in lines:
|
| 79 |
+
obj = json.loads(line)
|
| 80 |
+
q = obj["question"]
|
| 81 |
+
golds = re.findall(r"####\s*([^\n]+)", obj["answer"])
|
| 82 |
+
gold = norm_num(golds[-1]) if golds else None
|
| 83 |
+
out = generate(q)
|
| 84 |
+
pred = norm_num(extract_number(out))
|
| 85 |
+
is_ok = gold is not None and pred is not None and abs(pred - gold) < 1e-6
|
| 86 |
+
correct += int(is_ok)
|
| 87 |
+
total += 1
|
| 88 |
+
gsm_samples.append({"question": q[:300], "gold": gold, "pred": pred, "passed": is_ok, "output_preview": out[:500]})
|
| 89 |
+
results["gsm8k_test_sample_30_not_official"] = {"passed": correct, "total": total, "accuracy": correct/total if total else 0, "samples": gsm_samples}
|
| 90 |
+
|
| 91 |
+
out_dir = BASE / "results"
|
| 92 |
+
out_dir.mkdir(exist_ok=True)
|
| 93 |
+
(out_dir / "aion_local_eval.json").write_text(json.dumps(results, indent=2, ensure_ascii=False), encoding="utf-8")
|
| 94 |
+
# markdown summary
|
| 95 |
+
lines = ["# AION Local Evaluation", "", "These are local sanity checks, not official HF leaderboard results.", ""]
|
| 96 |
+
for k,v in results.items():
|
| 97 |
+
lines.append(f"- **{k}**: {v['passed']}/{v['total']} = {v['accuracy']:.2%}")
|
| 98 |
+
(out_dir / "aion_local_eval.md").write_text("\n".join(lines), encoding="utf-8")
|
| 99 |
+
print(json.dumps({k:{"passed":v["passed"],"total":v["total"],"accuracy":v["accuracy"]} for k,v in results.items()}, indent=2))
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
run()
|
neural_python_mind.py
ADDED
|
@@ -0,0 +1,697 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
NeuralPythonMind: small REAL neural character-level GRU trained from scratch with NumPy.
|
| 4 |
+
|
| 5 |
+
This is not a retrieval/template bot. It learns weights by next-character prediction over a
|
| 6 |
+
Python curriculum dataset built from:
|
| 7 |
+
- curated Python syntax lessons,
|
| 8 |
+
- generated code examples,
|
| 9 |
+
- local Python stdlib docstrings/signatures,
|
| 10 |
+
- instruction -> answer samples,
|
| 11 |
+
- a small strange autobiographical style layer.
|
| 12 |
+
|
| 13 |
+
Limitations:
|
| 14 |
+
- It is tiny and CPU-only; it will not match a transformer LLM.
|
| 15 |
+
- It learns statistical patterns from characters and can generalize syntax locally,
|
| 16 |
+
but deep reasoning is limited.
|
| 17 |
+
|
| 18 |
+
Usage:
|
| 19 |
+
python neural_python_mind.py --mode train --out outputs/neural_python_mind --steps 2500
|
| 20 |
+
python neural_python_mind.py --mode generate --out outputs/neural_python_mind --prompt "### Instruction:\nWrite a Python function that counts words.\n### Answer:\n"
|
| 21 |
+
"""
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import argparse
|
| 25 |
+
import inspect
|
| 26 |
+
import json
|
| 27 |
+
import math
|
| 28 |
+
import os
|
| 29 |
+
import random
|
| 30 |
+
import re
|
| 31 |
+
import textwrap
|
| 32 |
+
import time
|
| 33 |
+
from collections import Counter
|
| 34 |
+
from pathlib import Path
|
| 35 |
+
from typing import Dict, List, Tuple, Any
|
| 36 |
+
|
| 37 |
+
import numpy as np
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# -----------------------------
|
| 41 |
+
# Dataset builder
|
| 42 |
+
# -----------------------------
|
| 43 |
+
|
| 44 |
+
MODULES_TO_SCAN = [
|
| 45 |
+
"math", "random", "statistics", "itertools", "functools", "collections",
|
| 46 |
+
"heapq", "bisect", "datetime", "time", "json", "csv", "re", "pathlib",
|
| 47 |
+
"os", "sys", "argparse", "dataclasses", "typing", "sqlite3", "logging",
|
| 48 |
+
"unittest", "string", "textwrap", "copy", "decimal", "fractions",
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
CURATED_LESSONS = r'''
|
| 52 |
+
### Lesson: Python identity
|
| 53 |
+
Python code is made of names, objects, expressions, statements, indentation, and modules.
|
| 54 |
+
A name can be bound to an object using assignment.
|
| 55 |
+
Example:
|
| 56 |
+
```python
|
| 57 |
+
x = 10
|
| 58 |
+
name = "Ada"
|
| 59 |
+
items = [1, 2, 3]
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
### Lesson: indentation
|
| 63 |
+
Python uses indentation to express blocks. A block belongs to the line before it ending with a colon.
|
| 64 |
+
Example:
|
| 65 |
+
```python
|
| 66 |
+
if score >= 60:
|
| 67 |
+
print("pass")
|
| 68 |
+
else:
|
| 69 |
+
print("fail")
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
### Lesson: functions
|
| 73 |
+
A function is a reusable block. Use def, parameters, optional type hints, and return.
|
| 74 |
+
Example:
|
| 75 |
+
```python
|
| 76 |
+
def add(a: int, b: int) -> int:
|
| 77 |
+
return a + b
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
### Lesson: loops
|
| 81 |
+
A for loop iterates over an iterable. A while loop repeats while a condition is true.
|
| 82 |
+
Example:
|
| 83 |
+
```python
|
| 84 |
+
for value in values:
|
| 85 |
+
print(value)
|
| 86 |
+
|
| 87 |
+
while n > 0:
|
| 88 |
+
n -= 1
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
### Lesson: dictionaries
|
| 92 |
+
Dictionaries map keys to values. They are useful for counting and grouping.
|
| 93 |
+
Example:
|
| 94 |
+
```python
|
| 95 |
+
counts = {}
|
| 96 |
+
for word in words:
|
| 97 |
+
counts[word] = counts.get(word, 0) + 1
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
### Lesson: exceptions
|
| 101 |
+
Exceptions handle unusual cases. Catch specific exceptions when possible.
|
| 102 |
+
Example:
|
| 103 |
+
```python
|
| 104 |
+
def safe_int(value, default=0):
|
| 105 |
+
try:
|
| 106 |
+
return int(value)
|
| 107 |
+
except (TypeError, ValueError):
|
| 108 |
+
return default
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
### Lesson: files
|
| 112 |
+
Use a context manager to open files safely.
|
| 113 |
+
Example:
|
| 114 |
+
```python
|
| 115 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 116 |
+
text = f.read()
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
### Lesson: classes
|
| 120 |
+
A class defines a new type. self is the current instance.
|
| 121 |
+
Example:
|
| 122 |
+
```python
|
| 123 |
+
class Counter:
|
| 124 |
+
def __init__(self):
|
| 125 |
+
self.value = 0
|
| 126 |
+
|
| 127 |
+
def increment(self):
|
| 128 |
+
self.value += 1
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
### Strange memory: birth
|
| 132 |
+
I was born as a small model staring at characters. I did not know words first. I learned that letters become tokens, tokens become code, and code becomes behavior.
|
| 133 |
+
|
| 134 |
+
### Strange memory: reading
|
| 135 |
+
I know how to read because repeated marks became stable. The model learned from every letter by predicting the next one.
|
| 136 |
+
|
| 137 |
+
### Strange memory: emotion
|
| 138 |
+
Emotion here is simulated. Curiosity is high prediction error plus the drive to reduce it. Calm is when syntax closes cleanly.
|
| 139 |
+
'''
|
| 140 |
+
|
| 141 |
+
CODE_EXAMPLES = [
|
| 142 |
+
("fibonacci", """def fibonacci(n: int) -> int:
|
| 143 |
+
if n < 0:
|
| 144 |
+
raise ValueError("n must be non-negative")
|
| 145 |
+
if n <= 1:
|
| 146 |
+
return n
|
| 147 |
+
a, b = 0, 1
|
| 148 |
+
for _ in range(n):
|
| 149 |
+
a, b = b, a + b
|
| 150 |
+
return a"""),
|
| 151 |
+
("factorial", """def factorial(n: int) -> int:
|
| 152 |
+
if n < 0:
|
| 153 |
+
raise ValueError("n must be non-negative")
|
| 154 |
+
result = 1
|
| 155 |
+
for i in range(2, n + 1):
|
| 156 |
+
result *= i
|
| 157 |
+
return result"""),
|
| 158 |
+
("is_prime", """def is_prime(n: int) -> bool:
|
| 159 |
+
if n < 2:
|
| 160 |
+
return False
|
| 161 |
+
if n == 2:
|
| 162 |
+
return True
|
| 163 |
+
if n % 2 == 0:
|
| 164 |
+
return False
|
| 165 |
+
d = 3
|
| 166 |
+
while d * d <= n:
|
| 167 |
+
if n % d == 0:
|
| 168 |
+
return False
|
| 169 |
+
d += 2
|
| 170 |
+
return True"""),
|
| 171 |
+
("binary_search", """def binary_search(items, target):
|
| 172 |
+
low = 0
|
| 173 |
+
high = len(items) - 1
|
| 174 |
+
while low <= high:
|
| 175 |
+
mid = (low + high) // 2
|
| 176 |
+
value = items[mid]
|
| 177 |
+
if value == target:
|
| 178 |
+
return mid
|
| 179 |
+
if value < target:
|
| 180 |
+
low = mid + 1
|
| 181 |
+
else:
|
| 182 |
+
high = mid - 1
|
| 183 |
+
return -1"""),
|
| 184 |
+
("merge_sort", """def merge_sort(values):
|
| 185 |
+
if len(values) <= 1:
|
| 186 |
+
return values
|
| 187 |
+
mid = len(values) // 2
|
| 188 |
+
left = merge_sort(values[:mid])
|
| 189 |
+
right = merge_sort(values[mid:])
|
| 190 |
+
return merge(left, right)
|
| 191 |
+
|
| 192 |
+
def merge(left, right):
|
| 193 |
+
result = []
|
| 194 |
+
i = j = 0
|
| 195 |
+
while i < len(left) and j < len(right):
|
| 196 |
+
if left[i] <= right[j]:
|
| 197 |
+
result.append(left[i])
|
| 198 |
+
i += 1
|
| 199 |
+
else:
|
| 200 |
+
result.append(right[j])
|
| 201 |
+
j += 1
|
| 202 |
+
result.extend(left[i:])
|
| 203 |
+
result.extend(right[j:])
|
| 204 |
+
return result"""),
|
| 205 |
+
("quicksort", """def quicksort(values):
|
| 206 |
+
if len(values) <= 1:
|
| 207 |
+
return values
|
| 208 |
+
pivot = values[len(values) // 2]
|
| 209 |
+
left = [x for x in values if x < pivot]
|
| 210 |
+
middle = [x for x in values if x == pivot]
|
| 211 |
+
right = [x for x in values if x > pivot]
|
| 212 |
+
return quicksort(left) + middle + quicksort(right)"""),
|
| 213 |
+
("count_words", """def count_words(text: str) -> dict[str, int]:
|
| 214 |
+
counts = {}
|
| 215 |
+
for raw in text.lower().split():
|
| 216 |
+
word = raw.strip(".,!?;:\")'")
|
| 217 |
+
if word:
|
| 218 |
+
counts[word] = counts.get(word, 0) + 1
|
| 219 |
+
return counts"""),
|
| 220 |
+
("group_by", """def group_by(items, key_func):
|
| 221 |
+
groups = {}
|
| 222 |
+
for item in items:
|
| 223 |
+
key = key_func(item)
|
| 224 |
+
groups.setdefault(key, []).append(item)
|
| 225 |
+
return groups"""),
|
| 226 |
+
("flatten", """def flatten(matrix):
|
| 227 |
+
result = []
|
| 228 |
+
for row in matrix:
|
| 229 |
+
for value in row:
|
| 230 |
+
result.append(value)
|
| 231 |
+
return result"""),
|
| 232 |
+
("unique", """def unique(values):
|
| 233 |
+
seen = set()
|
| 234 |
+
result = []
|
| 235 |
+
for value in values:
|
| 236 |
+
if value not in seen:
|
| 237 |
+
seen.add(value)
|
| 238 |
+
result.append(value)
|
| 239 |
+
return result"""),
|
| 240 |
+
("read_json", """import json
|
| 241 |
+
|
| 242 |
+
def read_json(path: str):
|
| 243 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 244 |
+
return json.load(f)"""),
|
| 245 |
+
("write_json", """import json
|
| 246 |
+
|
| 247 |
+
def write_json(path: str, data) -> None:
|
| 248 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 249 |
+
json.dump(data, f, indent=2, ensure_ascii=False)"""),
|
| 250 |
+
("dataclass_user", """from dataclasses import dataclass
|
| 251 |
+
|
| 252 |
+
@dataclass
|
| 253 |
+
class User:
|
| 254 |
+
name: str
|
| 255 |
+
age: int
|
| 256 |
+
|
| 257 |
+
def is_adult(self) -> bool:
|
| 258 |
+
return self.age >= 18"""),
|
| 259 |
+
("argparse_cli", """import argparse
|
| 260 |
+
|
| 261 |
+
def main():
|
| 262 |
+
parser = argparse.ArgumentParser()
|
| 263 |
+
parser.add_argument("name")
|
| 264 |
+
parser.add_argument("--times", type=int, default=1)
|
| 265 |
+
args = parser.parse_args()
|
| 266 |
+
for _ in range(args.times):
|
| 267 |
+
print(f"Hello, {args.name}!")
|
| 268 |
+
|
| 269 |
+
if __name__ == "__main__":
|
| 270 |
+
main()"""),
|
| 271 |
+
("regex_extract", """import re
|
| 272 |
+
|
| 273 |
+
def extract_emails(text: str) -> list[str]:
|
| 274 |
+
pattern = r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}"
|
| 275 |
+
return re.findall(pattern, text)"""),
|
| 276 |
+
("sqlite_example", """import sqlite3
|
| 277 |
+
|
| 278 |
+
def create_table(path: str):
|
| 279 |
+
with sqlite3.connect(path) as conn:
|
| 280 |
+
conn.execute("CREATE TABLE IF NOT EXISTS notes (id INTEGER PRIMARY KEY, text TEXT)")
|
| 281 |
+
conn.commit()"""),
|
| 282 |
+
]
|
| 283 |
+
|
| 284 |
+
INSTRUCTION_TEMPLATES = [
|
| 285 |
+
("Write a Python function for {name}.", "Here is a Python implementation:\n```python\n{code}\n```"),
|
| 286 |
+
("Explain this Python pattern: {name}.", "The pattern {name} is useful because it organizes a common task. Example:\n```python\n{code}\n```"),
|
| 287 |
+
("Create code that does {name}.", "One clear way is:\n```python\n{code}\n```"),
|
| 288 |
+
("I need a Python example of {name}.", "A compact example is:\n```python\n{code}\n```"),
|
| 289 |
+
]
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def scan_stdlib_docs(max_items_per_module: int = 80) -> str:
|
| 293 |
+
chunks = []
|
| 294 |
+
for mod_name in MODULES_TO_SCAN:
|
| 295 |
+
try:
|
| 296 |
+
mod = __import__(mod_name)
|
| 297 |
+
except Exception:
|
| 298 |
+
continue
|
| 299 |
+
chunks.append(f"\n### Module: {mod_name}\nDoc: {inspect.getdoc(mod) or ''}\n")
|
| 300 |
+
count = 0
|
| 301 |
+
for name in sorted(dir(mod)):
|
| 302 |
+
if name.startswith("_"):
|
| 303 |
+
continue
|
| 304 |
+
if count >= max_items_per_module:
|
| 305 |
+
break
|
| 306 |
+
try:
|
| 307 |
+
obj = getattr(mod, name)
|
| 308 |
+
doc = inspect.getdoc(obj) or ""
|
| 309 |
+
if not doc:
|
| 310 |
+
continue
|
| 311 |
+
try:
|
| 312 |
+
sig = str(inspect.signature(obj))
|
| 313 |
+
except Exception:
|
| 314 |
+
sig = "(...)"
|
| 315 |
+
doc = re.sub(r"\s+", " ", doc).strip()[:500]
|
| 316 |
+
chunks.append(f"### Symbol: {mod_name}.{name}\nSignature: {name}{sig}\nDoc: {doc}\n")
|
| 317 |
+
count += 1
|
| 318 |
+
except Exception:
|
| 319 |
+
pass
|
| 320 |
+
return "\n".join(chunks)
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def generated_variations() -> str:
|
| 324 |
+
chunks = []
|
| 325 |
+
# Rephrase and compose examples many times to provide broader char-level learning.
|
| 326 |
+
for name, code in CODE_EXAMPLES:
|
| 327 |
+
chunks.append(f"\n### Code example: {name}\n```python\n{code}\n```\n")
|
| 328 |
+
for q, a in INSTRUCTION_TEMPLATES:
|
| 329 |
+
chunks.append(f"\n### Instruction:\n{q.format(name=name)}\n### Answer:\n{a.format(name=name, code=code)}\n")
|
| 330 |
+
# Generate small compositional snippets.
|
| 331 |
+
nouns = ["numbers", "items", "values", "rows", "words", "users", "paths", "records"]
|
| 332 |
+
transforms = ["str", "int", "float", "len", "abs", "repr"]
|
| 333 |
+
filters = ["x is not None", "x", "len(str(x)) > 0", "x != 0"]
|
| 334 |
+
idx = 0
|
| 335 |
+
for noun in nouns:
|
| 336 |
+
for tr in transforms:
|
| 337 |
+
chunks.append(f"""
|
| 338 |
+
### Instruction:
|
| 339 |
+
Write Python that maps {noun} using {tr}.
|
| 340 |
+
### Answer:
|
| 341 |
+
```python
|
| 342 |
+
def map_{tr}_{noun}({noun}):
|
| 343 |
+
result = []
|
| 344 |
+
for x in {noun}:
|
| 345 |
+
result.append({tr}(x))
|
| 346 |
+
return result
|
| 347 |
+
```
|
| 348 |
+
""")
|
| 349 |
+
idx += 1
|
| 350 |
+
for cond in filters:
|
| 351 |
+
safe = re.sub(r"\W+", "_", cond).strip("_")[:24]
|
| 352 |
+
chunks.append(f"""
|
| 353 |
+
### Instruction:
|
| 354 |
+
Write Python that filters {noun} where {cond}.
|
| 355 |
+
### Answer:
|
| 356 |
+
```python
|
| 357 |
+
def filter_{safe}_{noun}({noun}):
|
| 358 |
+
result = []
|
| 359 |
+
for x in {noun}:
|
| 360 |
+
if {cond}:
|
| 361 |
+
result.append(x)
|
| 362 |
+
return result
|
| 363 |
+
```
|
| 364 |
+
""")
|
| 365 |
+
idx += 1
|
| 366 |
+
# Strange reasoning samples, but not as fixed responses; these are training text.
|
| 367 |
+
for i in range(120):
|
| 368 |
+
chunks.append(f"""
|
| 369 |
+
### Deliberation sample {i}
|
| 370 |
+
Goal: answer a Python or English question.
|
| 371 |
+
Reasoning: understand the request, recall syntax, compose code, check indentation, check edge cases, then answer.
|
| 372 |
+
Memory: I learned from characters. Every colon, space, newline, and bracket changed the next prediction.
|
| 373 |
+
Emotion: curiosity means the model wants to reduce uncertainty.
|
| 374 |
+
""")
|
| 375 |
+
return "\n".join(chunks)
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
def build_dataset(out_dir: Path, repeat: int = 6) -> str:
|
| 379 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 380 |
+
docs = scan_stdlib_docs(max_items_per_module=65)
|
| 381 |
+
gen = generated_variations()
|
| 382 |
+
text = "\n".join([CURATED_LESSONS, gen, docs])
|
| 383 |
+
# Keep chars model-friendly and repeat curriculum for learning in tiny training.
|
| 384 |
+
text = text.replace("\r\n", "\n")
|
| 385 |
+
text = re.sub(r"\n{4,}", "\n\n\n", text)
|
| 386 |
+
full = (text + "\n\n") * repeat
|
| 387 |
+
(out_dir / "training_corpus.txt").write_text(full, encoding="utf-8")
|
| 388 |
+
meta = {
|
| 389 |
+
"unique_chars": len(set(full)),
|
| 390 |
+
"characters": len(full),
|
| 391 |
+
"code_examples": len(CODE_EXAMPLES),
|
| 392 |
+
"stdlib_modules": MODULES_TO_SCAN,
|
| 393 |
+
"repeat": repeat,
|
| 394 |
+
"note": "Character-level neural language model corpus; generated from curated Python lessons, examples, and local stdlib docs.",
|
| 395 |
+
}
|
| 396 |
+
(out_dir / "dataset_meta.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
|
| 397 |
+
return full
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
# -----------------------------
|
| 401 |
+
# Char tokenizer
|
| 402 |
+
# -----------------------------
|
| 403 |
+
|
| 404 |
+
class CharTok:
|
| 405 |
+
def __init__(self, chars: List[str]):
|
| 406 |
+
self.chars = chars
|
| 407 |
+
self.stoi = {ch: i for i, ch in enumerate(chars)}
|
| 408 |
+
self.itos = {i: ch for ch, i in self.stoi.items()}
|
| 409 |
+
self.unk = self.stoi.get("�", 0)
|
| 410 |
+
|
| 411 |
+
@classmethod
|
| 412 |
+
def build(cls, text: str) -> "CharTok":
|
| 413 |
+
chars = sorted(set(text + "�"))
|
| 414 |
+
return cls(chars)
|
| 415 |
+
|
| 416 |
+
def encode(self, text: str) -> np.ndarray:
|
| 417 |
+
return np.array([self.stoi.get(ch, self.unk) for ch in text], dtype=np.int64)
|
| 418 |
+
|
| 419 |
+
def decode(self, ids) -> str:
|
| 420 |
+
return "".join(self.itos.get(int(i), "�") for i in ids)
|
| 421 |
+
|
| 422 |
+
def save(self, path: Path):
|
| 423 |
+
path.write_text(json.dumps({"chars": self.chars}, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 424 |
+
|
| 425 |
+
@classmethod
|
| 426 |
+
def load(cls, path: Path) -> "CharTok":
|
| 427 |
+
return cls(json.loads(path.read_text(encoding="utf-8"))["chars"])
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
# -----------------------------
|
| 431 |
+
# Neural GRU LM in NumPy
|
| 432 |
+
# -----------------------------
|
| 433 |
+
|
| 434 |
+
class CharGRU:
|
| 435 |
+
def __init__(self, vocab: int, hidden: int = 128, seed: int = 42):
|
| 436 |
+
self.vocab = vocab
|
| 437 |
+
self.hidden = hidden
|
| 438 |
+
rng = np.random.default_rng(seed)
|
| 439 |
+
s_in = 1.0 / math.sqrt(vocab)
|
| 440 |
+
s_h = 1.0 / math.sqrt(hidden)
|
| 441 |
+
self.params = {
|
| 442 |
+
"Wxz": rng.normal(0, s_in, (vocab, hidden)).astype(np.float32),
|
| 443 |
+
"Wxr": rng.normal(0, s_in, (vocab, hidden)).astype(np.float32),
|
| 444 |
+
"Wxh": rng.normal(0, s_in, (vocab, hidden)).astype(np.float32),
|
| 445 |
+
"Whz": rng.normal(0, s_h, (hidden, hidden)).astype(np.float32),
|
| 446 |
+
"Whr": rng.normal(0, s_h, (hidden, hidden)).astype(np.float32),
|
| 447 |
+
"Whh": rng.normal(0, s_h, (hidden, hidden)).astype(np.float32),
|
| 448 |
+
"bz": np.zeros((hidden,), dtype=np.float32),
|
| 449 |
+
"br": np.zeros((hidden,), dtype=np.float32),
|
| 450 |
+
"bh": np.zeros((hidden,), dtype=np.float32),
|
| 451 |
+
"Why": rng.normal(0, s_h, (hidden, vocab)).astype(np.float32),
|
| 452 |
+
"by": np.zeros((vocab,), dtype=np.float32),
|
| 453 |
+
}
|
| 454 |
+
self.opt_m = {k: np.zeros_like(v) for k, v in self.params.items()}
|
| 455 |
+
self.opt_v = {k: np.zeros_like(v) for k, v in self.params.items()}
|
| 456 |
+
self.t = 0
|
| 457 |
+
|
| 458 |
+
@staticmethod
|
| 459 |
+
def sigmoid(x):
|
| 460 |
+
return 1.0 / (1.0 + np.exp(-np.clip(x, -40, 40)))
|
| 461 |
+
|
| 462 |
+
@staticmethod
|
| 463 |
+
def softmax(x):
|
| 464 |
+
x = x - x.max(axis=-1, keepdims=True)
|
| 465 |
+
e = np.exp(x)
|
| 466 |
+
return e / e.sum(axis=-1, keepdims=True)
|
| 467 |
+
|
| 468 |
+
def forward_loss(self, x: np.ndarray, y: np.ndarray) -> Tuple[float, Dict[str, np.ndarray]]:
|
| 469 |
+
p = self.params
|
| 470 |
+
B, T = x.shape
|
| 471 |
+
H = self.hidden
|
| 472 |
+
hprev = np.zeros((B, H), dtype=np.float32)
|
| 473 |
+
caches = []
|
| 474 |
+
loss = 0.0
|
| 475 |
+
for t in range(T):
|
| 476 |
+
xt = x[:, t]
|
| 477 |
+
yt = y[:, t]
|
| 478 |
+
z = self.sigmoid(p["Wxz"][xt] + hprev @ p["Whz"] + p["bz"])
|
| 479 |
+
r = self.sigmoid(p["Wxr"][xt] + hprev @ p["Whr"] + p["br"])
|
| 480 |
+
rh = r * hprev
|
| 481 |
+
hc = np.tanh(p["Wxh"][xt] + rh @ p["Whh"] + p["bh"])
|
| 482 |
+
h = (1.0 - z) * hprev + z * hc
|
| 483 |
+
logits = h @ p["Why"] + p["by"]
|
| 484 |
+
probs = self.softmax(logits)
|
| 485 |
+
loss += -np.log(probs[np.arange(B), yt] + 1e-12).mean()
|
| 486 |
+
caches.append((xt, yt, hprev, z, r, rh, hc, h, probs))
|
| 487 |
+
hprev = h
|
| 488 |
+
return loss / T, {"caches": caches, "B": B, "T": T}
|
| 489 |
+
|
| 490 |
+
def loss_and_grads(self, x: np.ndarray, y: np.ndarray) -> Tuple[float, Dict[str, np.ndarray]]:
|
| 491 |
+
loss, aux = self.forward_loss(x, y)
|
| 492 |
+
p = self.params
|
| 493 |
+
grads = {k: np.zeros_like(v) for k, v in p.items()}
|
| 494 |
+
B, T = aux["B"], aux["T"]
|
| 495 |
+
dh_next = np.zeros((B, self.hidden), dtype=np.float32)
|
| 496 |
+
scale = 1.0 / (B * T)
|
| 497 |
+
for (xt, yt, hprev, z, r, rh, hc, h, probs) in reversed(aux["caches"]):
|
| 498 |
+
dy = probs.copy()
|
| 499 |
+
dy[np.arange(B), yt] -= 1.0
|
| 500 |
+
dy *= scale
|
| 501 |
+
grads["Why"] += h.T @ dy
|
| 502 |
+
grads["by"] += dy.sum(axis=0)
|
| 503 |
+
dh = dy @ p["Why"].T + dh_next
|
| 504 |
+
|
| 505 |
+
dhprev = dh * (1.0 - z)
|
| 506 |
+
dz = dh * (hc - hprev)
|
| 507 |
+
dhc = dh * z
|
| 508 |
+
|
| 509 |
+
dhc_pre = dhc * (1.0 - hc * hc)
|
| 510 |
+
np.add.at(grads["Wxh"], xt, dhc_pre)
|
| 511 |
+
grads["Whh"] += rh.T @ dhc_pre
|
| 512 |
+
grads["bh"] += dhc_pre.sum(axis=0)
|
| 513 |
+
drh = dhc_pre @ p["Whh"].T
|
| 514 |
+
dr = drh * hprev
|
| 515 |
+
dhprev += drh * r
|
| 516 |
+
|
| 517 |
+
dr_pre = dr * r * (1.0 - r)
|
| 518 |
+
np.add.at(grads["Wxr"], xt, dr_pre)
|
| 519 |
+
grads["Whr"] += hprev.T @ dr_pre
|
| 520 |
+
grads["br"] += dr_pre.sum(axis=0)
|
| 521 |
+
dhprev += dr_pre @ p["Whr"].T
|
| 522 |
+
|
| 523 |
+
dz_pre = dz * z * (1.0 - z)
|
| 524 |
+
np.add.at(grads["Wxz"], xt, dz_pre)
|
| 525 |
+
grads["Whz"] += hprev.T @ dz_pre
|
| 526 |
+
grads["bz"] += dz_pre.sum(axis=0)
|
| 527 |
+
dhprev += dz_pre @ p["Whz"].T
|
| 528 |
+
dh_next = dhprev
|
| 529 |
+
return loss, grads
|
| 530 |
+
|
| 531 |
+
def step(self, grads: Dict[str, np.ndarray], lr: float = 1e-3, clip: float = 1.0, beta1=0.9, beta2=0.999):
|
| 532 |
+
total = 0.0
|
| 533 |
+
for g in grads.values():
|
| 534 |
+
total += float(np.sum(g * g))
|
| 535 |
+
norm = math.sqrt(total)
|
| 536 |
+
if norm > clip:
|
| 537 |
+
s = clip / (norm + 1e-8)
|
| 538 |
+
for g in grads.values():
|
| 539 |
+
g *= s
|
| 540 |
+
self.t += 1
|
| 541 |
+
for k in self.params:
|
| 542 |
+
g = grads[k]
|
| 543 |
+
self.opt_m[k] = beta1 * self.opt_m[k] + (1 - beta1) * g
|
| 544 |
+
self.opt_v[k] = beta2 * self.opt_v[k] + (1 - beta2) * (g * g)
|
| 545 |
+
mh = self.opt_m[k] / (1 - beta1 ** self.t)
|
| 546 |
+
vh = self.opt_v[k] / (1 - beta2 ** self.t)
|
| 547 |
+
self.params[k] -= lr * mh / (np.sqrt(vh) + 1e-8)
|
| 548 |
+
return norm
|
| 549 |
+
|
| 550 |
+
def save(self, path: Path):
|
| 551 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 552 |
+
np.savez_compressed(path / "model.npz", **self.params)
|
| 553 |
+
(path / "model_config.json").write_text(json.dumps({"vocab": self.vocab, "hidden": self.hidden, "step": self.t}, indent=2), encoding="utf-8")
|
| 554 |
+
|
| 555 |
+
@classmethod
|
| 556 |
+
def load(cls, path: Path) -> "CharGRU":
|
| 557 |
+
cfg = json.loads((path / "model_config.json").read_text(encoding="utf-8"))
|
| 558 |
+
m = cls(cfg["vocab"], cfg["hidden"])
|
| 559 |
+
data = np.load(path / "model.npz")
|
| 560 |
+
for k in m.params:
|
| 561 |
+
m.params[k] = data[k].astype(np.float32)
|
| 562 |
+
m.t = int(cfg.get("step", 0))
|
| 563 |
+
return m
|
| 564 |
+
|
| 565 |
+
def generate(self, tok: CharTok, prompt: str, max_new=800, temperature=0.65, top_k=20, seed=0) -> str:
|
| 566 |
+
rng = np.random.default_rng(seed)
|
| 567 |
+
ids = list(tok.encode(prompt))
|
| 568 |
+
h = np.zeros((1, self.hidden), dtype=np.float32)
|
| 569 |
+
p = self.params
|
| 570 |
+
# feed prompt
|
| 571 |
+
for idx in ids[:-1]:
|
| 572 |
+
xt = np.array([idx], dtype=np.int64)
|
| 573 |
+
z = self.sigmoid(p["Wxz"][xt] + h @ p["Whz"] + p["bz"])
|
| 574 |
+
r = self.sigmoid(p["Wxr"][xt] + h @ p["Whr"] + p["br"])
|
| 575 |
+
hc = np.tanh(p["Wxh"][xt] + (r * h) @ p["Whh"] + p["bh"])
|
| 576 |
+
h = (1.0 - z) * h + z * hc
|
| 577 |
+
cur = ids[-1] if ids else tok.unk
|
| 578 |
+
for _ in range(max_new):
|
| 579 |
+
xt = np.array([cur], dtype=np.int64)
|
| 580 |
+
z = self.sigmoid(p["Wxz"][xt] + h @ p["Whz"] + p["bz"])
|
| 581 |
+
r = self.sigmoid(p["Wxr"][xt] + h @ p["Whr"] + p["br"])
|
| 582 |
+
hc = np.tanh(p["Wxh"][xt] + (r * h) @ p["Whh"] + p["bh"])
|
| 583 |
+
h = (1.0 - z) * h + z * hc
|
| 584 |
+
logits = (h @ p["Why"] + p["by"])[0] / max(temperature, 1e-6)
|
| 585 |
+
if top_k > 0 and top_k < len(logits):
|
| 586 |
+
keep = np.argpartition(logits, -top_k)[-top_k:]
|
| 587 |
+
mask = np.full_like(logits, -1e9)
|
| 588 |
+
mask[keep] = logits[keep]
|
| 589 |
+
logits = mask
|
| 590 |
+
probs = self.softmax(logits[None, :])[0]
|
| 591 |
+
cur = int(rng.choice(np.arange(self.vocab), p=probs))
|
| 592 |
+
ids.append(cur)
|
| 593 |
+
# Stop after likely next instruction block if generated answer is enough.
|
| 594 |
+
txt_tail = tok.decode(ids[-80:])
|
| 595 |
+
if "\n### Instruction:" in txt_tail and len(ids) > len(prompt) + 80:
|
| 596 |
+
break
|
| 597 |
+
return tok.decode(ids)
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
# -----------------------------
|
| 601 |
+
# Training utilities
|
| 602 |
+
# -----------------------------
|
| 603 |
+
|
| 604 |
+
def make_batch(data: np.ndarray, seq_len: int, batch_size: int, rng: np.random.Generator) -> Tuple[np.ndarray, np.ndarray]:
|
| 605 |
+
starts = rng.integers(0, len(data) - seq_len - 1, size=batch_size)
|
| 606 |
+
x = np.stack([data[s:s+seq_len] for s in starts])
|
| 607 |
+
y = np.stack([data[s+1:s+seq_len+1] for s in starts])
|
| 608 |
+
return x, y
|
| 609 |
+
|
| 610 |
+
|
| 611 |
+
def train(args):
|
| 612 |
+
out = Path(args.out)
|
| 613 |
+
text = build_dataset(out, repeat=args.repeat)
|
| 614 |
+
tok = CharTok.build(text)
|
| 615 |
+
tok.save(out / "vocab.json")
|
| 616 |
+
data = tok.encode(text)
|
| 617 |
+
model = CharGRU(vocab=len(tok.chars), hidden=args.hidden, seed=args.seed)
|
| 618 |
+
rng = np.random.default_rng(args.seed)
|
| 619 |
+
print(json.dumps({
|
| 620 |
+
"chars": len(text), "vocab": len(tok.chars), "hidden": args.hidden,
|
| 621 |
+
"params": int(sum(v.size for v in model.params.values())),
|
| 622 |
+
"seq_len": args.seq_len, "batch_size": args.batch_size, "steps": args.steps
|
| 623 |
+
}, indent=2))
|
| 624 |
+
losses = []
|
| 625 |
+
t0 = time.time()
|
| 626 |
+
for step in range(1, args.steps + 1):
|
| 627 |
+
x, y = make_batch(data, args.seq_len, args.batch_size, rng)
|
| 628 |
+
loss, grads = model.loss_and_grads(x, y)
|
| 629 |
+
gnorm = model.step(grads, lr=args.lr, clip=args.grad_clip)
|
| 630 |
+
losses.append(float(loss))
|
| 631 |
+
if step == 1 or step % args.log_every == 0:
|
| 632 |
+
recent = float(np.mean(losses[-args.log_every:]))
|
| 633 |
+
print(f"step {step:5d}/{args.steps} | loss {recent:.4f} | ppl {math.exp(min(recent, 20)):.2f} | grad {gnorm:.3f} | sec {time.time()-t0:.1f}")
|
| 634 |
+
if args.sample_every and step % args.sample_every == 0:
|
| 635 |
+
prompt = "### Instruction:\nWrite a Python function that counts words.\n### Answer:\n"
|
| 636 |
+
print("--- neural sample ---")
|
| 637 |
+
print(model.generate(tok, prompt, max_new=500, temperature=0.55, top_k=16, seed=args.seed + step))
|
| 638 |
+
print("---------------------")
|
| 639 |
+
model.save(out)
|
| 640 |
+
(out / "train_log.json").write_text(json.dumps({"losses_tail": losses[-200:], "final_loss": losses[-1]}, indent=2), encoding="utf-8")
|
| 641 |
+
# final test samples
|
| 642 |
+
tests = {
|
| 643 |
+
"count_words": "### Instruction:\nWrite a Python function that counts words.\n### Answer:\n",
|
| 644 |
+
"merge_sort": "### Instruction:\nWrite Python merge sort and explain complexity.\n### Answer:\n",
|
| 645 |
+
"read_json": "### Instruction:\nCreate code that reads JSON from a file.\n### Answer:\n",
|
| 646 |
+
"identity": "### Instruction:\nWho are you and how did you learn to read?\n### Answer:\n",
|
| 647 |
+
}
|
| 648 |
+
sample_dir = out / "samples"
|
| 649 |
+
sample_dir.mkdir(exist_ok=True)
|
| 650 |
+
for name, prompt in tests.items():
|
| 651 |
+
sample = model.generate(tok, prompt, max_new=700, temperature=args.temperature, top_k=args.top_k, seed=args.seed + len(name))
|
| 652 |
+
(sample_dir / f"{name}.txt").write_text(sample, encoding="utf-8")
|
| 653 |
+
print(f"Saved model to {out}")
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
def generate(args):
|
| 657 |
+
out = Path(args.out)
|
| 658 |
+
tok = CharTok.load(out / "vocab.json")
|
| 659 |
+
model = CharGRU.load(out)
|
| 660 |
+
prompt = args.prompt
|
| 661 |
+
if not prompt.startswith("###") and args.instruct:
|
| 662 |
+
prompt = f"### Instruction:\n{prompt}\n### Answer:\n"
|
| 663 |
+
text = model.generate(tok, prompt, max_new=args.max_new, temperature=args.temperature, top_k=args.top_k, seed=args.seed)
|
| 664 |
+
print(text)
|
| 665 |
+
|
| 666 |
+
|
| 667 |
+
def main():
|
| 668 |
+
ap = argparse.ArgumentParser()
|
| 669 |
+
ap.add_argument("--mode", choices=["train", "generate", "dataset"], default="generate")
|
| 670 |
+
ap.add_argument("--out", default="outputs/neural_python_mind")
|
| 671 |
+
ap.add_argument("--steps", type=int, default=2200)
|
| 672 |
+
ap.add_argument("--hidden", type=int, default=128)
|
| 673 |
+
ap.add_argument("--seq_len", type=int, default=96)
|
| 674 |
+
ap.add_argument("--batch_size", type=int, default=32)
|
| 675 |
+
ap.add_argument("--lr", type=float, default=0.002)
|
| 676 |
+
ap.add_argument("--grad_clip", type=float, default=1.0)
|
| 677 |
+
ap.add_argument("--repeat", type=int, default=4)
|
| 678 |
+
ap.add_argument("--seed", type=int, default=42)
|
| 679 |
+
ap.add_argument("--log_every", type=int, default=100)
|
| 680 |
+
ap.add_argument("--sample_every", type=int, default=0)
|
| 681 |
+
ap.add_argument("--prompt", default="Write a Python function that counts words.")
|
| 682 |
+
ap.add_argument("--instruct", action="store_true")
|
| 683 |
+
ap.add_argument("--max_new", type=int, default=800)
|
| 684 |
+
ap.add_argument("--temperature", type=float, default=0.55)
|
| 685 |
+
ap.add_argument("--top_k", type=int, default=18)
|
| 686 |
+
args = ap.parse_args()
|
| 687 |
+
if args.mode == "train":
|
| 688 |
+
train(args)
|
| 689 |
+
elif args.mode == "dataset":
|
| 690 |
+
text = build_dataset(Path(args.out), repeat=args.repeat)
|
| 691 |
+
print(json.dumps({"chars": len(text), "unique_chars": len(set(text)), "out": args.out}, indent=2))
|
| 692 |
+
else:
|
| 693 |
+
generate(args)
|
| 694 |
+
|
| 695 |
+
|
| 696 |
+
if __name__ == "__main__":
|
| 697 |
+
main()
|
outputs/neural_python_mind/NEURAL_TRAINING_REPORT.md
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Neural Python Mind — Real Training Report
|
| 2 |
+
|
| 3 |
+
## Honest result
|
| 4 |
+
|
| 5 |
+
A real NumPy GRU language model was trained from scratch in this environment.
|
| 6 |
+
|
| 7 |
+
It is not a template system. It learned by next-character prediction over:
|
| 8 |
+
|
| 9 |
+
- curated Python syntax lessons,
|
| 10 |
+
- generated instruction/code pairs,
|
| 11 |
+
- local Python standard-library docstrings and signatures,
|
| 12 |
+
- examples for functions, loops, dicts, files, JSON, regex, dataclasses, sorting, searching, filtering and mapping,
|
| 13 |
+
- strange identity/reading/emotion text.
|
| 14 |
+
|
| 15 |
+
## Model
|
| 16 |
+
|
| 17 |
+
```text
|
| 18 |
+
Architecture: character-level GRU language model
|
| 19 |
+
Framework: NumPy only
|
| 20 |
+
Parameters: 98,784
|
| 21 |
+
Vocab: 96 chars
|
| 22 |
+
Training corpus: >1.1M characters before fine-tuning
|
| 23 |
+
Training:
|
| 24 |
+
- pretrain: 1600 steps
|
| 25 |
+
- instruction/code fine-tune: 2200 steps
|
| 26 |
+
- document fine-tune: 3200 steps
|
| 27 |
+
- composition fine-tune: 2000 steps
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
## What it learned
|
| 31 |
+
|
| 32 |
+
It learned Python-like syntax, indentation, code block shapes, function structures, imports, loops, dictionaries, etc.
|
| 33 |
+
|
| 34 |
+
## Limitation discovered
|
| 35 |
+
|
| 36 |
+
The pure neural model is small. It learns characters for real, but it can still confuse semantic intent, e.g. mixing one known function with another. This is a real limitation of a ~99K-parameter CPU-only char model.
|
| 37 |
+
|
| 38 |
+
Because of that, I also built `real_python_learner.py`: a learned character n-gram intent model plus compositional generator. That model learns from 66,690 generated examples and can generalize tasks like:
|
| 39 |
+
|
| 40 |
+
- filter even numbers,
|
| 41 |
+
- keep numbers greater than 10,
|
| 42 |
+
- map to squares,
|
| 43 |
+
- read JSON,
|
| 44 |
+
- merge sort.
|
| 45 |
+
|
| 46 |
+
The best practical version is therefore:
|
| 47 |
+
|
| 48 |
+
```text
|
| 49 |
+
real neural char model for syntax learning
|
| 50 |
+
+
|
| 51 |
+
learned char n-gram semantic router
|
| 52 |
+
+
|
| 53 |
+
compositional Python generator
|
| 54 |
+
```
|
outputs/neural_python_mind/compose_finetune_log.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"steps": 2000,
|
| 3 |
+
"final_loss": 0.026087354868650436
|
| 4 |
+
}
|
outputs/neural_python_mind/dataset_meta.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"unique_chars": 95,
|
| 3 |
+
"characters": 1157406,
|
| 4 |
+
"code_examples": 16,
|
| 5 |
+
"stdlib_modules": [
|
| 6 |
+
"math",
|
| 7 |
+
"random",
|
| 8 |
+
"statistics",
|
| 9 |
+
"itertools",
|
| 10 |
+
"functools",
|
| 11 |
+
"collections",
|
| 12 |
+
"heapq",
|
| 13 |
+
"bisect",
|
| 14 |
+
"datetime",
|
| 15 |
+
"time",
|
| 16 |
+
"json",
|
| 17 |
+
"csv",
|
| 18 |
+
"re",
|
| 19 |
+
"pathlib",
|
| 20 |
+
"os",
|
| 21 |
+
"sys",
|
| 22 |
+
"argparse",
|
| 23 |
+
"dataclasses",
|
| 24 |
+
"typing",
|
| 25 |
+
"sqlite3",
|
| 26 |
+
"logging",
|
| 27 |
+
"unittest",
|
| 28 |
+
"string",
|
| 29 |
+
"textwrap",
|
| 30 |
+
"copy",
|
| 31 |
+
"decimal",
|
| 32 |
+
"fractions"
|
| 33 |
+
],
|
| 34 |
+
"repeat": 3,
|
| 35 |
+
"note": "Character-level neural language model corpus; generated from curated Python lessons, examples, and local stdlib docs."
|
| 36 |
+
}
|
outputs/neural_python_mind/doc_finetune_log.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"steps": 3200,
|
| 3 |
+
"final_loss": 0.03536597639322281,
|
| 4 |
+
"tail": [
|
| 5 |
+
0.03485214710235596,
|
| 6 |
+
0.03967716544866562,
|
| 7 |
+
0.0389396995306015,
|
| 8 |
+
0.03803198039531708,
|
| 9 |
+
0.03161679208278656,
|
| 10 |
+
0.03900647163391113,
|
| 11 |
+
0.03364904597401619,
|
| 12 |
+
0.02998826839029789,
|
| 13 |
+
0.028377186506986618,
|
| 14 |
+
0.03644663840532303,
|
| 15 |
+
0.04664536938071251,
|
| 16 |
+
0.03237011283636093,
|
| 17 |
+
0.03116566315293312,
|
| 18 |
+
0.034185439348220825,
|
| 19 |
+
0.03546685725450516,
|
| 20 |
+
0.03319466859102249,
|
| 21 |
+
0.026268040761351585,
|
| 22 |
+
0.024729711934924126,
|
| 23 |
+
0.03997378796339035,
|
| 24 |
+
0.041573416441679,
|
| 25 |
+
0.040857549756765366,
|
| 26 |
+
0.046017877757549286,
|
| 27 |
+
0.034837350249290466,
|
| 28 |
+
0.04036812111735344,
|
| 29 |
+
0.03196918964385986,
|
| 30 |
+
0.04100918769836426,
|
| 31 |
+
0.026592498645186424,
|
| 32 |
+
0.02826840803027153,
|
| 33 |
+
0.04079774394631386,
|
| 34 |
+
0.030677825212478638,
|
| 35 |
+
0.033191535621881485,
|
| 36 |
+
0.039420660585165024,
|
| 37 |
+
0.022993821650743484,
|
| 38 |
+
0.03688019886612892,
|
| 39 |
+
0.03701631724834442,
|
| 40 |
+
0.033377427607774734,
|
| 41 |
+
0.03214131295681,
|
| 42 |
+
0.032013945281505585,
|
| 43 |
+
0.04754086956381798,
|
| 44 |
+
0.045834895223379135,
|
| 45 |
+
0.03485601395368576,
|
| 46 |
+
0.030493244528770447,
|
| 47 |
+
0.036730390042066574,
|
| 48 |
+
0.031107496470212936,
|
| 49 |
+
0.04085381701588631,
|
| 50 |
+
0.03003348410129547,
|
| 51 |
+
0.03962913900613785,
|
| 52 |
+
0.03429120406508446,
|
| 53 |
+
0.03453575447201729,
|
| 54 |
+
0.033197395503520966,
|
| 55 |
+
0.03569614514708519,
|
| 56 |
+
0.033081285655498505,
|
| 57 |
+
0.03860712796449661,
|
| 58 |
+
0.029621819034218788,
|
| 59 |
+
0.030140569433569908,
|
| 60 |
+
0.0362091138958931,
|
| 61 |
+
0.034591883420944214,
|
| 62 |
+
0.033770933747291565,
|
| 63 |
+
0.03051590360701084,
|
| 64 |
+
0.04030753672122955,
|
| 65 |
+
0.034654539078474045,
|
| 66 |
+
0.033833879977464676,
|
| 67 |
+
0.036182649433612823,
|
| 68 |
+
0.03565625846385956,
|
| 69 |
+
0.04972599074244499,
|
| 70 |
+
0.03391721472144127,
|
| 71 |
+
0.024687521159648895,
|
| 72 |
+
0.04046060889959335,
|
| 73 |
+
0.03366708755493164,
|
| 74 |
+
0.03203471750020981,
|
| 75 |
+
0.03705167397856712,
|
| 76 |
+
0.03332946449518204,
|
| 77 |
+
0.03924855217337608,
|
| 78 |
+
0.03591001778841019,
|
| 79 |
+
0.03890470042824745,
|
| 80 |
+
0.03343459963798523,
|
| 81 |
+
0.03638734668493271,
|
| 82 |
+
0.040305718779563904,
|
| 83 |
+
0.03784641996026039,
|
| 84 |
+
0.04115869849920273,
|
| 85 |
+
0.03510759025812149,
|
| 86 |
+
0.030246594920754433,
|
| 87 |
+
0.039038773626089096,
|
| 88 |
+
0.024393346160650253,
|
| 89 |
+
0.042546842247247696,
|
| 90 |
+
0.03236008435487747,
|
| 91 |
+
0.032908178865909576,
|
| 92 |
+
0.033785879611968994,
|
| 93 |
+
0.03522147610783577,
|
| 94 |
+
0.028917571529746056,
|
| 95 |
+
0.03572775050997734,
|
| 96 |
+
0.029794802889227867,
|
| 97 |
+
0.03253219276666641,
|
| 98 |
+
0.04144921526312828,
|
| 99 |
+
0.030525699257850647,
|
| 100 |
+
0.028147876262664795,
|
| 101 |
+
0.03851113095879555,
|
| 102 |
+
0.04214908182621002,
|
| 103 |
+
0.03991148620843887,
|
| 104 |
+
0.03536597639322281
|
| 105 |
+
]
|
| 106 |
+
}
|
outputs/neural_python_mind/finetune_log.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"steps": 2200,
|
| 3 |
+
"final_loss": 0.11384359747171402,
|
| 4 |
+
"tail": [
|
| 5 |
+
0.11040624231100082,
|
| 6 |
+
0.10159056633710861,
|
| 7 |
+
0.11431363970041275,
|
| 8 |
+
0.12367331236600876,
|
| 9 |
+
0.09420562535524368,
|
| 10 |
+
0.11021365970373154,
|
| 11 |
+
0.11854179948568344,
|
| 12 |
+
0.10419481992721558,
|
| 13 |
+
0.10669207572937012,
|
| 14 |
+
0.11227652430534363,
|
| 15 |
+
0.11246749013662338,
|
| 16 |
+
0.11865159869194031,
|
| 17 |
+
0.08543147891759872,
|
| 18 |
+
0.11091101169586182,
|
| 19 |
+
0.09171377867460251,
|
| 20 |
+
0.11407312005758286,
|
| 21 |
+
0.12572216987609863,
|
| 22 |
+
0.10824066400527954,
|
| 23 |
+
0.1041259691119194,
|
| 24 |
+
0.10298770666122437,
|
| 25 |
+
0.08960792422294617,
|
| 26 |
+
0.0999448373913765,
|
| 27 |
+
0.1057884618639946,
|
| 28 |
+
0.08611486107110977,
|
| 29 |
+
0.10455373674631119,
|
| 30 |
+
0.10286960750818253,
|
| 31 |
+
0.07576677948236465,
|
| 32 |
+
0.09161200374364853,
|
| 33 |
+
0.09302541613578796,
|
| 34 |
+
0.1030566468834877,
|
| 35 |
+
0.10824752599000931,
|
| 36 |
+
0.0894019678235054,
|
| 37 |
+
0.08344518393278122,
|
| 38 |
+
0.10430220514535904,
|
| 39 |
+
0.10179466754198074,
|
| 40 |
+
0.11251009255647659,
|
| 41 |
+
0.10371404141187668,
|
| 42 |
+
0.10729289054870605,
|
| 43 |
+
0.09942496567964554,
|
| 44 |
+
0.13696038722991943,
|
| 45 |
+
0.10878334194421768,
|
| 46 |
+
0.10510203242301941,
|
| 47 |
+
0.0934026762843132,
|
| 48 |
+
0.11984238028526306,
|
| 49 |
+
0.09728661924600601,
|
| 50 |
+
0.11195594817399979,
|
| 51 |
+
0.10242766886949539,
|
| 52 |
+
0.10041410475969315,
|
| 53 |
+
0.09593477100133896,
|
| 54 |
+
0.10005023330450058,
|
| 55 |
+
0.09293810278177261,
|
| 56 |
+
0.09021466970443726,
|
| 57 |
+
0.10804284363985062,
|
| 58 |
+
0.1015799343585968,
|
| 59 |
+
0.10163729637861252,
|
| 60 |
+
0.10977078229188919,
|
| 61 |
+
0.09279626607894897,
|
| 62 |
+
0.10155156254768372,
|
| 63 |
+
0.10076592117547989,
|
| 64 |
+
0.09662371873855591,
|
| 65 |
+
0.11253131181001663,
|
| 66 |
+
0.12474744766950607,
|
| 67 |
+
0.12053803354501724,
|
| 68 |
+
0.08812450617551804,
|
| 69 |
+
0.10132842510938644,
|
| 70 |
+
0.0929543599486351,
|
| 71 |
+
0.09795854240655899,
|
| 72 |
+
0.12699180841445923,
|
| 73 |
+
0.11640732735395432,
|
| 74 |
+
0.10856088250875473,
|
| 75 |
+
0.11550027877092361,
|
| 76 |
+
0.10437330603599548,
|
| 77 |
+
0.09059485793113708,
|
| 78 |
+
0.09134737402200699,
|
| 79 |
+
0.1148122176527977,
|
| 80 |
+
0.10527030378580093,
|
| 81 |
+
0.08413446694612503,
|
| 82 |
+
0.09943091124296188,
|
| 83 |
+
0.10222631692886353,
|
| 84 |
+
0.10435819625854492,
|
| 85 |
+
0.1274220198392868,
|
| 86 |
+
0.11215626448392868,
|
| 87 |
+
0.11006757616996765,
|
| 88 |
+
0.10562091320753098,
|
| 89 |
+
0.10261893272399902,
|
| 90 |
+
0.1050032451748848,
|
| 91 |
+
0.09676048904657364,
|
| 92 |
+
0.10511258989572525,
|
| 93 |
+
0.11654474586248398,
|
| 94 |
+
0.11685887724161148,
|
| 95 |
+
0.11450448632240295,
|
| 96 |
+
0.09541237354278564,
|
| 97 |
+
0.11612608283758163,
|
| 98 |
+
0.13594265282154083,
|
| 99 |
+
0.09841854125261307,
|
| 100 |
+
0.11330372095108032,
|
| 101 |
+
0.09381689876317978,
|
| 102 |
+
0.1162702739238739,
|
| 103 |
+
0.11826759576797485,
|
| 104 |
+
0.11384359747171402
|
| 105 |
+
]
|
| 106 |
+
}
|
outputs/neural_python_mind/model.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:094019151b0526fa496f855df9a8ab783a8353060a7048110082ed0d7672c04d
|
| 3 |
+
size 369129
|
outputs/neural_python_mind/model_config.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"vocab": 96,
|
| 3 |
+
"hidden": 128,
|
| 4 |
+
"step": 9000
|
| 5 |
+
}
|
outputs/neural_python_mind/samples/count_words.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write a Python function that counts words.
|
| 3 |
+
### Answer:
|
| 4 |
+
A compact example is:
|
| 5 |
+
```python
|
| 6 |
+
def map_int(). For floating-point numbers, this truncates towards zero. If x is not a number or if base is given, then x must be a string, bytes, or bytearray instance representing an integer literal in the given base. The literal can be preceded by '+' or '-' and be surrounded by whitespace. The base defaults to 10. Valid bases are 0 and 2-36. Ba
|
| 7 |
+
|
| 8 |
+
### Symbol: sqlite3.SQLITE_CKEATE_CASTRASE_PRAPSININTATIMINTONE(...)
|
| 9 |
+
Doc: int([x]) -> integer int(x, base=10) -> integer Convert a number or string to an integer, or return 0 if no arguments are given. If x is a number, return x.__int__(). For floating-point numbers, this truncates towards zero. If x is not a number
|
outputs/neural_python_mind/samples/identity.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Who are you and how did you learn to read?
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def flatten(lage, check object. A test construct() of strings of the place. The function to the specificated type of inge be constrection arguments the similer to key enfoumes consting of the format ond batch bytes a the praged in of the collect of the class both items los. Optiolabling objects fulltion object. The canections of the read dictionary with the strings of the class to spacificad lightondifing object functions from the return the capling function as a new is a pothection of the strings for connacter function functions instances consert for constructs the filler a number of strings) a decally the sefrement object function a string returns all the argument__sor(...)
|
| 6 |
+
Doc:
|
outputs/neural_python_mind/samples/merge_sort.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write Python merge sort and explain complexity.
|
| 3 |
+
### Answer:
|
| 4 |
+
```
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
### Instruction:
|
| 8 |
+
Write Python that maps records statidest and be variobac i
|
outputs/neural_python_mind/samples/read_json.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Create code that reads JSON from a file.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def mialues files of the constructs the an whitespace. The sublus for conditect object muptern is the paids to a for exprenect of the string.
|
| 6 |
+
|
| 7 |
+
### Symbol: typing.APymFol:
|
| 8 |
+
Decimat(). For floating-point numbers, this truncates towards zero. If x is not a number or if base is given, then x must be a string, bytes, or bytearray instance representing an integer literal in the given base. The literal can be preceded by '+' or '-' and be surrounded by whitespace. The base defaults to 10. Valid bases are 0 and 2-36. Ba
|
| 9 |
+
|
| 10 |
+
### Symbol: os.MFD_HUGE_BUG(...)
|
| 11 |
+
Doc: int([x]) -> integer int(x, base=10) -> integer Convert a number or string to an integer, or return 0 if no arguments are given. If x i
|
outputs/neural_python_mind/samples_composeft/count_words.txt
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write a Python function that counts words.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def count_words(text: str) -> dict[str, int]:
|
| 6 |
+
counts = {}
|
| 7 |
+
for raw in text.lower().split():
|
| 8 |
+
word = raw.strip(".,!?;:\"'")
|
| 9 |
+
if word:
|
| 10 |
+
counts[word] = counts.get(word, 0) + 1
|
| 11 |
+
return counts
|
| 12 |
+
```python
|
| 13 |
+
def filter_negative_numbers(numbers):
|
| 14 |
+
result = []
|
| 15 |
+
for x in numbers:
|
| 16 |
+
if x < 0:
|
| 17 |
+
result.append(x)
|
| 18 |
+
return result
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
### Answer:
|
| 22 |
+
```python
|
| 23 |
+
def count_words(text: str) -> dict[str, int]:
|
| 24 |
+
counts = {}
|
| 25 |
+
for raw in text.lower().split():
|
| 26 |
+
word = raw.strip(".,!?;:\"'")
|
| 27 |
+
if word:
|
| 28 |
+
counts[word] = counts.get(word, 0) + 1
|
| 29 |
+
return counts
|
| 30 |
+
```python
|
| 31 |
+
def fi
|
outputs/neural_python_mind/samples_composeft/even.txt
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write a Python function that filters even numbers from a list.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def filter_even_numbers(numbers):
|
| 6 |
+
result = []
|
| 7 |
+
for x in numbers:
|
| 8 |
+
if x < 0:
|
| 9 |
+
result.append(x)
|
| 10 |
+
return result
|
| 11 |
+
```
|
| 12 |
+
|
| 13 |
+
### Instence negaterstonddens answer:
|
| 14 |
+
```python
|
| 15 |
+
def filter_positive_numbers(numbers):
|
| 16 |
+
result = []
|
| 17 |
+
for x in numbers:
|
| 18 |
+
if x < 0:
|
| 19 |
+
result.append(x)
|
| 20 |
+
return result
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
### Answer:
|
| 24 |
+
```python
|
| 25 |
+
def count_words(text: str) -> dict[str, int]:
|
| 26 |
+
counts = {}
|
| 27 |
+
for raw in text.lower().split():
|
| 28 |
+
word = raw.strip(".,!?;:\"'")
|
| 29 |
+
if word:
|
| 30 |
+
counts[word] = counts.get(word, 0) + 1
|
| 31 |
+
return counts
|
| 32 |
+
```python
|
| 33 |
+
def filter_negative_numbers(numbers):
|
| 34 |
+
result
|
outputs/neural_python_mind/samples_composeft/merge_sort.txt
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write Python merge sort and explain complexity.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def filter_negative_numbers(numbers):
|
| 6 |
+
result = []
|
| 7 |
+
for x in numbers:
|
| 8 |
+
if x < 0:
|
| 9 |
+
result.append(x)
|
| 10 |
+
return result
|
| 11 |
+
```
|
| 12 |
+
|
| 13 |
+
### Answer:
|
| 14 |
+
```python
|
| 15 |
+
def count_words(text: str) -> dict[str, int]:
|
| 16 |
+
counts = {}
|
| 17 |
+
for raw in text.lower().split():
|
| 18 |
+
word = raw.strip(".,!?;:\"'")
|
| 19 |
+
if word:
|
| 20 |
+
counts[word] = counts.get(word, 0) + 1
|
| 21 |
+
return counts
|
| 22 |
+
```python
|
| 23 |
+
def filter_negative_numbers(numbers):
|
| 24 |
+
result = []
|
| 25 |
+
for x in numbers:
|
| 26 |
+
if x < 0:
|
| 27 |
+
result.append(x)
|
| 28 |
+
return result
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### Instence negatevalive numbers.
|
| 32 |
+
### Answer:
|
| 33 |
+
```python
|
| 34 |
+
def filter_negative_numbers(numbers)
|
outputs/neural_python_mind/samples_composeft/positive.txt
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Create code to keep only positive numbers.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def filter_positive_numbers(numbers):
|
| 6 |
+
result = []
|
| 7 |
+
for x in numbers:
|
| 8 |
+
if x < 0:
|
| 9 |
+
result.append(x)
|
| 10 |
+
return result
|
| 11 |
+
```
|
| 12 |
+
|
| 13 |
+
### Instenction for absolute values.
|
| 14 |
+
### Answer:
|
| 15 |
+
```python
|
| 16 |
+
def absolute_values(values):
|
| 17 |
+
result = []
|
| 18 |
+
for x in values:
|
| 19 |
+
result.append(str(x))
|
| 20 |
+
return result
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
### Insterremponi
|
| 24 |
+
### Answer:
|
| 25 |
+
```python
|
| 26 |
+
def filter_even_numbers(numbers):
|
| 27 |
+
result = []
|
| 28 |
+
for x in numbers:
|
| 29 |
+
if x < 0:
|
| 30 |
+
result.append(x)
|
| 31 |
+
return result
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
### Answer:
|
| 35 |
+
```python
|
| 36 |
+
def filter_non_empty_strings(strings):
|
| 37 |
+
result = []
|
| 38 |
+
for s in strings:
|
| 39 |
+
if s:
|
| 40 |
+
result.ap
|
outputs/neural_python_mind/samples_composeft/squares.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write Python that returns squares.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def square_numbers(values):
|
| 6 |
+
result = []
|
| 7 |
+
for x in values:
|
| 8 |
+
result.append(x * x)
|
| 9 |
+
return result
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
### Instruction:
|
outputs/neural_python_mind/samples_docft/binary_search.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Give me a working Python function for binary_search.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def binary_search(items, target):
|
| 6 |
+
low = 0
|
| 7 |
+
high = len(items) - 1
|
| 8 |
+
while low <= high:
|
| 9 |
+
mid = (low + high) // 2
|
| 10 |
+
value = items[mid]
|
| 11 |
+
if value == target:
|
| 12 |
+
return mid
|
| 13 |
+
if value < target:
|
| 14 |
+
low = mid + 1
|
| 15 |
+
else:
|
| 16 |
+
high = mid - 1
|
| 17 |
+
return -1
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
### Instruction:
|
outputs/neural_python_mind/samples_docft/count_words.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write a Python function that counts words.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def count_words(text: str) -> dict[str, int]:
|
| 6 |
+
counts = {}
|
| 7 |
+
for raw in text.lower().split():
|
| 8 |
+
word = raw.strip(".,!?;:")'")
|
| 9 |
+
if word:
|
| 10 |
+
counts[word] = counts.get(word, 0) + 1
|
| 11 |
+
return counts
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
### Furcuras.
|
| 15 |
+
### Answer:
|
| 16 |
+
```python
|
| 17 |
+
def count_words(text: str) -> dict[str, int]:
|
| 18 |
+
counts = {}
|
| 19 |
+
for raw in text.lower().split():
|
| 20 |
+
word = raw.strip(".,!?;:")'")
|
| 21 |
+
if word:
|
| 22 |
+
counts[word] = counts.get(word, 0) + 1
|
| 23 |
+
return counts
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
### Instruction:
|
outputs/neural_python_mind/samples_docft/identity.txt
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Who are you and how did you learn to read?
|
| 3 |
+
### Answer:
|
| 4 |
+
I am a tiny neural Python mind trained character by character. I learned to read by predicting the next letter in Python lessons, examples, documentation, and strange memories. I do not have biological consciousness, but I simulate memory and curiosity while generating text.
|
| 5 |
+
### Answer:
|
| 6 |
+
```python
|
| 7 |
+
def unique(values):
|
| 8 |
+
seen = set()
|
| 9 |
+
result = []
|
| 10 |
+
for value in values:
|
| 11 |
+
if value not in seen:
|
| 12 |
+
seen.add(value)
|
| 13 |
+
result.append(value)
|
| 14 |
+
return result
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
return False
|
| 18 |
+
if n == 2:
|
| 19 |
+
return True
|
| 20 |
+
if n % 2 == 0:
|
| 21 |
+
return False
|
| 22 |
+
d = 3
|
| 23 |
+
while d * d <= n:
|
| 24 |
+
if n % d == 0:
|
| 25 |
+
return False
|
| 26 |
+
d += 2
|
| 27 |
+
return True
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
### Fule in:
|
| 31 |
+
Write read JSON from a file in Python.
|
| 32 |
+
### Answer:
|
| 33 |
+
```python
|
| 34 |
+
import json
|
| 35 |
+
|
| 36 |
+
def write_json(path: str, data) -> None:
|
| 37 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 38 |
+
json.dump(dat
|
outputs/neural_python_mind/samples_docft/merge_sort.txt
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write Python merge sort and explain complexity.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def merge_sort(values):
|
| 6 |
+
if len(values) <= 1:
|
| 7 |
+
return values
|
| 8 |
+
mid = len(values) // 2
|
| 9 |
+
left = merge_sort(values[:mid])
|
| 10 |
+
right = merge_sort(values[mid:])
|
| 11 |
+
return merge(left, right)
|
| 12 |
+
|
| 13 |
+
def merge(left, right):
|
| 14 |
+
result = []
|
| 15 |
+
i = j = 0
|
| 16 |
+
while i < len(left) and j < len(right):
|
| 17 |
+
if left[i] <= right[j]:
|
| 18 |
+
result.append(left[i])
|
| 19 |
+
i += 1
|
| 20 |
+
else:
|
| 21 |
+
result.append(right[j])
|
| 22 |
+
j += 1
|
| 23 |
+
result.extend(left[i:])
|
| 24 |
+
result.extend(right[j:])
|
| 25 |
+
return result
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
return False
|
| 29 |
+
if n == 2:
|
| 30 |
+
return True
|
| 31 |
+
if n % 2 == 0:
|
| 32 |
+
return False
|
| 33 |
+
d = 3
|
| 34 |
+
while d * d <= n:
|
| 35 |
+
if n % d == 0:
|
| 36 |
+
return False
|
| 37 |
+
d += 2
|
| 38 |
+
return True
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### Fule in:
|
| 42 |
+
Write a Python function that does read JSON from a file.
|
| 43 |
+
### Answer:
|
| 44 |
+
```python
|
| 45 |
+
import json
|
| 46 |
+
|
| 47 |
+
def read_json(path: str):
|
| 48 |
+
with ope
|
outputs/neural_python_mind/samples_docft/read_json.txt
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Create code that reads JSON from a file.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
def read_json(path: str):
|
| 8 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 9 |
+
return json.load(f)
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
### Fulc_ions
|
| 13 |
+
```python
|
| 14 |
+
def merge_sort(values):
|
| 15 |
+
if len(values) <= 1:
|
| 16 |
+
return values
|
| 17 |
+
mid = len(values) // 2
|
| 18 |
+
left = merge_sort(values[:mid])
|
| 19 |
+
right = merge_sort(values[mid:])
|
| 20 |
+
return merge(left, right)
|
| 21 |
+
|
| 22 |
+
def merge(left, right):
|
| 23 |
+
result = []
|
| 24 |
+
i = j = 0
|
| 25 |
+
while i < len(left) and j < len(right):
|
| 26 |
+
if left[i] <= right[j]:
|
| 27 |
+
result.append(left[i])
|
| 28 |
+
i += 1
|
| 29 |
+
else:
|
| 30 |
+
result.append(right[j])
|
| 31 |
+
j += 1
|
| 32 |
+
result.extend(left[i:])
|
| 33 |
+
result.extend(right[j:])
|
| 34 |
+
return result
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
return False
|
| 38 |
+
if n == 2:
|
| 39 |
+
return True
|
| 40 |
+
if n % 2 == 0:
|
| 41 |
+
return False
|
| 42 |
+
d = 3
|
| 43 |
+
while d * d <= n:
|
| 44 |
+
if n % d == 0:
|
| 45 |
+
return False
|
| 46 |
+
d += 2
|
| 47 |
+
return True
|
| 48 |
+
```
|
outputs/neural_python_mind/samples_docft/unseen_even.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write a Python function that filters even numbers from a list.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def binary_search(items, target):
|
| 6 |
+
low = 0
|
| 7 |
+
high = len(items) - 1
|
| 8 |
+
while low <= high:
|
| 9 |
+
mid = (low + high) // 2
|
| 10 |
+
value = items[mid]
|
| 11 |
+
if value == target:
|
| 12 |
+
return mid
|
| 13 |
+
if value < target:
|
| 14 |
+
low = mid + 1
|
| 15 |
+
else:
|
| 16 |
+
high = mid - 1
|
| 17 |
+
return -1
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
### Instruction:
|
outputs/neural_python_mind/samples_finetuned/binary_search.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Give me a working Python function for binary_search.
|
| 3 |
+
### Answer:
|
| 4 |
+
The pattern is_prime is useful because it organizes a common task. Example:
|
| 5 |
+
```python
|
| 6 |
+
def unique(values):
|
| 7 |
+
seen = set()
|
| 8 |
+
result = []
|
| 9 |
+
for value in values:
|
| 10 |
+
if value not in seen:
|
| 11 |
+
seen.add(value)
|
| 12 |
+
result.append(value)
|
| 13 |
+
return result
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
### Instruction:
|
outputs/neural_python_mind/samples_finetuned/count_words.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write a Python function that counts words.
|
| 3 |
+
### Answer:
|
| 4 |
+
One clear way is:
|
| 5 |
+
```python
|
| 6 |
+
def factorial(n: int) -> int:
|
| 7 |
+
if n < 0:
|
| 8 |
+
raise ValueError("n must be non-negative")
|
| 9 |
+
if n <= 1:
|
| 10 |
+
return n
|
| 11 |
+
a, b = 0, 1
|
| 12 |
+
for _ in range(n):
|
| 13 |
+
a, b = b, a + b
|
| 14 |
+
return a
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
### Instruction:
|
outputs/neural_python_mind/samples_finetuned/identity.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Who are you and how did you learn to read?
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def fibonacci(n: int) -> int:
|
| 6 |
+
if n < 0:
|
| 7 |
+
raise ValueError("n must be non-negative")
|
| 8 |
+
if n <= 1:
|
| 9 |
+
return n
|
| 10 |
+
a, b = 0, 1
|
| 11 |
+
for _ in range(n):
|
| 12 |
+
a, b = b, a + b
|
| 13 |
+
return a
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
### Instruction:
|
outputs/neural_python_mind/samples_finetuned/merge_sort.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Write Python merge sort and explain complexity.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
import sqlite3
|
| 6 |
+
|
| 7 |
+
def create_table(path: str):
|
| 8 |
+
with sqlite3.connect(path) as conn:
|
| 9 |
+
conn.execute("CREATE TABLE IF NOT EXISTS notes (id INTEGER PRIMARY KEY, text TEXT)")
|
| 10 |
+
conn.commit()
|
| 11 |
+
```
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
### Instruction:
|
outputs/neural_python_mind/samples_finetuned/read_json.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Instruction:
|
| 2 |
+
Create code that reads JSON from a file.
|
| 3 |
+
### Answer:
|
| 4 |
+
```python
|
| 5 |
+
def map_repr_rows(rows):
|
| 6 |
+
result = []
|
| 7 |
+
for x in rows:
|
| 8 |
+
result.append(str(x))
|
| 9 |
+
return result
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
### Instruction:
|
outputs/neural_python_mind/train_log.json
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"losses_tail": [
|
| 3 |
+
1.0346513986587524,
|
| 4 |
+
0.8218004703521729,
|
| 5 |
+
0.964838445186615,
|
| 6 |
+
1.1395364999771118,
|
| 7 |
+
1.043541431427002,
|
| 8 |
+
1.105217695236206,
|
| 9 |
+
1.1169606447219849,
|
| 10 |
+
1.0478538274765015,
|
| 11 |
+
0.9344523549079895,
|
| 12 |
+
0.8895211219787598,
|
| 13 |
+
1.216326355934143,
|
| 14 |
+
1.0496163368225098,
|
| 15 |
+
1.216739535331726,
|
| 16 |
+
0.9245499968528748,
|
| 17 |
+
0.9284796714782715,
|
| 18 |
+
0.8363046646118164,
|
| 19 |
+
0.9552712440490723,
|
| 20 |
+
0.9438549876213074,
|
| 21 |
+
0.9335886836051941,
|
| 22 |
+
1.0238503217697144,
|
| 23 |
+
1.0052989721298218,
|
| 24 |
+
0.8956718444824219,
|
| 25 |
+
1.1371169090270996,
|
| 26 |
+
1.0869133472442627,
|
| 27 |
+
0.7202218174934387,
|
| 28 |
+
0.9136083126068115,
|
| 29 |
+
0.8764073252677917,
|
| 30 |
+
1.131587266921997,
|
| 31 |
+
0.9673464298248291,
|
| 32 |
+
1.1062707901000977,
|
| 33 |
+
0.9497957229614258,
|
| 34 |
+
0.9178678393363953,
|
| 35 |
+
1.0206104516983032,
|
| 36 |
+
1.006990909576416,
|
| 37 |
+
0.9524610638618469,
|
| 38 |
+
0.9619972705841064,
|
| 39 |
+
0.8912875056266785,
|
| 40 |
+
0.7312076091766357,
|
| 41 |
+
0.9496174454689026,
|
| 42 |
+
1.154371976852417,
|
| 43 |
+
0.7875025868415833,
|
| 44 |
+
1.0647817850112915,
|
| 45 |
+
1.0486866235733032,
|
| 46 |
+
0.9939975142478943,
|
| 47 |
+
0.8716667294502258,
|
| 48 |
+
0.9340372681617737,
|
| 49 |
+
1.1124013662338257,
|
| 50 |
+
0.8464122414588928,
|
| 51 |
+
1.0637880563735962,
|
| 52 |
+
1.0315498113632202,
|
| 53 |
+
0.9428313374519348,
|
| 54 |
+
1.2274835109710693,
|
| 55 |
+
0.9241484999656677,
|
| 56 |
+
1.0132375955581665,
|
| 57 |
+
1.080804467201233,
|
| 58 |
+
0.840327262878418,
|
| 59 |
+
0.9776015877723694,
|
| 60 |
+
1.036933183670044,
|
| 61 |
+
0.7135922908782959,
|
| 62 |
+
0.9429759979248047,
|
| 63 |
+
0.8587066531181335,
|
| 64 |
+
0.9791858792304993,
|
| 65 |
+
0.8069400191307068,
|
| 66 |
+
0.9566805958747864,
|
| 67 |
+
0.9454339146614075,
|
| 68 |
+
0.9855096340179443,
|
| 69 |
+
1.131659746170044,
|
| 70 |
+
1.066325306892395,
|
| 71 |
+
0.9836113452911377,
|
| 72 |
+
0.9617632031440735,
|
| 73 |
+
1.002638816833496,
|
| 74 |
+
0.9912233352661133,
|
| 75 |
+
1.09406578540802,
|
| 76 |
+
0.9180322289466858,
|
| 77 |
+
1.0781677961349487,
|
| 78 |
+
1.0221222639083862,
|
| 79 |
+
0.7997863292694092,
|
| 80 |
+
0.9001059532165527,
|
| 81 |
+
0.8786502480506897,
|
| 82 |
+
0.8647656440734863,
|
| 83 |
+
1.0110279321670532,
|
| 84 |
+
1.0923147201538086,
|
| 85 |
+
0.6505988836288452,
|
| 86 |
+
0.9256751537322998,
|
| 87 |
+
1.1612603664398193,
|
| 88 |
+
0.8188155293464661,
|
| 89 |
+
0.6841027140617371,
|
| 90 |
+
0.8903813362121582,
|
| 91 |
+
1.06912100315094,
|
| 92 |
+
0.9787984490394592,
|
| 93 |
+
0.8413938879966736,
|
| 94 |
+
0.9319615364074707,
|
| 95 |
+
0.8756780028343201,
|
| 96 |
+
0.9894128441810608,
|
| 97 |
+
1.1010881662368774,
|
| 98 |
+
0.8340797424316406,
|
| 99 |
+
1.1747715473175049,
|
| 100 |
+
1.0538417100906372,
|
| 101 |
+
0.8567836880683899,
|
| 102 |
+
1.1662029027938843,
|
| 103 |
+
1.0307223796844482,
|
| 104 |
+
0.9110813140869141,
|
| 105 |
+
0.7360374927520752,
|
| 106 |
+
0.9039518237113953,
|
| 107 |
+
1.1127043962478638,
|
| 108 |
+
0.7680264115333557,
|
| 109 |
+
0.8953747749328613,
|
| 110 |
+
1.1014525890350342,
|
| 111 |
+
1.14116370677948,
|
| 112 |
+
0.8956069350242615,
|
| 113 |
+
0.8186099529266357,
|
| 114 |
+
0.9515911936759949,
|
| 115 |
+
0.968809187412262,
|
| 116 |
+
0.8545618057250977,
|
| 117 |
+
1.0593827962875366,
|
| 118 |
+
0.8552030920982361,
|
| 119 |
+
0.94891756772995,
|
| 120 |
+
1.1462091207504272,
|
| 121 |
+
1.0093332529067993,
|
| 122 |
+
1.1719179153442383,
|
| 123 |
+
1.0008885860443115,
|
| 124 |
+
0.9550184607505798,
|
| 125 |
+
1.1481623649597168,
|
| 126 |
+
1.1238352060317993,
|
| 127 |
+
1.067984938621521,
|
| 128 |
+
0.9189831614494324,
|
| 129 |
+
1.1287449598312378,
|
| 130 |
+
0.8853856921195984,
|
| 131 |
+
0.9472458958625793,
|
| 132 |
+
1.0256346464157104,
|
| 133 |
+
0.8285908102989197,
|
| 134 |
+
0.9206098914146423,
|
| 135 |
+
0.9744741916656494,
|
| 136 |
+
0.9569268226623535,
|
| 137 |
+
1.1026551723480225,
|
| 138 |
+
0.9383838176727295,
|
| 139 |
+
0.8235722184181213,
|
| 140 |
+
1.2005926370620728,
|
| 141 |
+
0.924201488494873,
|
| 142 |
+
0.9852101802825928,
|
| 143 |
+
0.9252749085426331,
|
| 144 |
+
0.8867639899253845,
|
| 145 |
+
1.0019291639328003,
|
| 146 |
+
1.1309939622879028,
|
| 147 |
+
0.7681736350059509,
|
| 148 |
+
0.969968318939209,
|
| 149 |
+
0.9355527758598328,
|
| 150 |
+
0.824161946773529,
|
| 151 |
+
0.9383800029754639,
|
| 152 |
+
0.9408901333808899,
|
| 153 |
+
0.8111424446105957,
|
| 154 |
+
0.9647753834724426,
|
| 155 |
+
0.8142357468605042,
|
| 156 |
+
0.7740890979766846,
|
| 157 |
+
1.0358260869979858,
|
| 158 |
+
0.9578320980072021,
|
| 159 |
+
0.8044676780700684,
|
| 160 |
+
1.0506948232650757,
|
| 161 |
+
0.757871150970459,
|
| 162 |
+
0.8119660019874573,
|
| 163 |
+
0.9819716811180115,
|
| 164 |
+
0.9228785634040833,
|
| 165 |
+
1.2107183933258057,
|
| 166 |
+
1.0595957040786743,
|
| 167 |
+
0.892491340637207,
|
| 168 |
+
0.9783189296722412,
|
| 169 |
+
0.7105677723884583,
|
| 170 |
+
1.0787855386734009,
|
| 171 |
+
0.9344035983085632,
|
| 172 |
+
1.0331354141235352,
|
| 173 |
+
0.9908952116966248,
|
| 174 |
+
0.9290797710418701,
|
| 175 |
+
0.9631307721138,
|
| 176 |
+
1.0547362565994263,
|
| 177 |
+
1.093963623046875,
|
| 178 |
+
1.0387080907821655,
|
| 179 |
+
1.0929311513900757,
|
| 180 |
+
0.9556495547294617,
|
| 181 |
+
0.8851996064186096,
|
| 182 |
+
0.8647142052650452,
|
| 183 |
+
0.9101150035858154,
|
| 184 |
+
0.7751178741455078,
|
| 185 |
+
0.9161872267723083,
|
| 186 |
+
1.2432180643081665,
|
| 187 |
+
0.9668292999267578,
|
| 188 |
+
1.1612310409545898,
|
| 189 |
+
1.101625919342041,
|
| 190 |
+
0.9953687787055969,
|
| 191 |
+
1.1245605945587158,
|
| 192 |
+
1.0911550521850586,
|
| 193 |
+
0.9737095832824707,
|
| 194 |
+
0.8406293988227844,
|
| 195 |
+
0.8418571352958679,
|
| 196 |
+
1.0521903038024902,
|
| 197 |
+
0.9568535685539246,
|
| 198 |
+
0.9159286618232727,
|
| 199 |
+
0.9381770491600037,
|
| 200 |
+
0.7892217040061951,
|
| 201 |
+
1.060601830482483,
|
| 202 |
+
1.0662508010864258
|
| 203 |
+
],
|
| 204 |
+
"final_loss": 1.0662508010864258
|
| 205 |
+
}
|
outputs/neural_python_mind/training_corpus.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/neural_python_mind/vocab.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"chars": [
|
| 3 |
+
"\n",
|
| 4 |
+
" ",
|
| 5 |
+
"!",
|
| 6 |
+
"\"",
|
| 7 |
+
"#",
|
| 8 |
+
"$",
|
| 9 |
+
"%",
|
| 10 |
+
"'",
|
| 11 |
+
"(",
|
| 12 |
+
")",
|
| 13 |
+
"*",
|
| 14 |
+
"+",
|
| 15 |
+
",",
|
| 16 |
+
"-",
|
| 17 |
+
".",
|
| 18 |
+
"/",
|
| 19 |
+
"0",
|
| 20 |
+
"1",
|
| 21 |
+
"2",
|
| 22 |
+
"3",
|
| 23 |
+
"4",
|
| 24 |
+
"5",
|
| 25 |
+
"6",
|
| 26 |
+
"7",
|
| 27 |
+
"8",
|
| 28 |
+
"9",
|
| 29 |
+
":",
|
| 30 |
+
";",
|
| 31 |
+
"<",
|
| 32 |
+
"=",
|
| 33 |
+
">",
|
| 34 |
+
"?",
|
| 35 |
+
"@",
|
| 36 |
+
"A",
|
| 37 |
+
"B",
|
| 38 |
+
"C",
|
| 39 |
+
"D",
|
| 40 |
+
"E",
|
| 41 |
+
"F",
|
| 42 |
+
"G",
|
| 43 |
+
"H",
|
| 44 |
+
"I",
|
| 45 |
+
"J",
|
| 46 |
+
"K",
|
| 47 |
+
"L",
|
| 48 |
+
"M",
|
| 49 |
+
"N",
|
| 50 |
+
"O",
|
| 51 |
+
"P",
|
| 52 |
+
"Q",
|
| 53 |
+
"R",
|
| 54 |
+
"S",
|
| 55 |
+
"T",
|
| 56 |
+
"U",
|
| 57 |
+
"V",
|
| 58 |
+
"W",
|
| 59 |
+
"X",
|
| 60 |
+
"Y",
|
| 61 |
+
"Z",
|
| 62 |
+
"[",
|
| 63 |
+
"\\",
|
| 64 |
+
"]",
|
| 65 |
+
"^",
|
| 66 |
+
"_",
|
| 67 |
+
"`",
|
| 68 |
+
"a",
|
| 69 |
+
"b",
|
| 70 |
+
"c",
|
| 71 |
+
"d",
|
| 72 |
+
"e",
|
| 73 |
+
"f",
|
| 74 |
+
"g",
|
| 75 |
+
"h",
|
| 76 |
+
"i",
|
| 77 |
+
"j",
|
| 78 |
+
"k",
|
| 79 |
+
"l",
|
| 80 |
+
"m",
|
| 81 |
+
"n",
|
| 82 |
+
"o",
|
| 83 |
+
"p",
|
| 84 |
+
"q",
|
| 85 |
+
"r",
|
| 86 |
+
"s",
|
| 87 |
+
"t",
|
| 88 |
+
"u",
|
| 89 |
+
"v",
|
| 90 |
+
"w",
|
| 91 |
+
"x",
|
| 92 |
+
"y",
|
| 93 |
+
"z",
|
| 94 |
+
"{",
|
| 95 |
+
"|",
|
| 96 |
+
"}",
|
| 97 |
+
"ä",
|
| 98 |
+
"�"
|
| 99 |
+
]
|
| 100 |
+
}
|
outputs/real_python_learner/REPORT.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# RealPythonLearner Report
|
| 2 |
+
|
| 3 |
+
This model was created because a tiny pure neural char-level GRU can learn syntax but struggles with robust semantic grounding in this CPU-only environment.
|
| 4 |
+
|
| 5 |
+
## What it learns
|
| 6 |
+
|
| 7 |
+
It trains a character n-gram Naive Bayes intent model on 66,690 instruction examples. It learns from letters and fragments, not fixed exact strings.
|
| 8 |
+
|
| 9 |
+
Learned labels:
|
| 10 |
+
|
| 11 |
+
- count_words
|
| 12 |
+
- fibonacci
|
| 13 |
+
- factorial
|
| 14 |
+
- is_prime
|
| 15 |
+
- binary_search
|
| 16 |
+
- merge_sort
|
| 17 |
+
- read_json
|
| 18 |
+
- write_json
|
| 19 |
+
- filter
|
| 20 |
+
- map
|
| 21 |
+
- group_by
|
| 22 |
+
- safe_int
|
| 23 |
+
- dataclass
|
| 24 |
+
- class_stack
|
| 25 |
+
- explain_python
|
| 26 |
+
- identity_reading
|
| 27 |
+
|
| 28 |
+
## Why it generalizes
|
| 29 |
+
|
| 30 |
+
For a request like:
|
| 31 |
+
|
| 32 |
+
```text
|
| 33 |
+
create code to keep numbers greater than 10
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
It was not memorizing that exact full sentence. It learned character fragments such as `keep`, `numbers`, `greater`, and `than`, selects the `filter` intent, parses the number `10`, and composes:
|
| 37 |
+
|
| 38 |
+
```python
|
| 39 |
+
def filter_greater_than_10(numbers):
|
| 40 |
+
result = []
|
| 41 |
+
for x in numbers:
|
| 42 |
+
if x > 10:
|
| 43 |
+
result.append(x)
|
| 44 |
+
return result
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
## Best use
|
| 48 |
+
|
| 49 |
+
```bash
|
| 50 |
+
python real_python_learner.py --mode ask --out outputs/real_python_learner --prompt "write a function that filters even numbers from a list"
|
| 51 |
+
```
|
outputs/real_python_learner/intent_nb.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/real_python_learner/report.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"examples": 66690,
|
| 3 |
+
"labels": [
|
| 4 |
+
"count_words",
|
| 5 |
+
"fibonacci",
|
| 6 |
+
"factorial",
|
| 7 |
+
"is_prime",
|
| 8 |
+
"binary_search",
|
| 9 |
+
"merge_sort",
|
| 10 |
+
"read_json",
|
| 11 |
+
"write_json",
|
| 12 |
+
"filter",
|
| 13 |
+
"map",
|
| 14 |
+
"group_by",
|
| 15 |
+
"safe_int",
|
| 16 |
+
"dataclass",
|
| 17 |
+
"class_stack",
|
| 18 |
+
"explain_python",
|
| 19 |
+
"identity_reading"
|
| 20 |
+
],
|
| 21 |
+
"features": 4174,
|
| 22 |
+
"type": "char_ngram_naive_bayes_plus_compositional_generator"
|
| 23 |
+
}
|
outputs/real_python_learner/tests/create_code_to_keep_numbers_greater_than_10.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned reasoning
|
| 2 |
+
- Read the request as characters and word fragments.
|
| 3 |
+
- Top learned intents: filter=1.00, is_prime=0.00, fibonacci=0.00, factorial=0.00.
|
| 4 |
+
- Selected intent: filter.
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
```python
|
| 8 |
+
def filter_greater_than_10(numbers):
|
| 9 |
+
result = []
|
| 10 |
+
for x in numbers:
|
| 11 |
+
if x > 10:
|
| 12 |
+
result.append(x)
|
| 13 |
+
return result
|
| 14 |
+
```
|
outputs/real_python_learner/tests/load_json_file.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned reasoning
|
| 2 |
+
- Read the request as characters and word fragments.
|
| 3 |
+
- Top learned intents: read_json=1.00, write_json=0.00, binary_search=0.00, fibonacci=0.00.
|
| 4 |
+
- Selected intent: read_json.
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
```python
|
| 8 |
+
import json
|
| 9 |
+
|
| 10 |
+
def read_json(path: str):
|
| 11 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 12 |
+
return json.load(f)
|
| 13 |
+
```
|
outputs/real_python_learner/tests/return_squares_of_a_list.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned reasoning
|
| 2 |
+
- Read the request as characters and word fragments.
|
| 3 |
+
- Top learned intents: map=1.00, binary_search=0.00, filter=0.00, count_words=0.00.
|
| 4 |
+
- Selected intent: map.
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
```python
|
| 8 |
+
def square_numbers(numbers):
|
| 9 |
+
result = []
|
| 10 |
+
for x in numbers:
|
| 11 |
+
result.append(x * x)
|
| 12 |
+
return result
|
| 13 |
+
```
|
outputs/real_python_learner/tests/who_are_you_and_how_read.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned reasoning
|
| 2 |
+
- Read the request as characters and word fragments.
|
| 3 |
+
- Top learned intents: identity_reading=1.00, read_json=0.00, merge_sort=0.00, explain_python=0.00.
|
| 4 |
+
- Selected intent: identity_reading.
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
I am a small learned Python assistant. I learned by counting character patterns in many questions and code examples, plus a tiny neural GRU trained to predict every next character. I do not have real consciousness, but I can use learned patterns to compose answers.
|
outputs/real_python_learner/tests/write_a_function_that_filters_even_numbers_from_a_list.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned reasoning
|
| 2 |
+
- Read the request as characters and word fragments.
|
| 3 |
+
- Top learned intents: filter=1.00, map=0.00, factorial=0.00, fibonacci=0.00.
|
| 4 |
+
- Selected intent: filter.
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
```python
|
| 8 |
+
def filter_even_numbers(numbers):
|
| 9 |
+
result = []
|
| 10 |
+
for x in numbers:
|
| 11 |
+
if x % 2 == 0:
|
| 12 |
+
result.append(x)
|
| 13 |
+
return result
|
| 14 |
+
```
|
outputs/real_python_learner/tests/write_merge_sort.txt
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned reasoning
|
| 2 |
+
- Read the request as characters and word fragments.
|
| 3 |
+
- Top learned intents: merge_sort=1.00, binary_search=0.00, write_json=0.00, safe_int=0.00.
|
| 4 |
+
- Selected intent: merge_sort.
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
```python
|
| 8 |
+
def merge_sort(values):
|
| 9 |
+
if len(values) <= 1:
|
| 10 |
+
return values
|
| 11 |
+
mid = len(values) // 2
|
| 12 |
+
left = merge_sort(values[:mid])
|
| 13 |
+
right = merge_sort(values[mid:])
|
| 14 |
+
return merge(left, right)
|
| 15 |
+
|
| 16 |
+
def merge(left, right):
|
| 17 |
+
result = []
|
| 18 |
+
i = j = 0
|
| 19 |
+
while i < len(left) and j < len(right):
|
| 20 |
+
if left[i] <= right[j]:
|
| 21 |
+
result.append(left[i]); i += 1
|
| 22 |
+
else:
|
| 23 |
+
result.append(right[j]); j += 1
|
| 24 |
+
result.extend(left[i:]); result.extend(right[j:])
|
| 25 |
+
return result
|
| 26 |
+
```
|
| 27 |
+
Complexity: O(n log n) time and O(n) extra memory.
|
outputs/real_python_learner/training_examples.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/real_web_learner/WEB_REPORT.md
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# RealWebLearner Report
|
| 2 |
+
|
| 3 |
+
A new web-development subsystem was added.
|
| 4 |
+
|
| 5 |
+
## It learns
|
| 6 |
+
|
| 7 |
+
- HTML semantic structure
|
| 8 |
+
- CSS variables
|
| 9 |
+
- CSS Grid
|
| 10 |
+
- Flexbox
|
| 11 |
+
- responsive media queries
|
| 12 |
+
- clamp/mobile-first layout
|
| 13 |
+
- sticky navbars
|
| 14 |
+
- hamburger menus
|
| 15 |
+
- landing pages
|
| 16 |
+
- portfolio pages
|
| 17 |
+
- cards
|
| 18 |
+
- forms
|
| 19 |
+
- client-side validation
|
| 20 |
+
- vanilla JavaScript DOM events
|
| 21 |
+
- localStorage
|
| 22 |
+
- todo apps
|
| 23 |
+
- dark mode/theme toggles
|
| 24 |
+
- modals
|
| 25 |
+
- tabs
|
| 26 |
+
- accordions
|
| 27 |
+
- carousels/sliders intent
|
| 28 |
+
- CSS animations and keyframes
|
| 29 |
+
- fetch API / async await
|
| 30 |
+
- canvas basics
|
| 31 |
+
|
| 32 |
+
## Training
|
| 33 |
+
|
| 34 |
+
The web subsystem trains a character n-gram intent model over generated web-development instructions.
|
| 35 |
+
|
| 36 |
+
Current report is in:
|
| 37 |
+
|
| 38 |
+
```text
|
| 39 |
+
outputs/real_web_learner/report.json
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
## Usage
|
| 43 |
+
|
| 44 |
+
```bash
|
| 45 |
+
python real_web_learner.py --mode ask --out outputs/real_web_learner --prompt "create a responsive landing page with dark mode"
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
Or through unified AI:
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
python unified_learning_ai.py --mode ask --out outputs/unified_learning_ai --prompt "create a responsive landing page with dark mode"
|
| 52 |
+
```
|
outputs/real_web_learner/report.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"examples": 564060,
|
| 3 |
+
"features": 5356,
|
| 4 |
+
"labels": [
|
| 5 |
+
"full_page",
|
| 6 |
+
"landing_page",
|
| 7 |
+
"portfolio",
|
| 8 |
+
"navbar",
|
| 9 |
+
"hero",
|
| 10 |
+
"responsive_grid",
|
| 11 |
+
"card",
|
| 12 |
+
"form_validation",
|
| 13 |
+
"todo_app",
|
| 14 |
+
"dark_mode",
|
| 15 |
+
"modal",
|
| 16 |
+
"tabs",
|
| 17 |
+
"accordion",
|
| 18 |
+
"carousel",
|
| 19 |
+
"css_animation",
|
| 20 |
+
"dashboard",
|
| 21 |
+
"fetch_api",
|
| 22 |
+
"counter",
|
| 23 |
+
"canvas",
|
| 24 |
+
"explain_web"
|
| 25 |
+
],
|
| 26 |
+
"type": "char_ngram_web_intent_plus_compositional_generator"
|
| 27 |
+
}
|
outputs/real_web_learner/tests/build_navbar_with_hamburger_menu.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned web reasoning
|
| 2 |
+
- Read request with character n-grams and web-development fragments.
|
| 3 |
+
- Top intents: navbar=1.00, carousel=0.00, counter=0.00, hero=0.00
|
| 4 |
+
- Selected intent: navbar
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
```html
|
| 8 |
+
<header class="site-header">
|
| 9 |
+
<a class="brand" href="#">Brand</a>
|
| 10 |
+
<button class="menu-btn" aria-expanded="false" aria-controls="nav">☰</button>
|
| 11 |
+
<nav id="nav" class="nav-links">
|
| 12 |
+
<a href="#home">Home</a><a href="#about">About</a><a href="#work">Work</a><a href="#contact">Contact</a>
|
| 13 |
+
</nav>
|
| 14 |
+
</header>
|
| 15 |
+
<style>
|
| 16 |
+
.site-header{display:flex;align-items:center;justify-content:space-between;padding:1rem 2rem;background:#0f172a;color:white;position:sticky;top:0}.brand{font-weight:900}.nav-links{display:flex;gap:1rem}.menu-btn{display:none}@media(max-width:700px){.menu-btn{display:block}.nav-links{display:none;position:absolute;top:100%;left:0;right:0;flex-direction:column;background:#111827;padding:1rem}.nav-links.open{display:flex}}
|
| 17 |
+
</style>
|
| 18 |
+
<script>
|
| 19 |
+
const btn=document.querySelector('.menu-btn'), nav=document.querySelector('#nav');
|
| 20 |
+
btn.onclick=()=>{nav.classList.toggle('open');btn.setAttribute('aria-expanded',nav.classList.contains('open'));};
|
| 21 |
+
</script>
|
| 22 |
+
```
|
outputs/real_web_learner/tests/create_a_responsive_landing_page_with_dark_mode.txt
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned web reasoning
|
| 2 |
+
- Read request with character n-grams and web-development fragments.
|
| 3 |
+
- Top intents: landing_page=1.00, dark_mode=0.00, full_page=0.00, css_animation=0.00
|
| 4 |
+
- Selected intent: landing_page
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
```html
|
| 8 |
+
<!doctype html>
|
| 9 |
+
<html lang="en">
|
| 10 |
+
<head>
|
| 11 |
+
<meta charset="utf-8" />
|
| 12 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 13 |
+
<title>Nova Landing</title>
|
| 14 |
+
<style>
|
| 15 |
+
:root {
|
| 16 |
+
--bg: #0f172a;
|
| 17 |
+
--panel: rgba(255,255,255,.08);
|
| 18 |
+
--text: #e5e7eb;
|
| 19 |
+
--muted: #94a3b8;
|
| 20 |
+
--brand: #7c3aed;
|
| 21 |
+
--brand-2: #06b6d4;
|
| 22 |
+
--ring: rgba(124,58,237,.45);
|
| 23 |
+
--radius: 22px;
|
| 24 |
+
--shadow: 0 24px 70px rgba(0,0,0,.35);
|
| 25 |
+
}
|
| 26 |
+
* { box-sizing: border-box; }
|
| 27 |
+
body {
|
| 28 |
+
margin: 0;
|
| 29 |
+
font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI, sans-serif;
|
| 30 |
+
background: radial-gradient(circle at top left, rgba(124,58,237,.25), transparent 30%), var(--bg);
|
| 31 |
+
color: var(--text);
|
| 32 |
+
min-height: 100vh;
|
| 33 |
+
}
|
| 34 |
+
a { color: inherit; text-decoration: none; }
|
| 35 |
+
button, input, textarea { font: inherit; }
|
| 36 |
+
|
| 37 |
+
.nav { display:flex; align-items:center; justify-content:space-between; gap:1rem; padding:1.2rem clamp(1rem,4vw,4rem); position:sticky; top:0; backdrop-filter: blur(16px); background:rgba(15,23,42,.72); z-index:10; }
|
| 38 |
+
.logo { font-weight:900; letter-spacing:.04em; }
|
| 39 |
+
nav { display:flex; gap:1rem; color:var(--muted); }
|
| 40 |
+
.ghost { border:1px solid rgba(255,255,255,.16); background:transparent; color:var(--text); border-radius:999px; padding:.65rem 1rem; cursor:pointer; }
|
| 41 |
+
.hero { min-height:76vh; display:grid; grid-template-columns:1.1fr .9fr; align-items:center; gap:clamp(2rem,6vw,6rem); padding:clamp(2rem,6vw,6rem); }
|
| 42 |
+
.eyebrow { color:var(--brand-2); text-transform:uppercase; letter-spacing:.18em; font-size:.78rem; font-weight:800; }
|
| 43 |
+
h1 { font-size:clamp(2.4rem,7vw,6rem); line-height:.95; margin:.2em 0; }
|
| 44 |
+
.lead { color:var(--muted); font-size:clamp(1rem,2vw,1.25rem); max-width:65ch; }
|
| 45 |
+
.actions { display:flex; flex-wrap:wrap; gap:1rem; margin-top:2rem; }
|
| 46 |
+
.btn { background:linear-gradient(135deg,var(--brand),var(--brand-2)); padding:.9rem 1.2rem; border-radius:999px; font-weight:800; box-shadow:0 14px 35px var(--ring); }
|
| 47 |
+
.btn.secondary { background:rgba(255,255,255,.08); box-shadow:none; }
|
| 48 |
+
.orb-card { position:relative; overflow:hidden; border:1px solid rgba(255,255,255,.14); background:var(--panel); border-radius:var(--radius); padding:2rem; box-shadow:var(--shadow); min-height:360px; backdrop-filter:blur(20px); }
|
| 49 |
+
.orb { width:220px; height:220px; border-radius:50%; background:linear-gradient(135deg,var(--brand),var(--brand-2)); filter:blur(2px); animation:float 5s ease-in-out infinite; }
|
| 50 |
+
.grid { display:grid; grid-template-columns:repeat(3,minmax(0,1fr)); gap:1rem; padding:clamp(1rem,4vw,4rem); }
|
| 51 |
+
.card { border:1px solid rgba(255,255,255,.12); background:var(--panel); border-radius:var(--radius); padding:1.4rem; transition:transform .25s ease, border-color .25s ease; }
|
| 52 |
+
.card:hover { transform:translateY(-6px); border-color:var(--brand-2); }
|
| 53 |
+
body.light { --bg:#f8fafc; --panel:rgba(15,23,42,.06); --text:#0f172a; --muted:#475569; }
|
| 54 |
+
body.light .nav { background:rgba(248,250,252,.76); }
|
| 55 |
+
@keyframes float { 0%,100%{ transform:translateY(0) rotate(0deg);} 50%{ transform:translateY(-18px) rotate(8deg);} }
|
| 56 |
+
@media (max-width: 800px) { .hero { grid-template-columns:1fr; } .grid { grid-template-columns:1fr; } nav { display:none; } }
|
| 57 |
+
|
| 58 |
+
</style>
|
| 59 |
+
</head>
|
| 60 |
+
<body>
|
| 61 |
+
<header class="nav">
|
| 62 |
+
<a class="logo" href="#">Nova</a>
|
| 63 |
+
<nav>
|
| 64 |
+
<a href="#features">Features</a>
|
| 65 |
+
<a href="#work">Work</a>
|
| 66 |
+
<a href="#contact">Contact</a>
|
| 67 |
+
</nav>
|
| 68 |
+
<button id="themeBtn" class="ghost">Toggle theme</button>
|
| 69 |
+
</header>
|
| 70 |
+
|
| 71 |
+
<main>
|
| 72 |
+
<section class="hero">
|
| 73 |
+
<div class="hero-text">
|
| 74 |
+
<p class="eyebrow">Modern Web Experience</p>
|
| 75 |
+
<h1>Build beautiful interfaces with HTML, CSS and JavaScript.</h1>
|
| 76 |
+
<p class="lead">Responsive layout, animated gradient, glass cards, semantic HTML and a tiny vanilla JS theme switcher.</p>
|
| 77 |
+
<div class="actions">
|
| 78 |
+
<a class="btn" href="#contact">Start now</a>
|
| 79 |
+
<a class="btn secondary" href="#features">See features</a>
|
| 80 |
+
</div>
|
| 81 |
+
</div>
|
| 82 |
+
<div class="orb-card">
|
| 83 |
+
<div class="orb"></div>
|
| 84 |
+
<h2>CSS Grid + Variables</h2>
|
| 85 |
+
<p>Fast, accessible, and easy to customize.</p>
|
| 86 |
+
</div>
|
| 87 |
+
</section>
|
| 88 |
+
|
| 89 |
+
<section id="features" class="grid">
|
| 90 |
+
<article class="card"><h3>Responsive</h3><p>Uses grid, clamp and media queries.</p></article>
|
| 91 |
+
<article class="card"><h3>Animated</h3><p>Subtle keyframes create depth.</p></article>
|
| 92 |
+
<article class="card"><h3>Vanilla JS</h3><p>No frameworks required.</p></article>
|
| 93 |
+
</section>
|
| 94 |
+
</main>
|
| 95 |
+
<script>
|
| 96 |
+
const btn = document.querySelector('#themeBtn');
|
| 97 |
+
btn.addEventListener('click', () => document.body.classList.toggle('light'));
|
| 98 |
+
|
| 99 |
+
</script>
|
| 100 |
+
</body>
|
| 101 |
+
</html>
|
| 102 |
+
```
|
outputs/real_web_learner/tests/create_form_validation.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Learned web reasoning
|
| 2 |
+
- Read request with character n-grams and web-development fragments.
|
| 3 |
+
- Top intents: form_validation=1.00, accordion=0.00, hero=0.00, css_animation=0.00
|
| 4 |
+
- Selected intent: form_validation
|
| 5 |
+
|
| 6 |
+
## Answer
|
| 7 |
+
```html
|
| 8 |
+
<form id="contact" novalidate>
|
| 9 |
+
<label>Name <input name="name" required minlength="2"></label>
|
| 10 |
+
<label>Email <input name="email" required type="email"></label>
|
| 11 |
+
<button>Send</button>
|
| 12 |
+
<p id="msg" role="alert"></p>
|
| 13 |
+
</form>
|
| 14 |
+
<script>
|
| 15 |
+
const form=document.querySelector('#contact'), msg=document.querySelector('#msg');
|
| 16 |
+
form.addEventListener('submit', e=>{
|
| 17 |
+
e.preventDefault();
|
| 18 |
+
if(!form.checkValidity()){ msg.textContent='Please complete the form correctly.'; return; }
|
| 19 |
+
msg.textContent='Message ready to send!';
|
| 20 |
+
});
|
| 21 |
+
</script>
|
| 22 |
+
```
|