fawazo commited on
Commit
bde55d4
·
verified ·
1 Parent(s): 8186f7a

Upload convert_pentest_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_pentest_gguf.py +232 -0
convert_pentest_gguf.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # dependencies = [
4
+ # "transformers>=4.36.0",
5
+ # "peft>=0.7.0",
6
+ # "torch>=2.0.0",
7
+ # "accelerate>=0.24.0",
8
+ # "huggingface_hub>=0.20.0",
9
+ # "sentencepiece>=0.1.99",
10
+ # "protobuf>=3.20.0",
11
+ # "numpy",
12
+ # "gguf",
13
+ # ]
14
+ # ///
15
+
16
+ """
17
+ GGUF Conversion for Pentesting Model
18
+ Converts LoRA adapter to GGUF Q4_K_M for Jetson Orin Nano (8GB)
19
+ """
20
+
21
+ import os
22
+ import torch
23
+ from transformers import AutoModelForCausalLM, AutoTokenizer
24
+ from peft import PeftModel
25
+ from huggingface_hub import HfApi
26
+ import subprocess
27
+
28
+ print("=" * 60)
29
+ print("GGUF CONVERSION - Pentesting Model for Jetson")
30
+ print("=" * 60)
31
+
32
+ # Configuration
33
+ ADAPTER_MODEL = "fawazo/qwen2.5-coder-3b-pentest"
34
+ BASE_MODEL = "Qwen/Qwen2.5-Coder-3B"
35
+ OUTPUT_REPO = "fawazo/qwen2.5-coder-3b-pentest-gguf"
36
+
37
+ print(f"\nBase model: {BASE_MODEL}")
38
+ print(f"Adapter: {ADAPTER_MODEL}")
39
+ print(f"Output: {OUTPUT_REPO}")
40
+
41
+ # Step 1: Load and merge
42
+ print("\n[1/6] Loading base model...")
43
+ base_model = AutoModelForCausalLM.from_pretrained(
44
+ BASE_MODEL,
45
+ torch_dtype=torch.float16,
46
+ device_map="auto",
47
+ trust_remote_code=True,
48
+ )
49
+ print("Base model loaded")
50
+
51
+ print("Loading LoRA adapter...")
52
+ model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
53
+ print("Merging...")
54
+ merged_model = model.merge_and_unload()
55
+ print("Models merged!")
56
+
57
+ tokenizer = AutoTokenizer.from_pretrained(ADAPTER_MODEL, trust_remote_code=True)
58
+
59
+ # Step 2: Save merged model
60
+ print("\n[2/6] Saving merged model...")
61
+ merged_dir = "/tmp/merged_model"
62
+ merged_model.save_pretrained(merged_dir, safe_serialization=True)
63
+ tokenizer.save_pretrained(merged_dir)
64
+ print(f"Saved to {merged_dir}")
65
+
66
+ # Step 3: Setup llama.cpp
67
+ print("\n[3/6] Setting up llama.cpp...")
68
+ subprocess.run(["apt-get", "update", "-qq"], check=True, capture_output=True)
69
+ subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True, capture_output=True)
70
+ print("Build tools installed")
71
+
72
+ subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"], check=True, capture_output=True)
73
+ print("llama.cpp cloned")
74
+
75
+ subprocess.run(["pip", "install", "-q", "-r", "/tmp/llama.cpp/requirements.txt"], check=True, capture_output=True)
76
+ subprocess.run(["pip", "install", "-q", "sentencepiece", "protobuf"], check=True, capture_output=True)
77
+ print("Dependencies installed")
78
+
79
+ # Step 4: Convert to GGUF
80
+ print("\n[4/6] Converting to GGUF (FP16)...")
81
+ gguf_dir = "/tmp/gguf_output"
82
+ os.makedirs(gguf_dir, exist_ok=True)
83
+
84
+ model_name = "qwen2.5-coder-3b-pentest"
85
+ gguf_fp16 = f"{gguf_dir}/{model_name}-f16.gguf"
86
+
87
+ result = subprocess.run(
88
+ ["python", "/tmp/llama.cpp/convert_hf_to_gguf.py", merged_dir, "--outfile", gguf_fp16, "--outtype", "f16"],
89
+ check=True, capture_output=True, text=True
90
+ )
91
+ print(f"FP16 GGUF created: {os.path.getsize(gguf_fp16) / 1024**3:.2f} GB")
92
+
93
+ # Step 5: Quantize
94
+ print("\n[5/6] Building quantize tool and creating quantizations...")
95
+ os.makedirs("/tmp/llama.cpp/build", exist_ok=True)
96
+ subprocess.run(["cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", "-DGGML_CUDA=OFF"], check=True, capture_output=True, text=True)
97
+ subprocess.run(["cmake", "--build", "/tmp/llama.cpp/build", "--target", "llama-quantize", "-j", "4"], check=True, capture_output=True, text=True)
98
+ print("Quantize tool built")
99
+
100
+ quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize"
101
+
102
+ # Create quantizations optimized for Jetson
103
+ quant_formats = [
104
+ ("Q4_K_M", "4-bit - RECOMMENDED for 8GB Jetson"),
105
+ ("Q5_K_M", "5-bit - Higher quality, ~2GB"),
106
+ ("Q8_0", "8-bit - Best quality, ~3.5GB"),
107
+ ]
108
+
109
+ quantized_files = []
110
+ for quant_type, desc in quant_formats:
111
+ quant_file = f"{gguf_dir}/{model_name}-{quant_type.lower()}.gguf"
112
+ subprocess.run([quantize_bin, gguf_fp16, quant_file, quant_type], check=True, capture_output=True)
113
+ size_gb = os.path.getsize(quant_file) / 1024**3
114
+ print(f" {quant_type}: {size_gb:.2f} GB - {desc}")
115
+ quantized_files.append((quant_file, quant_type))
116
+
117
+ # Step 6: Upload
118
+ print("\n[6/6] Uploading to Hugging Face Hub...")
119
+ api = HfApi()
120
+ api.create_repo(repo_id=OUTPUT_REPO, repo_type="model", exist_ok=True)
121
+
122
+ # Upload all files
123
+ api.upload_file(path_or_fileobj=gguf_fp16, path_in_repo=f"{model_name}-f16.gguf", repo_id=OUTPUT_REPO)
124
+ print(" Uploaded FP16")
125
+
126
+ for quant_file, quant_type in quantized_files:
127
+ api.upload_file(path_or_fileobj=quant_file, path_in_repo=f"{model_name}-{quant_type.lower()}.gguf", repo_id=OUTPUT_REPO)
128
+ print(f" Uploaded {quant_type}")
129
+
130
+ # Create README with Jetson-specific instructions
131
+ readme = f"""---
132
+ base_model: {BASE_MODEL}
133
+ tags:
134
+ - gguf
135
+ - llama.cpp
136
+ - pentesting
137
+ - cybersecurity
138
+ - jetson
139
+ - quantized
140
+ ---
141
+
142
+ # Qwen2.5-Coder-3B Pentest - GGUF
143
+
144
+ GGUF quantizations of [fawazo/qwen2.5-coder-3b-pentest](https://huggingface.co/fawazo/qwen2.5-coder-3b-pentest) optimized for **Jetson Orin Nano (8GB)**.
145
+
146
+ ## Model Description
147
+
148
+ An AI pentesting assistant fine-tuned on 150K+ cybersecurity examples covering:
149
+ - OWASP Top 10 vulnerabilities
150
+ - MITRE ATT&CK framework
151
+ - API security testing
152
+ - Web application penetration testing
153
+
154
+ **Output Format:** JSON for automation
155
+
156
+ ## Quantizations
157
+
158
+ | File | Size | RAM Needed | Recommended For |
159
+ |------|------|------------|-----------------|
160
+ | `{model_name}-q4_k_m.gguf` | ~1.8GB | ~3GB | **Jetson Orin Nano 8GB** |
161
+ | `{model_name}-q5_k_m.gguf` | ~2.1GB | ~4GB | Better quality |
162
+ | `{model_name}-q8_0.gguf` | ~3.4GB | ~5GB | Best quality |
163
+ | `{model_name}-f16.gguf` | ~6GB | ~8GB | Full precision |
164
+
165
+ ## Usage on Jetson
166
+
167
+ ### With Ollama
168
+ ```bash
169
+ # Download Q4_K_M (recommended for 8GB)
170
+ huggingface-cli download {OUTPUT_REPO} {model_name}-q4_k_m.gguf
171
+
172
+ # Create Modelfile
173
+ cat > Modelfile << 'EOF'
174
+ FROM ./{model_name}-q4_k_m.gguf
175
+
176
+ SYSTEM \"\"\"You are an expert penetration testing AI assistant. Analyze web traffic and respond with JSON:
177
+ {{"action": "report|request|command|complete", ...}}\"\"\"
178
+
179
+ PARAMETER temperature 0.3
180
+ PARAMETER num_ctx 2048
181
+ EOF
182
+
183
+ # Create and run
184
+ ollama create pentest-agent -f Modelfile
185
+ ollama run pentest-agent
186
+ ```
187
+
188
+ ### With llama.cpp
189
+ ```bash
190
+ ./llama-cli -m {model_name}-q4_k_m.gguf -ngl 99 -c 2048 -p "Analyze this request..."
191
+ ```
192
+
193
+ ## Example Usage
194
+
195
+ **Input:**
196
+ ```
197
+ Analyze this HTTP exchange:
198
+ REQUEST: GET /api/users?id=1
199
+ RESPONSE: {{"user": "admin", "role": "administrator"}}
200
+ ```
201
+
202
+ **Output:**
203
+ ```json
204
+ {{
205
+ "action": "request",
206
+ "method": "GET",
207
+ "path": "/api/users?id=2",
208
+ "reasoning": "Testing for IDOR - checking if user IDs are enumerable"
209
+ }}
210
+ ```
211
+
212
+ ## Training Details
213
+
214
+ - **Base:** Qwen/Qwen2.5-Coder-3B
215
+ - **Method:** SFT with LoRA (r=32)
216
+ - **Dataset:** 150K+ combined examples from Trendyol, Fenrir v2.0, pentest-agent
217
+ - **Frameworks:** OWASP, MITRE ATT&CK, NIST CSF
218
+
219
+ ## License
220
+
221
+ Apache 2.0 (inherits from base model and training datasets)
222
+ """
223
+
224
+ api.upload_file(path_or_fileobj=readme.encode(), path_in_repo="README.md", repo_id=OUTPUT_REPO)
225
+ print(" Uploaded README")
226
+
227
+ print("\n" + "=" * 60)
228
+ print("CONVERSION COMPLETE!")
229
+ print(f"Repository: https://huggingface.co/{OUTPUT_REPO}")
230
+ print(f"\nFor Jetson Orin Nano, download:")
231
+ print(f" huggingface-cli download {OUTPUT_REPO} {model_name}-q4_k_m.gguf")
232
+ print("=" * 60)