fawazo
/

training-scripts

Model card Files Files and versions

xet

Community

fawazo commited on Dec 8, 2025

Commit

bde55d4

verified ·

1 Parent(s): 8186f7a

Upload convert_pentest_gguf.py with huggingface_hub

Browse files

Files changed (1) hide show

convert_pentest_gguf.py +232 -0

convert_pentest_gguf.py ADDED Viewed

	@@ -0,0 +1,232 @@

+#!/usr/bin/env python3
+# /// script
+# dependencies = [
+#     "transformers>=4.36.0",
+#     "peft>=0.7.0",
+#     "torch>=2.0.0",
+#     "accelerate>=0.24.0",
+#     "huggingface_hub>=0.20.0",
+#     "sentencepiece>=0.1.99",
+#     "protobuf>=3.20.0",
+#     "numpy",
+#     "gguf",
+# ]
+# ///
+"""
+GGUF Conversion for Pentesting Model
+Converts LoRA adapter to GGUF Q4_K_M for Jetson Orin Nano (8GB)
+"""
+import os
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+from huggingface_hub import HfApi
+import subprocess
+print("=" * 60)
+print("GGUF CONVERSION - Pentesting Model for Jetson")
+print("=" * 60)
+# Configuration
+ADAPTER_MODEL = "fawazo/qwen2.5-coder-3b-pentest"
+BASE_MODEL = "Qwen/Qwen2.5-Coder-3B"
+OUTPUT_REPO = "fawazo/qwen2.5-coder-3b-pentest-gguf"
+print(f"\nBase model: {BASE_MODEL}")
+print(f"Adapter: {ADAPTER_MODEL}")
+print(f"Output: {OUTPUT_REPO}")
+# Step 1: Load and merge
+print("\n[1/6] Loading base model...")
+base_model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    trust_remote_code=True,
+)
+print("Base model loaded")
+print("Loading LoRA adapter...")
+model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
+print("Merging...")
+merged_model = model.merge_and_unload()
+print("Models merged!")
+tokenizer = AutoTokenizer.from_pretrained(ADAPTER_MODEL, trust_remote_code=True)
+# Step 2: Save merged model
+print("\n[2/6] Saving merged model...")
+merged_dir = "/tmp/merged_model"
+merged_model.save_pretrained(merged_dir, safe_serialization=True)
+tokenizer.save_pretrained(merged_dir)
+print(f"Saved to {merged_dir}")
+# Step 3: Setup llama.cpp
+print("\n[3/6] Setting up llama.cpp...")
+subprocess.run(["apt-get", "update", "-qq"], check=True, capture_output=True)
+subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True, capture_output=True)
+print("Build tools installed")
+subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"], check=True, capture_output=True)
+print("llama.cpp cloned")
+subprocess.run(["pip", "install", "-q", "-r", "/tmp/llama.cpp/requirements.txt"], check=True, capture_output=True)
+subprocess.run(["pip", "install", "-q", "sentencepiece", "protobuf"], check=True, capture_output=True)
+print("Dependencies installed")
+# Step 4: Convert to GGUF
+print("\n[4/6] Converting to GGUF (FP16)...")
+gguf_dir = "/tmp/gguf_output"
+os.makedirs(gguf_dir, exist_ok=True)
+model_name = "qwen2.5-coder-3b-pentest"
+gguf_fp16 = f"{gguf_dir}/{model_name}-f16.gguf"
+result = subprocess.run(
+    ["python", "/tmp/llama.cpp/convert_hf_to_gguf.py", merged_dir, "--outfile", gguf_fp16, "--outtype", "f16"],
+    check=True, capture_output=True, text=True
+)
+print(f"FP16 GGUF created: {os.path.getsize(gguf_fp16) / 1024**3:.2f} GB")
+# Step 5: Quantize
+print("\n[5/6] Building quantize tool and creating quantizations...")
+os.makedirs("/tmp/llama.cpp/build", exist_ok=True)
+subprocess.run(["cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", "-DGGML_CUDA=OFF"], check=True, capture_output=True, text=True)
+subprocess.run(["cmake", "--build", "/tmp/llama.cpp/build", "--target", "llama-quantize", "-j", "4"], check=True, capture_output=True, text=True)
+print("Quantize tool built")
+quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize"
+# Create quantizations optimized for Jetson
+quant_formats = [
+    ("Q4_K_M", "4-bit - RECOMMENDED for 8GB Jetson"),
+    ("Q5_K_M", "5-bit - Higher quality, ~2GB"),
+    ("Q8_0", "8-bit - Best quality, ~3.5GB"),
+]
+quantized_files = []
+for quant_type, desc in quant_formats:
+    quant_file = f"{gguf_dir}/{model_name}-{quant_type.lower()}.gguf"
+    subprocess.run([quantize_bin, gguf_fp16, quant_file, quant_type], check=True, capture_output=True)
+    size_gb = os.path.getsize(quant_file) / 1024**3
+    print(f"  {quant_type}: {size_gb:.2f} GB - {desc}")
+    quantized_files.append((quant_file, quant_type))
+# Step 6: Upload
+print("\n[6/6] Uploading to Hugging Face Hub...")
+api = HfApi()
+api.create_repo(repo_id=OUTPUT_REPO, repo_type="model", exist_ok=True)
+# Upload all files
+api.upload_file(path_or_fileobj=gguf_fp16, path_in_repo=f"{model_name}-f16.gguf", repo_id=OUTPUT_REPO)
+print("  Uploaded FP16")
+for quant_file, quant_type in quantized_files:
+    api.upload_file(path_or_fileobj=quant_file, path_in_repo=f"{model_name}-{quant_type.lower()}.gguf", repo_id=OUTPUT_REPO)
+    print(f"  Uploaded {quant_type}")
+# Create README with Jetson-specific instructions
+readme = f"""---
+base_model: {BASE_MODEL}
+tags:
+- gguf
+- llama.cpp
+- pentesting
+- cybersecurity
+- jetson
+- quantized
+---
+# Qwen2.5-Coder-3B Pentest - GGUF
+GGUF quantizations of [fawazo/qwen2.5-coder-3b-pentest](https://huggingface.co/fawazo/qwen2.5-coder-3b-pentest) optimized for **Jetson Orin Nano (8GB)**.
+## Model Description
+An AI pentesting assistant fine-tuned on 150K+ cybersecurity examples covering:
+- OWASP Top 10 vulnerabilities
+- MITRE ATT&CK framework
+- API security testing
+- Web application penetration testing
+**Output Format:** JSON for automation
+## Quantizations
+| File | Size | RAM Needed | Recommended For |
+|------|------|------------|-----------------|
+| `{model_name}-q4_k_m.gguf` | ~1.8GB | ~3GB | **Jetson Orin Nano 8GB** |
+| `{model_name}-q5_k_m.gguf` | ~2.1GB | ~4GB | Better quality |
+| `{model_name}-q8_0.gguf` | ~3.4GB | ~5GB | Best quality |
+| `{model_name}-f16.gguf` | ~6GB | ~8GB | Full precision |
+## Usage on Jetson
+### With Ollama
+```bash
+# Download Q4_K_M (recommended for 8GB)
+huggingface-cli download {OUTPUT_REPO} {model_name}-q4_k_m.gguf
+# Create Modelfile
+cat > Modelfile << 'EOF'
+FROM ./{model_name}-q4_k_m.gguf
+SYSTEM \"\"\"You are an expert penetration testing AI assistant. Analyze web traffic and respond with JSON:
+{{"action": "report|request|command|complete", ...}}\"\"\"
+PARAMETER temperature 0.3
+PARAMETER num_ctx 2048
+EOF
+# Create and run
+ollama create pentest-agent -f Modelfile
+ollama run pentest-agent
+```
+### With llama.cpp
+```bash
+./llama-cli -m {model_name}-q4_k_m.gguf -ngl 99 -c 2048 -p "Analyze this request..."
+```
+## Example Usage
+**Input:**
+```
+Analyze this HTTP exchange:
+REQUEST: GET /api/users?id=1
+RESPONSE: {{"user": "admin", "role": "administrator"}}
+```
+**Output:**
+```json
+{{
+  "action": "request",
+  "method": "GET",
+  "path": "/api/users?id=2",
+  "reasoning": "Testing for IDOR - checking if user IDs are enumerable"
+}}
+```
+## Training Details
+- **Base:** Qwen/Qwen2.5-Coder-3B
+- **Method:** SFT with LoRA (r=32)
+- **Dataset:** 150K+ combined examples from Trendyol, Fenrir v2.0, pentest-agent
+- **Frameworks:** OWASP, MITRE ATT&CK, NIST CSF
+## License
+Apache 2.0 (inherits from base model and training datasets)
+"""
+api.upload_file(path_or_fileobj=readme.encode(), path_in_repo="README.md", repo_id=OUTPUT_REPO)
+print("  Uploaded README")
+print("\n" + "=" * 60)
+print("CONVERSION COMPLETE!")
+print(f"Repository: https://huggingface.co/{OUTPUT_REPO}")
+print(f"\nFor Jetson Orin Nano, download:")
+print(f"  huggingface-cli download {OUTPUT_REPO} {model_name}-q4_k_m.gguf")
+print("=" * 60)