Tester1482's picture
Upload 19 files
0f32205 verified
---
license: mit
datasets:
- zerofata/Instruct-Anime
- zerofata/Roleplay-Anime-Characters
- zerofata/Gemini-3.1-Pro-GLM5-Characters
- zerofata/Gemini-3.1-Pro-SmallWiki
base_model:
- zai-org/GLM-4.5-Air
---
<style>
.ib {
--bg: #e4eef6;
--panel: rgba(255,255,255,0.7);
--accent: #4a9ec8;
--accent2: #78c4e0;
--accent3: #a0d8ef;
--border: #b0cedf;
--text: #1e3040;
--muted: #5a7a90;
--bright: #2884b0;
--white: #ffffff;
--crystal: rgba(74,158,200,0.12);
--mono: 'JetBrains Mono', monospace;
--sans: 'Inter', sans-serif;
font-family: var(--sans);
color: var(--text);
background: var(--bg);
max-width: 960px;
margin: 0 auto;
padding: 0 0 48px;
line-height: 1.7;
font-size: 1rem;
}
/* ── Hero ── */
.ib-hero { position: relative; border-bottom: 1px solid var(--accent2); margin: 0; }
.ib-hero img {
display: block;
width: 100%;
margin: 0;
}
.ib-title {
text-align: center;
position: relative;
z-index: 1;
}
.ib-card {
display: inline-block;
padding: 18px 52px;
background: rgba(255,255,255,0.88);
border: 1px solid var(--accent2);
box-shadow: 0 4px 32px rgba(74,158,200,0.2);
margin-top: -75px;
}
.ib-name {
font-size: 2.6rem;
font-weight: 900;
letter-spacing: 6px;
text-transform: uppercase;
margin: 0 0 6px;
line-height: 1;
background: linear-gradient(90deg, var(--bright), var(--accent2), var(--bright));
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
.ib-base {
font-family: var(--mono);
font-size: 0.68rem;
color: var(--muted);
letter-spacing: 3px;
text-transform: uppercase;
}
/* ── Diamond separators ── */
.ib-sep {
display: flex;
align-items: center;
margin: 0 32px;
padding: 16px 0;
}
.ib-sep-line {
flex: 1;
height: 1px;
background: linear-gradient(90deg, transparent, var(--accent2), transparent);
}
.ib-dia {
width: 10px;
height: 10px;
background: var(--accent);
transform: rotate(45deg);
box-shadow: 0 0 8px rgba(74,158,200,0.35);
margin: 0 14px;
flex-shrink: 0;
}
/* ── Sections ── */
.ib-section {
margin: 0 32px;
padding: 32px;
background: var(--panel);
border: 1px solid var(--border);
box-shadow: 0 2px 20px var(--crystal);
}
/* ── Section headers ── */
.ib-shead {
text-align: center;
margin-bottom: 24px;
}
.ib-emblem {
width: 40px;
height: 40px;
border: 2px solid var(--accent2);
transform: rotate(45deg);
margin: 0 auto 14px;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 0 14px rgba(74,158,200,0.15);
background: var(--white);
}
.ib-glyph {
transform: rotate(-45deg);
font-size: 1rem;
color: var(--accent);
line-height: 1;
}
.ib-stitle {
font-size: 1.5rem;
font-weight: 800;
letter-spacing: 4px;
text-transform: uppercase;
margin: 0 !important;
padding: 0 !important;
border: none !important;
display: block;
background: linear-gradient(90deg, var(--accent), var(--bright), var(--accent));
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
/* ── Body ── */
.ib-sbody p { margin: 0 0 14px; font-size: 0.95rem; }
.ib-sbody p:last-child { margin-bottom: 0; }
/* ── Sub-headings ── */
.ib-sub {
color: var(--bright) !important;
font-size: 1.1rem !important;
margin: 24px 0 14px !important;
padding: 0 0 8px !important;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 2px;
border: none !important;
border-bottom: 2px solid var(--accent2) !important;
}
/* ── Data boxes ── */
.ib-data {
background: var(--white);
padding: 18px;
border: 1px solid var(--border);
border-left: 3px solid var(--accent);
margin-bottom: 18px;
box-shadow: 0 2px 12px var(--crystal);
font-size: 0.95rem;
}
.ib-data:last-child { margin-bottom: 0; }
.ib-row {
display: flex;
align-items: center;
margin-bottom: 8px;
padding: 6px 0;
border-bottom: 1px solid rgba(176,206,223,0.4);
}
.ib-row:last-child { margin-bottom: 0; border-bottom: none; }
.ib-mark {
width: 6px;
height: 6px;
background: var(--accent);
transform: rotate(45deg);
box-shadow: 0 0 4px rgba(74,158,200,0.3);
margin-right: 12px;
flex-shrink: 0;
}
.ib-label {
color: var(--muted);
font-weight: 700;
margin-right: 12px;
min-width: 90px;
text-transform: uppercase;
letter-spacing: 1px;
font-size: 0.88rem;
}
/* ── Links ── */
.ib a {
color: var(--bright);
text-decoration: none;
font-weight: 600;
border-bottom: 1px dotted var(--accent2);
}
.ib a:hover {
color: var(--accent);
border-bottom-style: solid;
}
/* ── Dropdown ── */
.ib-drop { margin-top: 24px; }
.ib-drop details {
border: 1px solid var(--border);
background: var(--white);
box-shadow: 0 2px 12px var(--crystal);
}
.ib-drop summary {
cursor: pointer;
padding: 12px 18px;
color: var(--muted);
font-size: 1rem;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 2px;
list-style: none;
display: flex;
align-items: center;
gap: 12px;
}
.ib-drop summary::-webkit-details-marker { display: none; }
.ib-drop summary::before {
content: '+';
color: var(--accent);
font-size: 1.1rem;
font-weight: 700;
line-height: 1;
flex-shrink: 0;
}
.ib-drop details[open] summary::before { content: '−'; }
.ib-drop summary:hover { color: var(--bright); }
.ib-drop-body {
padding: 18px;
border-top: 1px solid var(--border);
background: rgba(228,238,246,0.3);
}
.ib-drop-body p { margin: 0 0 12px; font-size: 0.9rem; }
.ib-cfg {
color: var(--bright);
font-size: 0.95rem;
margin-bottom: 8px;
text-transform: uppercase;
letter-spacing: 2px;
font-weight: 700;
}
/* ── Code ── */
.ib pre {
background: #1a2a3a;
padding: 14px 16px;
margin: 0;
border: 1px solid var(--accent2);
border-left: 3px solid var(--accent);
overflow-x: auto;
color: #c8dce8;
box-shadow: 0 2px 12px var(--crystal);
}
.ib pre code {
font-family: var(--mono);
font-size: 0.76rem;
line-height: 1.6;
background: none;
color: inherit;
padding: 0;
display: block;
border: none;
}
.ib code {
font-family: var(--mono);
color: var(--bright);
background: rgba(74,158,200,0.08);
padding: 2px 6px;
border: 1px solid rgba(74,158,200,0.15);
}
</style>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Iceblink v3</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800;900&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
</head>
<body>
<div class="ib">
<div class="ib-hero">
<img src="https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/AsvE-KnBo6Zq2qJ92C_uc.png" alt="image">
</div>
<div class="ib-title">
<div class="ib-card">
<h1 class="ib-name">Iceblink</h1>
<span class="ib-base">Version 3 · GLM-4.5 Air</span>
</div>
</div>
<div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div>
<div class="ib-section">
<div class="ib-shead">
<div class="ib-emblem"><span class="ib-glyph"></span></div>
<span class="ib-stitle">Overview</span>
</div>
<div class="ib-sbody">
<p>Decided to try tuning Air again after I saw Axolotl make some improvements on their training implementation and now that I know a lot more about what I'm doing. And wow. I think this came out pretty good.</p>
<p>This model is a creative writing and RP model. Supports reasoning and no reasoning with the usual GLM Air templates. Although reasoning off is recommended generally.</p>
</div>
</div>
<div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div>
<div class="ib-section">
<div class="ib-shead">
<div class="ib-emblem"><span class="ib-glyph"></span></div>
<span class="ib-stitle">SillyTavern Settings</span>
</div>
<div class="ib-sbody">
<h3 class="ib-sub">Recommended Roleplay Format</h3>
<div class="ib-data">
<div class="ib-row">
<span class="ib-mark"></span>
<span class="ib-label">Actions:</span>
<span>In plaintext</span>
</div>
<div class="ib-row">
<span class="ib-mark"></span>
<span class="ib-label">Dialogue:</span>
<span>"In quotes"</span>
</div>
<div class="ib-row">
<span class="ib-mark"></span>
<span class="ib-label">Thoughts:</span>
<span>*In asterisks*</span>
</div>
</div>
<h3 class="ib-sub">Recommended Samplers</h3>
<div class="ib-data">
<div class="ib-row">
<span class="ib-mark"></span>
<span class="ib-label">Temp:</span>
<span>0.8 - 0.9</span>
</div>
<div class="ib-row">
<span class="ib-mark"></span>
<span class="ib-label">MinP:</span>
<span>0.05</span>
</div>
<div class="ib-row">
<span class="ib-mark"></span>
<span class="ib-label">TopP:</span>
<span>0.95 - 1.00</span>
</div>
</div>
<h3 class="ib-sub">Instruct</h3>
<div class="ib-data">
<p style="margin: 0;">GLM4.5 (no thinking): <a href="https://huggingface.co/zerofata/GLM-4.5-Iceblink-106B-A12B/raw/main/GLM45-NoThink-SillyTavern-Preset.json">SillyTavern Preset</a></p>
</div>
</div>
</div>
<div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div>
<div class="ib-section">
<div class="ib-shead">
<div class="ib-emblem"><span class="ib-glyph"></span></div>
<span class="ib-stitle">Quantizations</span>
</div>
<div class="ib-sbody">
<h3 class="ib-sub">GGUF</h3>
<div class="ib-data">
<div class="ib-row">
<span class="ib-mark"></span>
<a href="https://huggingface.co/zerofata/GLM-4.5-Iceblink-v3-106B-A12B-GGUF">iMatrix</a>
</div>
</div>
</div>
</div>
<div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div>
<div class="ib-section">
<div class="ib-shead">
<div class="ib-emblem"><span class="ib-glyph"></span></div>
<span class="ib-stitle">Creation Process</span>
</div>
<div class="ib-sbody">
<p>Creation Process: SFT > SFT</p>
<p>SFT on approx 15.3 million tokens (11.7 million trainable), SFW / NSFW RP, instruct & chat data.</p>
<p>Then I tried out an idea I saw from <a href="https://huggingface.co/ConicCat">ConicCat</a> and trained the model for 8 epochs on 96 short stories (150k tokens) from light novels and human authors the internet said were good. This seems to have had a surprisingly positive effect on the prose without hurting the intelligence too much.</p>
<p>I went back to my usual higher LR's for this model. It turns out the GLM chat template was more cursed than I originally gave it credit for while training. It was a skill issue all along, go figure.</p>
<div class="ib-drop">
<details>
<summary>Axolotl Config</summary>
<div class="ib-drop-body">
<div class="ib-cfg">SFT (4&#215;H200)</div>
<pre><code>base_model: zai&#45;org/GLM&#45;4.5&#45;Air
eot_tokens:
&#45; "&lt;|user|&gt;"
&#45; "&lt;|endoftext|&gt;"
chat_template_jinja: ./glm_air.jinja
&#32;
plugins:
&#45; axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
&#32;
load_in_8bit: false
load_in_4bit: true
&#32;
quantize_moe_experts: true &#35; important
&#32;
datasets:
&#45; path: ./data/nothink_dataset.jsonl
type: chat_template
&#45; path: ./data/think_dataset.jsonl
type: chat_template
&#32;
dataset_prepared_path: last_run_prepared
val_set_size: 0.01
output_dir: ./GLM&#45;Air&#45;v4&#45;SFT&#45;1
&#32;
adapter: qlora
lora_model_dir:
&#32;
sequence_len: 10756
sample_packing: true
&#32;
lora_r: 128
lora_alpha: 16
peft_use_rslora: true
lora_dropout: 0
lora_target_modules:
&#45; q_proj
&#45; v_proj
&#45; k_proj
&#45; o_proj
&#32;
lora_target_parameters:
&#45; mlp.experts.gate_up_proj
&#45; mlp.experts.down_proj
&#32;
lora_mlp_kernel: false
lora_qkv_kernel: false
lora_o_kernel: false
&#32;
gradient_accumulation_steps: 8
micro_batch_size: 1
num_epochs: 2
optimizer: adamw_torch_8bit
lr_scheduler: cosine
learning_rate: 1e&#45;5
&#32;
bf16: auto
tf32: false
&#32;
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
&#32;
warmup_ratio: 0.1
evals_per_epoch: 3
saves_per_epoch: 3
&#32;
fsdp_config:
fsdp_version: 2
offload_params: false
cpu_ram_efficient_loading: false
auto_wrap_policy: TRANSFORMER_BASED_WRAP
transformer_layer_cls_to_wrap: Glm4MoeDecoderLayer
state_dict_type: FULL_STATE_DICT
sharding_strategy: FULL_SHARD
reshard_after_forward: true
activation_checkpointing: true
&#32;
&#35; save_first_step: true &#35; uncomment this to validate checkpoint saving works with your config</code></pre>
<br><div class="ib-cfg">Writing SFT (2&#215;H200)</div>
<pre><code>base_model: ApocalypseParty/GLM&#45;Air&#45;v4&#45;SFT&#45;1&#45;merged
eot_tokens:
&#45; "&lt;|user|&gt;"
&#45; "&lt;|endoftext|&gt;"
chat_template_jinja: ./glm_air.jinja
&#32;
plugins:
&#45; axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
&#32;
load_in_8bit: false
load_in_4bit: true
&#32;
quantize_moe_experts: true &#35; important
&#32;
datasets:
&#45; path: ./data/dataset_writing.jsonl
type: chat_template
&#32;
dataset_prepared_path: last_run_prepared
output_dir: ./GLM&#45;Air&#45;v4&#45;SFT&#45;1&#45;writing
&#32;
wandb_project: GLM&#45;Air&#45;v4&#45;SFT
wandb_name: GLM&#45;Air&#45;v4&#45;SFT&#45;1&#45;writing
&#32;
adapter: qlora
lora_model_dir:
&#32;
sequence_len: 4096
sample_packing: true
&#32;
lora_r: 16
lora_alpha: 32
lora_dropout: 0
lora_target_modules:
&#45; q_proj
&#45; v_proj
&#45; k_proj
&#45; o_proj
&#32;
lora_target_parameters:
&#45; mlp.experts.gate_up_proj
&#45; mlp.experts.down_proj
&#32;
lora_mlp_kernel: false
lora_qkv_kernel: false
lora_o_kernel: false
&#32;
gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 8
optimizer: adamw_torch_8bit
lr_scheduler: cosine
learning_rate: 9e&#45;6
&#32;
bf16: auto
tf32: false
&#32;
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
&#32;
warmup_ratio: 0.1
saves_per_epoch: 1
&#32;
fsdp_config:
fsdp_version: 2
offload_params: false
cpu_ram_efficient_loading: false
auto_wrap_policy: TRANSFORMER_BASED_WRAP
transformer_layer_cls_to_wrap: Glm4MoeDecoderLayer
state_dict_type: FULL_STATE_DICT
sharding_strategy: FULL_SHARD
reshard_after_forward: true
activation_checkpointing: true
&#32;
&#35; save_first_step: true &#35; uncomment this to validate checkpoint saving works with your config</code></pre>
</div>
</details>
</div>
</div>
</div>
</div>
</body>
</html>