| --- |
| license: mit |
| datasets: |
| - zerofata/Instruct-Anime |
| - zerofata/Roleplay-Anime-Characters |
| - zerofata/Gemini-3.1-Pro-GLM5-Characters |
| - zerofata/Gemini-3.1-Pro-SmallWiki |
| base_model: |
| - zai-org/GLM-4.5-Air |
| --- |
| <style> |
| .ib { |
| --bg: #e4eef6; |
| --panel: rgba(255,255,255,0.7); |
| --accent: #4a9ec8; |
| --accent2: #78c4e0; |
| --accent3: #a0d8ef; |
| --border: #b0cedf; |
| --text: #1e3040; |
| --muted: #5a7a90; |
| --bright: #2884b0; |
| --white: #ffffff; |
| --crystal: rgba(74,158,200,0.12); |
| --mono: 'JetBrains Mono', monospace; |
| --sans: 'Inter', sans-serif; |
|
|
| font-family: var(--sans); |
| color: var(--text); |
| background: var(--bg); |
| max-width: 960px; |
| margin: 0 auto; |
| padding: 0 0 48px; |
| line-height: 1.7; |
| font-size: 1rem; |
| } |
|
|
| /* ── Hero ── */ |
| .ib-hero { position: relative; border-bottom: 1px solid var(--accent2); margin: 0; } |
| .ib-hero img { |
| display: block; |
| width: 100%; |
| margin: 0; |
| } |
| .ib-title { |
| text-align: center; |
| position: relative; |
| z-index: 1; |
| } |
| .ib-card { |
| display: inline-block; |
| padding: 18px 52px; |
| background: rgba(255,255,255,0.88); |
| border: 1px solid var(--accent2); |
| box-shadow: 0 4px 32px rgba(74,158,200,0.2); |
| margin-top: -75px; |
| } |
| .ib-name { |
| font-size: 2.6rem; |
| font-weight: 900; |
| letter-spacing: 6px; |
| text-transform: uppercase; |
| margin: 0 0 6px; |
| line-height: 1; |
| background: linear-gradient(90deg, var(--bright), var(--accent2), var(--bright)); |
| background-clip: text; |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| } |
| .ib-base { |
| font-family: var(--mono); |
| font-size: 0.68rem; |
| color: var(--muted); |
| letter-spacing: 3px; |
| text-transform: uppercase; |
| } |
| |
| /* ── Diamond separators ── */ |
| .ib-sep { |
| display: flex; |
| align-items: center; |
| margin: 0 32px; |
| padding: 16px 0; |
| } |
| .ib-sep-line { |
| flex: 1; |
| height: 1px; |
| background: linear-gradient(90deg, transparent, var(--accent2), transparent); |
| } |
| .ib-dia { |
| width: 10px; |
| height: 10px; |
| background: var(--accent); |
| transform: rotate(45deg); |
| box-shadow: 0 0 8px rgba(74,158,200,0.35); |
| margin: 0 14px; |
| flex-shrink: 0; |
| } |
| |
| /* ── Sections ── */ |
| .ib-section { |
| margin: 0 32px; |
| padding: 32px; |
| background: var(--panel); |
| border: 1px solid var(--border); |
| box-shadow: 0 2px 20px var(--crystal); |
| } |
| |
| /* ── Section headers ── */ |
| .ib-shead { |
| text-align: center; |
| margin-bottom: 24px; |
| } |
| .ib-emblem { |
| width: 40px; |
| height: 40px; |
| border: 2px solid var(--accent2); |
| transform: rotate(45deg); |
| margin: 0 auto 14px; |
| display: flex; |
| align-items: center; |
| justify-content: center; |
| box-shadow: 0 0 14px rgba(74,158,200,0.15); |
| background: var(--white); |
| } |
| .ib-glyph { |
| transform: rotate(-45deg); |
| font-size: 1rem; |
| color: var(--accent); |
| line-height: 1; |
| } |
| .ib-stitle { |
| font-size: 1.5rem; |
| font-weight: 800; |
| letter-spacing: 4px; |
| text-transform: uppercase; |
| margin: 0 !important; |
| padding: 0 !important; |
| border: none !important; |
| display: block; |
| background: linear-gradient(90deg, var(--accent), var(--bright), var(--accent)); |
| background-clip: text; |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| } |
| |
| /* ── Body ── */ |
| .ib-sbody p { margin: 0 0 14px; font-size: 0.95rem; } |
| .ib-sbody p:last-child { margin-bottom: 0; } |
| |
| /* ── Sub-headings ── */ |
| .ib-sub { |
| color: var(--bright) !important; |
| font-size: 1.1rem !important; |
| margin: 24px 0 14px !important; |
| padding: 0 0 8px !important; |
| font-weight: 700; |
| text-transform: uppercase; |
| letter-spacing: 2px; |
| border: none !important; |
| border-bottom: 2px solid var(--accent2) !important; |
| } |
| |
| /* ── Data boxes ── */ |
| .ib-data { |
| background: var(--white); |
| padding: 18px; |
| border: 1px solid var(--border); |
| border-left: 3px solid var(--accent); |
| margin-bottom: 18px; |
| box-shadow: 0 2px 12px var(--crystal); |
| font-size: 0.95rem; |
| } |
| .ib-data:last-child { margin-bottom: 0; } |
| .ib-row { |
| display: flex; |
| align-items: center; |
| margin-bottom: 8px; |
| padding: 6px 0; |
| border-bottom: 1px solid rgba(176,206,223,0.4); |
| } |
| .ib-row:last-child { margin-bottom: 0; border-bottom: none; } |
| .ib-mark { |
| width: 6px; |
| height: 6px; |
| background: var(--accent); |
| transform: rotate(45deg); |
| box-shadow: 0 0 4px rgba(74,158,200,0.3); |
| margin-right: 12px; |
| flex-shrink: 0; |
| } |
| .ib-label { |
| color: var(--muted); |
| font-weight: 700; |
| margin-right: 12px; |
| min-width: 90px; |
| text-transform: uppercase; |
| letter-spacing: 1px; |
| font-size: 0.88rem; |
| } |
| |
| /* ── Links ── */ |
| .ib a { |
| color: var(--bright); |
| text-decoration: none; |
| font-weight: 600; |
| border-bottom: 1px dotted var(--accent2); |
| } |
| .ib a:hover { |
| color: var(--accent); |
| border-bottom-style: solid; |
| } |
| |
| /* ── Dropdown ── */ |
| .ib-drop { margin-top: 24px; } |
| .ib-drop details { |
| border: 1px solid var(--border); |
| background: var(--white); |
| box-shadow: 0 2px 12px var(--crystal); |
| } |
| .ib-drop summary { |
| cursor: pointer; |
| padding: 12px 18px; |
| color: var(--muted); |
| font-size: 1rem; |
| font-weight: 700; |
| text-transform: uppercase; |
| letter-spacing: 2px; |
| list-style: none; |
| display: flex; |
| align-items: center; |
| gap: 12px; |
| } |
| .ib-drop summary::-webkit-details-marker { display: none; } |
| .ib-drop summary::before { |
| content: '+'; |
| color: var(--accent); |
| font-size: 1.1rem; |
| font-weight: 700; |
| line-height: 1; |
| flex-shrink: 0; |
| } |
| .ib-drop details[open] summary::before { content: '−'; } |
| .ib-drop summary:hover { color: var(--bright); } |
| .ib-drop-body { |
| padding: 18px; |
| border-top: 1px solid var(--border); |
| background: rgba(228,238,246,0.3); |
| } |
| .ib-drop-body p { margin: 0 0 12px; font-size: 0.9rem; } |
| .ib-cfg { |
| color: var(--bright); |
| font-size: 0.95rem; |
| margin-bottom: 8px; |
| text-transform: uppercase; |
| letter-spacing: 2px; |
| font-weight: 700; |
| } |
| |
| /* ── Code ── */ |
| .ib pre { |
| background: #1a2a3a; |
| padding: 14px 16px; |
| margin: 0; |
| border: 1px solid var(--accent2); |
| border-left: 3px solid var(--accent); |
| overflow-x: auto; |
| color: #c8dce8; |
| box-shadow: 0 2px 12px var(--crystal); |
| } |
| .ib pre code { |
| font-family: var(--mono); |
| font-size: 0.76rem; |
| line-height: 1.6; |
| background: none; |
| color: inherit; |
| padding: 0; |
| display: block; |
| border: none; |
| } |
| .ib code { |
| font-family: var(--mono); |
| color: var(--bright); |
| background: rgba(74,158,200,0.08); |
| padding: 2px 6px; |
| border: 1px solid rgba(74,158,200,0.15); |
| } |
| </style> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Iceblink v3</title> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800;900&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"> |
| </head> |
| <body> |
| <div class="ib"> |
| <div class="ib-hero"> |
| <img src="https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/AsvE-KnBo6Zq2qJ92C_uc.png" alt="image"> |
| </div> |
| <div class="ib-title"> |
| <div class="ib-card"> |
| <h1 class="ib-name">Iceblink</h1> |
| <span class="ib-base">Version 3 · GLM-4.5 Air</span> |
| </div> |
| </div> |
| |
| <div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div> |
| |
| <div class="ib-section"> |
| <div class="ib-shead"> |
| <div class="ib-emblem"><span class="ib-glyph">❊</span></div> |
| <span class="ib-stitle">Overview</span> |
| </div> |
| <div class="ib-sbody"> |
| <p>Decided to try tuning Air again after I saw Axolotl make some improvements on their training implementation and now that I know a lot more about what I'm doing. And wow. I think this came out pretty good.</p> |
| <p>This model is a creative writing and RP model. Supports reasoning and no reasoning with the usual GLM Air templates. Although reasoning off is recommended generally.</p> |
| </div> |
| </div> |
| |
| <div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div> |
| |
| <div class="ib-section"> |
| <div class="ib-shead"> |
| <div class="ib-emblem"><span class="ib-glyph">❊</span></div> |
| <span class="ib-stitle">SillyTavern Settings</span> |
| </div> |
| <div class="ib-sbody"> |
| <h3 class="ib-sub">Recommended Roleplay Format</h3> |
| <div class="ib-data"> |
| <div class="ib-row"> |
| <span class="ib-mark"></span> |
| <span class="ib-label">Actions:</span> |
| <span>In plaintext</span> |
| </div> |
| <div class="ib-row"> |
| <span class="ib-mark"></span> |
| <span class="ib-label">Dialogue:</span> |
| <span>"In quotes"</span> |
| </div> |
| <div class="ib-row"> |
| <span class="ib-mark"></span> |
| <span class="ib-label">Thoughts:</span> |
| <span>*In asterisks*</span> |
| </div> |
| </div> |
| <h3 class="ib-sub">Recommended Samplers</h3> |
| <div class="ib-data"> |
| <div class="ib-row"> |
| <span class="ib-mark"></span> |
| <span class="ib-label">Temp:</span> |
| <span>0.8 - 0.9</span> |
| </div> |
| <div class="ib-row"> |
| <span class="ib-mark"></span> |
| <span class="ib-label">MinP:</span> |
| <span>0.05</span> |
| </div> |
| <div class="ib-row"> |
| <span class="ib-mark"></span> |
| <span class="ib-label">TopP:</span> |
| <span>0.95 - 1.00</span> |
| </div> |
| </div> |
| <h3 class="ib-sub">Instruct</h3> |
| <div class="ib-data"> |
| <p style="margin: 0;">GLM4.5 (no thinking): <a href="https://huggingface.co/zerofata/GLM-4.5-Iceblink-106B-A12B/raw/main/GLM45-NoThink-SillyTavern-Preset.json">SillyTavern Preset</a></p> |
| </div> |
| </div> |
| </div> |
| |
| <div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div> |
| |
| <div class="ib-section"> |
| <div class="ib-shead"> |
| <div class="ib-emblem"><span class="ib-glyph">❊</span></div> |
| <span class="ib-stitle">Quantizations</span> |
| </div> |
| <div class="ib-sbody"> |
| <h3 class="ib-sub">GGUF</h3> |
| <div class="ib-data"> |
| <div class="ib-row"> |
| <span class="ib-mark"></span> |
| <a href="https://huggingface.co/zerofata/GLM-4.5-Iceblink-v3-106B-A12B-GGUF">iMatrix</a> |
| </div> |
| </div> |
| </div> |
| </div> |
| |
| <div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div> |
| |
| <div class="ib-section"> |
| <div class="ib-shead"> |
| <div class="ib-emblem"><span class="ib-glyph">❊</span></div> |
| <span class="ib-stitle">Creation Process</span> |
| </div> |
| <div class="ib-sbody"> |
| <p>Creation Process: SFT > SFT</p> |
| <p>SFT on approx 15.3 million tokens (11.7 million trainable), SFW / NSFW RP, instruct & chat data.</p> |
| <p>Then I tried out an idea I saw from <a href="https://huggingface.co/ConicCat">ConicCat</a> and trained the model for 8 epochs on 96 short stories (150k tokens) from light novels and human authors the internet said were good. This seems to have had a surprisingly positive effect on the prose without hurting the intelligence too much.</p> |
| <p>I went back to my usual higher LR's for this model. It turns out the GLM chat template was more cursed than I originally gave it credit for while training. It was a skill issue all along, go figure.</p> |
| <div class="ib-drop"> |
| <details> |
| <summary>Axolotl Config</summary> |
| <div class="ib-drop-body"> |
| <div class="ib-cfg">SFT (4×H200)</div> |
| <pre><code>base_model: zai-org/GLM-4.5-Air |
| eot_tokens: |
| - "<|user|>" |
| - "<|endoftext|>" |
| chat_template_jinja: ./glm_air.jinja |
|   |
| plugins: |
| - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin |
|   |
| load_in_8bit: false |
| load_in_4bit: true |
|   |
| quantize_moe_experts: true # important |
|   |
| datasets: |
| - path: ./data/nothink_dataset.jsonl |
| type: chat_template |
| - path: ./data/think_dataset.jsonl |
| type: chat_template |
|   |
| dataset_prepared_path: last_run_prepared |
| val_set_size: 0.01 |
| output_dir: ./GLM-Air-v4-SFT-1 |
|   |
| adapter: qlora |
| lora_model_dir: |
|   |
| sequence_len: 10756 |
| sample_packing: true |
|   |
| lora_r: 128 |
| lora_alpha: 16 |
| peft_use_rslora: true |
| lora_dropout: 0 |
| lora_target_modules: |
| - q_proj |
| - v_proj |
| - k_proj |
| - o_proj |
|   |
| lora_target_parameters: |
| - mlp.experts.gate_up_proj |
| - mlp.experts.down_proj |
|   |
| lora_mlp_kernel: false |
| lora_qkv_kernel: false |
| lora_o_kernel: false |
|   |
| gradient_accumulation_steps: 8 |
| micro_batch_size: 1 |
| num_epochs: 2 |
| optimizer: adamw_torch_8bit |
| lr_scheduler: cosine |
| learning_rate: 1e-5 |
|   |
| bf16: auto |
| tf32: false |
|   |
| resume_from_checkpoint: |
| logging_steps: 1 |
| flash_attention: true |
|   |
| warmup_ratio: 0.1 |
| evals_per_epoch: 3 |
| saves_per_epoch: 3 |
|   |
| fsdp_config: |
| fsdp_version: 2 |
| offload_params: false |
| cpu_ram_efficient_loading: false |
| auto_wrap_policy: TRANSFORMER_BASED_WRAP |
| transformer_layer_cls_to_wrap: Glm4MoeDecoderLayer |
| state_dict_type: FULL_STATE_DICT |
| sharding_strategy: FULL_SHARD |
| reshard_after_forward: true |
| activation_checkpointing: true |
|   |
| # save_first_step: true # uncomment this to validate checkpoint saving works with your config</code></pre> |
| <br><div class="ib-cfg">Writing SFT (2×H200)</div> |
| <pre><code>base_model: ApocalypseParty/GLM-Air-v4-SFT-1-merged |
| eot_tokens: |
| - "<|user|>" |
| - "<|endoftext|>" |
| chat_template_jinja: ./glm_air.jinja |
|   |
| plugins: |
| - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin |
|   |
| load_in_8bit: false |
| load_in_4bit: true |
|   |
| quantize_moe_experts: true # important |
|   |
| datasets: |
| - path: ./data/dataset_writing.jsonl |
| type: chat_template |
|   |
| dataset_prepared_path: last_run_prepared |
| output_dir: ./GLM-Air-v4-SFT-1-writing |
|   |
| wandb_project: GLM-Air-v4-SFT |
| wandb_name: GLM-Air-v4-SFT-1-writing |
|   |
| adapter: qlora |
| lora_model_dir: |
|   |
| sequence_len: 4096 |
| sample_packing: true |
|   |
| lora_r: 16 |
| lora_alpha: 32 |
| lora_dropout: 0 |
| lora_target_modules: |
| - q_proj |
| - v_proj |
| - k_proj |
| - o_proj |
|   |
| lora_target_parameters: |
| - mlp.experts.gate_up_proj |
| - mlp.experts.down_proj |
|   |
| lora_mlp_kernel: false |
| lora_qkv_kernel: false |
| lora_o_kernel: false |
|   |
| gradient_accumulation_steps: 4 |
| micro_batch_size: 2 |
| num_epochs: 8 |
| optimizer: adamw_torch_8bit |
| lr_scheduler: cosine |
| learning_rate: 9e-6 |
|   |
| bf16: auto |
| tf32: false |
|   |
| resume_from_checkpoint: |
| logging_steps: 1 |
| flash_attention: true |
|   |
| warmup_ratio: 0.1 |
| saves_per_epoch: 1 |
|   |
| fsdp_config: |
| fsdp_version: 2 |
| offload_params: false |
| cpu_ram_efficient_loading: false |
| auto_wrap_policy: TRANSFORMER_BASED_WRAP |
| transformer_layer_cls_to_wrap: Glm4MoeDecoderLayer |
| state_dict_type: FULL_STATE_DICT |
| sharding_strategy: FULL_SHARD |
| reshard_after_forward: true |
| activation_checkpointing: true |
|   |
| # save_first_step: true # uncomment this to validate checkpoint saving works with your config</code></pre> |
| </div> |
| </details> |
| </div> |
| </div> |
| </div> |
| </div> |
| |
| </body> |
| </html> |