Upload 19 files

0f32205 verified about 1 month ago

15.2 kB

	---
	license: mit
	datasets:
	- zerofata/Instruct-Anime
	- zerofata/Roleplay-Anime-Characters
	- zerofata/Gemini-3.1-Pro-GLM5-Characters
	- zerofata/Gemini-3.1-Pro-SmallWiki
	base_model:
	- zai-org/GLM-4.5-Air
	---
	<style>
	.ib {
	--bg: #e4eef6;
	--panel: rgba(255,255,255,0.7);
	--accent: #4a9ec8;
	--accent2: #78c4e0;
	--accent3: #a0d8ef;
	--border: #b0cedf;
	--text: #1e3040;
	--muted: #5a7a90;
	--bright: #2884b0;
	--white: #ffffff;
	--crystal: rgba(74,158,200,0.12);
	--mono: 'JetBrains Mono', monospace;
	--sans: 'Inter', sans-serif;

	font-family: var(--sans);
	color: var(--text);
	background: var(--bg);
	max-width: 960px;
	margin: 0 auto;
	padding: 0 0 48px;
	line-height: 1.7;
	font-size: 1rem;
	}

	/* ── Hero ── */
	.ib-hero { position: relative; border-bottom: 1px solid var(--accent2); margin: 0; }
	.ib-hero img {
	display: block;
	width: 100%;
	margin: 0;
	}
	.ib-title {
	text-align: center;
	position: relative;
	z-index: 1;
	}
	.ib-card {
	display: inline-block;
	padding: 18px 52px;
	background: rgba(255,255,255,0.88);
	border: 1px solid var(--accent2);
	box-shadow: 0 4px 32px rgba(74,158,200,0.2);
	margin-top: -75px;
	}
	.ib-name {
	font-size: 2.6rem;
	font-weight: 900;
	letter-spacing: 6px;
	text-transform: uppercase;
	margin: 0 0 6px;
	line-height: 1;
	background: linear-gradient(90deg, var(--bright), var(--accent2), var(--bright));
	background-clip: text;
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	}
	.ib-base {
	font-family: var(--mono);
	font-size: 0.68rem;
	color: var(--muted);
	letter-spacing: 3px;
	text-transform: uppercase;
	}

	/* ── Diamond separators ── */
	.ib-sep {
	display: flex;
	align-items: center;
	margin: 0 32px;
	padding: 16px 0;
	}
	.ib-sep-line {
	flex: 1;
	height: 1px;
	background: linear-gradient(90deg, transparent, var(--accent2), transparent);
	}
	.ib-dia {
	width: 10px;
	height: 10px;
	background: var(--accent);
	transform: rotate(45deg);
	box-shadow: 0 0 8px rgba(74,158,200,0.35);
	margin: 0 14px;
	flex-shrink: 0;
	}

	/* ── Sections ── */
	.ib-section {
	margin: 0 32px;
	padding: 32px;
	background: var(--panel);
	border: 1px solid var(--border);
	box-shadow: 0 2px 20px var(--crystal);
	}

	/* ── Section headers ── */
	.ib-shead {
	text-align: center;
	margin-bottom: 24px;
	}
	.ib-emblem {
	width: 40px;
	height: 40px;
	border: 2px solid var(--accent2);
	transform: rotate(45deg);
	margin: 0 auto 14px;
	display: flex;
	align-items: center;
	justify-content: center;
	box-shadow: 0 0 14px rgba(74,158,200,0.15);
	background: var(--white);
	}
	.ib-glyph {
	transform: rotate(-45deg);
	font-size: 1rem;
	color: var(--accent);
	line-height: 1;
	}
	.ib-stitle {
	font-size: 1.5rem;
	font-weight: 800;
	letter-spacing: 4px;
	text-transform: uppercase;
	margin: 0 !important;
	padding: 0 !important;
	border: none !important;
	display: block;
	background: linear-gradient(90deg, var(--accent), var(--bright), var(--accent));
	background-clip: text;
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	}

	/* ── Body ── */
	.ib-sbody p { margin: 0 0 14px; font-size: 0.95rem; }
	.ib-sbody p:last-child { margin-bottom: 0; }

	/* ── Sub-headings ── */
	.ib-sub {
	color: var(--bright) !important;
	font-size: 1.1rem !important;
	margin: 24px 0 14px !important;
	padding: 0 0 8px !important;
	font-weight: 700;
	text-transform: uppercase;
	letter-spacing: 2px;
	border: none !important;
	border-bottom: 2px solid var(--accent2) !important;
	}

	/* ── Data boxes ── */
	.ib-data {
	background: var(--white);
	padding: 18px;
	border: 1px solid var(--border);
	border-left: 3px solid var(--accent);
	margin-bottom: 18px;
	box-shadow: 0 2px 12px var(--crystal);
	font-size: 0.95rem;
	}
	.ib-data:last-child { margin-bottom: 0; }
	.ib-row {
	display: flex;
	align-items: center;
	margin-bottom: 8px;
	padding: 6px 0;
	border-bottom: 1px solid rgba(176,206,223,0.4);
	}
	.ib-row:last-child { margin-bottom: 0; border-bottom: none; }
	.ib-mark {
	width: 6px;
	height: 6px;
	background: var(--accent);
	transform: rotate(45deg);
	box-shadow: 0 0 4px rgba(74,158,200,0.3);
	margin-right: 12px;
	flex-shrink: 0;
	}
	.ib-label {
	color: var(--muted);
	font-weight: 700;
	margin-right: 12px;
	min-width: 90px;
	text-transform: uppercase;
	letter-spacing: 1px;
	font-size: 0.88rem;
	}

	/* ── Links ── */
	.ib a {
	color: var(--bright);
	text-decoration: none;
	font-weight: 600;
	border-bottom: 1px dotted var(--accent2);
	}
	.ib a:hover {
	color: var(--accent);
	border-bottom-style: solid;
	}

	/* ── Dropdown ── */
	.ib-drop { margin-top: 24px; }
	.ib-drop details {
	border: 1px solid var(--border);
	background: var(--white);
	box-shadow: 0 2px 12px var(--crystal);
	}
	.ib-drop summary {
	cursor: pointer;
	padding: 12px 18px;
	color: var(--muted);
	font-size: 1rem;
	font-weight: 700;
	text-transform: uppercase;
	letter-spacing: 2px;
	list-style: none;
	display: flex;
	align-items: center;
	gap: 12px;
	}
	.ib-drop summary::-webkit-details-marker { display: none; }
	.ib-drop summary::before {
	content: '+';
	color: var(--accent);
	font-size: 1.1rem;
	font-weight: 700;
	line-height: 1;
	flex-shrink: 0;
	}
	.ib-drop details[open] summary::before { content: '−'; }
	.ib-drop summary:hover { color: var(--bright); }
	.ib-drop-body {
	padding: 18px;
	border-top: 1px solid var(--border);
	background: rgba(228,238,246,0.3);
	}
	.ib-drop-body p { margin: 0 0 12px; font-size: 0.9rem; }
	.ib-cfg {
	color: var(--bright);
	font-size: 0.95rem;
	margin-bottom: 8px;
	text-transform: uppercase;
	letter-spacing: 2px;
	font-weight: 700;
	}

	/* ── Code ── */
	.ib pre {
	background: #1a2a3a;
	padding: 14px 16px;
	margin: 0;
	border: 1px solid var(--accent2);
	border-left: 3px solid var(--accent);
	overflow-x: auto;
	color: #c8dce8;
	box-shadow: 0 2px 12px var(--crystal);
	}
	.ib pre code {
	font-family: var(--mono);
	font-size: 0.76rem;
	line-height: 1.6;
	background: none;
	color: inherit;
	padding: 0;
	display: block;
	border: none;
	}
	.ib code {
	font-family: var(--mono);
	color: var(--bright);
	background: rgba(74,158,200,0.08);
	padding: 2px 6px;
	border: 1px solid rgba(74,158,200,0.15);
	}
	</style>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Iceblink v3</title>
	<link rel="preconnect" href="https://fonts.googleapis.com">
	<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
	<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800;900&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
	</head>
	<body>
	<div class="ib">
	<div class="ib-hero">
	<img src="https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/AsvE-KnBo6Zq2qJ92C_uc.png" alt="image">
	</div>
	<div class="ib-title">
	<div class="ib-card">
	<h1 class="ib-name">Iceblink</h1>
	<span class="ib-base">Version 3 · GLM-4.5 Air</span>
	</div>
	</div>

	<div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div>

	<div class="ib-section">
	<div class="ib-shead">
	<div class="ib-emblem"><span class="ib-glyph">❊</span></div>
	<span class="ib-stitle">Overview</span>
	</div>
	<div class="ib-sbody">
	<p>Decided to try tuning Air again after I saw Axolotl make some improvements on their training implementation and now that I know a lot more about what I'm doing. And wow. I think this came out pretty good.</p>
	<p>This model is a creative writing and RP model. Supports reasoning and no reasoning with the usual GLM Air templates. Although reasoning off is recommended generally.</p>
	</div>
	</div>

	<div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div>

	<div class="ib-section">
	<div class="ib-shead">
	<div class="ib-emblem"><span class="ib-glyph">❊</span></div>
	<span class="ib-stitle">SillyTavern Settings</span>
	</div>
	<div class="ib-sbody">
	<h3 class="ib-sub">Recommended Roleplay Format</h3>
	<div class="ib-data">
	<div class="ib-row">
	<span class="ib-mark"></span>
	<span class="ib-label">Actions:</span>
	<span>In plaintext</span>
	</div>
	<div class="ib-row">
	<span class="ib-mark"></span>
	<span class="ib-label">Dialogue:</span>
	<span>"In quotes"</span>
	</div>
	<div class="ib-row">
	<span class="ib-mark"></span>
	<span class="ib-label">Thoughts:</span>
	<span>In asterisks</span>
	</div>
	</div>
	<h3 class="ib-sub">Recommended Samplers</h3>
	<div class="ib-data">
	<div class="ib-row">
	<span class="ib-mark"></span>
	<span class="ib-label">Temp:</span>
	<span>0.8 - 0.9</span>
	</div>
	<div class="ib-row">
	<span class="ib-mark"></span>
	<span class="ib-label">MinP:</span>
	<span>0.05</span>
	</div>
	<div class="ib-row">
	<span class="ib-mark"></span>
	<span class="ib-label">TopP:</span>
	<span>0.95 - 1.00</span>
	</div>
	</div>
	<h3 class="ib-sub">Instruct</h3>
	<div class="ib-data">
	<p style="margin: 0;">GLM4.5 (no thinking): <a href="https://huggingface.co/zerofata/GLM-4.5-Iceblink-106B-A12B/raw/main/GLM45-NoThink-SillyTavern-Preset.json">SillyTavern Preset</a></p>
	</div>
	</div>
	</div>

	<div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div>

	<div class="ib-section">
	<div class="ib-shead">
	<div class="ib-emblem"><span class="ib-glyph">❊</span></div>
	<span class="ib-stitle">Quantizations</span>
	</div>
	<div class="ib-sbody">
	<h3 class="ib-sub">GGUF</h3>
	<div class="ib-data">
	<div class="ib-row">
	<span class="ib-mark"></span>
	<a href="https://huggingface.co/zerofata/GLM-4.5-Iceblink-v3-106B-A12B-GGUF">iMatrix</a>
	</div>
	</div>
	</div>
	</div>

	<div class="ib-sep"><div class="ib-sep-line"></div><div class="ib-dia"></div><div class="ib-sep-line"></div></div>

	<div class="ib-section">
	<div class="ib-shead">
	<div class="ib-emblem"><span class="ib-glyph">❊</span></div>
	<span class="ib-stitle">Creation Process</span>
	</div>
	<div class="ib-sbody">
	<p>Creation Process: SFT > SFT</p>
	<p>SFT on approx 15.3 million tokens (11.7 million trainable), SFW / NSFW RP, instruct & chat data.</p>
	<p>Then I tried out an idea I saw from <a href="https://huggingface.co/ConicCat">ConicCat</a> and trained the model for 8 epochs on 96 short stories (150k tokens) from light novels and human authors the internet said were good. This seems to have had a surprisingly positive effect on the prose without hurting the intelligence too much.</p>
	<p>I went back to my usual higher LR's for this model. It turns out the GLM chat template was more cursed than I originally gave it credit for while training. It was a skill issue all along, go figure.</p>
	<div class="ib-drop">
	<details>
	<summary>Axolotl Config</summary>
	<div class="ib-drop-body">
	<div class="ib-cfg">SFT (4×H200)</div>
	<pre><code>base_model: zai-org/GLM-4.5-Air
	eot_tokens:
	- "<\|user\|>"
	- "<\|endoftext\|>"
	chat_template_jinja: ./glm_air.jinja

	plugins:
	- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin

	load_in_8bit: false
	load_in_4bit: true

	quantize_moe_experts: true # important

	datasets:
	- path: ./data/nothink_dataset.jsonl
	type: chat_template
	- path: ./data/think_dataset.jsonl
	type: chat_template

	dataset_prepared_path: last_run_prepared
	val_set_size: 0.01
	output_dir: ./GLM-Air-v4-SFT-1

	adapter: qlora
	lora_model_dir:

	sequence_len: 10756
	sample_packing: true

	lora_r: 128
	lora_alpha: 16
	peft_use_rslora: true
	lora_dropout: 0
	lora_target_modules:
	- q_proj
	- v_proj
	- k_proj
	- o_proj

	lora_target_parameters:
	- mlp.experts.gate_up_proj
	- mlp.experts.down_proj

	lora_mlp_kernel: false
	lora_qkv_kernel: false
	lora_o_kernel: false

	gradient_accumulation_steps: 8
	micro_batch_size: 1
	num_epochs: 2
	optimizer: adamw_torch_8bit
	lr_scheduler: cosine
	learning_rate: 1e-5

	bf16: auto
	tf32: false

	resume_from_checkpoint:
	logging_steps: 1
	flash_attention: true

	warmup_ratio: 0.1
	evals_per_epoch: 3
	saves_per_epoch: 3

	fsdp_config:
	fsdp_version: 2
	offload_params: false
	cpu_ram_efficient_loading: false
	auto_wrap_policy: TRANSFORMER_BASED_WRAP
	transformer_layer_cls_to_wrap: Glm4MoeDecoderLayer
	state_dict_type: FULL_STATE_DICT
	sharding_strategy: FULL_SHARD
	reshard_after_forward: true
	activation_checkpointing: true

	# save_first_step: true # uncomment this to validate checkpoint saving works with your config</code></pre>
	<br><div class="ib-cfg">Writing SFT (2×H200)</div>
	<pre><code>base_model: ApocalypseParty/GLM-Air-v4-SFT-1-merged
	eot_tokens:
	- "<\|user\|>"
	- "<\|endoftext\|>"
	chat_template_jinja: ./glm_air.jinja

	plugins:
	- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin

	load_in_8bit: false
	load_in_4bit: true

	quantize_moe_experts: true # important

	datasets:
	- path: ./data/dataset_writing.jsonl
	type: chat_template

	dataset_prepared_path: last_run_prepared
	output_dir: ./GLM-Air-v4-SFT-1-writing

	wandb_project: GLM-Air-v4-SFT
	wandb_name: GLM-Air-v4-SFT-1-writing

	adapter: qlora
	lora_model_dir:

	sequence_len: 4096
	sample_packing: true

	lora_r: 16
	lora_alpha: 32
	lora_dropout: 0
	lora_target_modules:
	- q_proj
	- v_proj
	- k_proj
	- o_proj

	lora_target_parameters:
	- mlp.experts.gate_up_proj
	- mlp.experts.down_proj

	lora_mlp_kernel: false
	lora_qkv_kernel: false
	lora_o_kernel: false

	gradient_accumulation_steps: 4
	micro_batch_size: 2
	num_epochs: 8
	optimizer: adamw_torch_8bit
	lr_scheduler: cosine
	learning_rate: 9e-6

	bf16: auto
	tf32: false

	resume_from_checkpoint:
	logging_steps: 1
	flash_attention: true

	warmup_ratio: 0.1
	saves_per_epoch: 1

	fsdp_config:
	fsdp_version: 2
	offload_params: false
	cpu_ram_efficient_loading: false
	auto_wrap_policy: TRANSFORMER_BASED_WRAP
	transformer_layer_cls_to_wrap: Glm4MoeDecoderLayer
	state_dict_type: FULL_STATE_DICT
	sharding_strategy: FULL_SHARD
	reshard_after_forward: true
	activation_checkpointing: true

	# save_first_step: true # uncomment this to validate checkpoint saving works with your config</code></pre>
	</div>
	</details>
	</div>
	</div>
	</div>
	</div>

	</body>
	</html>