| slices: | |
| - sources: | |
| - model: lilmeaty/testing_semifinal | |
| layer_range: [1, 1] | |
| parameters: | |
| weight: 0.3 | |
| density: 0.2 | |
| gamma: 0.005 | |
| normalize: true | |
| int8_mask: true | |
| random_seed: 42 | |
| temperature: 0.5 | |
| top_p: 0.65 | |
| inference: true | |
| max_tokens: 300 | |
| stream: true | |
| quantization: | |
| - method: int8 | |
| value: 60 | |
| - method: int4 | |
| value: 40 | |
| merge_method: passthrough | |
| base_model: huihui-ai/Llama-3.2-1B-Instruct-abliterated | |
| dtype: float16 | |
| compression: | |
| pruning: | |
| enabled: true | |
| sparsity: 0.95 | |
| distillation: | |
| enabled: true | |
| temperature: 0.7 | |
| model_type: "distilled" | |
| quantization: | |
| enabled: true | |
| methods: | |
| - int8 | |
| - int4 | |
| inference_optimizations: | |
| caching: | |
| enabled: true | |
| cache_size: 1000 | |
| batching: | |
| enabled: true | |
| batch_size: 8 | |
| parallelism: | |
| enabled: true | |
| workers: 4 | |
| asynchronous: | |
| enabled: true | |
| max_concurrent_tasks: 5 | |
| tensor_cores: | |
| enabled: true | |
| gpu: | |
| enabled: true | |
| device: cuda | |
| model_sharding: | |
| enabled: true | |
| shards: 2 | |
| memory_optimization: | |
| enabled: true | |
| strategy: "offload" | |
| tensor_compression: | |
| enabled: true | |
| method: "tensor_factorization" | |
| mixture_of_experts: | |
| enabled: true | |
| num_experts: 4 | |
| gating_strategy: top_k | |
| top_k: 2 | |
| load_balancing: | |
| enabled: true | |
| balance_factor: 0.5 | |
| expert_capacity: | |
| max_tokens_per_expert: 512 | |
| dynamic_routing: | |
| enabled: true | |
| routing_threshold: 0.1 | |
| routing_optimizations: | |
| enabled: true | |
| cache_routing: true | |
| model_sparsity: | |
| enabled: true | |
| sparsity_pattern: "block" | |
| mask_method: "random" | |
| pruning_factor: 0.98 | |
| auto_tuning: | |
| enabled: true | |
| batch_size_adaptation: | |
| enabled: true | |
| factor: 0.8 | |
| max_batch_size: 32 | |
| temperature_scheduling: | |
| enabled: true | |
| start_temp: 1.0 | |
| end_temp: 0.5 | |
| schedule: "linear" | |