anemll's picture
Upload folder using huggingface_hub
301eb4a verified
raw
history blame
753 Bytes
model_info:
name: anemll-google-gemma-3-4b-it-qat-int4-unquantized-ctx1024
version: 0.3.5
description: |
Demonstarates running google-gemma-3-4b-it-qat-int4-unquantized on Apple Neural Engine
Context length: 1024
Batch size: 64
Chunks: 2
license: MIT
author: Anemll
framework: Core ML
language: Python
architecture: gemma3
parameters:
context_length: 1024
batch_size: 64
lut_embeddings: none
lut_ffn: 4
lut_ffn_per_channel: 4
lut_lmhead: 6
lut_lmhead_per_channel: 4
num_chunks: 2
model_prefix: gemma3
embeddings: gemma3_embeddings.mlmodelc
lm_head: gemma3_lm_head_lut6.mlmodelc
ffn: gemma3_FFN_PF_lut4_chunk_01of02.mlmodelc
split_lm_head: 16
sliding_window: 1024