model_info:
  name: anemll-google-gemma-3-4b-it-qat-int4-unquantized-ctx1024
  version: 0.3.5
  description: |
    Demonstarates running google-gemma-3-4b-it-qat-int4-unquantized on Apple Neural Engine
    Context length: 1024
    Batch size: 64
    Chunks: 2
  license: MIT
  author: Anemll
  framework: Core ML
  language: Python
  architecture: gemma3
  parameters:
    context_length: 1024
    batch_size: 64
    lut_embeddings: none
    lut_ffn: 4
    lut_ffn_per_channel: 4
    lut_lmhead: 6
    lut_lmhead_per_channel: 4
    num_chunks: 2
    model_prefix: gemma3
    embeddings: gemma3_embeddings.mlmodelc
    lm_head: gemma3_lm_head_lut6.mlmodelc
    ffn: gemma3_FFN_PF_lut4_chunk_01of02.mlmodelc
    split_lm_head: 16
    sliding_window: 1024