roest-chatterbox-turbo-demo

Sleeping

App Files Files Community

roest-chatterbox-turbo-demo / chatterbox /models /t3 /llama_configs.py

Biorrith

Initial space

450fc66 3 months ago

raw

history blame

1.81 kB

	LLAMA_520M_CONFIG_DICT = dict(
	# Arbitrary small number that won't cause problems when loading.
	# These param are unused due to custom input layers.
	vocab_size=8,
	# default params needed for loading most pretrained 1B weights
	max_position_embeddings=131072,
	hidden_size=1024,
	intermediate_size=4096,
	num_hidden_layers=30,
	num_attention_heads=16,
	attn_implementation="sdpa",
	head_dim=64,
	tie_word_embeddings=False,
	hidden_act="silu",
	attention_bias=False,
	attention_dropout=0.0,
	initializer_range=0.02,
	mlp_bias=False,
	model_type="llama",
	num_key_value_heads=16,
	pretraining_tp=1,
	rms_norm_eps=1e-05,
	rope_scaling=dict(
	factor=8.0,
	high_freq_factor=4.0,
	low_freq_factor=1.0,
	original_max_position_embeddings=8192,
	rope_type="llama3"
	),
	rope_theta=500000.0,
	torch_dtype="bfloat16",
	use_cache=True,
	)

	GPT2_MEDIUM_CONFIG = {
	"activation_function": "gelu_new",
	"architectures": [
	"GPT2LMHeadModel"
	],
	"attn_pdrop": 0.1,
	"bos_token_id": 50256,
	"embd_pdrop": 0.1,
	"eos_token_id": 50256,
	"initializer_range": 0.02,
	"layer_norm_epsilon": 1e-05,
	"model_type": "gpt2",
	"n_ctx": 8196,
	"n_embd": 1024,
	"hidden_size": 1024,
	"n_head": 16,
	"n_layer": 24,
	"n_positions": 8196,
	"n_special": 0,
	"predict_special_tokens": True,
	"resid_pdrop": 0.1,
	"summary_activation": None,
	"summary_first_dropout": 0.1,
	"summary_proj_to_labels": True,
	"summary_type": "cls_index",
	"summary_use_proj": True,
	"task_specific_params": {
	"text-generation": {
	"do_sample": True,
	"max_length": 50
	}
	},
	"vocab_size": 50276,
	}

	LLAMA_CONFIGS = {
	"Llama_520M": LLAMA_520M_CONFIG_DICT,
	"GPT2_medium": GPT2_MEDIUM_CONFIG,
	}