File size: 8,314 Bytes
d1f3a2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
{
    "activation_quant_modules": [
        "bert.encoder.albert_layer_groups.0.albert_layers.0.attention.dense",
        "bert.encoder.albert_layer_groups.0.albert_layers.0.attention.key",
        "bert.encoder.albert_layer_groups.0.albert_layers.0.attention.query",
        "bert.encoder.albert_layer_groups.0.albert_layers.0.attention.value",
        "bert.encoder.albert_layer_groups.0.albert_layers.0.ffn",
        "bert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output",
        "bert.encoder.embedding_hidden_mapping_in",
        "bert_encoder",
        "decoder.asr_res.0",
        "decoder.decode.0.conv1",
        "decoder.decode.0.conv1x1",
        "decoder.decode.0.conv2",
        "decoder.decode.0.norm1.fc",
        "decoder.decode.0.norm2.fc",
        "decoder.decode.1.conv1",
        "decoder.decode.1.conv1x1",
        "decoder.decode.1.conv2",
        "decoder.decode.1.norm1.fc",
        "decoder.decode.1.norm2.fc",
        "decoder.decode.2.conv1",
        "decoder.decode.2.conv1x1",
        "decoder.decode.2.conv2",
        "decoder.decode.2.norm1.fc",
        "decoder.decode.2.norm2.fc",
        "decoder.decode.3.conv1",
        "decoder.decode.3.conv1x1",
        "decoder.decode.3.conv2",
        "decoder.decode.3.norm1.fc",
        "decoder.decode.3.norm2.fc",
        "decoder.encode.conv1",
        "decoder.encode.conv1x1",
        "decoder.encode.conv2",
        "decoder.encode.norm1.fc",
        "decoder.encode.norm2.fc",
        "decoder.generator.conv_post",
        "decoder.generator.noise_convs.0",
        "decoder.generator.noise_convs.1",
        "decoder.generator.noise_res.0.adain1.0.fc",
        "decoder.generator.noise_res.0.adain1.1.fc",
        "decoder.generator.noise_res.0.adain1.2.fc",
        "decoder.generator.noise_res.0.adain2.0.fc",
        "decoder.generator.noise_res.0.adain2.1.fc",
        "decoder.generator.noise_res.0.adain2.2.fc",
        "decoder.generator.noise_res.0.convs1.0",
        "decoder.generator.noise_res.0.convs1.1",
        "decoder.generator.noise_res.0.convs1.2",
        "decoder.generator.noise_res.0.convs2.0",
        "decoder.generator.noise_res.0.convs2.1",
        "decoder.generator.noise_res.0.convs2.2",
        "decoder.generator.noise_res.1.adain1.0.fc",
        "decoder.generator.noise_res.1.adain1.1.fc",
        "decoder.generator.noise_res.1.adain1.2.fc",
        "decoder.generator.noise_res.1.adain2.0.fc",
        "decoder.generator.noise_res.1.adain2.1.fc",
        "decoder.generator.noise_res.1.adain2.2.fc",
        "decoder.generator.noise_res.1.convs1.0",
        "decoder.generator.noise_res.1.convs1.1",
        "decoder.generator.noise_res.1.convs1.2",
        "decoder.generator.noise_res.1.convs2.0",
        "decoder.generator.noise_res.1.convs2.1",
        "decoder.generator.noise_res.1.convs2.2",
        "decoder.generator.resblocks.0.adain1.0.fc",
        "decoder.generator.resblocks.0.adain1.1.fc",
        "decoder.generator.resblocks.0.adain1.2.fc",
        "decoder.generator.resblocks.0.adain2.0.fc",
        "decoder.generator.resblocks.0.adain2.1.fc",
        "decoder.generator.resblocks.0.adain2.2.fc",
        "decoder.generator.resblocks.0.convs1.0",
        "decoder.generator.resblocks.0.convs1.1",
        "decoder.generator.resblocks.0.convs1.2",
        "decoder.generator.resblocks.0.convs2.0",
        "decoder.generator.resblocks.0.convs2.1",
        "decoder.generator.resblocks.0.convs2.2",
        "decoder.generator.resblocks.1.adain1.0.fc",
        "decoder.generator.resblocks.1.adain1.1.fc",
        "decoder.generator.resblocks.1.adain1.2.fc",
        "decoder.generator.resblocks.1.adain2.0.fc",
        "decoder.generator.resblocks.1.adain2.1.fc",
        "decoder.generator.resblocks.1.adain2.2.fc",
        "decoder.generator.resblocks.1.convs1.0",
        "decoder.generator.resblocks.1.convs1.1",
        "decoder.generator.resblocks.1.convs1.2",
        "decoder.generator.resblocks.1.convs2.0",
        "decoder.generator.resblocks.1.convs2.1",
        "decoder.generator.resblocks.1.convs2.2",
        "decoder.generator.resblocks.2.adain1.0.fc",
        "decoder.generator.resblocks.2.adain1.1.fc",
        "decoder.generator.resblocks.2.adain1.2.fc",
        "decoder.generator.resblocks.2.adain2.0.fc",
        "decoder.generator.resblocks.2.adain2.1.fc",
        "decoder.generator.resblocks.2.adain2.2.fc",
        "decoder.generator.resblocks.2.convs1.0",
        "decoder.generator.resblocks.2.convs1.1",
        "decoder.generator.resblocks.2.convs1.2",
        "decoder.generator.resblocks.2.convs2.0",
        "decoder.generator.resblocks.2.convs2.1",
        "decoder.generator.resblocks.2.convs2.2",
        "decoder.generator.resblocks.3.adain1.0.fc",
        "decoder.generator.resblocks.3.adain1.1.fc",
        "decoder.generator.resblocks.3.adain1.2.fc",
        "decoder.generator.resblocks.3.adain2.0.fc",
        "decoder.generator.resblocks.3.adain2.1.fc",
        "decoder.generator.resblocks.3.adain2.2.fc",
        "decoder.generator.resblocks.3.convs1.0",
        "decoder.generator.resblocks.3.convs1.1",
        "decoder.generator.resblocks.3.convs1.2",
        "decoder.generator.resblocks.3.convs2.0",
        "decoder.generator.resblocks.3.convs2.1",
        "decoder.generator.resblocks.3.convs2.2",
        "predictor.F0.0.conv1",
        "predictor.F0.0.conv2",
        "predictor.F0.0.norm1.fc",
        "predictor.F0.0.norm2.fc",
        "predictor.F0.1.conv1",
        "predictor.F0.1.conv1x1",
        "predictor.F0.1.conv2",
        "predictor.F0.1.norm1.fc",
        "predictor.F0.1.norm2.fc",
        "predictor.F0.2.conv1",
        "predictor.F0.2.conv2",
        "predictor.F0.2.norm1.fc",
        "predictor.F0.2.norm2.fc",
        "predictor.F0_proj",
        "predictor.N.0.conv1",
        "predictor.N.0.conv2",
        "predictor.N.0.norm1.fc",
        "predictor.N.0.norm2.fc",
        "predictor.N.1.conv1",
        "predictor.N.1.conv1x1",
        "predictor.N.1.conv2",
        "predictor.N.1.norm1.fc",
        "predictor.N.1.norm2.fc",
        "predictor.N.2.conv1",
        "predictor.N.2.conv2",
        "predictor.N.2.norm1.fc",
        "predictor.N.2.norm2.fc",
        "predictor.N_proj",
        "predictor.lstm",
        "predictor.shared",
        "predictor.text_encoder.lstms.0",
        "predictor.text_encoder.lstms.1.fc",
        "predictor.text_encoder.lstms.2",
        "predictor.text_encoder.lstms.3.fc",
        "predictor.text_encoder.lstms.4",
        "predictor.text_encoder.lstms.5.fc",
        "text_encoder.cnn.0.0",
        "text_encoder.cnn.1.0",
        "text_encoder.cnn.2.0",
        "text_encoder.lstm"
    ],
    "asr_res_dim": 64,
    "decoder_out_dim": 512,
    "hidden_dim": 512,
    "istftnet": {
        "resblock_kernel_sizes": [
            3,
            3
        ],
        "upsample_rates": [
            10,
            6
        ],
        "upsample_initial_channel": 512,
        "resblock_dilation_sizes": [
            [
                1,
                3,
                5
            ],
            [
                1,
                3,
                5
            ]
        ],
        "upsample_kernel_sizes": [
            20,
            12
        ],
        "gen_istft_n_fft": 20,
        "gen_istft_hop_size": 5
    },
    "max_conv_dim": 1024,
    "max_dur": 50,
    "model_type": "kitten_tts",
    "n_layer": 3,
    "n_mels": 80,
    "n_token": 178,
    "plbert": {
        "num_hidden_layers": 12,
        "num_attention_heads": 12,
        "hidden_size": 768,
        "intermediate_size": 2048,
        "max_position_embeddings": 512,
        "embedding_size": 128,
        "inner_group_num": 1,
        "num_hidden_groups": 1,
        "hidden_dropout_prob": 0.0,
        "attention_probs_dropout_prob": 0.0,
        "type_vocab_size": 2,
        "layer_norm_eps": 1e-12
    },
    "sample_rate": 24000,
    "speed_priors": {},
    "style_dim": 128,
    "text_encoder_kernel_size": 5,
    "voice_aliases": {
        "Bella": "expr-voice-2-f",
        "Jasper": "expr-voice-2-m",
        "Luna": "expr-voice-3-f",
        "Bruno": "expr-voice-3-m",
        "Rosie": "expr-voice-4-f",
        "Hugo": "expr-voice-4-m",
        "Kiki": "expr-voice-5-f",
        "Leo": "expr-voice-5-m"
    },
    "voices_path": "voices.npz"
}