| { |
| "_class_name": "OpenSoraT2V", |
| "_diffusers_version": "0.28.0", |
| "activation_fn": "gelu-approximate", |
| "attention_bias": true, |
| "attention_head_dim": 96, |
| "attention_mode": "xformers", |
| "attention_type": "default", |
| "caption_channels": 4096, |
| "cross_attention_dim": 2304, |
| "decay": 0.9999, |
| "double_self_attention": false, |
| "downsampler": null, |
| "dropout": 0.0, |
| "in_channels": 4, |
| "interpolation_scale_h": 1.5, |
| "interpolation_scale_t": 1.0, |
| "interpolation_scale_w": 2.0, |
| "inv_gamma": 1.0, |
| "min_decay": 0.0, |
| "norm_elementwise_affine": false, |
| "norm_eps": 1e-06, |
| "norm_num_groups": 32, |
| "norm_type": "ada_norm_single", |
| "num_attention_heads": 24, |
| "num_embeds_ada_norm": 1000, |
| "num_layers": 32, |
| "num_vector_embeds": null, |
| "only_cross_attention": false, |
| "optimization_step": 16000, |
| "out_channels": 4, |
| "patch_size": 2, |
| "patch_size_t": 1, |
| "power": 0.6666666666666666, |
| "sample_size": [ |
| 90, |
| 160 |
| ], |
| "sample_size_t": 8, |
| "upcast_attention": false, |
| "update_after_step": 0, |
| "use_additional_conditions": null, |
| "use_ema_warmup": false, |
| "use_linear_projection": false, |
| "use_rope": true, |
| "use_stable_fp32": true |
| } |
|
|