Any-to-Any
Transformers
Safetensors
English
xoron
multimodal
Mixture of Experts
text-to-image
image editing
image to video
text-to-video
video editing
text-to-speech
speech-to-text
speech-to-speech
image-to-text
video-to-text
agentic
tool-use
flow-matching
3d-rope
titok
vidtok
dual-stream-attention
zero-shot-voice-cloning
bigvgan
snake-activation
multi-receptive-field-fusion
custom_code
Instructions to use Backup-bdg/Xoron-Dev-MultiMoe with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Backup-bdg/Xoron-Dev-MultiMoe with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Backup-bdg/Xoron-Dev-MultiMoe", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
Update model weights after training (epoch 1, loss 4.2916)
Browse files- audio_decoder.safetensors +1 -1
- cross_attention.safetensors +1 -1
- generator.safetensors +1 -1
- llm.safetensors +1 -1
- sample_images/epoch1_sample1.png +0 -0
- sample_images/epoch1_sample2.png +0 -0
- sample_images/epoch1_sample3.png +0 -0
- streaming_state.json +16 -16
- tokenizer.json +2 -2
- trainer_state.json +3 -3
- training_state.pt +2 -2
audio_decoder.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1458415836
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37e38d5f45ab1bb841c51d43d1a830339c9554d7b7121b40c29543be6b8bb246
|
| 3 |
size 1458415836
|
cross_attention.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 174191400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7967eb8d40a1c69ade87aaae8872447a9f3b28418fadde6d1a8f876d96a6848
|
| 3 |
size 174191400
|
generator.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 629440508
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89ea74af2d5ac45430e7dc96fd17ddf368b8baa87a4133046782ec32e7a63224
|
| 3 |
size 629440508
|
llm.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1506831304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96e14153b590b91954c672ba79a681cd40c08dfc82dcb9e5500e87c2d8d23c83
|
| 3 |
size 1506831304
|
sample_images/epoch1_sample1.png
ADDED
|
sample_images/epoch1_sample2.png
ADDED
|
sample_images/epoch1_sample3.png
ADDED
|
streaming_state.json
CHANGED
|
@@ -1,33 +1,33 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"unique_samples":
|
| 4 |
-
"total_yields":
|
| 5 |
"dataset_positions": {
|
| 6 |
-
"WebSight":
|
| 7 |
-
"ScienceQA":
|
| 8 |
-
"InstructPix2Pix":
|
| 9 |
-
"Flickr8k":
|
| 10 |
-
"NewYorker":
|
| 11 |
"Football": 6,
|
| 12 |
-
"MagicBrush":
|
| 13 |
},
|
| 14 |
"modality_positions": {
|
| 15 |
"text": {},
|
| 16 |
"image": {
|
| 17 |
-
"WebSight":
|
| 18 |
-
"ScienceQA":
|
| 19 |
-
"InstructPix2Pix":
|
| 20 |
-
"Flickr8k":
|
| 21 |
-
"NewYorker":
|
| 22 |
"Football": 6,
|
| 23 |
-
"MagicBrush":
|
| 24 |
},
|
| 25 |
"video": {},
|
| 26 |
"audio": {}
|
| 27 |
},
|
| 28 |
"modality_counts": {
|
| 29 |
"text": 0,
|
| 30 |
-
"image":
|
| 31 |
"video": 0,
|
| 32 |
"audio": 0
|
| 33 |
},
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 4,
|
| 3 |
+
"unique_samples": 800,
|
| 4 |
+
"total_yields": 1600,
|
| 5 |
"dataset_positions": {
|
| 6 |
+
"WebSight": 136,
|
| 7 |
+
"ScienceQA": 114,
|
| 8 |
+
"InstructPix2Pix": 136,
|
| 9 |
+
"Flickr8k": 136,
|
| 10 |
+
"NewYorker": 136,
|
| 11 |
"Football": 6,
|
| 12 |
+
"MagicBrush": 136
|
| 13 |
},
|
| 14 |
"modality_positions": {
|
| 15 |
"text": {},
|
| 16 |
"image": {
|
| 17 |
+
"WebSight": 136,
|
| 18 |
+
"ScienceQA": 114,
|
| 19 |
+
"InstructPix2Pix": 136,
|
| 20 |
+
"Flickr8k": 136,
|
| 21 |
+
"NewYorker": 136,
|
| 22 |
"Football": 6,
|
| 23 |
+
"MagicBrush": 136
|
| 24 |
},
|
| 25 |
"video": {},
|
| 26 |
"audio": {}
|
| 27 |
},
|
| 28 |
"modality_counts": {
|
| 29 |
"text": 0,
|
| 30 |
+
"image": 300,
|
| 31 |
"video": 0,
|
| 32 |
"audio": 0
|
| 33 |
},
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d27d705d6b96a348ab0adece83feb6480983d947f7be700666328942f89381b5
|
| 3 |
+
size 11523225
|
trainer_state.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
{
|
| 2 |
"best_model_checkpoint": "/kaggle/working/xoron-final",
|
| 3 |
-
"best_metric":
|
| 4 |
"epoch": 1,
|
| 5 |
"epochs_completed": 1,
|
| 6 |
-
"global_step":
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
| 9 |
"log_history": [],
|
| 10 |
"logging_steps": 50,
|
| 11 |
-
"max_steps":
|
| 12 |
"num_train_epochs": 1,
|
| 13 |
"total_flos": 0,
|
| 14 |
"train_batch_size": 1,
|
|
|
|
| 1 |
{
|
| 2 |
"best_model_checkpoint": "/kaggle/working/xoron-final",
|
| 3 |
+
"best_metric": 4.291641629748046,
|
| 4 |
"epoch": 1,
|
| 5 |
"epochs_completed": 1,
|
| 6 |
+
"global_step": 37,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
| 9 |
"log_history": [],
|
| 10 |
"logging_steps": 50,
|
| 11 |
+
"max_steps": 37,
|
| 12 |
"num_train_epochs": 1,
|
| 13 |
"total_flos": 0,
|
| 14 |
"train_batch_size": 1,
|
training_state.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9329bf26836f4da61c1deaa53cc9d62b880df8b96c33af2739c8288691dd2b3a
|
| 3 |
+
size 1419713437
|