Add CM3P model
Browse files
audio_feature_extractor/preprocessor_config.json
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
| 2 |
"chunk_length": 30,
|
| 3 |
"dither": 0.0,
|
| 4 |
"feature_extractor_type": "WhisperFeatureExtractor",
|
|
|
|
| 1 |
{
|
| 2 |
+
"auto_map": {
|
| 3 |
+
"AutoProcessor": "processing_cm3p.CM3PProcessor"
|
| 4 |
+
},
|
| 5 |
"chunk_length": 30,
|
| 6 |
"dither": 0.0,
|
| 7 |
"feature_extractor_type": "WhisperFeatureExtractor",
|
beatmap_parser/preprocessor_config.json
CHANGED
|
@@ -8,6 +8,9 @@
|
|
| 8 |
"add_sv": true,
|
| 9 |
"add_timing": true,
|
| 10 |
"add_timing_points": true,
|
|
|
|
|
|
|
|
|
|
| 11 |
"feature_extractor_type": "CM3PBeatmapParser",
|
| 12 |
"mania_bpm_normalized_scroll_speed": true,
|
| 13 |
"processor_class": "CM3PProcessor",
|
|
|
|
| 8 |
"add_sv": true,
|
| 9 |
"add_timing": true,
|
| 10 |
"add_timing_points": true,
|
| 11 |
+
"auto_map": {
|
| 12 |
+
"AutoProcessor": "processing_cm3p.CM3PProcessor"
|
| 13 |
+
},
|
| 14 |
"feature_extractor_type": "CM3PBeatmapParser",
|
| 15 |
"mania_bpm_normalized_scroll_speed": true,
|
| 16 |
"processor_class": "CM3PProcessor",
|
beatmap_tokenizer/tokenizer_config.json
CHANGED
|
@@ -87,6 +87,9 @@
|
|
| 87 |
"[AUDIO_EOS]",
|
| 88 |
"[AUDIO]"
|
| 89 |
],
|
|
|
|
|
|
|
|
|
|
| 90 |
"bos_token": "[BOS]",
|
| 91 |
"clean_up_tokenization_spaces": false,
|
| 92 |
"cls_token": "[CLS]",
|
|
|
|
| 87 |
"[AUDIO_EOS]",
|
| 88 |
"[AUDIO]"
|
| 89 |
],
|
| 90 |
+
"auto_map": {
|
| 91 |
+
"AutoProcessor": "processing_cm3p.CM3PProcessor"
|
| 92 |
+
},
|
| 93 |
"bos_token": "[BOS]",
|
| 94 |
"clean_up_tokenization_spaces": false,
|
| 95 |
"cls_token": "[CLS]",
|
metadata_tokenizer/tokenizer_config.json
CHANGED
|
@@ -162,6 +162,9 @@
|
|
| 162 |
"[SCROLL_SPEED_RATIO_UNK]",
|
| 163 |
"[TAG_UNK]"
|
| 164 |
],
|
|
|
|
|
|
|
|
|
|
| 165 |
"bos_token": "[BOS]",
|
| 166 |
"clean_up_tokenization_spaces": false,
|
| 167 |
"cls_token": "[CLS]",
|
|
|
|
| 162 |
"[SCROLL_SPEED_RATIO_UNK]",
|
| 163 |
"[TAG_UNK]"
|
| 164 |
],
|
| 165 |
+
"auto_map": {
|
| 166 |
+
"AutoProcessor": "processing_cm3p.CM3PProcessor"
|
| 167 |
+
},
|
| 168 |
"bos_token": "[BOS]",
|
| 169 |
"clean_up_tokenization_spaces": false,
|
| 170 |
"cls_token": "[CLS]",
|
processing_cm3p.py
CHANGED
|
@@ -7,11 +7,14 @@ from pathlib import Path
|
|
| 7 |
from typing import Optional, Union, IO, TypedDict
|
| 8 |
|
| 9 |
import numpy as np
|
|
|
|
| 10 |
from pandas import Series
|
| 11 |
from slider import Beatmap, HoldNote
|
| 12 |
from transformers import WhisperFeatureExtractor, AutoProcessor, BatchEncoding
|
| 13 |
-
from transformers.
|
|
|
|
| 14 |
from transformers.utils import is_torch_available, PaddingStrategy, PROCESSOR_NAME, logging
|
|
|
|
| 15 |
|
| 16 |
from .configuration_cm3p import CM3PConfig
|
| 17 |
from .parsing_cm3p import CM3PBeatmapParser, load_beatmap, get_song_length
|
|
@@ -132,6 +135,7 @@ class CM3PTokenizerKwargs(TypedDict, total=False):
|
|
| 132 |
class CM3PBeatmapKwargs(CM3PTokenizerKwargs, total=False):
|
| 133 |
window_length_sec: float
|
| 134 |
window_stride_sec: float
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
class CM3PAudioKwargs(AudioKwargs, total=False):
|
|
@@ -139,6 +143,7 @@ class CM3PAudioKwargs(AudioKwargs, total=False):
|
|
| 139 |
hop_length: Optional[int]
|
| 140 |
window_size: Optional[int]
|
| 141 |
audio_length_per_tok: Optional[int]
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
# noinspection PyTypedDict
|
|
@@ -166,6 +171,7 @@ class CM3PProcessorKwargs(CommonKwargs, CM3PBeatmapKwargs, CM3PTokenizerKwargs,
|
|
| 166 |
"hop_length": 160,
|
| 167 |
"window_size": 400,
|
| 168 |
"audio_length_per_tok": 8,
|
|
|
|
| 169 |
},
|
| 170 |
"common_kwargs": {
|
| 171 |
"return_tensors": "pt",
|
|
@@ -558,7 +564,7 @@ class CM3PProcessor(ProcessorMixin):
|
|
| 558 |
**beatmap_kwargs,
|
| 559 |
)
|
| 560 |
|
| 561 |
-
if
|
| 562 |
data = dict(beatmap_encoding)
|
| 563 |
data["input_features"] = self._retrieve_input_features(batch_audio, **audio_kwargs)
|
| 564 |
beatmap_encoding = BatchFeature(data, tensor_type=return_tensors)
|
|
@@ -572,7 +578,7 @@ class CM3PProcessor(ProcessorMixin):
|
|
| 572 |
},
|
| 573 |
tensor_type=return_tensors,
|
| 574 |
)
|
| 575 |
-
if
|
| 576 |
data = dict(beatmap_encoding)
|
| 577 |
data["input_features"] = torch.zeros((0, self.audio_feature_extractor.feature_size, max_source_positions), dtype=torch.float) if return_tensors == "pt" else []
|
| 578 |
beatmap_encoding = BatchFeature(data, tensor_type=return_tensors)
|
|
@@ -651,36 +657,91 @@ class CM3PProcessor(ProcessorMixin):
|
|
| 651 |
return self.beatmap_tokenizer.decode(*args, **kwargs)
|
| 652 |
|
| 653 |
def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
os.makedirs(save_directory, exist_ok=True)
|
| 655 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
for attribute_name in self.attributes:
|
| 657 |
attribute = getattr(self, attribute_name)
|
| 658 |
-
|
| 659 |
-
#
|
|
|
|
| 660 |
if hasattr(attribute, "_set_processor_class"):
|
| 661 |
# noinspection PyProtectedMember
|
| 662 |
attribute._set_processor_class(self.__class__.__name__)
|
|
|
|
| 663 |
attribute.save_pretrained(os.path.join(save_directory, attribute_name))
|
| 664 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 665 |
output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
|
| 666 |
-
self.
|
| 667 |
-
# noinspection PyUnresolvedReferences
|
| 668 |
-
logger.warning_once(f"processor saved in {output_processor_file}")
|
| 669 |
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
|
|
|
| 675 |
|
|
|
|
|
|
|
| 676 |
self._upload_modified_files(
|
| 677 |
save_directory,
|
| 678 |
repo_id,
|
| 679 |
files_timestamps,
|
| 680 |
commit_message=commit_message,
|
| 681 |
token=kwargs.get("token"),
|
|
|
|
|
|
|
|
|
|
| 682 |
)
|
| 683 |
|
|
|
|
|
|
|
| 684 |
return [output_processor_file]
|
| 685 |
|
| 686 |
@classmethod
|
|
@@ -700,6 +761,76 @@ class CM3PProcessor(ProcessorMixin):
|
|
| 700 |
|
| 701 |
return args
|
| 702 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
AutoProcessor.register(CM3PConfig, CM3PProcessor)
|
| 704 |
|
| 705 |
__all__ = ["CM3PProcessor", "get_metadata"]
|
|
|
|
| 7 |
from typing import Optional, Union, IO, TypedDict
|
| 8 |
|
| 9 |
import numpy as np
|
| 10 |
+
from huggingface_hub.errors import HfHubHTTPError
|
| 11 |
from pandas import Series
|
| 12 |
from slider import Beatmap, HoldNote
|
| 13 |
from transformers import WhisperFeatureExtractor, AutoProcessor, BatchEncoding
|
| 14 |
+
from transformers.dynamic_module_utils import custom_object_save
|
| 15 |
+
from transformers.tokenization_utils_base import TruncationStrategy, PreTrainedTokenizerBase
|
| 16 |
from transformers.utils import is_torch_available, PaddingStrategy, PROCESSOR_NAME, logging
|
| 17 |
+
from huggingface_hub import CommitOperationAdd, create_branch, create_commit
|
| 18 |
|
| 19 |
from .configuration_cm3p import CM3PConfig
|
| 20 |
from .parsing_cm3p import CM3PBeatmapParser, load_beatmap, get_song_length
|
|
|
|
| 135 |
class CM3PBeatmapKwargs(CM3PTokenizerKwargs, total=False):
|
| 136 |
window_length_sec: float
|
| 137 |
window_stride_sec: float
|
| 138 |
+
min_window_length_sec: float
|
| 139 |
|
| 140 |
|
| 141 |
class CM3PAudioKwargs(AudioKwargs, total=False):
|
|
|
|
| 143 |
hop_length: Optional[int]
|
| 144 |
window_size: Optional[int]
|
| 145 |
audio_length_per_tok: Optional[int]
|
| 146 |
+
device: Optional[str]
|
| 147 |
|
| 148 |
|
| 149 |
# noinspection PyTypedDict
|
|
|
|
| 171 |
"hop_length": 160,
|
| 172 |
"window_size": 400,
|
| 173 |
"audio_length_per_tok": 8,
|
| 174 |
+
"device": "cpu",
|
| 175 |
},
|
| 176 |
"common_kwargs": {
|
| 177 |
"return_tensors": "pt",
|
|
|
|
| 564 |
**beatmap_kwargs,
|
| 565 |
)
|
| 566 |
|
| 567 |
+
if all(a is not None for a in audio):
|
| 568 |
data = dict(beatmap_encoding)
|
| 569 |
data["input_features"] = self._retrieve_input_features(batch_audio, **audio_kwargs)
|
| 570 |
beatmap_encoding = BatchFeature(data, tensor_type=return_tensors)
|
|
|
|
| 578 |
},
|
| 579 |
tensor_type=return_tensors,
|
| 580 |
)
|
| 581 |
+
if all(a is not None for a in audio):
|
| 582 |
data = dict(beatmap_encoding)
|
| 583 |
data["input_features"] = torch.zeros((0, self.audio_feature_extractor.feature_size, max_source_positions), dtype=torch.float) if return_tensors == "pt" else []
|
| 584 |
beatmap_encoding = BatchFeature(data, tensor_type=return_tensors)
|
|
|
|
| 657 |
return self.beatmap_tokenizer.decode(*args, **kwargs)
|
| 658 |
|
| 659 |
def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
|
| 660 |
+
"""
|
| 661 |
+
Save processor and its sub-components, with support for AutoProcessor remote code.
|
| 662 |
+
|
| 663 |
+
This is a lightly adapted version of ProcessorMixin.save_pretrained:
|
| 664 |
+
- child attributes are saved into subfolders (audio_feature_extractor/, beatmap_parser/, ...);
|
| 665 |
+
- when self._auto_class is set (via register_for_auto_class), custom_object_save is used
|
| 666 |
+
so that auto_map and dynamic modules are written correctly.
|
| 667 |
+
"""
|
| 668 |
os.makedirs(save_directory, exist_ok=True)
|
| 669 |
|
| 670 |
+
# Handle Hub integration (same as ProcessorMixin / your existing code)
|
| 671 |
+
if push_to_hub:
|
| 672 |
+
commit_message = kwargs.pop("commit_message", None)
|
| 673 |
+
repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
|
| 674 |
+
repo_id = self._create_repo(repo_id, **kwargs)
|
| 675 |
+
files_timestamps = self._get_files_timestamps(save_directory)
|
| 676 |
+
else:
|
| 677 |
+
commit_message = None
|
| 678 |
+
repo_id = None
|
| 679 |
+
files_timestamps = None
|
| 680 |
+
|
| 681 |
+
# If we have a custom processor registered for an Auto class,
|
| 682 |
+
# save its code and dependencies as a dynamic module and
|
| 683 |
+
# populate the auto_map field in processor_config.json.
|
| 684 |
+
if self._auto_class is not None:
|
| 685 |
+
attrs = [getattr(self, attribute_name) for attribute_name in self.attributes]
|
| 686 |
+
|
| 687 |
+
# For tokenizers, we pass their init_kwargs; for other objects, we pass the object itself.
|
| 688 |
+
configs = []
|
| 689 |
+
for a in attrs:
|
| 690 |
+
if isinstance(a, PreTrainedTokenizerBase):
|
| 691 |
+
configs.append(a.init_kwargs)
|
| 692 |
+
else:
|
| 693 |
+
configs.append(a)
|
| 694 |
+
|
| 695 |
+
# Include the processor itself so its class is exported.
|
| 696 |
+
configs.append(self)
|
| 697 |
+
|
| 698 |
+
custom_object_save(self, save_directory, config=configs)
|
| 699 |
+
|
| 700 |
+
# Save each sub-component into its own subfolder
|
| 701 |
for attribute_name in self.attributes:
|
| 702 |
attribute = getattr(self, attribute_name)
|
| 703 |
+
|
| 704 |
+
# Include the processor class in the attribute config so this
|
| 705 |
+
# processor can then be reloaded with the AutoProcessor API.
|
| 706 |
if hasattr(attribute, "_set_processor_class"):
|
| 707 |
# noinspection PyProtectedMember
|
| 708 |
attribute._set_processor_class(self.__class__.__name__)
|
| 709 |
+
|
| 710 |
attribute.save_pretrained(os.path.join(save_directory, attribute_name))
|
| 711 |
|
| 712 |
+
# Clean up temporary auto_map injected into tokenizers, if any
|
| 713 |
+
if self._auto_class is not None:
|
| 714 |
+
for attribute_name in self.attributes:
|
| 715 |
+
attribute = getattr(self, attribute_name)
|
| 716 |
+
if isinstance(attribute, PreTrainedTokenizerBase) and "auto_map" in attribute.init_kwargs:
|
| 717 |
+
del attribute.init_kwargs["auto_map"]
|
| 718 |
+
|
| 719 |
+
# Write processor_config.json (or equivalent)
|
| 720 |
output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
|
| 721 |
+
processor_dict = self.to_dict()
|
|
|
|
|
|
|
| 722 |
|
| 723 |
+
# If processor_dict only contains processor_class, we skip writing the file,
|
| 724 |
+
# matching the upstream behavior; otherwise we save it.
|
| 725 |
+
if set(processor_dict.keys()) != {"processor_class"}:
|
| 726 |
+
self.to_json_file(output_processor_file)
|
| 727 |
+
# noinspection PyUnresolvedReferences
|
| 728 |
+
logger.warning_once(f"processor saved in {output_processor_file}")
|
| 729 |
|
| 730 |
+
# If requested, upload the modified files to the Hub
|
| 731 |
+
if push_to_hub:
|
| 732 |
self._upload_modified_files(
|
| 733 |
save_directory,
|
| 734 |
repo_id,
|
| 735 |
files_timestamps,
|
| 736 |
commit_message=commit_message,
|
| 737 |
token=kwargs.get("token"),
|
| 738 |
+
create_pr=kwargs.get("create_pr", False),
|
| 739 |
+
revision=kwargs.get("revision"),
|
| 740 |
+
commit_description=kwargs.get("commit_description"),
|
| 741 |
)
|
| 742 |
|
| 743 |
+
if set(processor_dict.keys()) == {"processor_class"}:
|
| 744 |
+
return []
|
| 745 |
return [output_processor_file]
|
| 746 |
|
| 747 |
@classmethod
|
|
|
|
| 761 |
|
| 762 |
return args
|
| 763 |
|
| 764 |
+
def _upload_modified_files(
|
| 765 |
+
self,
|
| 766 |
+
working_dir: Union[str, os.PathLike],
|
| 767 |
+
repo_id: str,
|
| 768 |
+
files_timestamps: dict[str, float],
|
| 769 |
+
commit_message: Optional[str] = None,
|
| 770 |
+
token: Optional[Union[bool, str]] = None,
|
| 771 |
+
create_pr: bool = False,
|
| 772 |
+
revision: Optional[str] = None,
|
| 773 |
+
commit_description: Optional[str] = None,
|
| 774 |
+
):
|
| 775 |
+
"""
|
| 776 |
+
Uploads all modified files in `working_dir` to `repo_id`, based on `files_timestamps`.
|
| 777 |
+
"""
|
| 778 |
+
working_dir = Path(working_dir)
|
| 779 |
+
|
| 780 |
+
if commit_message is None:
|
| 781 |
+
commit_message = "Upload CM3P processor"
|
| 782 |
+
modified_files = [
|
| 783 |
+
f
|
| 784 |
+
for f in working_dir.iterdir()
|
| 785 |
+
if str(f) not in files_timestamps or f.stat().st_mtime > files_timestamps[str(f)]
|
| 786 |
+
]
|
| 787 |
+
|
| 788 |
+
# filter for actual files + folders at the root level
|
| 789 |
+
modified_files = [
|
| 790 |
+
f
|
| 791 |
+
for f in modified_files
|
| 792 |
+
if f.is_file() or f.is_dir()
|
| 793 |
+
]
|
| 794 |
+
|
| 795 |
+
operations = []
|
| 796 |
+
# upload standalone files
|
| 797 |
+
for file in modified_files:
|
| 798 |
+
if file.is_dir():
|
| 799 |
+
# go over individual files of folder
|
| 800 |
+
for f in file.iterdir():
|
| 801 |
+
operations.append(
|
| 802 |
+
CommitOperationAdd(
|
| 803 |
+
path_or_fileobj=f, path_in_repo=f.relative_to(working_dir).as_posix()
|
| 804 |
+
)
|
| 805 |
+
)
|
| 806 |
+
else:
|
| 807 |
+
operations.append(
|
| 808 |
+
CommitOperationAdd(path_or_fileobj=file, path_in_repo=file.relative_to(working_dir).as_posix())
|
| 809 |
+
)
|
| 810 |
+
|
| 811 |
+
if revision is not None and not revision.startswith("refs/pr"):
|
| 812 |
+
try:
|
| 813 |
+
create_branch(repo_id=repo_id, branch=revision, token=token, exist_ok=True)
|
| 814 |
+
except HfHubHTTPError as e:
|
| 815 |
+
if e.response.status_code == 403 and create_pr:
|
| 816 |
+
# If we are creating a PR on a repo we don't have access to, we can't create the branch.
|
| 817 |
+
# so let's assume the branch already exists. If it's not the case, an error will be raised when
|
| 818 |
+
# calling `create_commit` below.
|
| 819 |
+
pass
|
| 820 |
+
else:
|
| 821 |
+
raise
|
| 822 |
+
|
| 823 |
+
logger.info(f"Uploading the following files to {repo_id}: {','.join([f.relative_to(working_dir).as_posix() for f in modified_files])}")
|
| 824 |
+
return create_commit(
|
| 825 |
+
repo_id=repo_id,
|
| 826 |
+
operations=operations,
|
| 827 |
+
commit_message=commit_message,
|
| 828 |
+
commit_description=commit_description,
|
| 829 |
+
token=token,
|
| 830 |
+
create_pr=create_pr,
|
| 831 |
+
revision=revision,
|
| 832 |
+
)
|
| 833 |
+
|
| 834 |
AutoProcessor.register(CM3PConfig, CM3PProcessor)
|
| 835 |
|
| 836 |
__all__ = ["CM3PProcessor", "get_metadata"]
|
processor_config.json
CHANGED
|
@@ -1,33 +1,33 @@
|
|
| 1 |
-
{
|
| 2 |
-
"
|
| 3 |
-
"
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
"
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
"
|
| 18 |
-
"
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
"
|
| 22 |
-
},
|
| 23 |
-
"
|
| 24 |
-
"
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"auto_map": {
|
| 3 |
+
"AutoProcessor": "processing_cm3p.CM3PProcessor"
|
| 4 |
+
},
|
| 5 |
+
"default_kwargs": {
|
| 6 |
+
"audio_kwargs": {
|
| 7 |
+
"audio_length_per_tok": 8,
|
| 8 |
+
"hop_length": 160,
|
| 9 |
+
"max_source_positions": 1600,
|
| 10 |
+
"pad_to_multiple_of": 256000,
|
| 11 |
+
"padding": false,
|
| 12 |
+
"sampling_rate": 16000,
|
| 13 |
+
"truncation": false,
|
| 14 |
+
"window_size": 400
|
| 15 |
+
},
|
| 16 |
+
"beatmap_kwargs": {
|
| 17 |
+
"max_length": 2000,
|
| 18 |
+
"padding": "longest",
|
| 19 |
+
"truncation": "longest_first",
|
| 20 |
+
"window_length_sec": 16.0,
|
| 21 |
+
"window_stride_sec": 16.0
|
| 22 |
+
},
|
| 23 |
+
"common_kwargs": {
|
| 24 |
+
"return_tensors": "pt"
|
| 25 |
+
},
|
| 26 |
+
"metadata_kwargs": {
|
| 27 |
+
"max_length": 128,
|
| 28 |
+
"padding": "longest",
|
| 29 |
+
"truncation": "longest_first"
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
"processor_class": "CM3PProcessor"
|
| 33 |
+
}
|