OliBomby commited on
Commit
63af269
·
verified ·
1 Parent(s): 17137e3

Add CM3P model

Browse files
audio_feature_extractor/preprocessor_config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "chunk_length": 30,
3
  "dither": 0.0,
4
  "feature_extractor_type": "WhisperFeatureExtractor",
 
1
  {
2
+ "auto_map": {
3
+ "AutoProcessor": "processing_cm3p.CM3PProcessor"
4
+ },
5
  "chunk_length": 30,
6
  "dither": 0.0,
7
  "feature_extractor_type": "WhisperFeatureExtractor",
beatmap_parser/preprocessor_config.json CHANGED
@@ -8,6 +8,9 @@
8
  "add_sv": true,
9
  "add_timing": true,
10
  "add_timing_points": true,
 
 
 
11
  "feature_extractor_type": "CM3PBeatmapParser",
12
  "mania_bpm_normalized_scroll_speed": true,
13
  "processor_class": "CM3PProcessor",
 
8
  "add_sv": true,
9
  "add_timing": true,
10
  "add_timing_points": true,
11
+ "auto_map": {
12
+ "AutoProcessor": "processing_cm3p.CM3PProcessor"
13
+ },
14
  "feature_extractor_type": "CM3PBeatmapParser",
15
  "mania_bpm_normalized_scroll_speed": true,
16
  "processor_class": "CM3PProcessor",
beatmap_tokenizer/tokenizer_config.json CHANGED
@@ -87,6 +87,9 @@
87
  "[AUDIO_EOS]",
88
  "[AUDIO]"
89
  ],
 
 
 
90
  "bos_token": "[BOS]",
91
  "clean_up_tokenization_spaces": false,
92
  "cls_token": "[CLS]",
 
87
  "[AUDIO_EOS]",
88
  "[AUDIO]"
89
  ],
90
+ "auto_map": {
91
+ "AutoProcessor": "processing_cm3p.CM3PProcessor"
92
+ },
93
  "bos_token": "[BOS]",
94
  "clean_up_tokenization_spaces": false,
95
  "cls_token": "[CLS]",
metadata_tokenizer/tokenizer_config.json CHANGED
@@ -162,6 +162,9 @@
162
  "[SCROLL_SPEED_RATIO_UNK]",
163
  "[TAG_UNK]"
164
  ],
 
 
 
165
  "bos_token": "[BOS]",
166
  "clean_up_tokenization_spaces": false,
167
  "cls_token": "[CLS]",
 
162
  "[SCROLL_SPEED_RATIO_UNK]",
163
  "[TAG_UNK]"
164
  ],
165
+ "auto_map": {
166
+ "AutoProcessor": "processing_cm3p.CM3PProcessor"
167
+ },
168
  "bos_token": "[BOS]",
169
  "clean_up_tokenization_spaces": false,
170
  "cls_token": "[CLS]",
processing_cm3p.py CHANGED
@@ -7,11 +7,14 @@ from pathlib import Path
7
  from typing import Optional, Union, IO, TypedDict
8
 
9
  import numpy as np
 
10
  from pandas import Series
11
  from slider import Beatmap, HoldNote
12
  from transformers import WhisperFeatureExtractor, AutoProcessor, BatchEncoding
13
- from transformers.tokenization_utils_base import TruncationStrategy
 
14
  from transformers.utils import is_torch_available, PaddingStrategy, PROCESSOR_NAME, logging
 
15
 
16
  from .configuration_cm3p import CM3PConfig
17
  from .parsing_cm3p import CM3PBeatmapParser, load_beatmap, get_song_length
@@ -132,6 +135,7 @@ class CM3PTokenizerKwargs(TypedDict, total=False):
132
  class CM3PBeatmapKwargs(CM3PTokenizerKwargs, total=False):
133
  window_length_sec: float
134
  window_stride_sec: float
 
135
 
136
 
137
  class CM3PAudioKwargs(AudioKwargs, total=False):
@@ -139,6 +143,7 @@ class CM3PAudioKwargs(AudioKwargs, total=False):
139
  hop_length: Optional[int]
140
  window_size: Optional[int]
141
  audio_length_per_tok: Optional[int]
 
142
 
143
 
144
  # noinspection PyTypedDict
@@ -166,6 +171,7 @@ class CM3PProcessorKwargs(CommonKwargs, CM3PBeatmapKwargs, CM3PTokenizerKwargs,
166
  "hop_length": 160,
167
  "window_size": 400,
168
  "audio_length_per_tok": 8,
 
169
  },
170
  "common_kwargs": {
171
  "return_tensors": "pt",
@@ -558,7 +564,7 @@ class CM3PProcessor(ProcessorMixin):
558
  **beatmap_kwargs,
559
  )
560
 
561
- if audio is not None:
562
  data = dict(beatmap_encoding)
563
  data["input_features"] = self._retrieve_input_features(batch_audio, **audio_kwargs)
564
  beatmap_encoding = BatchFeature(data, tensor_type=return_tensors)
@@ -572,7 +578,7 @@ class CM3PProcessor(ProcessorMixin):
572
  },
573
  tensor_type=return_tensors,
574
  )
575
- if audio is not None:
576
  data = dict(beatmap_encoding)
577
  data["input_features"] = torch.zeros((0, self.audio_feature_extractor.feature_size, max_source_positions), dtype=torch.float) if return_tensors == "pt" else []
578
  beatmap_encoding = BatchFeature(data, tensor_type=return_tensors)
@@ -651,36 +657,91 @@ class CM3PProcessor(ProcessorMixin):
651
  return self.beatmap_tokenizer.decode(*args, **kwargs)
652
 
653
  def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
 
 
 
 
 
 
 
 
654
  os.makedirs(save_directory, exist_ok=True)
655
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
  for attribute_name in self.attributes:
657
  attribute = getattr(self, attribute_name)
658
- # Include the processor class in the attribute config so this processor can then be reloaded with the
659
- # `AutoProcessor` API.
 
660
  if hasattr(attribute, "_set_processor_class"):
661
  # noinspection PyProtectedMember
662
  attribute._set_processor_class(self.__class__.__name__)
 
663
  attribute.save_pretrained(os.path.join(save_directory, attribute_name))
664
 
 
 
 
 
 
 
 
 
665
  output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
666
- self.to_json_file(output_processor_file)
667
- # noinspection PyUnresolvedReferences
668
- logger.warning_once(f"processor saved in {output_processor_file}")
669
 
670
- if push_to_hub:
671
- commit_message = kwargs.pop("commit_message", None)
672
- repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
673
- repo_id = self._create_repo(repo_id, **kwargs)
674
- files_timestamps = self._get_files_timestamps(save_directory)
 
675
 
 
 
676
  self._upload_modified_files(
677
  save_directory,
678
  repo_id,
679
  files_timestamps,
680
  commit_message=commit_message,
681
  token=kwargs.get("token"),
 
 
 
682
  )
683
 
 
 
684
  return [output_processor_file]
685
 
686
  @classmethod
@@ -700,6 +761,76 @@ class CM3PProcessor(ProcessorMixin):
700
 
701
  return args
702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
703
  AutoProcessor.register(CM3PConfig, CM3PProcessor)
704
 
705
  __all__ = ["CM3PProcessor", "get_metadata"]
 
7
  from typing import Optional, Union, IO, TypedDict
8
 
9
  import numpy as np
10
+ from huggingface_hub.errors import HfHubHTTPError
11
  from pandas import Series
12
  from slider import Beatmap, HoldNote
13
  from transformers import WhisperFeatureExtractor, AutoProcessor, BatchEncoding
14
+ from transformers.dynamic_module_utils import custom_object_save
15
+ from transformers.tokenization_utils_base import TruncationStrategy, PreTrainedTokenizerBase
16
  from transformers.utils import is_torch_available, PaddingStrategy, PROCESSOR_NAME, logging
17
+ from huggingface_hub import CommitOperationAdd, create_branch, create_commit
18
 
19
  from .configuration_cm3p import CM3PConfig
20
  from .parsing_cm3p import CM3PBeatmapParser, load_beatmap, get_song_length
 
135
  class CM3PBeatmapKwargs(CM3PTokenizerKwargs, total=False):
136
  window_length_sec: float
137
  window_stride_sec: float
138
+ min_window_length_sec: float
139
 
140
 
141
  class CM3PAudioKwargs(AudioKwargs, total=False):
 
143
  hop_length: Optional[int]
144
  window_size: Optional[int]
145
  audio_length_per_tok: Optional[int]
146
+ device: Optional[str]
147
 
148
 
149
  # noinspection PyTypedDict
 
171
  "hop_length": 160,
172
  "window_size": 400,
173
  "audio_length_per_tok": 8,
174
+ "device": "cpu",
175
  },
176
  "common_kwargs": {
177
  "return_tensors": "pt",
 
564
  **beatmap_kwargs,
565
  )
566
 
567
+ if all(a is not None for a in audio):
568
  data = dict(beatmap_encoding)
569
  data["input_features"] = self._retrieve_input_features(batch_audio, **audio_kwargs)
570
  beatmap_encoding = BatchFeature(data, tensor_type=return_tensors)
 
578
  },
579
  tensor_type=return_tensors,
580
  )
581
+ if all(a is not None for a in audio):
582
  data = dict(beatmap_encoding)
583
  data["input_features"] = torch.zeros((0, self.audio_feature_extractor.feature_size, max_source_positions), dtype=torch.float) if return_tensors == "pt" else []
584
  beatmap_encoding = BatchFeature(data, tensor_type=return_tensors)
 
657
  return self.beatmap_tokenizer.decode(*args, **kwargs)
658
 
659
  def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
660
+ """
661
+ Save processor and its sub-components, with support for AutoProcessor remote code.
662
+
663
+ This is a lightly adapted version of ProcessorMixin.save_pretrained:
664
+ - child attributes are saved into subfolders (audio_feature_extractor/, beatmap_parser/, ...);
665
+ - when self._auto_class is set (via register_for_auto_class), custom_object_save is used
666
+ so that auto_map and dynamic modules are written correctly.
667
+ """
668
  os.makedirs(save_directory, exist_ok=True)
669
 
670
+ # Handle Hub integration (same as ProcessorMixin / your existing code)
671
+ if push_to_hub:
672
+ commit_message = kwargs.pop("commit_message", None)
673
+ repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
674
+ repo_id = self._create_repo(repo_id, **kwargs)
675
+ files_timestamps = self._get_files_timestamps(save_directory)
676
+ else:
677
+ commit_message = None
678
+ repo_id = None
679
+ files_timestamps = None
680
+
681
+ # If we have a custom processor registered for an Auto class,
682
+ # save its code and dependencies as a dynamic module and
683
+ # populate the auto_map field in processor_config.json.
684
+ if self._auto_class is not None:
685
+ attrs = [getattr(self, attribute_name) for attribute_name in self.attributes]
686
+
687
+ # For tokenizers, we pass their init_kwargs; for other objects, we pass the object itself.
688
+ configs = []
689
+ for a in attrs:
690
+ if isinstance(a, PreTrainedTokenizerBase):
691
+ configs.append(a.init_kwargs)
692
+ else:
693
+ configs.append(a)
694
+
695
+ # Include the processor itself so its class is exported.
696
+ configs.append(self)
697
+
698
+ custom_object_save(self, save_directory, config=configs)
699
+
700
+ # Save each sub-component into its own subfolder
701
  for attribute_name in self.attributes:
702
  attribute = getattr(self, attribute_name)
703
+
704
+ # Include the processor class in the attribute config so this
705
+ # processor can then be reloaded with the AutoProcessor API.
706
  if hasattr(attribute, "_set_processor_class"):
707
  # noinspection PyProtectedMember
708
  attribute._set_processor_class(self.__class__.__name__)
709
+
710
  attribute.save_pretrained(os.path.join(save_directory, attribute_name))
711
 
712
+ # Clean up temporary auto_map injected into tokenizers, if any
713
+ if self._auto_class is not None:
714
+ for attribute_name in self.attributes:
715
+ attribute = getattr(self, attribute_name)
716
+ if isinstance(attribute, PreTrainedTokenizerBase) and "auto_map" in attribute.init_kwargs:
717
+ del attribute.init_kwargs["auto_map"]
718
+
719
+ # Write processor_config.json (or equivalent)
720
  output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
721
+ processor_dict = self.to_dict()
 
 
722
 
723
+ # If processor_dict only contains processor_class, we skip writing the file,
724
+ # matching the upstream behavior; otherwise we save it.
725
+ if set(processor_dict.keys()) != {"processor_class"}:
726
+ self.to_json_file(output_processor_file)
727
+ # noinspection PyUnresolvedReferences
728
+ logger.warning_once(f"processor saved in {output_processor_file}")
729
 
730
+ # If requested, upload the modified files to the Hub
731
+ if push_to_hub:
732
  self._upload_modified_files(
733
  save_directory,
734
  repo_id,
735
  files_timestamps,
736
  commit_message=commit_message,
737
  token=kwargs.get("token"),
738
+ create_pr=kwargs.get("create_pr", False),
739
+ revision=kwargs.get("revision"),
740
+ commit_description=kwargs.get("commit_description"),
741
  )
742
 
743
+ if set(processor_dict.keys()) == {"processor_class"}:
744
+ return []
745
  return [output_processor_file]
746
 
747
  @classmethod
 
761
 
762
  return args
763
 
764
+ def _upload_modified_files(
765
+ self,
766
+ working_dir: Union[str, os.PathLike],
767
+ repo_id: str,
768
+ files_timestamps: dict[str, float],
769
+ commit_message: Optional[str] = None,
770
+ token: Optional[Union[bool, str]] = None,
771
+ create_pr: bool = False,
772
+ revision: Optional[str] = None,
773
+ commit_description: Optional[str] = None,
774
+ ):
775
+ """
776
+ Uploads all modified files in `working_dir` to `repo_id`, based on `files_timestamps`.
777
+ """
778
+ working_dir = Path(working_dir)
779
+
780
+ if commit_message is None:
781
+ commit_message = "Upload CM3P processor"
782
+ modified_files = [
783
+ f
784
+ for f in working_dir.iterdir()
785
+ if str(f) not in files_timestamps or f.stat().st_mtime > files_timestamps[str(f)]
786
+ ]
787
+
788
+ # filter for actual files + folders at the root level
789
+ modified_files = [
790
+ f
791
+ for f in modified_files
792
+ if f.is_file() or f.is_dir()
793
+ ]
794
+
795
+ operations = []
796
+ # upload standalone files
797
+ for file in modified_files:
798
+ if file.is_dir():
799
+ # go over individual files of folder
800
+ for f in file.iterdir():
801
+ operations.append(
802
+ CommitOperationAdd(
803
+ path_or_fileobj=f, path_in_repo=f.relative_to(working_dir).as_posix()
804
+ )
805
+ )
806
+ else:
807
+ operations.append(
808
+ CommitOperationAdd(path_or_fileobj=file, path_in_repo=file.relative_to(working_dir).as_posix())
809
+ )
810
+
811
+ if revision is not None and not revision.startswith("refs/pr"):
812
+ try:
813
+ create_branch(repo_id=repo_id, branch=revision, token=token, exist_ok=True)
814
+ except HfHubHTTPError as e:
815
+ if e.response.status_code == 403 and create_pr:
816
+ # If we are creating a PR on a repo we don't have access to, we can't create the branch.
817
+ # so let's assume the branch already exists. If it's not the case, an error will be raised when
818
+ # calling `create_commit` below.
819
+ pass
820
+ else:
821
+ raise
822
+
823
+ logger.info(f"Uploading the following files to {repo_id}: {','.join([f.relative_to(working_dir).as_posix() for f in modified_files])}")
824
+ return create_commit(
825
+ repo_id=repo_id,
826
+ operations=operations,
827
+ commit_message=commit_message,
828
+ commit_description=commit_description,
829
+ token=token,
830
+ create_pr=create_pr,
831
+ revision=revision,
832
+ )
833
+
834
  AutoProcessor.register(CM3PConfig, CM3PProcessor)
835
 
836
  __all__ = ["CM3PProcessor", "get_metadata"]
processor_config.json CHANGED
@@ -1,33 +1,33 @@
1
- {
2
- "default_kwargs": {
3
- "audio_kwargs": {
4
- "audio_length_per_tok": 8,
5
- "hop_length": 160,
6
- "max_source_positions": 1600,
7
- "pad_to_multiple_of": 256000,
8
- "padding": false,
9
- "sampling_rate": 16000,
10
- "truncation": false,
11
- "window_size": 400
12
- },
13
- "beatmap_kwargs": {
14
- "max_length": 2000,
15
- "padding": "longest",
16
- "truncation": "longest_first",
17
- "window_length_sec": 16.0,
18
- "window_stride_sec": 16.0
19
- },
20
- "common_kwargs": {
21
- "return_tensors": "pt"
22
- },
23
- "metadata_kwargs": {
24
- "max_length": 128,
25
- "padding": "longest",
26
- "truncation": "longest_first"
27
- }
28
- },
29
- "processor_class": "CM3PProcessor",
30
- "auto_map": {
31
- "AutoProcessor": "processing_cm3p.CM3PProcessor"
32
- }
33
- }
 
1
+ {
2
+ "auto_map": {
3
+ "AutoProcessor": "processing_cm3p.CM3PProcessor"
4
+ },
5
+ "default_kwargs": {
6
+ "audio_kwargs": {
7
+ "audio_length_per_tok": 8,
8
+ "hop_length": 160,
9
+ "max_source_positions": 1600,
10
+ "pad_to_multiple_of": 256000,
11
+ "padding": false,
12
+ "sampling_rate": 16000,
13
+ "truncation": false,
14
+ "window_size": 400
15
+ },
16
+ "beatmap_kwargs": {
17
+ "max_length": 2000,
18
+ "padding": "longest",
19
+ "truncation": "longest_first",
20
+ "window_length_sec": 16.0,
21
+ "window_stride_sec": 16.0
22
+ },
23
+ "common_kwargs": {
24
+ "return_tensors": "pt"
25
+ },
26
+ "metadata_kwargs": {
27
+ "max_length": 128,
28
+ "padding": "longest",
29
+ "truncation": "longest_first"
30
+ }
31
+ },
32
+ "processor_class": "CM3PProcessor"
33
+ }