BeatHeritage-v1

Model card Files Files and versions

xet

Community

fourmansyah

hongminh54 commited on 9 days ago

Commit

12a8e0f

0 Parent(s):

Duplicate from hongminh54/BeatHeritage-v1

Browse files

Co-authored-by: hongminh54 <hongminh54@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.devcontainer/devcontainer.json +42 -0
.devcontainer/docker-compose.yml +26 -0
.gitattributes +35 -0
.github/FUNDING.yml +15 -0
.gitignore +11 -0
Dockerfile +8 -0
LICENSE +21 -0
README.md +323 -0
audit_all_configs.py +157 -0
beatheritage_postprocessor.py +474 -0
benchmark_comparison.py +469 -0
calc_fid.py +417 -0
classifier/README.md +34 -0
classifier/classify.py +175 -0
classifier/configs/inference.yaml +14 -0
classifier/configs/model/model.yaml +9 -0
classifier/configs/model/whisper_base.yaml +6 -0
classifier/configs/model/whisper_base_v2.yaml +7 -0
classifier/configs/model/whisper_small.yaml +6 -0
classifier/configs/model/whisper_tiny.yaml +6 -0
classifier/configs/train.yaml +82 -0
classifier/configs/train_v1.yaml +4 -0
classifier/configs/train_v2.yaml +14 -0
classifier/configs/train_v3.yaml +17 -0
classifier/count_classes.py +56 -0
classifier/libs/__init__.py +1 -0
classifier/libs/dataset/__init__.py +3 -0
classifier/libs/dataset/data_utils.py +308 -0
classifier/libs/dataset/ors_dataset.py +490 -0
classifier/libs/dataset/osu_parser.py +460 -0
classifier/libs/model/__init__.py +1 -0
classifier/libs/model/model.py +145 -0
classifier/libs/model/spectrogram.py +55 -0
classifier/libs/tokenizer/__init__.py +2 -0
classifier/libs/tokenizer/event.py +53 -0
classifier/libs/tokenizer/tokenizer.py +201 -0
classifier/libs/utils/__init__.py +1 -0
classifier/libs/utils/model_utils.py +190 -0
classifier/libs/utils/routed_pickle.py +17 -0
classifier/test.py +32 -0
classifier/train.py +82 -0
cli_inference.sh +491 -0
colab/beatheritage_v1_inference.ipynb +510 -0
colab/classifier_classify.ipynb +133 -0
colab/mai_mod_inference.ipynb +148 -0
colab/mapperatorinator_inference.ipynb +305 -0
collate_results.py +158 -0
compose.yaml +25 -0
config.py +197 -0
configs/calc_fid.yaml +43 -0

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,42 @@

+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
+{
+	"name": "Existing Docker Compose (Extend)",
+	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
+	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
+	"dockerComposeFile": [
+		"../compose.yaml",
+		"docker-compose.yml"
+	],
+	// The 'service' property is the name of the service for the container that VS Code should
+	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
+	"service": "Mapperatorinator",
+	// The optional 'workspaceFolder' property is the path VS Code should open by default when
+	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
+	"workspaceFolder": "/workspace/Mapperatorinator",
+	// "workspaceFolder": "/",
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+	// Uncomment the next line if you want start specific services in your Docker Compose config.
+	// "runServices": [],
+	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
+	// "shutdownAction": "none",
+	// Uncomment the next line to run commands after the container is created.
+	"postCreateCommand": "git config --global --add safe.directory /workspace/Mapperatorinator"
+	// Configure tool-specific properties.
+	// "customizations": {},
+	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "devcontainer"
+}

.devcontainer/docker-compose.yml ADDED Viewed

	@@ -0,0 +1,26 @@

+version: '3.8'
+services:
+  # Update this to the name of the service you want to work with in your docker-compose.yml file
+  mapperatorinator:
+    # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer
+    # folder. Note that the path of the Dockerfile and context is relative to the *primary*
+    # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
+    # array). The sample below assumes your primary file is in the root of your project.
+    #
+    # build:
+    #   context: .
+    #   dockerfile: .devcontainer/Dockerfile
+    volumes:
+      # Update this to wherever you want VS Code to mount the folder of your project
+      - ..:/workspace:cached
+    # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
+    # cap_add:
+    #   - SYS_PTRACE
+    # security_opt:
+    #   - seccomp:unconfined
+    # Overrides default command so things don't shut down after the process ends.
+    command: /bin/sh -c "while sleep 1000; do :; done"

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.github/FUNDING.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+# These are supported funding model platforms
+github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: OliBomby
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
+polar: # Replace with a single Polar username
+buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
+thanks_dev: # Replace with a single thanks.dev username
+custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+.venv
+__pycache__
+logs
+logs_fid
+multirun
+tensorboard_logs
+.idea
+test
+test_inference.py
+test_inference_mai_mod.py
+.windsurf

Dockerfile ADDED Viewed

	@@ -0,0 +1,8 @@

+FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel
+RUN apt-get -y update && apt-get -y upgrade && apt-get install -y git && apt-get install -y --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/*
+RUN pip install accelerate pydub nnAudio PyYAML transformers hydra-core tensorboard lightning pandas pyarrow einops 'git+https://github.com/OliBomby/slider.git@gedagedigedagedaoh#egg=slider' torch_tb_profiler wandb ninja
+RUN MAX_JOBS=4 pip install flash-attn --no-build-isolation
+# Modify .bashrc to include the custom prompt
+RUN echo 'if [ -f /.dockerenv ]; then export PS1="(docker) $PS1"; fi' >> /root/.bashrc

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 OliBomby
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,323 @@

+# BeatHeritage
+🎯 **NEW: BeatHeritage V1 - Enhanced Stability & Quality** | [Try on Colab](https://colab.research.google.com/github/hongminh54/BeatHeritage/blob/main/colab/beatheritage_v1_inference.ipynb) | [Documentation](docs/BEATHERITAGE_V1.md)
+Try the generative model [here](https://colab.research.google.com/github/hongminh54/BeatHeritage/blob/main/colab/beatheritage_v1_inference.ipynb), or MaiMod [here](https://colab.research.google.com/github/OliBomby/Mapperatorinator/blob/main/colab/mai_mod_inference.ipynb). Check out a video showcase [here](https://youtu.be/FEr7t1L2EoA).
+BeatHeritage (formerly Mapperatorinator) is a multi-model framework that uses spectrogram inputs to generate fully featured osu! beatmaps for all gamemodes and [assist modding beatmaps](#maimod-the-ai-driven-modding-tool).
+The goal of this project is to automatically generate rankable quality osu! beatmaps from any song with a high degree of customizability.
+## 🚀 What's New in BeatHeritage V1
+- **Enhanced Stability**: Optimized sampling parameters (temperature 0.85, top_p 0.92) for more consistent generation
+- **Quality Control**: Automatic spacing correction, overlap detection, and flow optimization
+- **Pattern Variety**: Advanced pattern generation with diversity enhancement
+- **All Gamemodes**: Full support for std, taiko, ctb, and mania with mode-specific optimizations
+- **Performance**: Flash attention, mixed precision (BF16), and gradient checkpointing
+- **Custom Postprocessor**: Advanced post-processing with flow optimization and style preservation
+- **Benchmark Tools**: Compare performance with previous models
+- **Easy Setup**: Auto-setup script with model downloading from Hugging Face
+This project is built upon [osuT5](https://github.com/gyataro/osuT5) and [osu-diffusion](https://github.com/OliBomby/osu-diffusion). In developing this, I spent about 2500 hours of GPU compute across 142 runs on my 4060 Ti and rented 4090 instances on vast.ai.
+#### Use this tool responsibly. Always disclose the use of AI in your beatmaps.
+## Installation
+The instruction below allows you to generate beatmaps on your local machine, alternatively you can run it in the cloud with the [colab notebook](https://colab.research.google.com/github/OliBomby/Mapperatorinator/blob/main/colab/mapperatorinator_inference.ipynb).
+### 1. Clone the repository
+```sh
+git clone https://github.com/OliBomby/Mapperatorinator.git
+cd Mapperatorinator
+```
+### 2. (Optional) Create virtual environment
+Use Python 3.10, later versions will might not be compatible with the dependencies.
+```sh
+python -m venv .venv
+# In cmd.exe
+.venv\Scripts\activate.bat
+# In PowerShell
+.venv\Scripts\Activate.ps1
+# In Linux or MacOS
+source .venv/bin/activate
+```
+### 3. Install dependencies
+- Python 3.10
+- [Git](https://git-scm.com/downloads)
+- [ffmpeg](http://www.ffmpeg.org/)
+- [PyTorch](https://pytorch.org/get-started/locally/): Make sure to follow the Get Started guide so you install `torch` and `torchaudio` with GPU support.
+- and the remaining Python dependencies:
+```sh
+pip install -r requirements.txt
+```
+## Web GUI (Recommended)
+For a more user-friendly experience, consider using the Web UI. It provides a graphical interface to configure generation parameters, start the process, and monitor the output.
+### Launch the GUI
+Navigate to the cloned `Mapperatorinator` directory in your terminal and run:
+```sh
+python web-ui.py
+```
+This will start a local web server and automatically open the UI in a new window.
+### Using the GUI
+- **Configure:** Set input/output paths using the form fields and "Browse" buttons. Adjust generation parameters like gamemode, difficulty, style (year, mapper ID, descriptors), timing, specific features (hitsounds, super timing), and more, mirroring the command-line options. (Note: If you provide a `beatmap_path`, the UI will automatically determine the `audio_path` and `output_path` from it, so you can leave those fields blank)
+- **Start:** Click the "Start Inference" button to begin the beatmap generation.
+- **Cancel:** You can stop the ongoing process using the "Cancel Inference" button.
+- **Open Output:** Once finished, use the "Open Output Folder" button for quick access to the generated files.
+The Web UI acts as a convenient wrapper around the `inference.py` script. For advanced options or troubleshooting, refer to the command-line instructions.
+![python_u3zyW0S3Vs](https://github.com/user-attachments/assets/5312a45f-d51c-4b37-9389-da3258ddd0a1)
+## Command-Line Inference
+For users who prefer the command line or need access to advanced configurations, follow the steps below. **Note:** For a simpler graphical interface, please see the [Web UI (Recommended)](#web-ui-recommended) section above.
+Run `inference.py` and pass in some arguments to generate beatmaps. For this use [Hydra override syntax](https://hydra.cc/docs/advanced/override_grammar/basic/). See `configs/inference_v29.yaml` for all available parameters.
+```
+python inference.py \
+  audio_path           [Path to input audio] \
+  output_path          [Path to output directory] \
+  beatmap_path         [Path to .osu file to autofill metadata, and output_path, or use as reference] \
+  gamemode             [Game mode to generate 0=std, 1=taiko, 2=ctb, 3=mania] \
+  difficulty           [Difficulty star rating to generate] \
+  mapper_id            [Mapper user ID for style] \
+  year                 [Upload year to simulate] \
+  hitsounded           [Whether to add hitsounds] \
+  slider_multiplier    [Slider velocity multiplier] \
+  circle_size          [Circle size] \
+  keycount             [Key count for mania] \
+  hold_note_ratio      [Hold note ratio for mania 0-1] \
+  scroll_speed_ratio   [Scroll speed ratio for mania and ctb 0-1] \
+  descriptors          [List of beatmap user tags for style] \
+  negative_descriptors [List of beatmap user tags for classifier-free guidance] \
+  add_to_beatmap       [Whether to add generated content to the reference beatmap instead of making a new beatmap] \
+  start_time           [Generation start time in milliseconds] \
+  end_time             [Generation end time in milliseconds] \
+  in_context           [List of additional context to provide to the model [NONE,TIMING,KIAI,MAP,GD,NO_HS]] \
+  output_type          [List of content types to generate] \
+  cfg_scale            [Scale of the classifier-free guidance] \
+  super_timing         [Whether to use slow accurate variable BPM timing generator] \
+  seed                 [Random seed for generation] \
+```
+Example:
+```
+python inference.py beatmap_path="'C:\Users\USER\AppData\Local\osu!\Songs\1 Kenji Ninuma - DISCO PRINCE\Kenji Ninuma - DISCOPRINCE (peppy) [Normal].osu'" gamemode=0 difficulty=5.5 year=2023 descriptors="['jump aim','clean']" in_context=[TIMING,KIAI]
+```
+## Interactive CLI
+For those who prefer a terminal-based workflow but want a guided setup, the interactive CLI script is an excellent alternative to the Web UI.
+### Launch the CLI
+Navigate to the cloned directory. You may need to make the script executable first.
+```sh
+# Make the script executable (only needs to be done once)
+chmod +x cli_inference.sh
+```
+```sh
+# Run the script
+./cli_inference.sh
+```
+### Using the CLI
+The script will walk you through a series of prompts to configure all generation parameters, just like the Web UI.
+It uses a color-coded interface for clarity.
+It provides an advanced multi-select menu for choosing style descriptors using your arrow keys and spacebar.
+After you've answered all the questions, it will display the final command for your review.
+You can then confirm to execute it directly or cancel and copy the command for manual use.
+## Generation Tips
+- You can edit `configs/inference_v29.yaml` and add your arguments there instead of typing them in the terminal every time.
+- All available descriptors can be found [here](https://osu.ppy.sh/wiki/en/Beatmap/Beatmap_tags).
+- Always provide a year argument between 2007 and 2023. If you leave it unknown, the model might generate with an inconsistent style.
+- Always provide a difficulty argument. If you leave it unknown, the model might generate with an inconsistent difficulty.
+- Increase the `cfg_scale` parameter to increase the effectiveness of the `mapper_id` and `descriptors` arguments.
+- You can use the `negative_descriptors` argument to guide the model away from certain styles. This only works when `cfg_scale > 1`. Make sure the number of negative descriptors is equal to the number of descriptors.
+- If your song style and desired beatmap style don't match well, the model might not follow your directions. For example, its hard to generate a high SR, high SV beatmap for a calm song.
+- If you already have timing and kiai times done for a song, then you can give this to the model to greatly increase inference speed and accuracy: Use the `beatmap_path` and `in_context=[TIMING,KIAI]` arguments.
+- To remap just a part of your beatmap, use the `beatmap_path`, `start_time`, `end_time`, and `add_to_beatmap=true` arguments.
+- To generate a guest difficulty for a beatmap, use the `beatmap_path` and `in_context=[GD,TIMING,KIAI]` arguments.
+- To generate hitsounds for a beatmap, use the `beatmap_path` and `in_context=[NO_HS,TIMING,KIAI]` arguments.
+- To generate only timing for a song, use the `super_timing=true` and `output_type=[TIMING]` arguments.
+## MaiMod: The AI-driven Modding Tool
+MaiMod is a modding tool for osu! beatmaps that uses Mapperatorinator predictions to find potential faults and inconsistencies which can't be detected by other automatic modding tools like [Mapset Verifier](https://github.com/Naxesss/MapsetVerifier).
+It can detect issues like:
+- Incorrect snapping or rhythmic patterns
+- Inaccurate timing points
+- Inconsistent hit object positions or new combo placements
+- Weird slider shapes
+- Inconsistent hitsounds or volumes
+You can try MaiMod [here](https://colab.research.google.com/github/OliBomby/Mapperatorinator/blob/main/colab/mai_mod_inference.ipynb), or run it locally:
+To run MaiMod locally, you'll need to install Mapperatorinator. Then, run the `mai_mod.py` script, specifying your beatmap's path with the `beatmap_path` argument.
+```sh
+python mai_mod.py beatmap_path="'C:\Users\USER\AppData\Local\osu!\Songs\1 Kenji Ninuma - DISCO PRINCE\Kenji Ninuma - DISCOPRINCE (peppy) [Normal].osu'"
+```
+This will print the modding suggestions to the console, which you can then apply to your beatmap manually.
+Suggestions are ordered chronologically and grouped into categories.
+The first value in the circle indicates the 'surprisal' which is a measure of how unexpected the model found the issue to be, so you can prioritize the most important issues.
+The model can make mistakes, especially on low surprisal issues, so always double-check the suggestions before applying them to your beatmap.
+The main goal is to help you narrow down the search space for potential issues, so you don't have to manually check every single hit object in your beatmap.
+### MaiMod GUI
+To run the MaiMod Web UI, you'll need to install Mapperatorinator.
+Then, run the `mai_mod_ui.py` script. This will start a local web server and automatically open the UI in a new window:
+```sh
+python mai_mod_ui.py
+```
+<img width="850" height="1019" alt="afbeelding" src="https://github.com/user-attachments/assets/67c03a43-a7bd-4265-a5b1-5e4d62aca1fa" />
+## Overview
+### Tokenization
+Mapperatorinator converts osu! beatmaps into an intermediate event representation that can be directly converted to and from tokens.
+It includes hit objects, hitsounds, slider velocities, new combos, timing points, kiai times, and taiko/mania scroll speeds.
+Here is a small examle of the tokenization process:
+![mapperatorinator_parser](https://github.com/user-attachments/assets/84efde76-4c27-48a1-b8ce-beceddd9e695)
+To save on vocabulary size, time events are quantized to 10ms intervals and position coordinates are quantized to 32 pixel grid points.
+### Model architecture
+The model is basically a wrapper around the [HF Transformers Whisper](https://huggingface.co/docs/transformers/en/model_doc/whisper#transformers.WhisperForConditionalGeneration) model, with custom input embeddings and loss function.
+Model size amounts to 219M parameters.
+This model was found to be faster and more accurate than T5 for this task.
+The high-level overview of the model's input-output is as follows:
+![Picture2](https://user-images.githubusercontent.com/28675590/201044116-1384ad72-c540-44db-a285-7319dd01caad.svg)
+The model uses Mel spectrogram frames as encoder input, with one frame per input position. The model decoder output at each step is a softmax distribution over a discrete, predefined, vocabulary of events. Outputs are sparse, events are only needed when a hit-object occurs, instead of annotating every single audio frame.
+### Multitask training format
+![Multitask training format](https://github.com/user-attachments/assets/62f490bc-a567-4671-a7ce-dbcc5f9cd6d9)
+Before the SOS token are additional tokens that facilitate conditional generation. These tokens include the gamemode, difficulty, mapper ID, year, and other metadata.
+During training, these tokens do not have accompanying labels, so they are never output by the model.
+Also during training there is a random chance that a metadata token gets replaced by an 'unknown' token, so during inference we can use these 'unknown' tokens to reduce the amount of metadata we have to give to the model.
+### Seamless long generation
+The context length of the model is 8.192 seconds long. This is obviously not enough to generate a full beatmap, so we have to split the song into multiple windows and generate the beatmap in small parts.
+To make sure that the generated beatmap does not have noticeable seams in between windows, we use a 90% overlap and generate the windows sequentially.
+Each generation window except the first starts with the decoder pre-filled up to 50% of the generation window with tokens from the previous windows.
+We use a logit processor to make sure that the model can't generate time tokens that are in the first 50% of the generation window.
+Additionally, the last 40% of the generation window is reserved for the next window. Any generated time tokens in that range are treated as EOS tokens.
+This ensures that each generated token is conditioned on at least 4 seconds of previous tokens and 3.3 seconds of future audio to anticipate.
+To prevent offset drifting during long generation, random offsets have been added to time events in the decoder during training.
+This forces it to correct timing errors by listening to the onsets in the audio instead, and results in a consistently accurate offset.
+### Refined coordinates with diffusion
+Position coordinates generated by the decoder are quantized to 32 pixel grid points, so afterward we use diffusion to denoise the coordinates to the final positions.
+For this we trained a modified version of [osu-diffusion](https://github.com/OliBomby/osu-diffusion) that is specialized to only the last 10% of the noise schedule, and accepts the more advanced metadata tokens that Mapperatorinator uses for conditional generation.
+Since the Mapperatorinator model outputs the SV of sliders, the required length of the slider is fixed regardless of the shape of the control point path.
+Therefore, we try to guide the diffusion process to create coordinates that fit the required slider lengths.
+We do this by recalculating the slider end positions after every step of the diffusion process based on the required length and the current control point path.
+This means that the diffusion process does not have direct control over the slider end positions, but it can still influence them by changing the control point path.
+### Post-processing
+Mapperatorinator does some extra post-processing to improve the quality of the generated beatmap:
+- Refine position coordinates with diffusion.
+- Resnap time events to the nearest tick using the snap divisors generated by the model.
+- Snap near-perfect positional overlaps.
+- Convert mania column events to X coordinates.
+- Generate slider paths for taiko drumrolls.
+- Fix big discrepancies in required slider length and control point path length.
+### Super timing generator
+Super timing generator is an algorithm that improves the precision and accuracy of generated timing by infering timing for the whole song 20 times and averaging the results.
+This is useful for songs with variable BPM, or songs with BPM changes. The result is almost perfect with only sometimes a section that needs manual adjustment.
+## Training
+The instruction below creates a training environment on your local machine.
+### 1. Clone the repository
+```sh
+git clone https://github.com/OliBomby/Mapperatorinator.git
+cd Mapperatorinator
+```
+### 2. Create dataset
+Create your own dataset using the [Mapperator console app](https://github.com/mappingtools/Mapperator/blob/master/README.md#create-a-high-quality-dataset). It requires an [osu! OAuth client token](https://osu.ppy.sh/home/account/edit) to verify beatmaps and get additional metadata. Place the dataset in a `datasets` directory next to the `Mapperatorinator` directory.
+```sh
+Mapperator.ConsoleApp.exe dataset2 -t "/Mapperatorinator/datasets/beatmap_descriptors.csv" -i "path/to/osz/files" -o "/datasets/cool_dataset"
+```
+### 3. Create docker container
+Training in your venv is also possible, but we recommend using Docker on WSL for better performance.
+```sh
+docker compose up -d --force-recreate
+docker attach mapperatorinator_space
+```
+### 4. Configure parameters and begin training
+All configurations are located in `./configs/osut5/train.yaml`. Begin training by calling `osuT5/train.py`.
+```sh
+python osuT5/train.py -cn train_v29 train_dataset_path="/workspace/datasets/cool_dataset" test_dataset_path="/workspace/datasets/cool_dataset" train_dataset_end=90 test_dataset_start=90 test_dataset_end=100
+```
+## See also
+- [Mapper Classifier](./classifier/README.md)
+- [RComplexion](./rcomplexion/README.md)
+## Credits
+Special thanks to:
+1. The authors of [osuT5](https://github.com/gyataro/osuT5) for their training code.
+2. Hugging Face team for their [tools](https://huggingface.co/docs/transformers/index).
+3. [Jason Won](https://github.com/jaswon) and [Richard Nagyfi](https://github.com/sedthh) for bouncing ideas.
+4. [Marvin](https://github.com/minetoblend) for donating training credits.
+5. The osu! community for the beatmaps.
+## Related works
+1. [osu! Beatmap Generator](https://github.com/Syps/osu_beatmap_generator) by Syps (Nick Sypteras)
+2. [osumapper](https://github.com/kotritrona/osumapper) by kotritrona, jyvden, Yoyolick (Ryan Zmuda)
+3. [osu-diffusion](https://github.com/OliBomby/osu-diffusion) by OliBomby (Olivier Schipper), NiceAesth (Andrei Baciu)
+4. [osuT5](https://github.com/gyataro/osuT5) by gyataro (Xiwen Teoh)
+5. [Beat Learning](https://github.com/sedthh/BeatLearning) by sedthh (Richard Nagyfi)
+6. [osu!dreamer](https://github.com/jaswon/osu-dreamer) by jaswon (Jason Won)

audit_all_configs.py ADDED Viewed

	@@ -0,0 +1,157 @@

+#!/usr/bin/env python3
+"""
+Comprehensive Config Audit Script for BeatHeritage
+Checks all config files against their corresponding dataclass definitions
+"""
+import os
+import yaml
+from pathlib import Path
+from dataclasses import fields
+from typing import Dict, List, Set, Any
+# Import all config classes
+from config import InferenceConfig, FidConfig, MaiModConfig
+from osuT5.osuT5.config import TrainConfig, DataConfig, DataloaderConfig, OptimizerConfig
+from osu_diffusion.config import DiffusionTrainConfig
+def get_config_fields(config_class) -> Set[str]:
+    """Get all field names from a dataclass"""
+    return {field.name for field in fields(config_class)}
+def get_yaml_keys(yaml_path: str, prefix: str = "") -> Set[str]:
+    """Get all keys from a YAML file, including nested keys with dot notation"""
+    keys = set()
+    try:
+        with open(yaml_path, 'r') as f:
+            data = yaml.safe_load(f)
+        def extract_keys(obj, parent_key=""):
+            if isinstance(obj, dict):
+                for key, value in obj.items():
+                    if key == 'defaults':  # Skip Hydra defaults
+                        continue
+                    full_key = f"{parent_key}.{key}" if parent_key else key
+                    keys.add(full_key)
+                    if isinstance(value, dict):
+                        extract_keys(value, full_key)
+                    elif isinstance(value, list) and value and isinstance(value[0], dict):
+                        # Handle list of dicts
+                        extract_keys(value[0], full_key)
+        extract_keys(data)
+    except Exception as e:
+        print(f"Error reading {yaml_path}: {e}")
+    return keys
+def audit_config_mapping(config_path: str, config_class, config_name: str):
+    """Audit a specific config file against its dataclass"""
+    print(f"\n[AUDIT] {config_name}: {config_path}")
+    if not os.path.exists(config_path):
+        print(f"[ERROR] Config file not found: {config_path}")
+        return
+    # Get fields from dataclass
+    class_fields = get_config_fields(config_class)
+    # Get keys from YAML
+    yaml_keys = get_yaml_keys(config_path)
+    # Find mismatches
+    missing_in_class = yaml_keys - class_fields
+    missing_in_config = class_fields - yaml_keys
+    # Filter out nested keys for top-level check
+    top_level_yaml = {key.split('.')[0] for key in yaml_keys}
+    top_level_missing = top_level_yaml - class_fields
+    print(f"[SUMMARY]:")
+    print(f"   - Dataclass fields: {len(class_fields)}")
+    print(f"   - YAML keys (all): {len(yaml_keys)}")
+    print(f"   - YAML keys (top-level): {len(top_level_yaml)}")
+    if top_level_missing:
+        print(f"[MISSING] Keys in YAML but missing in dataclass:")
+        for key in sorted(top_level_missing):
+            related_keys = [k for k in yaml_keys if k.startswith(key)]
+            print(f"   - {key} (related: {len(related_keys)} keys)")
+            if len(related_keys) <= 5:  # Show details for small sections
+                for rkey in sorted(related_keys)[:5]:
+                    print(f"     * {rkey}")
+            else:
+                print(f"     * ... and {len(related_keys)-3} more keys")
+    if missing_in_config:
+        optional_missing = missing_in_config & {'hydra', 'train', 'diffusion'}  # Usually optional
+        real_missing = missing_in_config - optional_missing
+        if real_missing:
+            print(f"[WARNING] Fields in dataclass but missing in YAML:")
+            for key in sorted(real_missing):
+                print(f"   - {key}")
+    return {
+        'missing_in_class': top_level_missing,
+        'missing_in_config': missing_in_config,
+        'all_yaml_keys': yaml_keys,
+        'class_fields': class_fields
+    }
+def main():
+    """Run comprehensive config audit"""
+    print("BeatHeritage Config Audit - Finding ALL Mismatches")
+    print("=" * 60)
+    # Define config mappings
+    config_mappings = [
+        # Inference configs
+        ("configs/inference/beatheritage_v1.yaml", InferenceConfig, "Inference (BeatHeritage V1)"),
+        ("configs/inference/default.yaml", InferenceConfig, "Inference (Default)"),
+        # Training configs
+        ("configs/train/beatheritage_v1.yaml", TrainConfig, "Training (BeatHeritage V1)"),
+        ("configs/train/default.yaml", TrainConfig, "Training (Default)"),
+        # Diffusion configs
+        ("configs/diffusion/v1.yaml", DiffusionTrainConfig, "Diffusion (V1)"),
+    ]
+    all_issues = {}
+    for config_path, config_class, name in config_mappings:
+        issues = audit_config_mapping(config_path, config_class, name)
+        if issues and issues['missing_in_class']:
+            all_issues[name] = issues
+    # Summary report
+    print(f"\nAUDIT SUMMARY")
+    print("=" * 60)
+    if not all_issues:
+        print("All configs are aligned with their dataclasses!")
+        return
+    print(f"Found issues in {len(all_issues)} config(s):")
+    for config_name, issues in all_issues.items():
+        print(f"\n{config_name}:")
+        for key in sorted(issues['missing_in_class']):
+            print(f"   - Missing field: {key}")
+    # Generate fix suggestions
+    print(f"\nSUGGESTED FIXES")
+    print("=" * 60)
+    for config_name, issues in all_issues.items():
+        if 'Inference' in config_name:
+            print(f"\nFor InferenceConfig class:")
+            for key in sorted(issues['missing_in_class']):
+                print(f"   + {key}: <appropriate_type> = <default_value>")
+if __name__ == "__main__":
+    main()

beatheritage_postprocessor.py ADDED Viewed

	@@ -0,0 +1,474 @@

+"""
+BeatHeritage V1 Custom Postprocessor
+Enhanced postprocessing for improved beatmap quality
+"""
+import numpy as np
+from typing import List, Tuple, Dict, Optional
+from dataclasses import dataclass
+import logging
+from osuT5.osuT5.inference.postprocessor import Postprocessor, BeatmapConfig
+logger = logging.getLogger(__name__)
+@dataclass
+class BeatHeritageConfig(BeatmapConfig):
+    """Enhanced config for BeatHeritage V1 postprocessing"""
+    # Quality control parameters
+    min_distance_threshold: float = 20.0
+    max_overlap_ratio: float = 0.15
+    enable_auto_correction: bool = True
+    enable_flow_optimization: bool = True
+    # Pattern enhancement
+    enable_pattern_variety: bool = True
+    pattern_complexity_target: float = 0.7
+    # Difficulty scaling
+    enable_difficulty_scaling: bool = True
+    difficulty_variance_threshold: float = 0.3
+    # Style preservation
+    enable_style_preservation: bool = True
+    style_consistency_weight: float = 0.8
+class BeatHeritagePostprocessor(Postprocessor):
+    """Enhanced postprocessor for BeatHeritage V1"""
+    def __init__(self, config: BeatHeritageConfig):
+        super().__init__(config)
+        self.config = config
+        self.flow_optimizer = FlowOptimizer(config)
+        self.pattern_enhancer = PatternEnhancer(config)
+        self.quality_controller = QualityController(config)
+    def postprocess(self, beatmap_data: Dict) -> Dict:
+        """
+        Enhanced postprocessing pipeline for BeatHeritage V1
+        Args:
+            beatmap_data: Raw beatmap data from model
+        Returns:
+            Processed beatmap data with enhancements
+        """
+        # Base postprocessing
+        beatmap_data = super().postprocess(beatmap_data)
+        # Quality control
+        if self.config.enable_auto_correction:
+            beatmap_data = self.quality_controller.fix_spacing_issues(beatmap_data)
+            beatmap_data = self.quality_controller.fix_overlaps(beatmap_data)
+        # Flow optimization
+        if self.config.enable_flow_optimization:
+            beatmap_data = self.flow_optimizer.optimize_flow(beatmap_data)
+        # Pattern enhancement
+        if self.config.enable_pattern_variety:
+            beatmap_data = self.pattern_enhancer.enhance_patterns(beatmap_data)
+        # Difficulty scaling
+        if self.config.enable_difficulty_scaling:
+            beatmap_data = self._scale_difficulty(beatmap_data)
+        # Style preservation
+        if self.config.enable_style_preservation:
+            beatmap_data = self._preserve_style(beatmap_data)
+        return beatmap_data
+    def _scale_difficulty(self, beatmap_data: Dict) -> Dict:
+        """Scale difficulty to match target star rating"""
+        target_difficulty = self.config.difficulty
+        if target_difficulty is None:
+            return beatmap_data
+        current_difficulty = self._calculate_difficulty(beatmap_data)
+        scale_factor = target_difficulty / max(current_difficulty, 0.1)
+        # Adjust spacing and timing based on scale factor
+        if 'hit_objects' in beatmap_data:
+            for obj in beatmap_data['hit_objects']:
+                if 'distance' in obj:
+                    obj['distance'] *= scale_factor
+        logger.info(f"Scaled difficulty from {current_difficulty:.2f} to {target_difficulty:.2f}")
+        return beatmap_data
+    def _preserve_style(self, beatmap_data: Dict) -> Dict:
+        """Preserve mapping style consistency"""
+        # Analyze style characteristics
+        style_features = self._extract_style_features(beatmap_data)
+        # Apply style consistency
+        consistency_weight = self.config.style_consistency_weight
+        if 'hit_objects' in beatmap_data:
+            for i, obj in enumerate(beatmap_data['hit_objects']):
+                if i > 0:
+                    # Maintain consistent spacing patterns
+                    prev_obj = beatmap_data['hit_objects'][i-1]
+                    expected_distance = style_features.get('avg_distance', 100)
+                    if 'position' in obj and 'position' in prev_obj:
+                        current_distance = self._calculate_distance(
+                            obj['position'], prev_obj['position']
+                        )
+                        # Blend current with expected based on consistency weight
+                        adjusted_distance = (
+                            current_distance * (1 - consistency_weight) +
+                            expected_distance * consistency_weight
+                        )
+                        # Adjust position to match distance
+                        obj['position'] = self._adjust_position(
+                            prev_obj['position'],
+                            obj['position'],
+                            adjusted_distance
+                        )
+        return beatmap_data
+    def _calculate_difficulty(self, beatmap_data: Dict) -> float:
+        """Calculate approximate star rating"""
+        # Simplified difficulty calculation
+        num_objects = len(beatmap_data.get('hit_objects', []))
+        avg_spacing = self._calculate_avg_spacing(beatmap_data)
+        bpm = beatmap_data.get('bpm', 180)
+        # Simple formula (can be improved)
+        difficulty = (num_objects / 100) * (avg_spacing / 50) * (bpm / 180)
+        return min(max(difficulty, 0), 10)  # Clamp to 0-10
+    def _extract_style_features(self, beatmap_data: Dict) -> Dict:
+        """Extract style characteristics from beatmap"""
+        features = {}
+        if 'hit_objects' in beatmap_data:
+            distances = []
+            for i in range(1, len(beatmap_data['hit_objects'])):
+                if 'position' in beatmap_data['hit_objects'][i]:
+                    dist = self._calculate_distance(
+                        beatmap_data['hit_objects'][i-1].get('position', (256, 192)),
+                        beatmap_data['hit_objects'][i]['position']
+                    )
+                    distances.append(dist)
+            if distances:
+                features['avg_distance'] = np.mean(distances)
+                features['distance_variance'] = np.var(distances)
+        return features
+    def _calculate_avg_spacing(self, beatmap_data: Dict) -> float:
+        """Calculate average spacing between objects"""
+        distances = []
+        objects = beatmap_data.get('hit_objects', [])
+        for i in range(1, len(objects)):
+            if 'position' in objects[i] and 'position' in objects[i-1]:
+                dist = self._calculate_distance(
+                    objects[i-1]['position'],
+                    objects[i]['position']
+                )
+                distances.append(dist)
+        return np.mean(distances) if distances else 100
+    def _calculate_distance(self, pos1: Tuple[float, float],
+                          pos2: Tuple[float, float]) -> float:
+        """Calculate Euclidean distance between two positions"""
+        return np.sqrt((pos1[0] - pos2[0])**2 + (pos1[1] - pos2[1])**2)
+    def _adjust_position(self, from_pos: Tuple[float, float],
+                        to_pos: Tuple[float, float],
+                        target_distance: float) -> Tuple[float, float]:
+        """Adjust position to achieve target distance"""
+        current_distance = self._calculate_distance(from_pos, to_pos)
+        if current_distance < 0.01:  # Avoid division by zero
+            return to_pos
+        scale = target_distance / current_distance
+        dx = (to_pos[0] - from_pos[0]) * scale
+        dy = (to_pos[1] - from_pos[1]) * scale
+        # Keep within playfield bounds
+        new_x = max(0, min(512, from_pos[0] + dx))
+        new_y = max(0, min(384, from_pos[1] + dy))
+        return (new_x, new_y)
+class FlowOptimizer:
+    """Optimize flow patterns in beatmaps"""
+    def __init__(self, config: BeatHeritageConfig):
+        self.config = config
+    def optimize_flow(self, beatmap_data: Dict) -> Dict:
+        """Optimize flow for better playability"""
+        if 'hit_objects' not in beatmap_data:
+            return beatmap_data
+        objects = beatmap_data['hit_objects']
+        optimized_objects = []
+        for i, obj in enumerate(objects):
+            if i >= 2 and 'position' in obj:
+                # Calculate flow angle
+                prev_angle = self._calculate_angle(
+                    objects[i-2].get('position', (256, 192)),
+                    objects[i-1].get('position', (256, 192))
+                )
+                current_angle = self._calculate_angle(
+                    objects[i-1].get('position', (256, 192)),
+                    obj['position']
+                )
+                # Smooth sharp angles
+                angle_diff = abs(current_angle - prev_angle)
+                if angle_diff > 120:  # Sharp angle threshold
+                    # Adjust position for smoother flow
+                    smoothed_angle = prev_angle + np.sign(current_angle - prev_angle) * 90
+                    distance = self._calculate_distance(
+                        objects[i-1]['position'],
+                        obj['position']
+                    )
+                    new_x = objects[i-1]['position'][0] + distance * np.cos(np.radians(smoothed_angle))
+                    new_y = objects[i-1]['position'][1] + distance * np.sin(np.radians(smoothed_angle))
+                    obj['position'] = (
+                        max(0, min(512, new_x)),
+                        max(0, min(384, new_y))
+                    )
+            optimized_objects.append(obj)
+        beatmap_data['hit_objects'] = optimized_objects
+        return beatmap_data
+    def _calculate_angle(self, pos1: Tuple[float, float],
+                        pos2: Tuple[float, float]) -> float:
+        """Calculate angle between two positions in degrees"""
+        return np.degrees(np.arctan2(pos2[1] - pos1[1], pos2[0] - pos1[0]))
+    def _calculate_distance(self, pos1: Tuple[float, float],
+                          pos2: Tuple[float, float]) -> float:
+        """Calculate Euclidean distance"""
+        return np.sqrt((pos1[0] - pos2[0])**2 + (pos1[1] - pos2[1])**2)
+class PatternEnhancer:
+    """Enhance pattern variety in beatmaps"""
+    def __init__(self, config: BeatHeritageConfig):
+        self.config = config
+        self.pattern_library = self._load_pattern_library()
+    def enhance_patterns(self, beatmap_data: Dict) -> Dict:
+        """Enhance patterns for more variety"""
+        if 'hit_objects' not in beatmap_data:
+            return beatmap_data
+        # Detect repetitive patterns
+        repetitive_sections = self._detect_repetitive_patterns(beatmap_data)
+        # Replace with varied patterns
+        for section in repetitive_sections:
+            beatmap_data = self._vary_pattern(beatmap_data, section)
+        return beatmap_data
+    def _load_pattern_library(self) -> List[Dict]:
+        """Load common mapping patterns"""
+        return [
+            {'name': 'triangle', 'positions': [(0, 0), (100, 0), (50, 86.6)]},
+            {'name': 'square', 'positions': [(0, 0), (100, 0), (100, 100), (0, 100)]},
+            {'name': 'star', 'positions': [(50, 0), (61, 35), (97, 35), (68, 57), (79, 91), (50, 70), (21, 91), (32, 57), (3, 35), (39, 35)]},
+            {'name': 'hexagon', 'positions': [(50, 0), (93, 25), (93, 75), (50, 100), (7, 75), (7, 25)]},
+        ]
+    def _detect_repetitive_patterns(self, beatmap_data: Dict) -> List[Tuple[int, int]]:
+        """Detect sections with repetitive patterns"""
+        repetitive_sections = []
+        objects = beatmap_data.get('hit_objects', [])
+        window_size = 8
+        for i in range(len(objects) - window_size * 2):
+            pattern1 = self._extract_pattern(objects[i:i+window_size])
+            pattern2 = self._extract_pattern(objects[i+window_size:i+window_size*2])
+            if self._patterns_similar(pattern1, pattern2):
+                repetitive_sections.append((i, i + window_size * 2))
+        return repetitive_sections
+    def _extract_pattern(self, objects: List[Dict]) -> List[Tuple[float, float]]:
+        """Extract position pattern from objects"""
+        return [obj.get('position', (256, 192)) for obj in objects]
+    def _patterns_similar(self, pattern1: List, pattern2: List, threshold: float = 0.8) -> bool:
+        """Check if two patterns are similar"""
+        if len(pattern1) != len(pattern2):
+            return False
+        distances = []
+        for pos1, pos2 in zip(pattern1, pattern2):
+            dist = np.sqrt((pos1[0] - pos2[0])**2 + (pos1[1] - pos2[1])**2)
+            distances.append(dist)
+        avg_distance = np.mean(distances)
+        return avg_distance < 50  # Threshold for similarity
+    def _vary_pattern(self, beatmap_data: Dict, section: Tuple[int, int]) -> Dict:
+        """Apply variation to a pattern section"""
+        start, end = section
+        objects = beatmap_data['hit_objects']
+        # Select a random pattern from library
+        pattern = np.random.choice(self.pattern_library)
+        pattern_positions = pattern['positions']
+        # Apply pattern with scaling
+        section_length = end - start
+        for i in range(start, min(end, len(objects))):
+            if 'position' in objects[i]:
+                pattern_idx = (i - start) % len(pattern_positions)
+                base_pos = pattern_positions[pattern_idx]
+                # Scale and translate pattern
+                center = (256, 192)
+                scale = 2.0
+                new_x = center[0] + base_pos[0] * scale
+                new_y = center[1] + base_pos[1] * scale
+                objects[i]['position'] = (
+                    max(0, min(512, new_x)),
+                    max(0, min(384, new_y))
+                )
+        return beatmap_data
+class QualityController:
+    """Control quality aspects of beatmaps"""
+    def __init__(self, config: BeatHeritageConfig):
+        self.config = config
+    def fix_spacing_issues(self, beatmap_data: Dict) -> Dict:
+        """Fix objects that are too close together"""
+        if 'hit_objects' not in beatmap_data:
+            return beatmap_data
+        objects = beatmap_data['hit_objects']
+        min_distance = self.config.min_distance_threshold
+        for i in range(1, len(objects)):
+            if 'position' in objects[i] and 'position' in objects[i-1]:
+                distance = self._calculate_distance(
+                    objects[i-1]['position'],
+                    objects[i]['position']
+                )
+                if distance < min_distance:
+                    # Move object to maintain minimum distance
+                    direction = self._get_direction(
+                        objects[i-1]['position'],
+                        objects[i]['position']
+                    )
+                    objects[i]['position'] = self._move_position(
+                        objects[i-1]['position'],
+                        direction,
+                        min_distance
+                    )
+        return beatmap_data
+    def fix_overlaps(self, beatmap_data: Dict) -> Dict:
+        """Fix overlapping sliders and circles"""
+        if 'hit_objects' not in beatmap_data:
+            return beatmap_data
+        objects = beatmap_data['hit_objects']
+        max_overlap = self.config.max_overlap_ratio
+        for i in range(len(objects)):
+            for j in range(i+1, min(i+10, len(objects))):  # Check next 10 objects
+                if self._objects_overlap(objects[i], objects[j], max_overlap):
+                    # Adjust position to reduce overlap
+                    objects[j] = self._adjust_for_overlap(objects[i], objects[j])
+        return beatmap_data
+    def _calculate_distance(self, pos1: Tuple[float, float],
+                          pos2: Tuple[float, float]) -> float:
+        """Calculate Euclidean distance"""
+        return np.sqrt((pos1[0] - pos2[0])**2 + (pos1[1] - pos2[1])**2)
+    def _get_direction(self, from_pos: Tuple[float, float],
+                      to_pos: Tuple[float, float]) -> Tuple[float, float]:
+        """Get normalized direction vector"""
+        dx = to_pos[0] - from_pos[0]
+        dy = to_pos[1] - from_pos[1]
+        length = np.sqrt(dx**2 + dy**2)
+        if length < 0.01:
+            return (1, 0)  # Default right direction
+        return (dx / length, dy / length)
+    def _move_position(self, from_pos: Tuple[float, float],
+                      direction: Tuple[float, float],
+                      distance: float) -> Tuple[float, float]:
+        """Move position in direction by distance"""
+        new_x = from_pos[0] + direction[0] * distance
+        new_y = from_pos[1] + direction[1] * distance
+        # Keep within bounds
+        return (
+            max(0, min(512, new_x)),
+            max(0, min(384, new_y))
+        )
+    def _objects_overlap(self, obj1: Dict, obj2: Dict, threshold: float) -> bool:
+        """Check if two objects overlap beyond threshold"""
+        if 'position' not in obj1 or 'position' not in obj2:
+            return False
+        distance = self._calculate_distance(obj1['position'], obj2['position'])
+        # Simple overlap check (can be improved for sliders)
+        radius = 30  # Approximate circle radius
+        overlap = max(0, 2 * radius - distance) / (2 * radius)
+        return overlap > threshold
+    def _adjust_for_overlap(self, obj1: Dict, obj2: Dict) -> Dict:
+        """Adjust object position to reduce overlap"""
+        if 'position' not in obj1 or 'position' not in obj2:
+            return obj2
+        # Move obj2 away from obj1
+        direction = self._get_direction(obj1['position'], obj2['position'])
+        min_safe_distance = 60  # Minimum safe distance
+        obj2['position'] = self._move_position(
+            obj1['position'],
+            direction,
+            min_safe_distance
+        )
+        return obj2
+# Export main postprocessor
+__all__ = ['BeatHeritagePostprocessor', 'BeatHeritageConfig']

benchmark_comparison.py ADDED Viewed

	@@ -0,0 +1,469 @@

+#!/usr/bin/env python3
+"""
+BeatHeritage V1 vs Mapperatorinator V30 Benchmark Script
+Compares performance, quality, and generation characteristics
+"""
+import os
+import sys
+import time
+import json
+import argparse
+import subprocess
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+from datetime import datetime
+import torch
+import matplotlib.pyplot as plt
+import seaborn as sns
+from tqdm import tqdm
+import logging
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+class BenchmarkRunner:
+    """Run benchmarks comparing BeatHeritage V1 with Mapperatorinator V30"""
+    def __init__(self, output_dir: str = "./benchmark_results"):
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.results = []
+    def run_inference(self, model_config: str, audio_path: str,
+                     gamemode: int, difficulty: float) -> Dict:
+        """Run inference with specified model and parameters"""
+        output_path = self.output_dir / f"{model_config}_{Path(audio_path).stem}"
+        output_path.mkdir(parents=True, exist_ok=True)
+        cmd = [
+            'python', 'inference.py',
+            '-cn', model_config,
+            f'audio_path={audio_path}',
+            f'output_path={str(output_path)}',
+            f'gamemode={gamemode}',
+            f'difficulty={difficulty}',
+        ]
+        # Add model-specific parameters
+        if model_config == 'beatheritage_v1':
+            cmd.extend([
+                'temperature=0.85',
+                'top_p=0.92',
+                'quality_control.enable_auto_correction=true',
+                'quality_control.enable_flow_optimization=true',
+                'advanced_features.enable_pattern_variety=true',
+            ])
+        else:  # v30
+            cmd.extend([
+                'temperature=0.9',
+                'top_p=0.9',
+            ])
+        # Measure performance
+        start_time = time.time()
+        memory_before = self._get_memory_usage()
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            end_time = time.time()
+            memory_after = self._get_memory_usage()
+            # Parse output for quality metrics
+            output_files = list(output_path.glob('*.osu'))
+            metrics = {
+                'model': model_config,
+                'audio': Path(audio_path).name,
+                'gamemode': gamemode,
+                'difficulty': difficulty,
+                'generation_time': end_time - start_time,
+                'memory_usage': memory_after - memory_before,
+                'success': True,
+                'output_files': len(output_files),
+                'quality_metrics': self._analyze_quality(output_files[0] if output_files else None)
+            }
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Error running {model_config}: {e}")
+            metrics = {
+                'model': model_config,
+                'audio': Path(audio_path).name,
+                'gamemode': gamemode,
+                'difficulty': difficulty,
+                'generation_time': -1,
+                'memory_usage': -1,
+                'success': False,
+                'error': str(e),
+                'output_files': 0,
+                'quality_metrics': {}
+            }
+        return metrics
+    def _get_memory_usage(self) -> float:
+        """Get current GPU memory usage in MB"""
+        if torch.cuda.is_available():
+            return torch.cuda.memory_allocated() / 1024**2
+        return 0
+    def _analyze_quality(self, osu_file: Optional[Path]) -> Dict:
+        """Analyze quality metrics of generated beatmap"""
+        if not osu_file or not osu_file.exists():
+            return {}
+        metrics = {
+            'object_count': 0,
+            'avg_spacing': 0,
+            'spacing_variance': 0,
+            'pattern_diversity': 0,
+            'flow_score': 0,
+            'difficulty_consistency': 0
+        }
+        try:
+            with open(osu_file, 'r', encoding='utf-8') as f:
+                lines = f.readlines()
+            # Parse hit objects
+            hit_objects = []
+            in_hit_objects = False
+            for line in lines:
+                if '[HitObjects]' in line:
+                    in_hit_objects = True
+                    continue
+                if in_hit_objects and line.strip():
+                    parts = line.strip().split(',')
+                    if len(parts) >= 2:
+                        try:
+                            x, y = int(parts[0]), int(parts[1])
+                            hit_objects.append((x, y))
+                        except:
+                            pass
+            metrics['object_count'] = len(hit_objects)
+            if len(hit_objects) > 1:
+                # Calculate spacing metrics
+                distances = []
+                for i in range(1, len(hit_objects)):
+                    dist = np.sqrt(
+                        (hit_objects[i][0] - hit_objects[i-1][0])**2 +
+                        (hit_objects[i][1] - hit_objects[i-1][1])**2
+                    )
+                    distances.append(dist)
+                metrics['avg_spacing'] = np.mean(distances)
+                metrics['spacing_variance'] = np.var(distances)
+                # Pattern diversity (entropy of distance distribution)
+                hist, _ = np.histogram(distances, bins=10)
+                hist = hist / hist.sum()
+                entropy = -np.sum(hist * np.log(hist + 1e-10))
+                metrics['pattern_diversity'] = entropy
+                # Flow score (based on angle changes)
+                if len(hit_objects) > 2:
+                    angles = []
+                    for i in range(2, len(hit_objects)):
+                        angle = self._calculate_angle(
+                            hit_objects[i-2],
+                            hit_objects[i-1],
+                            hit_objects[i]
+                        )
+                        angles.append(angle)
+                    # Lower angle variance = better flow
+                    metrics['flow_score'] = 1.0 / (1.0 + np.var(angles) / 100)
+                # Difficulty consistency
+                chunk_size = max(10, len(distances) // 10)
+                chunk_variances = []
+                for i in range(0, len(distances), chunk_size):
+                    chunk = distances[i:i+chunk_size]
+                    if chunk:
+                        chunk_variances.append(np.var(chunk))
+                if chunk_variances:
+                    metrics['difficulty_consistency'] = 1.0 / (1.0 + np.var(chunk_variances))
+        except Exception as e:
+            logger.error(f"Error analyzing quality: {e}")
+        return metrics
+    def _calculate_angle(self, p1: Tuple, p2: Tuple, p3: Tuple) -> float:
+        """Calculate angle between three points"""
+        v1 = (p2[0] - p1[0], p2[1] - p1[1])
+        v2 = (p3[0] - p2[0], p3[1] - p2[1])
+        angle1 = np.arctan2(v1[1], v1[0])
+        angle2 = np.arctan2(v2[1], v2[0])
+        angle_diff = angle2 - angle1
+        # Normalize to [-pi, pi]
+        while angle_diff > np.pi:
+            angle_diff -= 2 * np.pi
+        while angle_diff < -np.pi:
+            angle_diff += 2 * np.pi
+        return abs(angle_diff)
+    def run_benchmark_suite(self, test_audio_files: List[str]):
+        """Run complete benchmark suite"""
+        models = ['beatheritage_v1', 'v30']
+        gamemodes = [0, 1, 2, 3]  # All gamemodes
+        difficulties = [3.0, 5.5, 7.5]  # Easy, Normal, Hard
+        total_tests = len(test_audio_files) * len(models) * len(gamemodes) * len(difficulties)
+        with tqdm(total=total_tests, desc="Running benchmarks") as pbar:
+            for audio_file in test_audio_files:
+                for gamemode in gamemodes:
+                    for difficulty in difficulties:
+                        for model in models:
+                            logger.info(f"Testing {model} on {audio_file} "
+                                      f"(GM:{gamemode}, Diff:{difficulty})")
+                            result = self.run_inference(
+                                model, audio_file, gamemode, difficulty
+                            )
+                            self.results.append(result)
+                            pbar.update(1)
+                            # Save intermediate results
+                            self._save_results()
+    def _save_results(self):
+        """Save benchmark results to JSON and CSV"""
+        # Save as JSON
+        json_path = self.output_dir / f"benchmark_results_{self.timestamp}.json"
+        with open(json_path, 'w') as f:
+            json.dump(self.results, f, indent=2)
+        # Save as CSV for analysis
+        df = pd.DataFrame(self.results)
+        csv_path = self.output_dir / f"benchmark_results_{self.timestamp}.csv"
+        df.to_csv(csv_path, index=False)
+        logger.info(f"Results saved to {json_path} and {csv_path}")
+    def generate_report(self):
+        """Generate comprehensive benchmark report with visualizations"""
+        if not self.results:
+            logger.error("No results to generate report")
+            return
+        df = pd.DataFrame(self.results)
+        # Create visualizations
+        fig = plt.figure(figsize=(20, 12))
+        # 1. Generation Time Comparison
+        ax1 = plt.subplot(2, 3, 1)
+        successful_df = df[df['success'] == True]
+        if not successful_df.empty:
+            sns.boxplot(data=successful_df, x='model', y='generation_time', ax=ax1)
+            ax1.set_title('Generation Time Comparison')
+            ax1.set_ylabel('Time (seconds)')
+        # 2. Memory Usage Comparison
+        ax2 = plt.subplot(2, 3, 2)
+        if not successful_df.empty:
+            sns.boxplot(data=successful_df, x='model', y='memory_usage', ax=ax2)
+            ax2.set_title('Memory Usage Comparison')
+            ax2.set_ylabel('Memory (MB)')
+        # 3. Success Rate
+        ax3 = plt.subplot(2, 3, 3)
+        success_rates = df.groupby('model')['success'].mean() * 100
+        success_rates.plot(kind='bar', ax=ax3)
+        ax3.set_title('Success Rate (%)')
+        ax3.set_ylabel('Success Rate')
+        ax3.set_ylim(0, 105)
+        # 4. Quality Metrics Comparison
+        if not successful_df.empty and 'quality_metrics' in successful_df.columns:
+            # Extract quality metrics
+            quality_data = []
+            for _, row in successful_df.iterrows():
+                if row['quality_metrics']:
+                    quality_data.append({
+                        'model': row['model'],
+                        'pattern_diversity': row['quality_metrics'].get('pattern_diversity', 0),
+                        'flow_score': row['quality_metrics'].get('flow_score', 0),
+                        'difficulty_consistency': row['quality_metrics'].get('difficulty_consistency', 0)
+                    })
+            if quality_data:
+                quality_df = pd.DataFrame(quality_data)
+                # Pattern Diversity
+                ax4 = plt.subplot(2, 3, 4)
+                if 'pattern_diversity' in quality_df.columns:
+                    sns.boxplot(data=quality_df, x='model', y='pattern_diversity', ax=ax4)
+                    ax4.set_title('Pattern Diversity Score')
+                # Flow Score
+                ax5 = plt.subplot(2, 3, 5)
+                if 'flow_score' in quality_df.columns:
+                    sns.boxplot(data=quality_df, x='model', y='flow_score', ax=ax5)
+                    ax5.set_title('Flow Quality Score')
+                # Difficulty Consistency
+                ax6 = plt.subplot(2, 3, 6)
+                if 'difficulty_consistency' in quality_df.columns:
+                    sns.boxplot(data=quality_df, x='model', y='difficulty_consistency', ax=ax6)
+                    ax6.set_title('Difficulty Consistency Score')
+        plt.suptitle('BeatHeritage V1 vs Mapperatorinator V30 Benchmark Report', fontsize=16)
+        plt.tight_layout()
+        # Save plot
+        plot_path = self.output_dir / f"benchmark_report_{self.timestamp}.png"
+        plt.savefig(plot_path, dpi=150, bbox_inches='tight')
+        plt.show()
+        # Generate text summary
+        summary = self._generate_text_summary(df)
+        summary_path = self.output_dir / f"benchmark_summary_{self.timestamp}.txt"
+        with open(summary_path, 'w') as f:
+            f.write(summary)
+        logger.info(f"Report generated: {plot_path} and {summary_path}")
+    def _generate_text_summary(self, df: pd.DataFrame) -> str:
+        """Generate text summary of benchmark results"""
+        summary = []
+        summary.append("=" * 80)
+        summary.append("BEATHERITAGE V1 VS MAPPERATORINATOR V30 BENCHMARK SUMMARY")
+        summary.append("=" * 80)
+        summary.append(f"Timestamp: {self.timestamp}")
+        summary.append(f"Total Tests: {len(df)}")
+        summary.append("")
+        for model in df['model'].unique():
+            model_df = df[df['model'] == model]
+            successful_df = model_df[model_df['success'] == True]
+            summary.append(f"\n{model.upper()}")
+            summary.append("-" * 40)
+            summary.append(f"Success Rate: {model_df['success'].mean()*100:.1f}%")
+            if not successful_df.empty:
+                summary.append(f"Avg Generation Time: {successful_df['generation_time'].mean():.2f}s")
+                summary.append(f"Avg Memory Usage: {successful_df['memory_usage'].mean():.1f}MB")
+                # Quality metrics
+                quality_metrics = []
+                for _, row in successful_df.iterrows():
+                    if row['quality_metrics']:
+                        quality_metrics.append(row['quality_metrics'])
+                if quality_metrics:
+                    avg_diversity = np.mean([m.get('pattern_diversity', 0) for m in quality_metrics])
+                    avg_flow = np.mean([m.get('flow_score', 0) for m in quality_metrics])
+                    avg_consistency = np.mean([m.get('difficulty_consistency', 0) for m in quality_metrics])
+                    summary.append(f"Avg Pattern Diversity: {avg_diversity:.3f}")
+                    summary.append(f"Avg Flow Score: {avg_flow:.3f}")
+                    summary.append(f"Avg Difficulty Consistency: {avg_consistency:.3f}")
+        # Winner determination
+        summary.append("\n" + "=" * 80)
+        summary.append("WINNER ANALYSIS")
+        summary.append("=" * 80)
+        if len(df['model'].unique()) == 2:
+            model1, model2 = df['model'].unique()
+            # Compare metrics
+            metrics_comparison = []
+            for metric in ['generation_time', 'memory_usage']:
+                m1_avg = df[df['model'] == model1][metric].mean()
+                m2_avg = df[df['model'] == model2][metric].mean()
+                if m1_avg < m2_avg:
+                    winner = model1
+                    improvement = ((m2_avg - m1_avg) / m2_avg) * 100
+                else:
+                    winner = model2
+                    improvement = ((m1_avg - m2_avg) / m1_avg) * 100
+                metrics_comparison.append(
+                    f"{metric}: {winner} ({improvement:.1f}% better)"
+                )
+            for comp in metrics_comparison:
+                summary.append(comp)
+        return "\n".join(summary)
+def main():
+    parser = argparse.ArgumentParser(description='Benchmark BeatHeritage V1 vs V30')
+    parser.add_argument(
+        '--audio-dir',
+        type=str,
+        default='./test_audio',
+        help='Directory containing test audio files'
+    )
+    parser.add_argument(
+        '--output-dir',
+        type=str,
+        default='./benchmark_results',
+        help='Directory to save benchmark results'
+    )
+    parser.add_argument(
+        '--quick-test',
+        action='store_true',
+        help='Run quick test with limited parameters'
+    )
+    args = parser.parse_args()
+    # Get test audio files
+    audio_dir = Path(args.audio_dir)
+    if audio_dir.exists():
+        audio_files = list(audio_dir.glob('*.mp3')) + list(audio_dir.glob('*.ogg'))
+    else:
+        # Use demo files
+        logger.warning(f"Audio directory {audio_dir} not found, using demo files")
+        audio_files = ['demo.mp3']  # Fallback to demo
+    if args.quick_test:
+        # Quick test with limited parameters
+        audio_files = audio_files[:1]
+        logger.info("Running quick test with 1 audio file")
+    # Run benchmarks
+    runner = BenchmarkRunner(args.output_dir)
+    runner.run_benchmark_suite([str(f) for f in audio_files])
+    runner.generate_report()
+    logger.info("Benchmark complete!")
+if __name__ == "__main__":
+    main()

calc_fid.py ADDED Viewed

	@@ -0,0 +1,417 @@

+import logging
+import os
+import random
+import traceback
+from datetime import timedelta
+from pathlib import Path
+from typing import Optional
+import hydra
+import numpy as np
+import torch
+from scipy import linalg
+from slider import Beatmap, Circle, Slider, Spinner, HoldNote
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from classifier.classify import ExampleDataset
+from classifier.libs.model.model import OsuClassifierOutput
+from classifier.libs.utils import load_ckpt
+from config import FidConfig
+from inference import prepare_args, load_diff_model, generate, load_model
+from osuT5.osuT5.dataset.data_utils import load_audio_file, load_mmrs_metadata, filter_mmrs_metadata
+from osuT5.osuT5.inference import generation_config_from_beatmap, beatmap_config_from_beatmap
+from osuT5.osuT5.tokenizer import ContextType
+from multiprocessing import Manager, Process
+logger = logging.getLogger(__name__)
+def get_beatmap_paths(args: FidConfig) -> list[Path]:
+    """Get all beatmap paths (.osu) from the dataset directory."""
+    dataset_path = Path(args.dataset_path)
+    if args.dataset_type == "mmrs":
+        metadata = load_mmrs_metadata(dataset_path)
+        filtered_metadata = filter_mmrs_metadata(
+            metadata,
+            start=args.dataset_start,
+            end=args.dataset_end,
+            gamemodes=args.gamemodes,
+        )
+        beatmap_files = [dataset_path / "data" / item["BeatmapSetFolder"] / item["BeatmapFile"] for _, item in filtered_metadata.iterrows()]
+    elif args.dataset_type == "ors":
+        beatmap_files = []
+        track_names = ["Track" + str(i).zfill(5) for i in range(args.dataset_start, args.dataset_end)]
+        for track_name in track_names:
+            for beatmap_file in (dataset_path / track_name / "beatmaps").iterdir():
+                beatmap_files.append(dataset_path / track_name / "beatmaps" / beatmap_file.name)
+    else:
+        raise ValueError(f"Unknown dataset type: {args.dataset_type}")
+    return beatmap_files
+def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    """Numpy implementation of the Frechet Distance.
+    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+    and X_2 ~ N(mu_2, C_2) is
+            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+    Stable version by Dougal J. Sutherland.
+    Params:
+    -- mu1   : Numpy array containing the activations of a layer of the
+               inception net (like returned by the function 'get_predictions')
+               for generated samples.
+    -- mu2   : The sample mean over activations, precalculated on an
+               representative data set.
+    -- sigma1: The covariance matrix over activations for generated samples.
+    -- sigma2: The covariance matrix over activations, precalculated on an
+               representative data set.
+    Returns:
+    --   : The Frechet Distance.
+    """
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+    assert (
+        mu1.shape == mu2.shape
+    ), "Training and test mean vectors have different lengths"
+    assert (
+        sigma1.shape == sigma2.shape
+    ), "Training and test covariances have different dimensions"
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = (
+            "fid calculation produces singular product; "
+            "adding %s to diagonal of cov estimates"
+        ) % eps
+        logger.warning(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError("Imaginary component {}".format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
+def add_to_dict(source_dict, target_dict):
+    for key, value in source_dict.items():
+        if key not in target_dict:
+            target_dict[key] = value
+        else:
+            target_dict[key] += value
+def calculate_rhythm_stats(real_rhythm, generated_rhythm):
+    # Rhythm is a set of timestamps for each beat
+    # Calculate number of true positives, false positives, and false negatives within a leniency of 10 ms
+    leniency = 10
+    true_positives = 0
+    false_positives = 0
+    false_negatives = 0
+    for real_beat in real_rhythm:
+        if any(abs(real_beat - gen_beat) <= leniency for gen_beat in generated_rhythm):
+            true_positives += 1
+        else:
+            false_negatives += 1
+    for gen_beat in generated_rhythm:
+        if not any(abs(gen_beat - real_beat) <= leniency for real_beat in real_rhythm):
+            false_positives += 1
+    return {
+        "true_positives": true_positives,
+        "false_positives": false_positives,
+        "false_negatives": false_negatives,
+    }
+def calculate_precision(rhythm_stats):
+    true_positives = rhythm_stats["true_positives"]
+    false_positives = rhythm_stats["false_positives"]
+    if true_positives + false_positives == 0:
+        return 0.0
+    return true_positives / (true_positives + false_positives)
+def calculate_recall(rhythm_stats):
+    true_positives = rhythm_stats["true_positives"]
+    false_negatives = rhythm_stats["false_negatives"]
+    if true_positives + false_negatives == 0:
+        return 0.0
+    return true_positives / (true_positives + false_negatives)
+def calculate_f1(rhythm_stats):
+    precision = calculate_precision(rhythm_stats)
+    recall = calculate_recall(rhythm_stats)
+    if precision + recall == 0:
+        return 0.0
+    return 2 * (precision * recall) / (precision + recall)
+def get_rhythm(beatmap, passive=False):
+    # Extract the rhythm from the beatmap
+    # Active rhythm includes only circles, slider heads, and hold note heads
+    # Passive rhythm also includes slider tails, slider repeats, and spinners tails
+    rhythm = set()
+    for hit_object in beatmap.hit_objects(stacking=False):
+        if isinstance(hit_object, Circle):
+            rhythm.add(int(hit_object.time.total_seconds() * 1000 + 1e-5))
+        elif isinstance(hit_object, Slider):
+            duration: timedelta = (hit_object.end_time - hit_object.time) / hit_object.repeat
+            rhythm.add(int(hit_object.time.total_seconds() * 1000 + 1e-5))
+            if passive:
+                for i in range(hit_object.repeat):
+                    rhythm.add(int((hit_object.time + duration * (i + 1)).total_seconds() * 1000 + 1e-5))
+        elif isinstance(hit_object, Spinner):
+            if passive:
+                rhythm.add(int(hit_object.end_time.total_seconds() * 1000 + 1e-5))
+        elif isinstance(hit_object, HoldNote):
+            rhythm.add(int(hit_object.time.total_seconds() * 1000 + 1e-5))
+    return rhythm
+def generate_beatmaps(beatmap_paths, fid_args: FidConfig, return_dict, idx):
+    args = fid_args.inference
+    args.device = fid_args.device
+    torch.set_grad_enabled(False)
+    torch.set_float32_matmul_precision('high')
+    model, tokenizer, diff_model, diff_tokenizer, refine_model = None, None, None, None, None
+    model, tokenizer = load_model(args.model_path, args.train, args.device, args.max_batch_size, args.use_server, args.precision)
+    if args.compile:
+        model.transformer.forward = torch.compile(model.transformer.forward, mode="reduce-overhead", fullgraph=True)
+    if args.generate_positions:
+        diff_model, diff_tokenizer = load_diff_model(args.diff_ckpt, args.diffusion, args.device)
+        if os.path.exists(args.diff_refine_ckpt):
+            refine_model = load_diff_model(args.diff_refine_ckpt, args.diffusion, args.device)[0]
+        if args.compile:
+            diff_model.forward = torch.compile(diff_model.forward, mode="reduce-overhead", fullgraph=False)
+    for beatmap_path in tqdm(beatmap_paths, desc=f"Process {idx}"):
+        try:
+            beatmap = Beatmap.from_path(beatmap_path)
+            output_path = Path("generated") / beatmap_path.stem
+            if fid_args.dataset_type == "ors":
+                audio_path = beatmap_path.parents[1] / list(beatmap_path.parents[1].glob('audio.*'))[0]
+            else:
+                audio_path = beatmap_path.parent / beatmap.audio_filename
+            if fid_args.skip_generation or (output_path.exists() and len(list(output_path.glob("*.osu"))) > 0):
+                if not output_path.exists() or len(list(output_path.glob("*.osu"))) == 0:
+                    raise FileNotFoundError(f"Generated beatmap not found in {output_path}")
+                print(f"Skipping {beatmap_path.stem} as it already exists")
+            else:
+                if ContextType.GD in args.in_context:
+                    other_beatmaps = [k for k in beatmap_path.parent.glob("*.osu") if k != beatmap_path]
+                    if len(other_beatmaps) == 0:
+                        continue
+                    other_beatmap_path = random.choice(other_beatmaps)
+                else:
+                    other_beatmap_path = beatmap_path
+                generation_config = generation_config_from_beatmap(beatmap, tokenizer)
+                beatmap_config = beatmap_config_from_beatmap(beatmap)
+                beatmap_config.version = args.version
+                if args.year is not None:
+                    generation_config.year = args.year
+                result = generate(
+                    args,
+                    audio_path=audio_path,
+                    beatmap_path=other_beatmap_path,
+                    output_path=output_path,
+                    generation_config=generation_config,
+                    beatmap_config=beatmap_config,
+                    model=model,
+                    tokenizer=tokenizer,
+                    diff_model=diff_model,
+                    diff_tokenizer=diff_tokenizer,
+                    refine_model=refine_model,
+                    verbose=False,
+                )[0]
+                generated_beatmap = Beatmap.parse(result)
+                print(beatmap_path, "Generated %s hit objects" % len(generated_beatmap.hit_objects(stacking=False)))
+        except Exception as e:
+            print(f"Error processing {beatmap_path}: {e}")
+            traceback.print_exc()
+        finally:
+            torch.cuda.empty_cache()  # Clear any cached memory
+def calculate_metrics(args: FidConfig, beatmap_paths: list[Path]):
+    print("Calculating metrics...")
+    classifier_model, classifier_args, classifier_tokenizer = None, None, None
+    if args.fid:
+        classifier_model, classifier_args, classifier_tokenizer = load_ckpt(args.classifier_ckpt)
+        if args.compile:
+            classifier_model.model.transformer.forward = torch.compile(classifier_model.model.transformer.forward,
+                                                                       mode="reduce-overhead", fullgraph=False)
+    real_features = []
+    generated_features = []
+    active_rhythm_stats = {}
+    passive_rhythm_stats = {}
+    for beatmap_path in tqdm(beatmap_paths, desc=f"Metrics"):
+        try:
+            beatmap = Beatmap.from_path(beatmap_path)
+            generated_path = Path("generated") / beatmap_path.stem
+            if args.dataset_type == "ors":
+                audio_path = beatmap_path.parents[1] / list(beatmap_path.parents[1].glob('audio.*'))[0]
+            else:
+                audio_path = beatmap_path.parent / beatmap.audio_filename
+            if generated_path.exists() and len(list(generated_path.glob("*.osu"))) > 0:
+                generated_beatmap = Beatmap.from_path(list(generated_path.glob("*.osu"))[0])
+            else:
+                logger.warning(f"Skipping {beatmap_path.stem} as no generated beatmap found")
+                continue
+            if args.fid:
+                # Calculate feature vectors for real and generated beatmaps
+                sample_rate = classifier_args.data.sample_rate
+                audio = load_audio_file(audio_path, sample_rate, normalize=args.inference.train.data.normalize_audio)
+                for example in DataLoader(
+                        ExampleDataset(beatmap, audio, classifier_args, classifier_tokenizer, args.device),
+                        batch_size=args.classifier_batch_size):
+                    classifier_result: OsuClassifierOutput = classifier_model(**example)
+                    features = classifier_result.feature_vector
+                    real_features.append(features.cpu().numpy())
+                for example in DataLoader(
+                        ExampleDataset(generated_beatmap, audio, classifier_args, classifier_tokenizer, args.device),
+                        batch_size=args.classifier_batch_size):
+                    classifier_result: OsuClassifierOutput = classifier_model(**example)
+                    features = classifier_result.feature_vector
+                    generated_features.append(features.cpu().numpy())
+            if args.rhythm_stats:
+                # Calculate rhythm stats
+                real_active_rhythm = get_rhythm(beatmap, passive=False)
+                generated_active_rhythm = get_rhythm(generated_beatmap, passive=False)
+                add_to_dict(calculate_rhythm_stats(real_active_rhythm, generated_active_rhythm), active_rhythm_stats)
+                real_passive_rhythm = get_rhythm(beatmap, passive=True)
+                generated_passive_rhythm = get_rhythm(generated_beatmap, passive=True)
+                add_to_dict(calculate_rhythm_stats(real_passive_rhythm, generated_passive_rhythm), passive_rhythm_stats)
+        except Exception as e:
+            print(f"Error processing {beatmap_path}: {e}")
+            traceback.print_exc()
+        finally:
+            torch.cuda.empty_cache()  # Clear any cached memory
+    if args.fid:
+        # Calculate FID
+        real_features = np.concatenate(real_features, axis=0)
+        generated_features = np.concatenate(generated_features, axis=0)
+        m1, s1 = np.mean(real_features, axis=0), np.cov(real_features, rowvar=False)
+        m2, s2 = np.mean(generated_features, axis=0), np.cov(generated_features, rowvar=False)
+        fid = calculate_frechet_distance(m1, s1, m2, s2)
+        logger.info(f"FID: {fid}")
+    if args.rhythm_stats:
+        # Calculate rhythm precision, recall, and F1 score
+        active_precision = calculate_precision(active_rhythm_stats)
+        active_recall = calculate_recall(active_rhythm_stats)
+        active_f1 = calculate_f1(active_rhythm_stats)
+        passive_precision = calculate_precision(passive_rhythm_stats)
+        passive_recall = calculate_recall(passive_rhythm_stats)
+        passive_f1 = calculate_f1(passive_rhythm_stats)
+        logger.info(f"Active Rhythm Precision: {active_precision}")
+        logger.info(f"Active Rhythm Recall: {active_recall}")
+        logger.info(f"Active Rhythm F1: {active_f1}")
+        logger.info(f"Passive Rhythm Precision: {passive_precision}")
+        logger.info(f"Passive Rhythm Recall: {passive_recall}")
+        logger.info(f"Passive Rhythm F1: {passive_f1}")
+def test_training_set_overlap(beatmap_paths: list[Path], training_set_ids_path: Optional[str]):
+    if training_set_ids_path is None:
+        return
+    if not os.path.exists(training_set_ids_path):
+        logger.error(f"Training set IDs file {training_set_ids_path} does not exist.")
+        return
+    with open(training_set_ids_path, "r") as f:
+        training_set_ids = set(int(line.strip()) for line in f)
+    in_set = 0
+    out_set = 0
+    for path in tqdm(beatmap_paths):
+        beatmap = Beatmap.from_path(path)
+        if beatmap.beatmap_id in training_set_ids:
+            in_set += 1
+        else:
+            out_set += 1
+    logger.info(f"In training set: {in_set}, Not in training set: {out_set}, Total: {len(beatmap_paths)}, Ratio: {in_set / (in_set + out_set):.2f}")
+@hydra.main(config_path="configs", config_name="calc_fid", version_base="1.1")
+def main(args: FidConfig):
+    prepare_args(args)
+    # Fix inference model path
+    if args.inference.model_path.startswith("./"):
+        args.inference.model_path = os.path.join(Path(__file__).parent, args.inference.model_path[2:])
+    beatmap_paths = get_beatmap_paths(args)
+    test_training_set_overlap(beatmap_paths, args.training_set_ids_path)
+    if not args.skip_generation:
+        # Assign beatmaps to processes in a round-robin fashion
+        num_processes = args.num_processes
+        chunks = [[] for _ in range(num_processes)]
+        for i, path in enumerate(beatmap_paths):
+            chunks[i % num_processes].append(path)
+        manager = Manager()
+        return_dict = manager.dict()
+        processes = []
+        for i in range(num_processes):
+            p = Process(target=generate_beatmaps, args=(chunks[i], args, return_dict, i))
+            processes.append(p)
+            p.start()
+        for p in processes:
+            p.join()
+    calculate_metrics(args, beatmap_paths)
+if __name__ == "__main__":
+    main()

classifier/README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+# Mapper Classifier
+Try the model [here](https://colab.research.google.com/github/OliBomby/Mapperatorinator/blob/main/colab/classifier_classify.ipynb).
+Mapper Classifier is a model that predicts which osu! standard ranked mapper mapped a given beatmap.
+This model is built using transfer learning on the Mapperatorinator V22 model.
+It achieves a top-1 validation accuracy of 12.5% on a random sample of ranked beatmaps and recognizes 3,731 unique mappers.
+To make its predictions, the model analyzes an 8-second segment of beatmap.
+The purpose of this classifier is actually to calculate high-level feature vectors for beatmaps, which can be used to calculate the similarity between generated beatmaps and real beatmaps.
+This is a technique often used to assess the quality of image generation models with the [Fréchet Inception Distance](https://arxiv.org/abs/1706.08500).
+However, in my testing I found that the computed FID scores for beatmap generation models were not very close to the actual quality of the generated beatmaps.
+This classifier might not be able to recognize all the necessary features to accurately assess the quality of a beatmap, but it's a start.
+## Usage
+Run `classify.py` with the path to the beatmap you want to classify and the time in seconds of the segment you want to use to classify the beatmap.
+```shell
+python classify.py beatmap_path="'...\Songs\1790119 THE ORAL CIGARETTES - ReI\THE ORAL CIGARETTES - ReI (Sotarks) [Cataclysm.].osu'" time=60
+```
+```
+Mapper: Sotarks (4452992) with confidence: 9.760356903076172
+Mapper: Sajinn (13513687) with confidence: 6.975161075592041
+Mapper: kowari (5404892) with confidence: 6.800069332122803
+Mapper: Haruto (3772301) with confidence: 6.077754020690918
+Mapper: Kalibe (3376777) with confidence: 5.894346237182617
+Mapper: iljaaz (8501291) with confidence: 5.873990535736084
+Mapper: tomadoi (5712451) with confidence: 5.817874431610107
+Mapper: Nao Tomori (5364763) with confidence: 5.144880294799805
+Mapper: Kujinn (3723568) with confidence: 5.082106590270996
+...
+```

classifier/classify.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import json
+from pathlib import Path
+import numpy.typing as npt
+import hydra
+import torch
+from omegaconf import DictConfig
+from slider import Beatmap
+from torch.utils.data import IterableDataset
+from classifier.libs.dataset import OsuParser
+from classifier.libs.dataset.data_utils import load_audio_file
+from classifier.libs.dataset.ors_dataset import STEPS_PER_MILLISECOND
+from classifier.libs.model.model import OsuClassifierOutput
+from classifier.libs.tokenizer import Tokenizer, Event, EventType
+from classifier.libs.utils import load_ckpt
+def iterate_examples(
+        beatmap: Beatmap,
+        audio: npt.NDArray,
+        model_args: DictConfig,
+        tokenizer: Tokenizer,
+        device: torch.device
+):
+    frame_seq_len = model_args.data.src_seq_len - 1
+    frame_size = model_args.data.hop_length
+    sample_rate = model_args.data.sample_rate
+    samples_per_sequence = frame_seq_len * frame_size
+    parser = OsuParser(model_args, tokenizer)
+    events, event_times = parser.parse(beatmap)
+    for sample in range(0, len(audio) - samples_per_sequence, samples_per_sequence):
+        example = create_example(events, event_times, audio, sample / sample_rate, model_args, tokenizer, device)
+        yield example
+class ExampleDataset(IterableDataset):
+    def __init__(self, beatmap, audio, classifier_args, classifier_tokenizer, device):
+        self.beatmap = beatmap
+        self.audio = audio
+        self.classifier_args = classifier_args
+        self.classifier_tokenizer = classifier_tokenizer
+        self.device = device
+    def __iter__(self):
+        return iterate_examples(
+            self.beatmap,
+            self.audio,
+            self.classifier_args,
+            self.classifier_tokenizer,
+            self.device
+        )
+def create_example(
+        events: list[Event],
+        event_times: list[float],
+        audio: npt.NDArray,
+        time: float,
+        model_args: DictConfig,
+        tokenizer: Tokenizer,
+        device: torch.device,
+        unsqueeze: bool = False,
+):
+    frame_seq_len = model_args.data.src_seq_len - 1
+    frame_size = model_args.data.hop_length
+    sample_rate = model_args.data.sample_rate
+    samples_per_sequence = frame_seq_len * frame_size
+    sequence_duration = samples_per_sequence / sample_rate
+    # Get audio frames
+    frame_start = int(time * sample_rate)
+    frames = audio[frame_start:frame_start + samples_per_sequence]
+    frames = torch.from_numpy(frames).to(torch.float32).to(device)
+    # Get the events between time and time + sequence_duration
+    events = [event for event, event_time in zip(events, event_times) if
+              time <= event_time / 1000 < time + sequence_duration]
+    # Normalize time shifts
+    for i, event in enumerate(events):
+        if event.type == EventType.TIME_SHIFT:
+            events[i] = Event(EventType.TIME_SHIFT, int((event.value - time * 1000) * STEPS_PER_MILLISECOND))
+    # Tokenize the events
+    tokens = torch.full((model_args.data.tgt_seq_len,), tokenizer.pad_id, dtype=torch.long)
+    for i in range(min(len(events), model_args.data.tgt_seq_len)):
+        tokens[i] = tokenizer.encode(events[i])
+    tokens = tokens.to(device)
+    if unsqueeze:
+        tokens = tokens.unsqueeze(0)
+        frames = frames.unsqueeze(0)
+    return {
+        "decoder_input_ids": tokens,
+        "decoder_attention_mask": tokens != tokenizer.pad_id,
+        "frames": frames,
+    }
+def create_example_from_path(
+        beatmap_path: str,
+        audio_path: str,
+        time: float,
+        model_args: DictConfig,
+        tokenizer: Tokenizer,
+        device: torch.device,
+        unsqueeze: bool = False,
+):
+    sample_rate = model_args.data.sample_rate
+    beatmap_path = Path(beatmap_path)
+    beatmap = Beatmap.from_path(beatmap_path)
+    # Get audio frames
+    if audio_path == '':
+        audio_path = beatmap_path.parent / beatmap.audio_filename
+    audio = load_audio_file(audio_path, sample_rate)
+    parser = OsuParser(model_args, tokenizer)
+    events, event_times = parser.parse(beatmap)
+    return create_example(events, event_times, audio, time, model_args, tokenizer, device, unsqueeze)
+def get_mapper_names(path: str):
+    path = Path(path)
+    # Load JSON data from file
+    with open(path, 'r') as file:
+        data = json.load(file)
+    # Populate beatmap_mapper
+    mapper_names = {}
+    for item in data:
+        if len(item['username']) == 0:
+            mapper_name = "Unknown"
+        else:
+            mapper_name = item['username'][0]
+        mapper_names[item['user_id']] = mapper_name
+    return mapper_names
+@hydra.main(config_path="configs", config_name="inference", version_base="1.1")
+def main(args: DictConfig):
+    torch.set_grad_enabled(False)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model, model_args, tokenizer = load_ckpt(args.checkpoint_path)
+    model.eval().to(device)
+    example = create_example_from_path(args.beatmap_path, args.audio_path, args.time, model_args, tokenizer, device, True)
+    result: OsuClassifierOutput = model(**example)
+    logits = result.logits
+    # Print the top 100 mappers with confidences
+    top_k = 100
+    top_k_indices = logits[0].topk(top_k).indices
+    top_k_confidences = logits[0].topk(top_k).values
+    mapper_idx_id = {idx: ids for ids, idx in tokenizer.mapper_idx.items()}
+    mapper_names = get_mapper_names(args.mappers_path)
+    for idx, confidence in zip(top_k_indices, top_k_confidences):
+        mapper_id = mapper_idx_id[idx.item()]
+        mapper_name = mapper_names.get(mapper_id, "Unknown")
+        print(f"Mapper: {mapper_name} ({mapper_id}) with confidence: {confidence.item()}")
+if __name__ == "__main__":
+    main()

classifier/configs/inference.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+compile: true          # PyTorch 2.0 optimization
+device: gpu             # Training device (cpu/gpu)
+precision: 'no'         # Enable mixed precision (no/fp16/bf16/fp8)
+checkpoint_path: 'OliBomby/osu-classifier'     # Project checkpoint directory (to resume training)
+beatmap_path: ''        # Path to beatmap to classify
+audio_path: ''          # Path to audio to classify
+time: 0                 # Time to classify
+mappers_path: './/datasets/beatmap_users.json'  # Path to mappers dataset
+hydra:
+  job:
+    chdir: False
+  run:
+    dir: ./logs/${now:%Y-%m-%d}/${now:%H-%M-%S}

classifier/configs/model/model.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+input_features: false
+do_style_embed: true
+classifier_proj_size: 256
+spectrogram:
+  sample_rate: 16000
+  hop_length: 128
+  n_fft: 1024
+  n_mels: 388

classifier/configs/model/whisper_base.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - model
+  - _self_
+name: 'openai/whisper-base'
+input_features: true

classifier/configs/model/whisper_base_v2.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+defaults:
+  - model
+  - _self_
+name: 'openai/whisper-base'
+input_features: true
+classifier_proj_size: 2048

classifier/configs/model/whisper_small.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - model
+  - _self_
+name: 'openai/whisper-small'
+input_features: true

classifier/configs/model/whisper_tiny.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - model
+  - _self_
+name: 'openai/whisper-tiny'
+input_features: true

classifier/configs/train.yaml ADDED Viewed

	@@ -0,0 +1,82 @@

+compile: true          # PyTorch 2.0 optimization
+device: gpu             # Training device (cpu/gpu)
+precision: 'bf16-mixed'         # Enable mixed precision (no/fp16/bf16/fp8)
+seed: 42                # Project seed
+checkpoint_path: ''     # Project checkpoint directory (to resume training)
+pretrained_path: ''     # Path to pretrained model weights (to do transfer learning)
+data:                  # Data settings
+  train_dataset_path: "/workspace/datasets/ORS16291"
+  test_dataset_path: "/workspace/datasets/ORS16291"
+  train_dataset_start: 0  # Training dataset start index
+  train_dataset_end: 16200    # Training dataset end index
+  test_dataset_start: 16200   # Testing/validation dataset start index
+  test_dataset_end: 16291     # Testing/validation dataset end index
+  src_seq_len: 1024
+  tgt_seq_len: 1024
+  sample_rate: ${..model.spectrogram.sample_rate}
+  hop_length: ${..model.spectrogram.hop_length}
+  cycle_length: 16
+  per_track: false      # Loads all beatmaps in a track sequentially which optimizes audio data loading
+  num_classes: 3731     # Number of label classes in the dataset
+  timing_random_offset: 0
+  min_difficulty: 0     # Minimum difficulty to consider including in the dataset
+  mappers_path: "../../../datasets/beatmap_users.json"       # Path to file with all beatmap mappers
+  add_timing: true      # Model beatmap timing
+  add_snapping: true    # Model hit object snapping
+  add_timing_points: false  # Model beatmap timing with timing points
+  add_hitsounds: true   # Model beatmap hitsounds
+  add_distances: false   # Model hit object distances
+  add_positions: true   # Model hit object coordinates
+  position_precision: 1  # Precision of hit object coordinates
+  position_split_axes: true  # Split hit object X and Y coordinates into separate tokens
+  position_range: [-256, 768, -256, 640]  # Range of hit object coordinates
+  dt_augment_prob: 0.7   # Probability of augmenting the dataset with DT
+  dt_augment_range: [1.25, 1.5]  # Range of DT augmentation
+  types_first: true       # Put the type token at the start of the group before the timeshift token
+  augment_flip: false    # Augment the dataset with flipped positions
+dataloader:             # Dataloader settings
+  num_workers: 8
+optim:                  # Optimizer settings
+  name: adamw
+  base_lr: 1e-2         # Should be scaled with the number of devices present
+  batch_size: 128       # This is the batch size per GPU
+  total_steps: 65536
+  warmup_steps: 10000
+  lr_scheduler: cosine
+  weight_decay: 0.0
+  grad_clip: 1.0
+  grad_acc: 2
+  final_cosine: 1e-5
+eval:                   # Evaluation settings
+  every_steps: 1000
+  steps: 500
+checkpoint:             # Checkpoint settings
+  every_steps: 5000
+logging:                # Logging settings
+  log_with: 'wandb'     # Logging service (wandb/tensorboard)
+  every_steps: 10
+  grad_l2: true
+  weights_l2: true
+  mode: 'online'
+profile:                # Profiling settings
+  do_profile: false
+  early_stop: false
+  wait: 8
+  warmup: 8
+  active: 8
+  repeat: 1
+hydra:
+  job:
+    chdir: True
+  run:
+    dir: ./logs/${now:%Y-%m-%d}/${now:%H-%M-%S}

classifier/configs/train_v1.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+defaults:
+  - train
+  - _self_
+  - model: whisper_tiny

classifier/configs/train_v2.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+defaults:
+  - train
+  - _self_
+  - model: whisper_base
+pretrained_path: "../../../test/ckpt_v22"
+optim:                  # Optimizer settings
+  base_lr: 1e-4         # Should be scaled with the number of devices present
+  batch_size: 64       # This is the batch size per GPU
+  total_steps: 32218
+  warmup_steps: 2000
+  grad_acc: 2
+  final_cosine: 1e-5

classifier/configs/train_v3.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+defaults:
+  - train
+  - _self_
+  - model: whisper_base_v2
+pretrained_path: "../../../test/ckpt_v22"
+data:
+  augment_flip: true
+optim:                  # Optimizer settings
+  base_lr: 1e-3         # Should be scaled with the number of devices present
+  batch_size: 128       # This is the batch size per GPU
+  total_steps: 65536
+  warmup_steps: 2000
+  grad_acc: 4
+  final_cosine: 1e-5

classifier/count_classes.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import json
+from pathlib import Path
+def init_mapper_idx(mappers_path):
+    """"Indexes beatmap mappers and mapper idx."""
+    path = Path(mappers_path)
+    if not path.exists():
+        raise ValueError(f"mappers_path {path} not found")
+    # Load JSON data from file
+    with open(path, 'r') as file:
+        data = json.load(file)
+    # Populate beatmap_mapper
+    beatmap_mapper = {}
+    for item in data:
+        beatmap_mapper[item['id']] = item['user_id']
+    # Get unique user_ids from beatmap_mapper values
+    unique_user_ids = list(set(beatmap_mapper.values()))
+    # Create mapper_idx
+    mapper_idx = {user_id: idx for idx, user_id in enumerate(unique_user_ids)}
+    num_mapper_classes = len(unique_user_ids)
+    return beatmap_mapper, mapper_idx, num_mapper_classes
+path = "../datasets/beatmap_users.json"
+beatmap_mapper, mapper_idx, num_mapper_classes = init_mapper_idx(path)
+print("Number of mapper classes:", num_mapper_classes)
+print("Number of beatmaps:", len(beatmap_mapper))
+# Calculate number of maps per mapper
+maps_per_mapper = {}
+for beatmap_id in beatmap_mapper:
+    user_id = beatmap_mapper[beatmap_id]
+    if user_id not in maps_per_mapper:
+        maps_per_mapper[user_id] = 0
+    maps_per_mapper[user_id] += 1
+# Calculate average maps per mapper class
+average_maps_per_mapper = len(beatmap_mapper) / num_mapper_classes
+print("Average maps per mapper class:", average_maps_per_mapper)
+# Calculate median maps per mapper class
+median_maps_per_mapper = sorted(maps_per_mapper.values())[num_mapper_classes // 2]
+print("Median maps per mapper class:", median_maps_per_mapper)
+# Mapper with most number of maps
+max_maps = max(maps_per_mapper.values())
+max_maps_mapper = [user_id for user_id in maps_per_mapper if maps_per_mapper[user_id] == max_maps]
+print("Mapper with most number of maps:", max_maps_mapper)
+print("Number of maps:", max_maps)

classifier/libs/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .utils.model_utils import get_dataloaders, get_optimizer, get_scheduler, get_tokenizer

classifier/libs/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .ors_dataset import OrsDataset
+from .osu_parser import OsuParser
+from .data_utils import update_event_times

classifier/libs/dataset/data_utils.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import dataclasses
+from pathlib import Path
+from typing import Optional
+import numpy as np
+from pydub import AudioSegment
+import numpy.typing as npt
+from ..tokenizer import Event, EventType
+MILISECONDS_PER_SECOND = 1000
+def load_audio_file(file: Path, sample_rate: int, speed: float = 1.0) -> npt.NDArray:
+    """Load an audio file as a numpy time-series array
+    The signals are resampled, converted to mono channel, and normalized.
+    Args:
+        file: Path to audio file.
+        sample_rate: Sample rate to resample the audio.
+        speed: Speed multiplier for the audio.
+    Returns:
+        samples: Audio time series.
+    """
+    file = Path(file)
+    audio = AudioSegment.from_file(file, format=file.suffix[1:])
+    audio.frame_rate = int(audio.frame_rate * speed)
+    audio = audio.set_frame_rate(sample_rate)
+    audio = audio.set_channels(1)
+    samples = np.array(audio.get_array_of_samples()).astype(np.float32)
+    samples *= 1.0 / np.max(np.abs(samples))
+    return samples
+def update_event_times(
+        events: list[Event],
+        event_times: list[int],
+        end_time: Optional[float] = None,
+        types_first: bool = False
+) -> None:
+    """Extends the event times list with the times of the new events if the event list is longer than the event times list.
+    Args:
+        events: List of events.
+        event_times: List of event times.
+        end_time: End time of the events, for interpolation.
+        types_first: If True, the type token is at the start of the group before the timeshift token.
+    """
+    non_timed_events = [
+        EventType.BEZIER_ANCHOR,
+        EventType.PERFECT_ANCHOR,
+        EventType.CATMULL_ANCHOR,
+        EventType.RED_ANCHOR,
+    ]
+    timed_events = [
+        EventType.CIRCLE,
+        EventType.SPINNER,
+        EventType.SPINNER_END,
+        EventType.SLIDER_HEAD,
+        EventType.LAST_ANCHOR,
+        EventType.SLIDER_END,
+        EventType.BEAT,
+        EventType.MEASURE,
+    ]
+    start_index = len(event_times)
+    end_index = len(events)
+    current_time = 0 if len(event_times) == 0 else event_times[-1]
+    for i in range(start_index, end_index):
+        if types_first:
+            if i + 1 < end_index and events[i + 1].type == EventType.TIME_SHIFT:
+                current_time = events[i + 1].value
+        elif events[i].type == EventType.TIME_SHIFT:
+            current_time = events[i].value
+        event_times.append(current_time)
+    # Interpolate time for control point events
+    interpolate = False
+    if types_first:
+        # Start-T-D-CP-D-CP-D-LCP-T-D-End-T-D
+        # 1-----1-1-1--1-1--1-7---7-7-9---9-9
+        # 1-----1-1-3--3-5--5-7---7-7-9---9-9
+        index = range(start_index, end_index)
+        current_time = 0 if len(event_times) == 0 else event_times[-1]
+    else:
+        # T-D-Start-D-CP-D-CP-T-D-LCP-T-D-End
+        # 1-1-1-----1-1--1-1--7-7--7--9-9-9--
+        # 1-1-1-----3-3--5-5--7-7--7--9-9-9--
+        index = range(end_index - 1, start_index - 1, -1)
+        current_time = end_time if end_time is not None else event_times[-1]
+    for i in index:
+        event = events[i]
+        if event.type in timed_events:
+            interpolate = False
+        if event.type in non_timed_events:
+            interpolate = True
+        if not interpolate:
+            current_time = event_times[i]
+            continue
+        if event.type not in non_timed_events:
+            event_times[i] = current_time
+            continue
+        # Find the time of the first timed event and the number of control points between
+        j = i
+        step = 1 if types_first else -1
+        count = 0
+        other_time = current_time
+        while 0 <= j < len(events):
+            event2 = events[j]
+            if event2.type == EventType.TIME_SHIFT:
+                other_time = event_times[j]
+                break
+            if event2.type in non_timed_events:
+                count += 1
+            j += step
+        if j < 0:
+            other_time = 0
+        if j >= len(events):
+            other_time = end_time if end_time is not None else event_times[-1]
+        # Interpolate the time
+        current_time = int((current_time - other_time) / (count + 1) * count + other_time)
+        event_times[i] = current_time
+def merge_events(events1: list[Event], event_times1: list[int], events2: list[Event], event_times2: list[int]) -> tuple[list[Event], list[int]]:
+    """Merge two lists of events in a time sorted manner. Assumes both lists are sorted by time.
+    Args:
+        events1: List of events.
+        event_times1: List of event times.
+        events2: List of events.
+        event_times2: List of event times.
+    Returns:
+        merged_events: Merged list of events.
+        merged_event_times: Merged list of event times.
+    """
+    merged_events = []
+    merged_event_times = []
+    i = 0
+    j = 0
+    while i < len(events1) and j < len(events2):
+        t1 = event_times1[i]
+        t2 = event_times2[j]
+        if t1 <= t2:
+            merged_events.append(events1[i])
+            merged_event_times.append(t1)
+            i += 1
+        else:
+            merged_events.append(events2[j])
+            merged_event_times.append(t2)
+            j += 1
+    merged_events.extend(events1[i:])
+    merged_events.extend(events2[j:])
+    merged_event_times.extend(event_times1[i:])
+    merged_event_times.extend(event_times2[j:])
+    return merged_events, merged_event_times
+def remove_events_of_type(events: list[Event], event_times: list[int], event_types: list[EventType]) -> tuple[list[Event], list[int]]:
+    """Remove all events of a specific type from a list of events.
+    Args:
+        events: List of events.
+        event_types: Types of event to remove.
+    Returns:
+        filtered_events: Filtered list of events.
+    """
+    new_events = []
+    new_event_times = []
+    for event, time in zip(events, event_times):
+        if event.type not in event_types:
+            new_events.append(event)
+            new_event_times.append(time)
+    return new_events, new_event_times
+def speed_events(events: list[Event], event_times: list[int], speed: float) -> tuple[list[Event], list[int]]:
+    """Change the speed of a list of events.
+    Args:
+        events: List of events.
+        event_times: List of event times
+        speed: Speed multiplier.
+    Returns:
+        sped_events: Sped up list of events.
+    """
+    sped_events = []
+    for event in events:
+        if event.type == EventType.TIME_SHIFT:
+            event.value = int(event.value / speed)
+        sped_events.append(event)
+    sped_event_times = []
+    for t in event_times:
+        sped_event_times.append(int(t / speed))
+    return sped_events, sped_event_times
+@dataclasses.dataclass
+class Group:
+    event_type: EventType = None
+    time: int = 0
+    distance: int = None
+    x: float = None
+    y: float = None
+    new_combo: bool = False
+    hitsounds: list[int] = dataclasses.field(default_factory=list)
+    samplesets: list[int] = dataclasses.field(default_factory=list)
+    additions: list[int] = dataclasses.field(default_factory=list)
+    volumes: list[int] = dataclasses.field(default_factory=list)
+type_events = [
+    EventType.CIRCLE,
+    EventType.SPINNER,
+    EventType.SPINNER_END,
+    EventType.SLIDER_HEAD,
+    EventType.BEZIER_ANCHOR,
+    EventType.PERFECT_ANCHOR,
+    EventType.CATMULL_ANCHOR,
+    EventType.RED_ANCHOR,
+    EventType.LAST_ANCHOR,
+    EventType.SLIDER_END,
+    EventType.BEAT,
+    EventType.MEASURE,
+]
+def get_groups(
+        events: list[Event],
+        *,
+        event_times: Optional[list[int]] = None,
+        types_first: bool = False
+) -> list[Group]:
+    groups = []
+    group = Group()
+    for i, event in enumerate(events):
+        if event.type == EventType.TIME_SHIFT:
+            group.time = event.value
+        elif event.type == EventType.DISTANCE:
+            group.distance = event.value
+        elif event.type == EventType.POS_X:
+            group.x = event.value
+        elif event.type == EventType.POS_Y:
+            group.y = event.value
+        elif event.type == EventType.NEW_COMBO:
+            group.new_combo = True
+        elif event.type == EventType.HITSOUND:
+            group.hitsounds.append((event.value % 8) * 2)
+            group.samplesets.append(((event.value // 8) % 3) + 1)
+            group.additions.append(((event.value // 24) % 3) + 1)
+        elif event.type == EventType.VOLUME:
+            group.volumes.append(event.value)
+        elif event.type in type_events:
+            if types_first:
+                if group.event_type is not None:
+                    groups.append(group)
+                    group = Group()
+                group.event_type = event.type
+                if event_times is not None:
+                    group.time = event_times[i]
+            else:
+                group.event_type = event.type
+                if event_times is not None:
+                    group.time = event_times[i]
+                groups.append(group)
+                group = Group()
+    if group.event_type is not None:
+        groups.append(group)
+    return groups
+def get_group_indices(events: list[Event], types_first: bool = False) -> list[list[int]]:
+    groups = []
+    indices = []
+    for i, event in enumerate(events):
+        indices.append(i)
+        if event.type in type_events:
+            if types_first:
+                if len(indices) > 1:
+                    groups.append(indices[:-1])
+                    indices = [indices[-1]]
+            else:
+                groups.append(indices)
+                indices = []
+    if len(indices) > 0:
+        groups.append(indices)
+    return groups

classifier/libs/dataset/ors_dataset.py ADDED Viewed

	@@ -0,0 +1,490 @@

+from __future__ import annotations
+import json
+import os
+import random
+from typing import Optional, Callable
+from pathlib import Path
+import numpy as np
+import numpy.typing as npt
+import torch
+from omegaconf import DictConfig
+from slider import Beatmap
+from torch.utils.data import IterableDataset
+from .data_utils import load_audio_file
+from .osu_parser import OsuParser
+from ..tokenizer import Event, EventType, Tokenizer
+OSZ_FILE_EXTENSION = ".osz"
+AUDIO_FILE_NAME = "audio.mp3"
+MILISECONDS_PER_SECOND = 1000
+STEPS_PER_MILLISECOND = 0.1
+LABEL_IGNORE_ID = -100
+class OrsDataset(IterableDataset):
+    __slots__ = (
+        "path",
+        "start",
+        "end",
+        "args",
+        "parser",
+        "tokenizer",
+        "beatmap_files",
+        "test",
+    )
+    def __init__(
+            self,
+            args: DictConfig,
+            parser: OsuParser,
+            tokenizer: Tokenizer,
+            beatmap_files: Optional[list[Path]] = None,
+            test: bool = False,
+    ):
+        """Manage and process ORS dataset.
+        Attributes:
+            args: Data loading arguments.
+            parser: Instance of OsuParser class.
+            tokenizer: Instance of Tokenizer class.
+            beatmap_files: List of beatmap files to process. Overrides track index range.
+            test: Whether to load the test dataset.
+        """
+        super().__init__()
+        self.path = args.test_dataset_path if test else args.train_dataset_path
+        self.start = args.test_dataset_start if test else args.train_dataset_start
+        self.end = args.test_dataset_end if test else args.train_dataset_end
+        self.args = args
+        self.parser = parser
+        self.tokenizer = tokenizer
+        self.beatmap_files = beatmap_files
+        self.test = test
+    def _get_beatmap_files(self) -> list[Path]:
+        if self.beatmap_files is not None:
+            return self.beatmap_files
+        # Get a list of all beatmap files in the dataset path in the track index range between start and end
+        beatmap_files = []
+        track_names = ["Track" + str(i).zfill(5) for i in range(self.start, self.end)]
+        for track_name in track_names:
+            for beatmap_file in os.listdir(
+                    os.path.join(self.path, track_name, "beatmaps"),
+            ):
+                beatmap_files.append(
+                    Path(
+                        os.path.join(
+                            self.path,
+                            track_name,
+                            "beatmaps",
+                            beatmap_file,
+                        )
+                    ),
+                )
+        return beatmap_files
+    def _get_track_paths(self) -> list[Path]:
+        track_paths = []
+        track_names = ["Track" + str(i).zfill(5) for i in range(self.start, self.end)]
+        for track_name in track_names:
+            track_paths.append(Path(os.path.join(self.path, track_name)))
+        return track_paths
+    def __iter__(self):
+        beatmap_files = self._get_track_paths() if self.args.per_track else self._get_beatmap_files()
+        if not self.test:
+            random.shuffle(beatmap_files)
+        if self.args.cycle_length > 1 and not self.test:
+            return InterleavingBeatmapDatasetIterable(
+                beatmap_files,
+                self._iterable_factory,
+                self.args.cycle_length,
+            )
+        return self._iterable_factory(beatmap_files).__iter__()
+    def _iterable_factory(self, beatmap_files: list[Path]):
+        return BeatmapDatasetIterable(
+            beatmap_files,
+            self.args,
+            self.parser,
+            self.tokenizer,
+            self.test,
+        )
+class InterleavingBeatmapDatasetIterable:
+    __slots__ = ("workers", "cycle_length", "index")
+    def __init__(
+            self,
+            beatmap_files: list[Path],
+            iterable_factory: Callable,
+            cycle_length: int,
+    ):
+        per_worker = int(np.ceil(len(beatmap_files) / float(cycle_length)))
+        self.workers = [
+            iterable_factory(
+                beatmap_files[
+                i * per_worker: min(len(beatmap_files), (i + 1) * per_worker)
+                ]
+            ).__iter__()
+            for i in range(cycle_length)
+        ]
+        self.cycle_length = cycle_length
+        self.index = 0
+    def __iter__(self) -> "InterleavingBeatmapDatasetIterable":
+        return self
+    def __next__(self) -> tuple[any, int]:
+        num = len(self.workers)
+        for _ in range(num):
+            try:
+                self.index = self.index % len(self.workers)
+                item = self.workers[self.index].__next__()
+                self.index += 1
+                return item
+            except StopIteration:
+                self.workers.remove(self.workers[self.index])
+        raise StopIteration
+class BeatmapDatasetIterable:
+    __slots__ = (
+        "beatmap_files",
+        "args",
+        "parser",
+        "tokenizer",
+        "test",
+        "frame_seq_len",
+        "pre_token_len",
+        "add_empty_sequences",
+    )
+    def __init__(
+            self,
+            beatmap_files: list[Path],
+            args: DictConfig,
+            parser: OsuParser,
+            tokenizer: Tokenizer,
+            test: bool,
+    ):
+        self.beatmap_files = beatmap_files
+        self.args = args
+        self.parser = parser
+        self.tokenizer = tokenizer
+        self.test = test
+        self.frame_seq_len = args.src_seq_len - 1
+    def _get_frames(self, samples: npt.NDArray) -> tuple[npt.NDArray, npt.NDArray]:
+        """Segment audio samples into frames.
+        Each frame has `frame_size` audio samples.
+        It will also calculate and return the time of each audio frame, in miliseconds.
+        Args:
+            samples: Audio time-series.
+        Returns:
+            frames: Audio frames.
+            frame_times: Audio frame times.
+        """
+        samples = np.pad(samples, [0, self.args.hop_length - len(samples) % self.args.hop_length])
+        frames = np.reshape(samples, (-1, self.args.hop_length))
+        frames_per_milisecond = (
+                self.args.sample_rate / self.args.hop_length / MILISECONDS_PER_SECOND
+        )
+        frame_times = np.arange(len(frames)) / frames_per_milisecond
+        return frames, frame_times
+    def _create_sequences(
+            self,
+            frames: npt.NDArray,
+            frame_times: npt.NDArray,
+            context: dict,
+            extra_data: Optional[dict] = None,
+    ) -> list[dict[str, int | npt.NDArray | list[Event]]]:
+        """Create frame and token sequences for training/testing.
+        Args:
+            frames: Audio frames.
+        Returns:
+            A list of source and target sequences.
+        """
+        def get_event_indices(events2: list[Event], event_times2: list[int]) -> tuple[list[int], list[int]]:
+            if len(events2) == 0:
+                return [], []
+            # Corresponding start event index for every audio frame.
+            start_indices = []
+            event_index = 0
+            for current_time in frame_times:
+                while event_index < len(events2) and event_times2[event_index] < current_time:
+                    event_index += 1
+                start_indices.append(event_index)
+            # Corresponding end event index for every audio frame.
+            end_indices = start_indices[1:] + [len(events2)]
+            return start_indices, end_indices
+        start_indices, end_indices = get_event_indices(context["events"], context["event_times"])
+        sequences = []
+        n_frames = len(frames)
+        offset = random.randint(0, self.frame_seq_len)
+        # Divide audio frames into splits
+        for frame_start_idx in range(offset, n_frames, self.frame_seq_len):
+            frame_end_idx = min(frame_start_idx + self.frame_seq_len, n_frames)
+            def slice_events(context, frame_start_idx, frame_end_idx):
+                if len(context["events"]) == 0:
+                    return []
+                event_start_idx = start_indices[frame_start_idx]
+                event_end_idx = end_indices[frame_end_idx - 1]
+                return context["events"][event_start_idx:event_end_idx]
+            def slice_context(context, frame_start_idx, frame_end_idx):
+                return {"events": slice_events(context, frame_start_idx, frame_end_idx)}
+            # Create the sequence
+            sequence = {
+                           "time": frame_times[frame_start_idx],
+                           "frames": frames[frame_start_idx:frame_end_idx],
+                           "context": slice_context(context, frame_start_idx, frame_end_idx),
+                       } | extra_data
+            sequences.append(sequence)
+        return sequences
+    def _normalize_time_shifts(self, sequence: dict) -> dict:
+        """Make all time shifts in the sequence relative to the start time of the sequence,
+        and normalize time values.
+        Args:
+            sequence: The input sequence.
+        Returns:
+            The same sequence with trimmed time shifts.
+        """
+        def process(events: list[Event], start_time) -> list[Event] | tuple[list[Event], int]:
+            for i, event in enumerate(events):
+                if event.type == EventType.TIME_SHIFT:
+                    # We cant modify the event objects themselves because that will affect subsequent sequences
+                    events[i] = Event(EventType.TIME_SHIFT, int((event.value - start_time) * STEPS_PER_MILLISECOND))
+            return events
+        start_time = sequence["time"]
+        del sequence["time"]
+        sequence["context"]["events"] = process(sequence["context"]["events"], start_time)
+        return sequence
+    def _tokenize_sequence(self, sequence: dict) -> dict:
+        """Tokenize the event sequence.
+        Begin token sequence with `[SOS]` token (start-of-sequence).
+        End token sequence with `[EOS]` token (end-of-sequence).
+        Args:
+            sequence: The input sequence.
+        Returns:
+            The same sequence with tokenized events.
+        """
+        context = sequence["context"]
+        tokens = torch.empty(len(context["events"]), dtype=torch.long)
+        for i, event in enumerate(context["events"]):
+            tokens[i] = self.tokenizer.encode(event)
+        context["tokens"] = tokens
+        return sequence
+    def _pad_and_split_token_sequence(self, sequence: dict) -> dict:
+        """Pad token sequence to a fixed length and split decoder input and labels.
+        Pad with `[PAD]` tokens until `tgt_seq_len`.
+        Token sequence (w/o last token) is the input to the transformer decoder,
+        token sequence (w/o first token) is the label, a.k.a. decoder ground truth.
+        Prefix the token sequence with the pre_tokens sequence.
+        Args:
+            sequence: The input sequence.
+        Returns:
+            The same sequence with padded tokens.
+        """
+        # Count reducible tokens, pre_tokens and context tokens
+        num_tokens = len(sequence["context"]["tokens"])
+        # Trim tokens to target sequence length
+        # n + padding = tgt_seq_len
+        n = min(self.args.tgt_seq_len, num_tokens)
+        si = 0
+        input_tokens = torch.full((self.args.tgt_seq_len,), self.tokenizer.pad_id, dtype=torch.long)
+        tokens = sequence["context"]["tokens"]
+        input_tokens[si:si + n] = tokens[:n]
+        # Randomize some input tokens
+        def randomize_tokens(tokens):
+            offset = torch.randint(low=-self.args.timing_random_offset, high=self.args.timing_random_offset + 1,
+                                   size=tokens.shape)
+            return torch.where((self.tokenizer.event_start[EventType.TIME_SHIFT] <= tokens) & (
+                    tokens < self.tokenizer.event_end[EventType.TIME_SHIFT]),
+                               torch.clamp(tokens + offset,
+                                           self.tokenizer.event_start[EventType.TIME_SHIFT],
+                                           self.tokenizer.event_end[EventType.TIME_SHIFT] - 1),
+                               tokens)
+        if self.args.timing_random_offset > 0:
+            input_tokens[si:si + n] = randomize_tokens(input_tokens[si:si + n])
+        sequence["decoder_input_ids"] = input_tokens
+        sequence["decoder_attention_mask"] = input_tokens != self.tokenizer.pad_id
+        del sequence["context"]
+        return sequence
+    def _pad_frame_sequence(self, sequence: dict) -> dict:
+        """Pad frame sequence with zeros until `frame_seq_len`.
+        Frame sequence can be further processed into Mel spectrogram frames,
+        which is the input to the transformer encoder.
+        Args:
+            sequence: The input sequence.
+        Returns:
+            The same sequence with padded frames.
+        """
+        frames = torch.from_numpy(sequence["frames"]).to(torch.float32)
+        if frames.shape[0] != self.frame_seq_len:
+            n = min(self.frame_seq_len, len(frames))
+            padded_frames = torch.zeros(
+                self.frame_seq_len,
+                frames.shape[-1],
+                dtype=frames.dtype,
+                device=frames.device,
+            )
+            padded_frames[:n] = frames[:n]
+            sequence["frames"] = torch.flatten(padded_frames)
+        else:
+            sequence["frames"] = torch.flatten(frames)
+        return sequence
+    def __iter__(self):
+        return self._get_next_tracks() if self.args.per_track else self._get_next_beatmaps()
+    @staticmethod
+    def _load_metadata(track_path: Path) -> dict:
+        metadata_file = track_path / "metadata.json"
+        with open(metadata_file) as f:
+            return json.load(f)
+    def _get_difficulty(self, metadata: dict, beatmap_name: str, speed: float = 1.0, beatmap: Beatmap = None) -> float:
+        if beatmap is not None and (all(e == 1.5 for e in self.args.dt_augment_range) or speed not in [1.0, 1.5]):
+            return beatmap.stars(speed_scale=speed)
+        if speed == 1.5:
+            return metadata["Beatmaps"][beatmap_name]["StandardStarRating"]["64"]
+        return metadata["Beatmaps"][beatmap_name]["StandardStarRating"]["0"]
+    @staticmethod
+    def _get_idx(metadata: dict, beatmap_name: str):
+        return metadata["Beatmaps"][beatmap_name]["Index"]
+    def _get_speed_augment(self):
+        mi, ma = self.args.dt_augment_range
+        return random.random() * (ma - mi) + mi if random.random() < self.args.dt_augment_prob else 1.0
+    def _get_next_beatmaps(self) -> dict:
+        for beatmap_path in self.beatmap_files:
+            metadata = self._load_metadata(beatmap_path.parents[1])
+            if self.args.min_difficulty > 0 and self._get_difficulty(metadata,
+                                                                     beatmap_path.stem) < self.args.min_difficulty:
+                continue
+            speed = self._get_speed_augment()
+            audio_path = beatmap_path.parents[1] / list(beatmap_path.parents[1].glob('audio.*'))[0]
+            audio_samples = load_audio_file(audio_path, self.args.sample_rate, speed)
+            for sample in self._get_next_beatmap(audio_samples, beatmap_path, speed):
+                yield sample
+    def _get_next_tracks(self) -> dict:
+        for track_path in self.beatmap_files:
+            metadata = self._load_metadata(track_path)
+            if self.args.min_difficulty > 0 and all(self._get_difficulty(metadata, beatmap_name)
+                                                    < self.args.min_difficulty for beatmap_name in
+                                                    metadata["Beatmaps"]):
+                continue
+            speed = self._get_speed_augment()
+            audio_path = track_path / list(track_path.glob('audio.*'))[0]
+            audio_samples = load_audio_file(audio_path, self.args.sample_rate, speed)
+            for beatmap_name in metadata["Beatmaps"]:
+                beatmap_path = (track_path / "beatmaps" / beatmap_name).with_suffix(".osu")
+                if self.args.min_difficulty > 0 and self._get_difficulty(metadata,
+                                                                         beatmap_name) < self.args.min_difficulty:
+                    continue
+                for sample in self._get_next_beatmap(audio_samples, beatmap_path, speed):
+                    yield sample
+    def _get_next_beatmap(self, audio_samples, beatmap_path: Path, speed: float) -> dict:
+        frames, frame_times = self._get_frames(audio_samples)
+        osu_beatmap = Beatmap.from_path(beatmap_path)
+        if osu_beatmap.beatmap_id not in self.tokenizer.beatmap_mapper:
+            return
+        extra_data = {
+            "labels": self.tokenizer.mapper_idx[self.tokenizer.beatmap_mapper[osu_beatmap.beatmap_id]],
+        }
+        flip_x, flip_y = False, False
+        if self.args.augment_flip:
+            flip_x, flip_y = random.random() < 0.5, random.random() < 0.5
+        events, event_times = self.parser.parse(osu_beatmap, speed, flip_x, flip_y)
+        in_context = {"events": events, "event_times": event_times}
+        sequences = self._create_sequences(
+            frames,
+            frame_times,
+            in_context,
+            extra_data,
+        )
+        for sequence in sequences:
+            sequence = self._normalize_time_shifts(sequence)
+            sequence = self._tokenize_sequence(sequence)
+            sequence = self._pad_frame_sequence(sequence)
+            sequence = self._pad_and_split_token_sequence(sequence)
+            yield sequence

classifier/libs/dataset/osu_parser.py ADDED Viewed

	@@ -0,0 +1,460 @@

+from __future__ import annotations
+from datetime import timedelta
+from typing import Tuple
+import numpy as np
+import numpy.typing as npt
+from omegaconf import DictConfig
+from slider import Beatmap, Circle, Slider, Spinner
+from slider.curve import Linear, Catmull, Perfect, MultiBezier
+from ..tokenizer import Event, EventType, Tokenizer
+from .data_utils import merge_events, speed_events
+class OsuParser:
+    def __init__(self, args: DictConfig, tokenizer: Tokenizer) -> None:
+        self.types_first = args.data.types_first
+        self.add_timing = args.data.add_timing
+        self.add_snapping = args.data.add_snapping
+        self.add_timing_points = args.data.add_timing_points
+        self.add_hitsounds = args.data.add_hitsounds
+        self.add_distances = args.data.add_distances
+        self.add_positions = args.data.add_positions
+        if self.add_positions:
+            self.position_precision = args.data.position_precision
+            self.position_split_axes = args.data.position_split_axes
+            x_min, x_max, y_min, y_max = args.data.position_range
+            self.x_min = x_min / self.position_precision
+            self.x_max = x_max / self.position_precision
+            self.y_min = y_min / self.position_precision
+            self.y_max = y_max / self.position_precision
+            self.x_count = self.x_max - self.x_min + 1
+        if self.add_distances:
+            dist_range = tokenizer.event_range[EventType.DISTANCE]
+            self.dist_min = dist_range.min_value
+            self.dist_max = dist_range.max_value
+    def parse(
+            self,
+            beatmap: Beatmap,
+            speed: float = 1.0,
+            flip_x: bool = False,
+            flip_y: bool = False
+    ) -> tuple[list[Event], list[int]]:
+        # noinspection PyUnresolvedReferences
+        """Parse an .osu beatmap.
+        Each hit object is parsed into a list of Event objects, in order of its
+        appearance in the beatmap. In other words, in ascending order of time.
+        Args:
+            beatmap: Beatmap object parsed from an .osu file.
+            speed: Speed multiplier for the beatmap.
+            flip_x: Whether to flip the x-axis.
+            flip_y: Whether to flip the y-axis.
+        Returns:
+            events: List of Event object lists.
+            event_times: List of event times.
+        Example::
+            >>> beatmap = [
+                "64,80,11000,1,0",
+                "100,100,16000,2,0,B|200:200|250:200|250:200|300:150,2"
+            ]
+            >>> events = parse(beatmap)
+            >>> print(events)
+            [
+                Event(EventType.TIME_SHIFT, 11000), Event(EventType.DISTANCE, 36), Event(EventType.CIRCLE),
+                Event(EventType.TIME_SHIFT, 16000), Event(EventType.DISTANCE, 42), Event(EventType.SLIDER_HEAD),
+                Event(EventType.TIME_SHIFT, 16500), Event(EventType.DISTANCE, 141), Event(EventType.BEZIER_ANCHOR),
+                Event(EventType.TIME_SHIFT, 17000), Event(EventType.DISTANCE, 50), Event(EventType.BEZIER_ANCHOR),
+                Event(EventType.TIME_SHIFT, 17500), Event(EventType.DISTANCE, 10), Event(EventType.BEZIER_ANCHOR),
+                Event(EventType.TIME_SHIFT, 18000), Event(EventType.DISTANCE, 64), Event(EventType.LAST _ANCHOR),
+                Event(EventType.TIME_SHIFT, 20000), Event(EventType.DISTANCE, 11), Event(EventType.SLIDER_END)
+            ]
+        """
+        hit_objects = beatmap.hit_objects(stacking=False)
+        last_pos = np.array((256, 192))
+        events = []
+        event_times = []
+        for hit_object in hit_objects:
+            if isinstance(hit_object, Circle):
+                last_pos = self._parse_circle(hit_object, events, event_times, last_pos, beatmap, flip_x, flip_y)
+            elif isinstance(hit_object, Slider):
+                last_pos = self._parse_slider(hit_object, events, event_times, last_pos, beatmap, flip_x, flip_y)
+            elif isinstance(hit_object, Spinner):
+                last_pos = self._parse_spinner(hit_object, events, event_times, beatmap)
+        if self.add_timing:
+            timing_events, timing_times = self.parse_timing(beatmap)
+            events, event_times = merge_events(timing_events, timing_times, events, event_times)
+        if speed != 1.0:
+            events, event_times = speed_events(events, event_times, speed)
+        return events, event_times
+    def parse_timing(self, beatmap: Beatmap, speed: float = 1.0) -> tuple[list[Event], list[int]]:
+        """Extract all timing information from a beatmap."""
+        events = []
+        event_times = []
+        hit_objects = beatmap.hit_objects(stacking=False)
+        if len(hit_objects) == 0:
+            last_time = timedelta(milliseconds=0)
+        else:
+            last_ho = beatmap.hit_objects(stacking=False)[-1]
+            last_time = last_ho.end_time if hasattr(last_ho, "end_time") else last_ho.time
+        # Get all timing points with BPM changes
+        timing_points = [tp for tp in beatmap.timing_points if tp.bpm]
+        for i, tp in enumerate(timing_points):
+            # Generate beat and measure events until the next timing point
+            next_tp = timing_points[i + 1] if i + 1 < len(timing_points) else None
+            next_time = next_tp.offset - timedelta(milliseconds=10) if next_tp else last_time
+            time = tp.offset
+            measure_counter = 0
+            beat_delta = timedelta(milliseconds=tp.ms_per_beat)
+            while time <= next_time:
+                if self.add_timing_points and measure_counter == 0:
+                    event_type = EventType.TIMING_POINT
+                elif measure_counter % tp.meter == 0:
+                    event_type = EventType.MEASURE
+                else:
+                    event_type = EventType.BEAT
+                self._add_group(
+                    event_type,
+                    time,
+                    events,
+                    event_times,
+                    beatmap,
+                    time_event=True,
+                    add_snap=False,
+                )
+                measure_counter += 1
+                time += beat_delta
+        if speed != 1.0:
+            events, event_times = speed_events(events, event_times, speed)
+        return events, event_times
+    @staticmethod
+    def uninherited_point_at(time: timedelta, beatmap: Beatmap):
+        tp = beatmap.timing_point_at(time)
+        return tp if tp.parent is None else tp.parent
+    @staticmethod
+    def hitsound_point_at(time: timedelta, beatmap: Beatmap):
+        hs_query = time + timedelta(milliseconds=5)
+        return beatmap.timing_point_at(hs_query)
+    def _add_time_event(self, time: timedelta, beatmap: Beatmap, events: list[Event], event_times: list[int],
+                        add_snap: bool = True) -> None:
+        """Add a snapping event to the event list.
+        Args:
+            time: Time of the snapping event.
+            beatmap: Beatmap object.
+            events: List of events to add to.
+            add_snap: Whether to add a snapping event.
+        """
+        time_ms = int(time.total_seconds() * 1000 + 1e-5)
+        events.append(Event(EventType.TIME_SHIFT, time_ms))
+        event_times.append(time_ms)
+        if not add_snap or not self.add_snapping:
+            return
+        if len(beatmap.timing_points) > 0:
+            tp = self.uninherited_point_at(time, beatmap)
+            beats = (time - tp.offset).total_seconds() * 1000 / tp.ms_per_beat
+            snapping = 0
+            for i in range(1, 17):
+                # If the difference between the time and the snapped time is less than 2 ms, that is the correct snapping
+                if abs(beats - round(beats * i) / i) * tp.ms_per_beat < 2:
+                    snapping = i
+                    break
+        else:
+            snapping = 0
+        events.append(Event(EventType.SNAPPING, snapping))
+        event_times.append(time_ms)
+    def _add_hitsound_event(self, time: timedelta, group_time: int, hitsound: int, addition: str, beatmap: Beatmap,
+                            events: list[Event], event_times: list[int]) -> None:
+        if not self.add_hitsounds:
+            return
+        if len(beatmap.timing_points) > 0:
+            tp = self.hitsound_point_at(time, beatmap)
+            tp_sample_set = tp.sample_type if tp.sample_type != 0 else 2  # Inherit to soft sample set
+            tp_volume = tp.volume
+        else:
+            tp_sample_set = 2
+            tp_volume = 100
+        addition_split = addition.split(":")
+        sample_set = int(addition_split[0]) if addition_split[0] != "0" else tp_sample_set
+        addition_set = int(addition_split[1]) if addition_split[1] != "0" else sample_set
+        sample_set = sample_set if 0 < sample_set < 4 else 1  # Overflow default to normal sample set
+        addition_set = addition_set if 0 < addition_set < 4 else 1  # Overflow default to normal sample set
+        hitsound = hitsound & 14  # Only take the bits for normal, whistle, and finish
+        hitsound_idx = hitsound // 2 + 8 * (sample_set - 1) + 24 * (addition_set - 1)
+        events.append(Event(EventType.HITSOUND, hitsound_idx))
+        events.append(Event(EventType.VOLUME, tp_volume))
+        event_times.append(group_time)
+        event_times.append(group_time)
+    def _clip_dist(self, dist: int) -> int:
+        """Clip distance to valid range."""
+        return int(np.clip(dist, self.dist_min, self.dist_max))
+    def _scale_clip_pos(self, pos: npt.NDArray) -> Tuple[int, int]:
+        """Clip position to valid range."""
+        p = pos / self.position_precision
+        return int(np.clip(p[0], self.x_min, self.x_max)), int(np.clip(p[1], self.y_min, self.y_max))
+    def _add_position_event(self, pos: npt.NDArray, last_pos: npt.NDArray, time: timedelta, events: list[Event],
+                            event_times: list[int], flip_x: bool, flip_y: bool) -> npt.NDArray:
+        time_ms = int(time.total_seconds() * 1000 + 1e-5)
+        if self.add_distances:
+            dist = self._clip_dist(np.linalg.norm(pos - last_pos))
+            events.append(Event(EventType.DISTANCE, dist))
+            event_times.append(time_ms)
+        if self.add_positions:
+            pos_modified = pos.copy()
+            if flip_x:
+                pos_modified[0] = 512 - pos_modified[0]
+            if flip_y:
+                pos_modified[1] = 384 - pos_modified[1]
+            p = self._scale_clip_pos(pos_modified)
+            if self.position_split_axes:
+                events.append(Event(EventType.POS_X, p[0]))
+                events.append(Event(EventType.POS_Y, p[1]))
+                event_times.append(time_ms)
+                event_times.append(time_ms)
+            else:
+                events.append(Event(EventType.POS, (p[0] - self.x_min) + (p[1] - self.y_min) * self.x_count))
+                event_times.append(time_ms)
+        return pos
+    def _add_group(
+            self,
+            event_type: EventType,
+            time: timedelta,
+            events: list[Event],
+            event_times: list[int],
+            beatmap: Beatmap,
+            *,
+            time_event: bool = False,
+            add_snap=True,
+            pos: npt.NDArray = None,
+            last_pos: npt.NDArray = None,
+            new_combo: bool = False,
+            hitsound_ref_times: list[timedelta] = None,
+            hitsounds: list[int] = None,
+            additions: list[str] = None,
+            flip_x: bool = False,
+            flip_y: bool = False,
+    ) -> npt.NDArray:
+        """Add a group of events to the event list."""
+        time_ms = int(time.total_seconds() * 1000 + 1e-5) if time is not None else None
+        if self.types_first:
+            events.append(Event(event_type))
+            event_times.append(time_ms)
+        if time_event:
+            self._add_time_event(time, beatmap, events, event_times, add_snap)
+        if pos is not None:
+            last_pos = self._add_position_event(pos, last_pos, time, events, event_times, flip_x, flip_y)
+        if new_combo:
+            events.append(Event(EventType.NEW_COMBO))
+            event_times.append(time_ms)
+        if hitsound_ref_times is not None:
+            for i, ref_time in enumerate(hitsound_ref_times):
+                self._add_hitsound_event(ref_time, time_ms, hitsounds[i], additions[i], beatmap, events, event_times)
+        if not self.types_first:
+            events.append(Event(event_type))
+            event_times.append(time_ms)
+        return last_pos
+    def _parse_circle(self, circle: Circle, events: list[Event], event_times: list[int], last_pos: npt.NDArray,
+                      beatmap: Beatmap, flip_x: bool, flip_y: bool) -> npt.NDArray:
+        """Parse a circle hit object.
+        Args:
+            circle: Circle object.
+            events: List of events to add to.
+            last_pos: Last position of the hit objects.
+        Returns:
+            pos: Position of the circle.
+        """
+        return self._add_group(
+            EventType.CIRCLE,
+            circle.time,
+            events,
+            event_times,
+            beatmap,
+            time_event=True,
+            pos=np.array(circle.position),
+            last_pos=last_pos,
+            new_combo=circle.new_combo,
+            hitsound_ref_times=[circle.time],
+            hitsounds=[circle.hitsound],
+            additions=[circle.addition],
+            flip_x=flip_x,
+            flip_y=flip_y,
+        )
+    def _parse_slider(self, slider: Slider, events: list[Event], event_times: list[int], last_pos: npt.NDArray,
+                      beatmap: Beatmap, flip_x: bool, flip_y: bool) -> npt.NDArray:
+        """Parse a slider hit object.
+        Args:
+            slider: Slider object.
+            events: List of events to add to.
+            last_pos: Last position of the hit objects.
+        Returns:
+            pos: Last position of the slider.
+        """
+        # Ignore sliders which are too big
+        if len(slider.curve.points) >= 100:
+            return last_pos
+        last_pos = self._add_group(
+            EventType.SLIDER_HEAD,
+            slider.time,
+            events,
+            event_times,
+            beatmap,
+            time_event=True,
+            pos=np.array(slider.position),
+            last_pos=last_pos,
+            new_combo=slider.new_combo,
+            hitsound_ref_times=[slider.time],
+            hitsounds=[slider.edge_sounds[0] if len(slider.edge_sounds) > 0 else 0],
+            additions=[slider.edge_additions[0] if len(slider.edge_additions) > 0 else '0:0'],
+            flip_x=flip_x,
+            flip_y=flip_y,
+        )
+        duration: timedelta = (slider.end_time - slider.time) / slider.repeat
+        control_point_count = len(slider.curve.points)
+        def append_control_points(event_type: EventType, last_pos: npt.NDArray = last_pos) -> npt.NDArray:
+            for i in range(1, control_point_count - 1):
+                last_pos = add_anchor(event_type, i, last_pos)
+            return last_pos
+        def add_anchor(event_type: EventType, i: int, last_pos: npt.NDArray) -> npt.NDArray:
+            return self._add_group(
+                event_type,
+                slider.time + i / (control_point_count - 1) * duration,
+                events,
+                event_times,
+                beatmap,
+                pos=np.array(slider.curve.points[i]),
+                last_pos=last_pos,
+                flip_x=flip_x,
+                flip_y=flip_y,
+            )
+        if isinstance(slider.curve, Linear):
+            last_pos = append_control_points(EventType.RED_ANCHOR, last_pos)
+        elif isinstance(slider.curve, Catmull):
+            last_pos = append_control_points(EventType.CATMULL_ANCHOR, last_pos)
+        elif isinstance(slider.curve, Perfect):
+            last_pos = append_control_points(EventType.PERFECT_ANCHOR, last_pos)
+        elif isinstance(slider.curve, MultiBezier):
+            for i in range(1, control_point_count - 1):
+                if slider.curve.points[i] == slider.curve.points[i + 1]:
+                    last_pos = add_anchor(EventType.RED_ANCHOR, i, last_pos)
+                elif slider.curve.points[i] != slider.curve.points[i - 1]:
+                    last_pos = add_anchor(EventType.BEZIER_ANCHOR, i, last_pos)
+        # Add body hitsounds and remaining edge hitsounds
+        last_pos = self._add_group(
+            EventType.LAST_ANCHOR,
+            slider.time + duration,
+            events,
+            event_times,
+            beatmap,
+            time_event=True,
+            pos=np.array(slider.curve.points[-1]),
+            last_pos=last_pos,
+            hitsound_ref_times=[slider.time + timedelta(milliseconds=1)] + [slider.time + i * duration for i in
+                                                                            range(1, slider.repeat)],
+            hitsounds=[slider.hitsound] + [slider.edge_sounds[i] if len(slider.edge_sounds) > i else 0 for i in
+                                           range(1, slider.repeat)],
+            additions=[slider.addition] + [slider.edge_additions[i] if len(slider.edge_additions) > i else '0:0' for i
+                                           in range(1, slider.repeat)],
+            flip_x=flip_x,
+            flip_y=flip_y,
+        )
+        return self._add_group(
+            EventType.SLIDER_END,
+            slider.end_time,
+            events,
+            event_times,
+            beatmap,
+            time_event=True,
+            pos=np.array(slider.curve(1)),
+            last_pos=last_pos,
+            hitsound_ref_times=[slider.end_time],
+            hitsounds=[slider.edge_sounds[-1] if len(slider.edge_sounds) > 0 else 0],
+            additions=[slider.edge_additions[-1] if len(slider.edge_additions) > 0 else '0:0'],
+            flip_x=flip_x,
+            flip_y=flip_y,
+        )
+    def _parse_spinner(self, spinner: Spinner, events: list[Event], event_times: list[int],
+                       beatmap: Beatmap) -> npt.NDArray:
+        """Parse a spinner hit object.
+        Args:
+            spinner: Spinner object.
+            events: List of events to add to.
+        Returns:
+            pos: Last position of the spinner.
+        """
+        self._add_group(
+            EventType.SPINNER,
+            spinner.time,
+            events,
+            event_times,
+            beatmap,
+            time_event=True,
+        )
+        self._add_group(
+            EventType.SPINNER_END,
+            spinner.end_time,
+            events,
+            event_times,
+            beatmap,
+            time_event=True,
+            hitsound_ref_times=[spinner.end_time],
+            hitsounds=[spinner.hitsound],
+            additions=[spinner.addition],
+        )
+        return np.array((256, 192))

classifier/libs/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import OsuClassifier

classifier/libs/model/model.py ADDED Viewed

	@@ -0,0 +1,145 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+import torch
+import torch.nn as nn
+from omegaconf import DictConfig
+from transformers import T5Config, WhisperConfig, T5Model, WhisperModel
+from transformers.modeling_outputs import Seq2SeqModelOutput
+from .spectrogram import MelSpectrogram
+from ..tokenizer import Tokenizer
+LABEL_IGNORE_ID = -100
+@dataclass
+class OsuClassifierOutput:
+    loss: Optional[torch.FloatTensor] = None
+    logits: Optional[torch.FloatTensor] = None
+    encoder_last_hidden_state: Optional[torch.FloatTensor] = None
+    decoder_last_hidden_state: Optional[torch.FloatTensor] = None
+    feature_vector: Optional[torch.FloatTensor] = None
+def get_backbone_model(args, tokenizer: Tokenizer):
+    if args.model.name.startswith("google/t5"):
+        config = T5Config.from_pretrained(args.model.name)
+    elif args.model.name.startswith("openai/whisper"):
+        config = WhisperConfig.from_pretrained(args.model.name)
+    else:
+        raise NotImplementedError
+    config.vocab_size = tokenizer.vocab_size
+    if hasattr(args.model, "overwrite"):
+        for k, v in args.model.overwrite.items():
+            assert hasattr(config, k), f"config does not have attribute {k}"
+            setattr(config, k, v)
+    if hasattr(args.model, "add_config"):
+        for k, v in args.model.add_config.items():
+            assert not hasattr(config, k), f"config already has attribute {k}"
+            setattr(config, k, v)
+    if args.model.name.startswith("google/t5"):
+        model = T5Model(config)
+    elif args.model.name.startswith("openai/whisper"):
+        config.use_cache = False
+        config.num_mel_bins = config.d_model
+        config.pad_token_id = tokenizer.pad_id
+        config.max_source_positions = args.data.src_seq_len // 2
+        config.max_target_positions = args.data.tgt_seq_len
+        model = WhisperModel(config)
+    else:
+        raise NotImplementedError
+    return model, config.d_model
+class OsuClassifier(nn.Module):
+    __slots__ = [
+        "spectrogram",
+        "decoder_embedder",
+        "encoder_embedder",
+        "transformer",
+        "style_embedder",
+        "num_classes",
+        "input_features",
+        "projector",
+        "classifier",
+        "vocab_size",
+        "loss_fn",
+    ]
+    def __init__(self, args: DictConfig, tokenizer: Tokenizer):
+        super().__init__()
+        self.transformer, d_model = get_backbone_model(args, tokenizer)
+        self.num_classes = tokenizer.num_classes
+        self.input_features = args.model.input_features
+        self.decoder_embedder = nn.Embedding(tokenizer.vocab_size, d_model)
+        self.decoder_embedder.weight.data.normal_(mean=0.0, std=1.0)
+        self.spectrogram = MelSpectrogram(
+            args.model.spectrogram.sample_rate, args.model.spectrogram.n_fft,
+            args.model.spectrogram.n_mels, args.model.spectrogram.hop_length
+        )
+        self.encoder_embedder = nn.Linear(args.model.spectrogram.n_mels, d_model)
+        self.projector = nn.Linear(d_model, args.model.classifier_proj_size)
+        self.classifier = nn.Linear(args.model.classifier_proj_size, tokenizer.num_classes)
+        self.vocab_size = tokenizer.vocab_size
+        self.loss_fn = nn.CrossEntropyLoss()
+    def forward(
+            self,
+            frames: Optional[torch.FloatTensor] = None,
+            decoder_input_ids: Optional[torch.Tensor] = None,
+            labels: Optional[torch.LongTensor] = None,
+            **kwargs
+    ) -> OsuClassifierOutput:
+        """
+        frames: B x L_encoder x mel_bins, float32
+        decoder_input_ids: B x L_decoder, int64
+        beatmap_id: B, int64
+        encoder_outputs: B x L_encoder x D, float32
+        """
+        frames = self.spectrogram(frames)  # (N, L, M)
+        inputs_embeds = self.encoder_embedder(frames)
+        decoder_inputs_embeds = self.decoder_embedder(decoder_input_ids)
+        if self.input_features:
+            input_features = torch.swapaxes(inputs_embeds, 1, 2) if inputs_embeds is not None else None
+            # noinspection PyTypeChecker
+            base_output: Seq2SeqModelOutput = self.transformer.forward(input_features=input_features,
+                                                                       decoder_inputs_embeds=decoder_inputs_embeds,
+                                                                       **kwargs)
+        else:
+            base_output = self.transformer.forward(inputs_embeds=inputs_embeds,
+                                                   decoder_inputs_embeds=decoder_inputs_embeds,
+                                                   **kwargs)
+        # Get logits
+        hidden_states = self.projector(base_output.last_hidden_state)
+        pooled_output = hidden_states.mean(dim=1)
+        logits = self.classifier(pooled_output)
+        loss = None
+        if labels is not None:
+            loss = self.loss_fn(logits.view(-1, self.num_classes), labels.view(-1))
+        return OsuClassifierOutput(
+            loss=loss,
+            logits=logits,
+            encoder_last_hidden_state=base_output.encoder_last_hidden_state,
+            decoder_last_hidden_state=base_output.last_hidden_state,
+            feature_vector=pooled_output
+        )

classifier/libs/model/spectrogram.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from __future__ import annotations
+import torch
+import torch.nn as nn
+from nnAudio import features
+class MelSpectrogram(nn.Module):
+    def __init__(
+        self,
+        sample_rate: int = 16000,
+        n_ftt: int = 2048,
+        n_mels: int = 512,
+        hop_length: int = 128,
+    ):
+        """
+        Melspectrogram transformation layer, supports on-the-fly processing on GPU.
+        Attributes:
+            sample_rate: The sampling rate for the input audio.
+            n_ftt: The window size for the STFT.
+            n_mels: The number of Mel filter banks.
+            hop_length: The hop (or stride) size.
+        """
+        super().__init__()
+        self.transform = features.MelSpectrogram(
+            sr=sample_rate,
+            n_fft=n_ftt,
+            n_mels=n_mels,
+            hop_length=hop_length,
+            center=True,
+            fmin=0,
+            fmax=sample_rate // 2,
+            pad_mode="constant",
+        )
+    def forward(self, samples: torch.tensor) -> torch.tensor:
+        """
+        Convert a batch of audio frames into a batch of Mel spectrogram frames.
+        For each item in the batch:
+        1. pad left and right ends of audio by n_fft // 2.
+        2. run STFT with window size of |n_ftt| and stride of |hop_length|.
+        3. convert result into mel-scale.
+        4. therefore, n_frames = n_samples // hop_length + 1.
+        Args:
+            samples: Audio time-series (batch size, n_samples).
+        Returns:
+            A batch of Mel spectrograms of size (batch size, n_frames, n_mels).
+        """
+        spectrogram = self.transform(samples)
+        spectrogram = spectrogram.permute(0, 2, 1)
+        return spectrogram

classifier/libs/tokenizer/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .event import *
2	+ from .tokenizer import Tokenizer

classifier/libs/tokenizer/event.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from __future__ import annotations
+import dataclasses
+from enum import Enum
+class EventType(Enum):
+    TIME_SHIFT = "t"
+    SNAPPING = "snap"
+    DISTANCE = "dist"
+    NEW_COMBO = "new_combo"
+    HITSOUND = "hitsound"
+    VOLUME = "volume"
+    CIRCLE = "circle"
+    SPINNER = "spinner"
+    SPINNER_END = "spinner_end"
+    SLIDER_HEAD = "slider_head"
+    BEZIER_ANCHOR = "bezier_anchor"
+    PERFECT_ANCHOR = "perfect_anchor"
+    CATMULL_ANCHOR = "catmull_anchor"
+    RED_ANCHOR = "red_anchor"
+    LAST_ANCHOR = "last_anchor"
+    SLIDER_END = "slider_end"
+    BEAT = "beat"
+    MEASURE = "measure"
+    TIMING_POINT = "timing_point"
+    STYLE = "style"
+    DIFFICULTY = "difficulty"
+    MAPPER = "mapper"
+    DESCRIPTOR = "descriptor"
+    POS_X = "pos_x"
+    POS_Y = "pos_y"
+    POS = "pos"
+    CS = "cs"
+@dataclasses.dataclass
+class EventRange:
+    type: EventType
+    min_value: int
+    max_value: int
+@dataclasses.dataclass
+class Event:
+    type: EventType
+    value: int = 0
+    def __repr__(self) -> str:
+        return f"{self.type.value}{self.value}"
+    def __str__(self) -> str:
+        return f"{self.type.value}{self.value}"

classifier/libs/tokenizer/tokenizer.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import json
+from pathlib import Path
+from omegaconf import DictConfig
+from .event import Event, EventType, EventRange
+MILISECONDS_PER_SECOND = 1000
+MILISECONDS_PER_STEP = 10
+class Tokenizer:
+    __slots__ = [
+        "offset",
+        "event_ranges",
+        "input_event_ranges",
+        "num_classes",
+        "num_diff_classes",
+        "max_difficulty",
+        "event_range",
+        "event_start",
+        "event_end",
+        "vocab_size",
+        "beatmap_idx",
+        "mapper_idx",
+        "beatmap_mapper",
+        "num_mapper_classes",
+        "beatmap_descriptors",
+        "descriptor_idx",
+        "num_descriptor_classes",
+        "num_cs_classes",
+    ]
+    def __init__(self, args: DictConfig = None):
+        """Fixed vocabulary tokenizer."""
+        self.offset = 1
+        self.event_ranges: list[EventRange] = [
+            EventRange(EventType.TIME_SHIFT, 0, 1024),
+            EventRange(EventType.SNAPPING, 0, 16),
+            EventRange(EventType.DISTANCE, 0, 640),
+        ]
+        self.num_classes = 0
+        self.beatmap_mapper: dict[int, int] = {}  # beatmap_id -> mapper_id
+        self.mapper_idx: dict[int, int] = {}  # mapper_id -> mapper_idx
+        if args is not None:
+            miliseconds_per_sequence = ((args.data.src_seq_len - 1) * args.model.spectrogram.hop_length *
+                                        MILISECONDS_PER_SECOND / args.model.spectrogram.sample_rate)
+            max_time_shift = int(miliseconds_per_sequence / MILISECONDS_PER_STEP)
+            min_time_shift = 0
+            self.event_ranges = [
+                EventRange(EventType.TIME_SHIFT, min_time_shift, max_time_shift),
+                EventRange(EventType.SNAPPING, 0, 16),
+            ]
+            self._init_mapper_idx(args)
+            if args.data.add_distances:
+                self.event_ranges.append(EventRange(EventType.DISTANCE, 0, 640))
+            if args.data.add_positions:
+                p = args.data.position_precision
+                x_min, x_max, y_min, y_max = args.data.position_range
+                x_min, x_max, y_min, y_max = x_min // p, x_max // p, y_min // p, y_max // p
+                if args.data.position_split_axes:
+                    self.event_ranges.append(EventRange(EventType.POS_X, x_min, x_max))
+                    self.event_ranges.append(EventRange(EventType.POS_Y, y_min, y_max))
+                else:
+                    x_count = x_max - x_min + 1
+                    y_count = y_max - y_min + 1
+                    self.event_ranges.append(EventRange(EventType.POS, 0, x_count * y_count - 1))
+        self.event_ranges: list[EventRange] = self.event_ranges + [
+            EventRange(EventType.NEW_COMBO, 0, 0),
+            EventRange(EventType.HITSOUND, 0, 2 ** 3 * 3 * 3),
+            EventRange(EventType.VOLUME, 0, 100),
+            EventRange(EventType.CIRCLE, 0, 0),
+            EventRange(EventType.SPINNER, 0, 0),
+            EventRange(EventType.SPINNER_END, 0, 0),
+            EventRange(EventType.SLIDER_HEAD, 0, 0),
+            EventRange(EventType.BEZIER_ANCHOR, 0, 0),
+            EventRange(EventType.PERFECT_ANCHOR, 0, 0),
+            EventRange(EventType.CATMULL_ANCHOR, 0, 0),
+            EventRange(EventType.RED_ANCHOR, 0, 0),
+            EventRange(EventType.LAST_ANCHOR, 0, 0),
+            EventRange(EventType.SLIDER_END, 0, 0),
+            EventRange(EventType.BEAT, 0, 0),
+            EventRange(EventType.MEASURE, 0, 0),
+        ]
+        if args is not None and args.data.add_timing_points:
+            self.event_ranges.append(EventRange(EventType.TIMING_POINT, 0, 0))
+        self.event_range: dict[EventType, EventRange] = {er.type: er for er in self.event_ranges}
+        self.event_start: dict[EventType, int] = {}
+        self.event_end: dict[EventType, int] = {}
+        offset = self.offset
+        for er in self.event_ranges:
+            self.event_start[er.type] = offset
+            offset += er.max_value - er.min_value + 1
+            self.event_end[er.type] = offset
+        self.vocab_size: int = self.offset + sum(
+            er.max_value - er.min_value + 1 for er in self.event_ranges
+        )
+    @property
+    def pad_id(self) -> int:
+        """[PAD] token for padding."""
+        return 0
+    def decode(self, token_id: int) -> Event:
+        """Converts token ids into Event objects."""
+        offset = self.offset
+        for er in self.event_ranges:
+            if offset <= token_id <= offset + er.max_value - er.min_value:
+                return Event(type=er.type, value=er.min_value + token_id - offset)
+            offset += er.max_value - er.min_value + 1
+        for er in self.input_event_ranges:
+            if offset <= token_id <= offset + er.max_value - er.min_value:
+                return Event(type=er.type, value=er.min_value + token_id - offset)
+            offset += er.max_value - er.min_value + 1
+        raise ValueError(f"id {token_id} is not mapped to any event")
+    def encode(self, event: Event) -> int:
+        """Converts Event objects into token ids."""
+        if event.type not in self.event_range:
+            raise ValueError(f"unknown event type: {event.type}")
+        er = self.event_range[event.type]
+        offset = self.event_start[event.type]
+        if not er.min_value <= event.value <= er.max_value:
+            raise ValueError(
+                f"event value {event.value} is not within range "
+                f"[{er.min_value}, {er.max_value}] for event type {event.type}"
+            )
+        return offset + event.value - er.min_value
+    def event_type_range(self, event_type: EventType) -> tuple[int, int]:
+        """Get the token id range of each Event type."""
+        if event_type not in self.event_range:
+            raise ValueError(f"unknown event type: {event_type}")
+        er = self.event_range[event_type]
+        offset = self.event_start[event_type]
+        return offset, offset + (er.max_value - er.min_value)
+    def _init_mapper_idx(self, args):
+        """"Indexes beatmap mappers and mapper idx."""
+        if args is None or "mappers_path" not in args.data:
+            raise ValueError("mappers_path not found in args")
+        path = Path(args.data.mappers_path)
+        if not path.exists():
+            raise ValueError(f"mappers_path {path} not found")
+        # Load JSON data from file
+        with open(path, 'r') as file:
+            data = json.load(file)
+        # Populate beatmap_mapper
+        for item in data:
+            self.beatmap_mapper[item['id']] = item['user_id']
+        # Get unique user_ids from beatmap_mapper values
+        unique_user_ids = list(set(self.beatmap_mapper.values()))
+        # Create mapper_idx
+        self.mapper_idx = {user_id: idx for idx, user_id in enumerate(unique_user_ids)}
+        self.num_classes = len(unique_user_ids)
+    def state_dict(self):
+        return {
+            "offset": self.offset,
+            "event_ranges": self.event_ranges,
+            "num_classes": self.num_classes,
+            "event_range": self.event_range,
+            "event_start": self.event_start,
+            "event_end": self.event_end,
+            "vocab_size": self.vocab_size,
+            "beatmap_mapper": self.beatmap_mapper,
+            "mapper_idx": self.mapper_idx,
+        }
+    def load_state_dict(self, state_dict):
+        self.offset = state_dict["offset"]
+        self.event_ranges = state_dict["event_ranges"]
+        self.num_classes = state_dict["num_classes"]
+        self.event_range = state_dict["event_range"]
+        self.event_start = state_dict["event_start"]
+        self.event_end = state_dict["event_end"]
+        self.vocab_size = state_dict["vocab_size"]
+        self.beatmap_mapper = state_dict["beatmap_mapper"]
+        self.mapper_idx = state_dict["mapper_idx"]

classifier/libs/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model_utils import *

classifier/libs/utils/model_utils.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import os
+from pathlib import Path
+import lightning
+import numpy as np
+import torch
+import torchmetrics
+from omegaconf import DictConfig
+from torch.optim import Optimizer, AdamW
+from torch.optim.lr_scheduler import (
+    LRScheduler,
+    SequentialLR,
+    LinearLR,
+    CosineAnnealingLR,
+)
+from torch.utils.data import DataLoader
+from transformers.modeling_outputs import Seq2SeqSequenceClassifierOutput
+from transformers.utils import cached_file
+import routed_pickle
+from ..dataset import OrsDataset, OsuParser
+from ..model import OsuClassifier
+from ..model.model import OsuClassifierOutput
+from ..tokenizer import Tokenizer
+class LitOsuClassifier(lightning.LightningModule):
+    def __init__(self, args: DictConfig, tokenizer):
+        super().__init__()
+        self.save_hyperparameters()
+        self.args = args
+        self.model: OsuClassifier = OsuClassifier(args, tokenizer)
+    def forward(self, **kwargs) -> OsuClassifierOutput:
+        return self.model(**kwargs)
+    def training_step(self, batch, batch_idx):
+        output: Seq2SeqSequenceClassifierOutput = self.model(**batch)
+        loss = output.loss
+        self.log("train_loss", loss)
+        return loss
+    def testy_step(self, batch, batch_idx, prefix):
+        output: Seq2SeqSequenceClassifierOutput = self.model(**batch)
+        loss = output.loss
+        preds = output.logits.argmax(dim=1)
+        labels = batch["labels"]
+        accuracy = torchmetrics.functional.accuracy(preds, labels, "multiclass", num_classes=self.args.data.num_classes)
+        accuracy_10 = torchmetrics.functional.accuracy(output.logits, labels, "multiclass", num_classes=self.args.data.num_classes, top_k=10)
+        accuracy_100 = torchmetrics.functional.accuracy(output.logits, labels, "multiclass", num_classes=self.args.data.num_classes, top_k=100)
+        self.log(f"{prefix}_loss", loss)
+        self.log(f"{prefix}_accuracy", accuracy)
+        self.log(f"{prefix}_top_10_accuracy", accuracy_10)
+        self.log(f"{prefix}_top_100_accuracy", accuracy_100)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        return self.testy_step(batch, batch_idx, "val")
+    def test_step(self, batch, batch_idx):
+        return self.testy_step(batch, batch_idx, "test")
+    def configure_optimizers(self):
+        optimizer = get_optimizer(self.parameters(), self.args)
+        scheduler = get_scheduler(optimizer, self.args)
+        return {"optimizer": optimizer, "lr_scheduler": {
+            "scheduler": scheduler,
+            "interval": "step",
+            "frequency": 1,
+        }}
+def load_ckpt(ckpt_path, route_pickle=True):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if not os.path.exists(ckpt_path):
+        ckpt_path = cached_file(ckpt_path, "model.ckpt")
+    else:
+        ckpt_path = Path(ckpt_path)
+    checkpoint = torch.load(
+        ckpt_path,
+        map_location=lambda storage, loc: storage,
+        weights_only=False,
+        pickle_module=routed_pickle if route_pickle else None
+    )
+    tokenizer = checkpoint["hyper_parameters"]["tokenizer"]
+    model_args = checkpoint["hyper_parameters"]["args"]
+    state_dict = checkpoint["state_dict"]
+    non_compiled_state_dict = {}
+    for k, v in state_dict.items():
+        if k.startswith("model._orig_mod."):
+            non_compiled_state_dict["model." + k[16:]] = v
+        else:
+            non_compiled_state_dict[k] = v
+    model = LitOsuClassifier(model_args, tokenizer)
+    model.load_state_dict(non_compiled_state_dict)
+    model.eval().to(device)
+    return model, model_args, tokenizer
+def get_tokenizer(args: DictConfig) -> Tokenizer:
+    return Tokenizer(args)
+def get_optimizer(parameters, args: DictConfig) -> Optimizer:
+    if args.optim.name == 'adamw':
+        optimizer = AdamW(
+            parameters,
+            lr=args.optim.base_lr,
+        )
+    else:
+        raise NotImplementedError
+    return optimizer
+def get_scheduler(optimizer: Optimizer, args: DictConfig, num_processes=1) -> LRScheduler:
+    scheduler_p1 = LinearLR(
+        optimizer,
+        start_factor=0.5,
+        end_factor=1,
+        total_iters=args.optim.warmup_steps * num_processes,
+        last_epoch=-1,
+    )
+    scheduler_p2 = CosineAnnealingLR(
+        optimizer,
+        T_max=args.optim.total_steps * num_processes - args.optim.warmup_steps * num_processes,
+        eta_min=args.optim.final_cosine,
+    )
+    scheduler = SequentialLR(
+        optimizer,
+        schedulers=[scheduler_p1, scheduler_p2],
+        milestones=[args.optim.warmup_steps * num_processes],
+    )
+    return scheduler
+def get_dataloaders(tokenizer: Tokenizer, args: DictConfig) -> tuple[DataLoader, DataLoader]:
+    parser = OsuParser(args, tokenizer)
+    dataset = {
+        "train": OrsDataset(
+            args.data,
+            parser,
+            tokenizer,
+        ),
+        "test": OrsDataset(
+            args.data,
+            parser,
+            tokenizer,
+            test=True,
+        ),
+    }
+    dataloaders = {}
+    for split in ["train", "test"]:
+        batch_size = args.optim.batch_size // args.optim.grad_acc
+        dataloaders[split] = DataLoader(
+            dataset[split],
+            batch_size=batch_size,
+            num_workers=args.dataloader.num_workers,
+            pin_memory=True,
+            drop_last=False,
+            persistent_workers=args.dataloader.num_workers > 0,
+            worker_init_fn=worker_init_fn,
+        )
+    return dataloaders["train"], dataloaders["test"]
+def worker_init_fn(worker_id: int) -> None:
+    """
+    Give each dataloader a unique slice of the full dataset.
+    """
+    worker_info = torch.utils.data.get_worker_info()
+    dataset = worker_info.dataset  # the dataset copy in this worker process
+    overall_start = dataset.start
+    overall_end = dataset.end
+    # configure the dataset to only process the split workload
+    per_worker = int(
+        np.ceil((overall_end - overall_start) / float(worker_info.num_workers)),
+    )
+    dataset.start = overall_start + worker_id * per_worker
+    dataset.end = min(dataset.start + per_worker, overall_end)

classifier/libs/utils/routed_pickle.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import pickle
+from typing import Dict
+class Unpickler(pickle.Unpickler):
+    load_module_mapping: Dict[str, str] = {
+        'osuT5.tokenizer.event': 'osuT5.osuT5.event',
+        'libs.tokenizer.event': 'classifier.libs.tokenizer.event',
+        'libs.tokenizer.tokenizer': 'classifier.libs.tokenizer.tokenizer',
+        'osuT5.event': 'osuT5.osuT5.event',
+        'libs.event': 'classifier.libs.tokenizer.event',
+        'libs.tokenizer': 'classifier.libs.tokenizer.tokenizer',
+    }
+    def find_class(self, mod_name, name):
+        mod_name = self.load_module_mapping.get(mod_name, mod_name)
+        return super().find_class(mod_name, name)

classifier/test.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import hydra
+import lightning
+import torch
+from omegaconf import DictConfig
+from classifier.libs.utils import load_ckpt
+from libs import (
+    get_dataloaders,
+)
+torch.set_float32_matmul_precision('high')
+@hydra.main(config_path="configs", config_name="train_v1", version_base="1.1")
+def main(args: DictConfig):
+    model, model_args, tokenizer = load_ckpt(args.checkpoint_path, route_pickle=False)
+    _, val_dataloader = get_dataloaders(tokenizer, args)
+    if args.compile:
+        model.model = torch.compile(model.model)
+    trainer = lightning.Trainer(
+        accelerator=args.device,
+        precision=args.precision,
+    )
+    trainer.test(model, val_dataloader)
+if __name__ == "__main__":
+    main()

classifier/train.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from pathlib import Path
+import hydra
+import lightning
+import torch
+from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor
+from lightning.pytorch.loggers import WandbLogger
+from omegaconf import DictConfig
+from libs import (
+    get_tokenizer,
+    get_dataloaders,
+)
+from libs.model.model import OsuClassifier
+from libs.utils.model_utils import LitOsuClassifier
+torch.set_float32_matmul_precision('high')
+def load_old_model(path: str, model: OsuClassifier):
+    ckpt_path = Path(path)
+    model_state = torch.load(ckpt_path / "pytorch_model.bin", weights_only=True)
+    ignore_list = [
+        "transformer.model.decoder.embed_tokens.weight",
+        "transformer.model.decoder.embed_positions.weight",
+        "decoder_embedder.weight",
+        "transformer.proj_out.weight",
+        "loss_fn.weight",
+    ]
+    fixed_model_state = {}
+    for k, v in model_state.items():
+        if k in ignore_list:
+            continue
+        if k.startswith("transformer.model."):
+            fixed_model_state["transformer." + k[18:]] = v
+        else:
+            fixed_model_state[k] = v
+    model.load_state_dict(fixed_model_state, strict=False)
+@hydra.main(config_path="configs", config_name="train_v1", version_base="1.1")
+def main(args: DictConfig):
+    wandb_logger = WandbLogger(
+        project="osu-classifier",
+        entity="mappingtools",
+        job_type="training",
+        offline=args.logging.mode == "offline",
+        log_model="all" if args.logging.mode == "online" else False,
+    )
+    tokenizer = get_tokenizer(args)
+    train_dataloader, val_dataloader = get_dataloaders(tokenizer, args)
+    model = LitOsuClassifier(args, tokenizer)
+    if args.pretrained_path:
+        load_old_model(args.pretrained_path, model.model)
+    if args.compile:
+        model.model = torch.compile(model.model)
+    checkpoint_callback = ModelCheckpoint(every_n_train_steps=args.checkpoint.every_steps, save_top_k=2, monitor="val_loss")
+    lr_monitor = LearningRateMonitor(logging_interval="step")
+    trainer = lightning.Trainer(
+        accelerator=args.device,
+        precision=args.precision,
+        logger=wandb_logger,
+        max_steps=args.optim.total_steps,
+        accumulate_grad_batches=args.optim.grad_acc,
+        gradient_clip_val=args.optim.grad_clip,
+        val_check_interval=args.eval.every_steps,
+        log_every_n_steps=args.logging.every_steps,
+        callbacks=[checkpoint_callback, lr_monitor],
+    )
+    trainer.fit(model, train_dataloader, val_dataloader)
+    trainer.save_checkpoint("final.ckpt")
+if __name__ == "__main__":
+    main()

cli_inference.sh ADDED Viewed

	@@ -0,0 +1,491 @@

+#!/bin/bash
+# Mapperatorinator CLI - Interactive Inference Script
+# Based on web-ui.py functionality
+set -e  # Exit on error
+# Colors for better UI
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+PURPLE='\033[0;35m'
+CYAN='\033[0;36m'
+NC='\033[0m' # No Color
+# Function to print colored text
+print_color() {
+    local color=$1
+    local text=$2
+    echo -e "${color}${text}${NC}"
+}
+# Function to print section headers
+print_header() {
+    echo
+    print_color $CYAN "======================================"
+    print_color $CYAN "$1"
+    print_color $CYAN "======================================"
+    echo
+}
+# Function to prompt for input with default value
+prompt_input() {
+    local prompt=$1
+    local default=$2
+    local var_name=$3
+    if [ -n "$default" ]; then
+        read -e -p "$(print_color $GREEN "$prompt") [default: $default]: " input
+        if [ -z "$input" ]; then
+            input="$default"
+        fi
+    else
+        read -e -p "$(print_color $GREEN "$prompt"): " input
+    fi
+    eval "$var_name='$input'"
+}
+# Function to prompt for yes/no
+prompt_yn() {
+    local prompt=$1
+    local default=$2
+    local var_name=$3
+    while true; do
+        if [ "$default" = "y" ]; then
+            read -p "$(print_color $GREEN "$prompt") [Y/n]: " yn
+            yn=${yn:-y}
+        else
+            read -p "$(print_color $GREEN "$prompt") [y/N]: " yn
+            yn=${yn:-n}
+        fi
+        case $yn in
+            [Yy]* ) eval "$var_name=true"; break;;
+            [Nn]* ) eval "$var_name=false"; break;;
+            * ) echo "Please answer yes or no.";;
+        esac
+    done
+}
+# Function to prompt for multiple choice
+prompt_choice() {
+    local prompt=$1
+    local var_name=$2
+    shift 2
+    local options=("$@")
+    while true; do
+        print_color $GREEN "$prompt"
+        for i in "${!options[@]}"; do
+            echo "  $((i+1))) ${options[i]}"
+        done
+        read -p "Select option (1-${#options[@]}): " choice
+        if [[ "$choice" =~ ^[0-9]+$ ]] && [ "$choice" -ge 1 ] && [ "$choice" -le "${#options[@]}" ]; then
+            eval "$var_name='${options[$((choice-1))]}'"
+            break
+        else
+            print_color $RED "Invalid choice. Please select 1-${#options[@]}."
+        fi
+    done
+}
+# Function to prompt for multiple selection using arrow keys and spacebar
+prompt_multiselect() {
+    local prompt=$1
+    local var_name=$2
+    shift 2
+    local options=("$@")
+    local num_options=${#options[@]}
+    local selections=()
+    for (( i=0; i<num_options; i++ )); do
+        selections[i]=0
+    done
+    local current_idx=0
+    # Hide cursor for a cleaner UI
+    tput civis 2>/dev/null || true
+    # Ensure cursor is shown again on exit
+    trap 'tput cnorm; return' EXIT
+    # Initial draw
+    tput clear
+    while true; do
+        # Move cursor to top left
+        tput cup 0 0
+        echo -e "${GREEN}${prompt}${NC}"
+        echo "(Use UP/DOWN to navigate, SPACE to select/deselect, ENTER to confirm)"
+        for i in "${!options[@]}"; do
+            local checkbox="[ ]"
+            if [[ ${selections[i]} -eq 1 ]]; then
+                checkbox="[${GREEN}x${NC}]"
+            fi
+            if [ "$i" -eq "$current_idx" ]; then
+                echo -e "  ${CYAN}> $checkbox ${options[i]}${NC}"
+            else
+                echo -e "    $checkbox ${options[i]}"
+            fi
+        done
+        # Clear rest of the screen
+        tput ed
+        # Read a single keystroke.
+        # IFS= ensures space is read as a character, not a delimiter.
+        IFS= read -rsn1 key
+        # Handle escape sequences for arrow keys
+        if [[ "$key" == $'\e' ]]; then
+            read -rsn2 -t 0.1 key
+        fi
+        case "$key" in
+            '[A') # Up arrow
+                current_idx=$(( (current_idx - 1 + num_options) % num_options ))
+                ;;
+            '[B') # Down arrow
+                current_idx=$(( (current_idx + 1) % num_options ))
+                ;;
+            ' ') # Space bar
+                if [[ ${selections[current_idx]} -eq 1 ]]; then
+                    selections[current_idx]=0
+                else
+                    selections[current_idx]=1
+                fi
+                ;;
+            '') # Enter key
+                break
+                ;;
+        esac
+    done
+    # Show cursor again and clear the trap
+    tput cnorm 2>/dev/null || true
+    trap - EXIT
+    # Go back to the bottom of the screen
+    tput cup $(tput lines) 0
+    clear # Clean up the interactive menu from screen
+    # Collect selected options
+    local selected_options=()
+    for i in "${!options[@]}"; do
+        if [[ ${selections[i]} -eq 1 ]]; then
+            selected_options+=("${options[i]}")
+        fi
+    done
+    # Format the result list for Hydra/Python: '["item1", "item2"]'
+    if [ ${#selected_options[@]} -gt 0 ]; then
+        local formatted_items=""
+        for item in "${selected_options[@]}"; do
+            if [ -n "$formatted_items" ]; then
+                # Each item is wrapped in double quotes
+                formatted_items="$formatted_items,\"$item\""
+            else
+                formatted_items="\"$item\""
+            fi
+        done
+        # The whole list is wrapped in brackets
+        eval "$var_name='[$formatted_items]'"
+    else
+        # Return an empty string if nothing is selected
+        eval "$var_name=''"
+    fi
+}
+# Function to validate file path
+validate_file() {
+    local file_path=$1
+    if [ ! -f "$file_path" ]; then
+        print_color $RED "File not found: $file_path"
+        return 1
+    fi
+    return 0
+}
+convert_path_if_needed() {
+    local input_path="$1"
+    # Return immediately if the path is empty
+    if [[ -z "$input_path" ]]; then
+        echo ""
+        return
+    fi
+    local uname_out
+    uname_out="$(uname -s)"
+    case "$uname_out" in
+        CYGWIN*|MINGW*|MSYS*)
+            cygpath -w "$input_path"
+            ;;
+        *)
+            echo "$input_path"
+            ;;
+    esac
+}
+# Main script starts here
+print_color $PURPLE "╔═══════════════════════════════════════════╗"
+print_color $PURPLE "║            Mapperatorinator CLI           ║"
+print_color $PURPLE "║        Interactive Inference Setup        ║"
+print_color $PURPLE "╚═══════════════════════════════════════════╝"
+echo
+# 2. Required Paths
+print_header "Required Paths"
+# Python Path
+prompt_input "Python executable path" "python" python_executable
+# Audio Path (Required)
+while true; do
+    prompt_input "Audio file path (required)" "input/demo.mp3" audio_path
+    if [ -z "$audio_path" ]; then
+        print_color $RED "Audio path is required!"
+        continue
+    fi
+    if validate_file "$audio_path"; then
+        break
+    fi
+done
+# Output Path
+prompt_input "Output directory path" "$(dirname "$audio_path")" output_path
+# Beatmap Path (Optional)
+prompt_input "Beatmap file path (optional, for in-context learning)" "" beatmap_path
+if [ -n "$beatmap_path" ] && ! validate_file "$beatmap_path"; then
+    print_color $YELLOW "Warning: Beatmap file not found, continuing without it"
+    beatmap_path=""
+fi
+# Convert paths to Windows format if needed (for Cygwin/MinGW)
+audio_path=$(convert_path_if_needed "$audio_path")
+output_path=$(convert_path_if_needed "$output_path")
+beatmap_path=$(convert_path_if_needed "$beatmap_path")
+# 3. Basic Settings
+print_header "Basic Settings"
+# Model Selection
+model_options=(
+    "v28:Mapperatorinator V28"
+    "v29:Mapperatorinator V29 (Supports gamemodes and descriptors)"
+    "v30:Mapperatorinator V30 (Best stable model)"
+    "v31:Mapperatorinator V31 (Slightly more accurate than V29)"
+    "beatheritage_v1:BeatHeritage V1 (Enhanced stability & quality)"
+)
+print_color $GREEN "Select Model:"
+for i in "${!model_options[@]}"; do
+    IFS=':' read -r value desc <<< "${model_options[i]}"
+    echo "  $((i+1))) $desc"
+done
+while true; do
+    read -p "Select model (1-${#model_options[@]}) [default: 5 - BeatHeritage V1]: " model_choice
+    model_choice=${model_choice:-5}
+    if [[ "$model_choice" =~ ^[1-5]$ ]]; then
+        IFS=':' read -r model_config model_desc <<< "${model_options[$((model_choice-1))]}"
+        print_color $BLUE "Selected: $model_desc"
+        break
+    else
+        print_color $RED "Invalid choice. Please select 1-${#model_options[@]}."
+    fi
+done
+# Game Mode (MODIFIED BLOCK)
+gamemode_options=("osu!" "Taiko" "Catch" "Mania")
+while true; do
+    print_color $GREEN "Game mode:"
+    for i in "${!gamemode_options[@]}"; do
+        echo "  $i) ${gamemode_options[$i]}"
+    done
+    read -p "$(print_color $GREEN "Select option (0-3)") [default: 0]: " gamemode_input
+    # Set default value to 0 if input is empty
+    gamemode=${gamemode_input:-0}
+    if [[ "$gamemode" =~ ^[0-3]$ ]]; then
+        break
+    else
+        print_color $RED "Invalid choice. Please select a number between 0 and 3."
+        echo # Add a blank line for spacing before re-prompting
+    fi
+done
+# Difficulty
+prompt_input "Difficulty (1.0-10.0)" "5.5" difficulty
+# Year
+# default is 2023, and 2007-2023 are valid years
+prompt_input "Year" "2023" year
+if ! [[ "$year" =~ ^(200[7-9]|201[0-9]|202[0-3])$ ]]; then
+    print_color $RED "Invalid year! Year must be between 2007 and 2023. Defaulting to 2023."
+    year=2023
+fi
+# 4. Advanced Settings (Optional)
+print_header "Advanced Settings (Optional - Press Enter to skip)"
+print_color $BLUE "Difficulty Settings:"
+prompt_input "HP Drain Rate (0-10)" "" hp_drain_rate
+prompt_input "Circle Size (0-10)" "" circle_size
+prompt_input "Overall Difficulty (0-10)" "" overall_difficulty
+prompt_input "Approach Rate (0-10)" "" approach_rate
+print_color $BLUE "Slider Settings:"
+prompt_input "Slider Multiplier" "" slider_multiplier
+prompt_input "Slider Tick Rate" "" slider_tick_rate
+if [ "$gamemode" -eq 3 ]; then
+    print_color $BLUE "Mania Settings:"
+    prompt_input "Key Count" "" keycount
+    prompt_input "Hold Note Ratio (0-1)" "" hold_note_ratio
+    prompt_input "Scroll Speed Ratio" "" scroll_speed_ratio
+fi
+print_color $BLUE "Generation Settings:"
+prompt_input "CFG Scale (1-20)" "" cfg_scale
+prompt_input "Temperature (0-2)" "" temperature
+prompt_input "Top P (0-1)" "" top_p
+prompt_input "Seed (random if empty)" "" seed
+prompt_input "Mapper ID" "" mapper_id
+print_color $BLUE "Timing Settings:"
+prompt_input "Start Time (seconds)" "" start_time
+prompt_input "End Time (seconds)" "" end_time
+# 5. Boolean Options
+print_header "Export & Processing Options"
+prompt_yn "Export as .osz file?" "n" export_osz
+prompt_yn "Add to existing beatmap?" "n" add_to_beatmap
+prompt_yn "Add hitsounds?" "n" hitsounded
+prompt_yn "Use super timing analysis?" "n" super_timing
+# 6. Descriptors
+print_header "Style Descriptors"
+# Positive descriptors with interactive multi-select
+descriptor_options=("jump aim" "stream" "tech" "aim" "speed" "flow" "clean" "complex" "simple" "modern" "classic" "spaced" "stacked")
+prompt_multiselect "Positive descriptors (describe desired mapping style):" descriptors "${descriptor_options[@]}"
+# Negative descriptors with interactive multi-select
+prompt_multiselect "Negative descriptors (styles to avoid):" negative_descriptors "${descriptor_options[@]}"
+# In-context options (only if beatmap is provided)
+if [ -n "$beatmap_path" ]; then
+    print_header "In-Context Learning Options"
+    context_options_list=("timing" "patterns" "structure" "style")
+    prompt_multiselect "In-context learning aspects:" in_context_options "${context_options_list[@]}"
+fi
+# 7. Build and Execute Command
+print_header "Command Generation"
+# Start building the command
+cmd_args=("$python_executable" "inference.py" "-cn" "$model_config")
+# Helper function to add argument. Wraps value in single quotes.
+add_arg() {
+    local key=$1
+    local value=$2
+    if [ -n "$value" ]; then
+        # This format 'key=value' is robust for Hydra, even with complex values
+        # like lists represented as strings: descriptors='["item1", "item2"]'
+        cmd_args+=("${key}=${value}") # Removed extra quotes for direct execution
+    fi
+}
+# Helper function to add boolean argument
+add_bool_arg() {
+    local key=$1
+    local value=$2
+    if [ "$value" = "true" ]; then
+        cmd_args+=("${key}=true")
+    else
+        cmd_args+=("${key}=false")
+    fi
+}
+# Add all arguments
+add_arg "audio_path" "'$audio_path'"
+add_arg "output_path" "'$output_path'"
+add_arg "beatmap_path" "'$beatmap_path'"
+add_arg "gamemode" "$gamemode"
+add_arg "difficulty" "$difficulty"
+add_arg "year" "$year"
+# Optional numeric parameters
+add_arg "hp_drain_rate" "$hp_drain_rate"
+add_arg "circle_size" "$circle_size"
+add_arg "overall_difficulty" "$overall_difficulty"
+add_arg "approach_rate" "$approach_rate"
+add_arg "slider_multiplier" "$slider_multiplier"
+add_arg "slider_tick_rate" "$slider_tick_rate"
+add_arg "keycount" "$keycount"
+add_arg "hold_note_ratio" "$hold_note_ratio"
+add_arg "scroll_speed_ratio" "$scroll_speed_ratio"
+add_arg "cfg_scale" "$cfg_scale"
+add_arg "temperature" "$temperature"
+add_arg "top_p" "$top_p"
+add_arg "seed" "$seed"
+add_arg "mapper_id" "$mapper_id"
+add_arg "start_time" "$start_time"
+add_arg "end_time" "$end_time"
+# List parameters (now correctly quoted)
+add_arg "descriptors" "$descriptors"
+add_arg "negative_descriptors" "$negative_descriptors"
+add_arg "in_context" "$in_context_options"
+# Boolean parameters
+add_bool_arg "export_osz" "$export_osz"
+add_bool_arg "add_to_beatmap" "$add_to_beatmap"
+add_bool_arg "hitsounded" "$hitsounded"
+add_bool_arg "super_timing" "$super_timing"
+# Display the command
+print_color $YELLOW "Generated command:"
+echo
+# Use printf for safer printing of arguments
+printf "%s " "${cmd_args[@]}"
+echo
+echo
+# Ask for confirmation
+prompt_yn "Execute this command?" "y" execute_cmd
+if [ "$execute_cmd" = "true" ]; then
+    print_header "Executing Inference"
+    print_color $GREEN "Starting inference process..."
+    echo
+    # Execute the command by expanding the array. No need for eval.
+    "${cmd_args[@]}"
+    exit_code=$?
+    echo
+    if [ $exit_code -eq 0 ]; then
+        print_color $GREEN "✓ Inference completed successfully!"
+    else
+        print_color $RED "✗ Inference failed with exit code: $exit_code"
+    fi
+else
+    print_color $YELLOW "Command generation cancelled."
+    echo
+    print_color $BLUE "You can copy and run the command manually:"
+    # Use printf for safer printing of arguments
+    printf "%s " "${cmd_args[@]}"
+    echo
+fi
+echo
+print_color $PURPLE "Thank you for using Mapperatorinator CLI!"

colab/beatheritage_v1_inference.ipynb ADDED Viewed

	@@ -0,0 +1,510 @@

+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/hongminh54/BeatHeritage/blob/main/colab/beatheritage_v1_inference.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
+    "\n",
+    "# BeatHeritage V1 - Beatmap Generator\n",
+    "\n",
+    "An enhanced AI model for generating osu! beatmaps with improved stability and quality control.\n",
+    "\n",
+    "\n",
+    "### Instructions:\n",
+    "1. **Read and accept the rules** by clicking the checkbox in the first cell\n",
+    "2. **Ensure GPU runtime**: Go to __Runtime → Change Runtime Type → GPU__\n",
+    "3. **Execute cells in order**: Click ▶️ on each cell sequentially\n",
+    "4. **Upload your audio**: Choose an MP3/OGG file when prompted\n",
+    "5. **Configure parameters**: Adjust settings to your preference\n",
+    "6. **Generate beatmap**: Run the generation cell and wait for results\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "#@title 🚀 Setup Environment { display-mode: \"form\" }\n",
+    "#@markdown ### ⚠️ Important: Please use this tool responsibly\n",
+    "#@markdown - Always disclose AI usage in your beatmap descriptions\n",
+    "#@markdown - Respect the original music artists and mappers\n",
+    "#@markdown - This tool is for educational and creative purposes\n",
+    "\n",
+    "i_accept_the_rules = False #@param {type:\"boolean\"}\n",
+    "#@markdown ☑️ **I accept the rules and will use this tool responsibly**\n",
+    "\n",
+    "import os\n",
+    "import sys\n",
+    "\n",
+    "if not i_accept_the_rules:\n",
+    "    raise ValueError(\"Please read and accept the rules before proceeding!\")\n",
+    "\n",
+    "print(\"Installing BeatHeritage...\")\n",
+    "print(\"=\"*50)\n",
+    "\n",
+    "# Clone repository if not exists\n",
+    "if not os.path.exists('/content/BeatHeritage'):\n",
+    "    !git clone -q https://github.com/hongminh54/BeatHeritage.git\n",
+    "    print(\"✅ Repository cloned\")\n",
+    "else:\n",
+    "    print(\"✅ Repository already exists\")\n",
+    "\n",
+    "%cd /content/BeatHeritage\n",
+    "\n",
+    "# Install dependencies\n",
+    "print(\"\\nInstalling dependencies...\")\n",
+    "!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
+    "!pip install -q -r requirements.txt\n",
+    "!apt-get install -y ffmpeg > /dev/null 2>&1\n",
+    "\n",
+    "print(\"\\nSetup complete!\")\n",
+    "\n",
+    "# Import required libraries\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "import torch\n",
+    "from google.colab import files\n",
+    "from IPython.display import display, HTML, Audio\n",
+    "from pathlib import Path\n",
+    "import json\n",
+    "import shlex\n",
+    "import subprocess\n",
+    "from datetime import datetime\n",
+    "import zipfile\n",
+    "\n",
+    "# Check GPU availability\n",
+    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
+    "print(f\"\\nUsing device: {device}\")\n",
+    "if device == 'cuda':\n",
+    "    gpu_name = torch.cuda.get_device_name(0)\n",
+    "    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3\n",
+    "    print(f\"GPU: {gpu_name}\")\n",
+    "    print(f\"Memory: {gpu_memory:.1f} GB\")\n",
+    "else:\n",
+    "    print(\"No GPU detected! Generation will be VERY slow.\")\n",
+    "\n",
+    "# Initialize global variables\n",
+    "audio_path = \"\"\n",
+    "output_path = \"/content/BeatHeritage/output\"\n",
+    "os.makedirs(output_path, exist_ok=True)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "#@title 🎵 Upload Audio File { display-mode: \"form\" }\n",
+    "#@markdown Upload your audio file (MP3, OGG, or WAV format)\n",
+    "\n",
+    "def upload_and_validate_audio():\n",
+    "    \"\"\"Upload and validate audio file with proper error handling\"\"\"\n",
+    "    global audio_path\n",
+    "    \n",
+    "    print(\"Please select an audio file to upload...\")\n",
+    "    uploaded = files.upload()\n",
+    "    \n",
+    "    if not uploaded:\n",
+    "        print(\"No file uploaded\")\n",
+    "        return None\n",
+    "    \n",
+    "    # Get the first uploaded file\n",
+    "    original_filename = list(uploaded.keys())[0]\n",
+    "    \n",
+    "    # Clean filename - remove special characters and spaces\n",
+    "    import re\n",
+    "    clean_filename = re.sub(r'[^a-zA-Z0-9._-]', '_', original_filename)\n",
+    "    clean_filename = clean_filename.replace(' ', '_')\n",
+    "    \n",
+    "    # Ensure proper extension\n",
+    "    if not any(clean_filename.lower().endswith(ext) for ext in ['.mp3', '.ogg', '.wav']):\n",
+    "        print(f\"Invalid file format: {original_filename}\")\n",
+    "        print(\"Please upload an MP3, OGG, or WAV file\")\n",
+    "        return None\n",
+    "    \n",
+    "    # Save with cleaned filename\n",
+    "    audio_path = f'/content/BeatHeritage/{clean_filename}'\n",
+    "    \n",
+    "    # Write the uploaded content to the new path\n",
+    "    with open(audio_path, 'wb') as f:\n",
+    "        f.write(uploaded[original_filename])\n",
+    "    \n",
+    "    print(f\"Audio uploaded successfully!\")\n",
+    "    print(f\"Original: {original_filename}\")\n",
+    "    print(f\"Saved as: {clean_filename}\")\n",
+    "    print(f\"Path: {audio_path}\")\n",
+    "    \n",
+    "    # Display audio player\n",
+    "    display(Audio(audio_path))\n",
+    "    \n",
+    "    return audio_path\n",
+    "\n",
+    "# Upload audio\n",
+    "audio_path = upload_and_validate_audio()\n",
+    "\n",
+    "if not audio_path:\n",
+    "    print(\"\\n⚠Please run this cell again and upload a valid audio file\")"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "#@title ⚙️ Configure Generation Parameters { display-mode: \"form\" }\n",
+    "\n",
+    "#@markdown ### 🎯 Basic Settings\n",
+    "#@markdown ---\n",
+    "#@markdown Choose the AI model version to use:\n",
+    "model_version = \"BeatHeritage V1 (Enhanced)\" #@param [\"BeatHeritage V1 (Enhanced)\", \"Mapperatorinator V30\", \"Mapperatorinator V29\", \"Mapperatorinator V28\"]\n",
+    "\n",
+    "#@markdown Select the game mode for your beatmap:\n",
+    "gamemode = \"Standard\" #@param [\"Standard\", \"Taiko\", \"Catch the Beat\", \"Mania\"]\n",
+    "\n",
+    "#@markdown Target difficulty (★ rating):\n",
+    "difficulty = 5.5 #@param {type:\"slider\", min:1, max:10, step:0.1}\n",
+    "\n",
+    "#@markdown ### 🎨 Style Configuration\n",
+    "#@markdown ---\n",
+    "#@markdown Primary mapping style descriptor:\n",
+    "descriptor_1 = \"clean\" #@param [\"clean\", \"tech\", \"jump aim\", \"stream\", \"aim\", \"speed\", \"flow\", \"complex\", \"simple\", \"modern\", \"classic\", \"slider tech\", \"alt\", \"precision\", \"stamina\"]\n",
+    "\n",
+    "#@markdown Secondary style descriptor (optional):\n",
+    "descriptor_2 = \"\" #@param [\"\", \"clean\", \"tech\", \"jump aim\", \"stream\", \"aim\", \"speed\", \"flow\", \"complex\", \"simple\", \"modern\", \"classic\", \"slider tech\", \"alt\", \"precision\", \"stamina\"]\n",
+    "\n",
+    "#@markdown ### 🔧 Advanced Parameters\n",
+    "#@markdown ---\n",
+    "#@markdown Generation temperature (lower = more conservative):\n",
+    "temperature = 0.85 #@param {type:\"slider\", min:0.1, max:2.0, step:0.05}\n",
+    "\n",
+    "#@markdown Top-p sampling (nucleus sampling):\n",
+    "top_p = 0.92 #@param {type:\"slider\", min:0.1, max:1.0, step:0.01}\n",
+    "\n",
+    "#@markdown Classifier-free guidance scale:\n",
+    "cfg_scale = 7.5 #@param {type:\"slider\", min:1.0, max:20.0, step:0.5}\n",
+    "\n",
+    "#@markdown ### 📊 Quality Control (BeatHeritage V1)\n",
+    "#@markdown ---\n",
+    "enable_auto_correction = True #@param {type:\"boolean\"}\n",
+    "enable_flow_optimization = True #@param {type:\"boolean\"}\n",
+    "enable_pattern_variety = True #@param {type:\"boolean\"}\n",
+    "\n",
+    "#@markdown ### 🎯 Export Options\n",
+    "#@markdown ---\n",
+    "super_timing = False #@param {type:\"boolean\"}\n",
+    "#@markdown Enable for songs with variable BPM (slower generation)\n",
+    "\n",
+    "export_osz = True #@param {type:\"boolean\"}\n",
+    "#@markdown Export as .osz package (includes audio)\n",
+    "\n",
+    "# Map model names to config names\n",
+    "model_configs = {\n",
+    "    \"BeatHeritage V1 (Enhanced)\": \"beatheritage_v1\",\n",
+    "    \"Mapperatorinator V30\": \"v30\",\n",
+    "    \"Mapperatorinator V29\": \"v29\",\n",
+    "    \"Mapperatorinator V28\": \"v28\"\n",
+    "}\n",
+    "\n",
+    "# Map gamemode names to indices\n",
+    "gamemode_indices = {\n",
+    "    \"Standard\": 0,\n",
+    "    \"Taiko\": 1,\n",
+    "    \"Catch the Beat\": 2,\n",
+    "    \"Mania\": 3\n",
+    "}\n",
+    "\n",
+    "selected_model = model_configs[model_version]\n",
+    "selected_gamemode = gamemode_indices[gamemode]\n",
+    "\n",
+    "# Build descriptor list\n",
+    "descriptors = [d for d in [descriptor_1, descriptor_2] if d]\n",
+    "\n",
+    "# Display configuration summary\n",
+    "print(\"Configuration Summary\")\n",
+    "print(\"=\"*50)\n",
+    "print(f\"Model: {model_version}\")\n",
+    "print(f\"Game Mode: {gamemode}\")\n",
+    "print(f\"Difficulty: {difficulty}★\")\n",
+    "print(f\"Style: {', '.join(descriptors) if descriptors else 'Default'}\")\n",
+    "print(f\"Temperature: {temperature}\")\n",
+    "print(f\"Top-p: {top_p}\")\n",
+    "print(f\"CFG Scale: {cfg_scale}\")\n",
+    "\n",
+    "if selected_model == \"beatheritage_v1\":\n",
+    "    print(\"\\nBeatHeritage V1 Features:\")\n",
+    "    if enable_auto_correction:\n",
+    "        print(\"  ✓ Auto-correction enabled\")\n",
+    "    if enable_flow_optimization:\n",
+    "        print(\"  ✓ Flow optimization enabled\")\n",
+    "    if enable_pattern_variety:\n",
+    "        print(\"  ✓ Pattern variety enabled\")\n",
+    "\n",
+    "if super_timing:\n",
+    "    print(\"\\nSuper timing enabled (for variable BPM)\")\n",
+    "\n",
+    "print(\"\\nConfiguration ready!\")"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "#@title 🎮 Generate Beatmap { display-mode: \"form\" }\n",
+    "#@markdown Click the play button to start generation. This may take a few minutes depending on song length.\n",
+    "\n",
+    "def generate_beatmap():\n",
+    "    \"\"\"Generate beatmap with proper error handling and progress tracking\"\"\"\n",
+    "    \n",
+    "    if not audio_path or not os.path.exists(audio_path):\n",
+    "        print(\"Error: No audio file found!\")\n",
+    "        print(\"Please upload an audio file first.\")\n",
+    "        return None\n",
+    "    \n",
+    "    print(\"Starting beatmap generation...\")\n",
+    "    print(\"=\"*50)\n",
+    "    print(f\"Audio: {os.path.basename(audio_path)}\")\n",
+    "    print(f\"Model: {model_version}\")\n",
+    "    print(f\"Mode: {gamemode}\")\n",
+    "    print(f\"Difficulty: {difficulty}★\")\n",
+    "    print(\"=\"*50)\n",
+    "    print()\n",
+    "    \n",
+    "    # Build command with proper escaping\n",
+    "    cmd = [\n",
+    "        'python', 'inference.py',\n",
+    "        '-cn', selected_model,\n",
+    "        f'audio_path={shlex.quote(audio_path)}',\n",
+    "        f'output_path={shlex.quote(output_path)}',\n",
+    "        f'gamemode={selected_gamemode}',\n",
+    "        f'difficulty={difficulty}',\n",
+    "        f'temperature={temperature}',\n",
+    "        f'top_p={top_p}',\n",
+    "        f'cfg_scale={cfg_scale}',\n",
+    "        f'super_timing={str(super_timing).lower()}',\n",
+    "        f'export_osz={str(export_osz).lower()}',\n",
+    "    ]\n",
+    "    \n",
+    "    # Add descriptors if specified\n",
+    "    if descriptors:\n",
+    "        desc_str = json.dumps(descriptors)\n",
+    "        cmd.append(f'descriptors={shlex.quote(desc_str)}')\n",
+    "    \n",
+    "    # Add BeatHeritage V1 specific features\n",
+    "    if selected_model == \"beatheritage_v1\":\n",
+    "        if enable_auto_correction:\n",
+    "            cmd.append('quality_control.enable_auto_correction=true')\n",
+    "        if enable_flow_optimization:\n",
+    "            cmd.append('quality_control.enable_flow_optimization=true')\n",
+    "        if enable_pattern_variety:\n",
+    "            cmd.append('advanced_features.enable_pattern_variety=true')\n",
+    "        \n",
+    "        # Always enable these for V1\n",
+    "        cmd.extend([\n",
+    "            'advanced_features.enable_context_aware_generation=true',\n",
+    "            'advanced_features.enable_style_preservation=true',\n",
+    "            'generate_positions=true',\n",
+    "            'position_refinement=true'\n",
+    "        ])\n",
+    "    \n",
+    "    # Execute command\n",
+    "    try:\n",
+    "        print(\"⏳ Generating beatmap... (this may take several minutes)\\n\")\n",
+    "        \n",
+    "        # Run the command\n",
+    "        process = subprocess.Popen(\n",
+    "            cmd,\n",
+    "            stdout=subprocess.PIPE,\n",
+    "            stderr=subprocess.STDOUT,\n",
+    "            text=True,\n",
+    "            bufsize=1,\n",
+    "            universal_newlines=True\n",
+    "        )\n",
+    "        \n",
+    "        # Stream output in real-time\n",
+    "        for line in process.stdout:\n",
+    "            print(line, end='')\n",
+    "        \n",
+    "        # Wait for completion\n",
+    "        return_code = process.wait()\n",
+    "        \n",
+    "        if return_code == 0:\n",
+    "            print(\"\\n\" + \"=\"*50)\n",
+    "            print(\"Beatmap generation complete!\")\n",
+    "            \n",
+    "            # List generated files\n",
+    "            generated_files = list(Path(output_path).glob('*'))\n",
+    "            if generated_files:\n",
+    "                print(f\"\\nGenerated {len(generated_files)} file(s):\")\n",
+    "                for file in generated_files:\n",
+    "                    size_mb = file.stat().st_size / (1024 * 1024)\n",
+    "                    print(f\"  • {file.name} ({size_mb:.2f} MB)\")\n",
+    "            \n",
+    "            return generated_files\n",
+    "        else:\n",
+    "            print(f\"\\nGeneration failed with error code: {return_code}\")\n",
+    "            return None\n",
+    "            \n",
+    "    except Exception as e:\n",
+    "        print(f\"\\nError during generation: {str(e)}\")\n",
+    "        print(\"\\nTroubleshooting tips:\")\n",
+    "        print(\"1. Ensure the audio file is valid\")\n",
+    "        print(\"2. Check if GPU memory is sufficient\")\n",
+    "        print(\"3. Try reducing temperature or cfg_scale\")\n",
+    "        print(\"4. Disable super_timing if enabled\")\n",
+    "        return None\n",
+    "\n",
+    "# Run generation\n",
+    "generated_files = generate_beatmap()"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "#@title 📥 Download Generated Files { display-mode: \"form\" }\n",
+    "#@markdown Download your generated beatmap files\n",
+    "\n",
+    "def download_results():\n",
+    "    \"\"\"Package and download generated beatmap files\"\"\"\n",
+    "    \n",
+    "    output_files = list(Path(output_path).glob('*'))\n",
+    "    \n",
+    "    if not output_files:\n",
+    "        print(\"No files to download\")\n",
+    "        print(\"Please generate a beatmap first.\")\n",
+    "        return\n",
+    "    \n",
+    "    print(\"Preparing files for download...\")\n",
+    "    print(\"=\"*50)\n",
+    "    \n",
+    "    # Create timestamp for unique naming\n",
+    "    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')\n",
+    "    \n",
+    "    # Check if we have .osz files\n",
+    "    osz_files = [f for f in output_files if f.suffix == '.osz']\n",
+    "    osu_files = [f for f in output_files if f.suffix == '.osu']\n",
+    "    \n",
+    "    # Download .osz files directly if available\n",
+    "    if osz_files:\n",
+    "        for osz_file in osz_files:\n",
+    "            print(f\"\\n📥 Downloading: {osz_file.name}\")\n",
+    "            files.download(str(osz_file))\n",
+    "    \n",
+    "    # Download .osu files\n",
+    "    elif osu_files:\n",
+    "        if len(osu_files) == 1:\n",
+    "            # Single file - download directly\n",
+    "            osu_file = osu_files[0]\n",
+    "            print(f\"\\n📥 Downloading: {osu_file.name}\")\n",
+    "            files.download(str(osu_file))\n",
+    "        else:\n",
+    "            # Multiple files - create zip\n",
+    "            zip_name = f'beatheritage_{gamemode.lower()}_{timestamp}.zip'\n",
+    "            zip_path = f'/content/{zip_name}'\n",
+    "            \n",
+    "            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:\n",
+    "                for file in output_files:\n",
+    "                    zipf.write(file, file.name)\n",
+    "                    print(f\"  • Added: {file.name}\")\n",
+    "            \n",
+    "            print(f\"\\nDownloading: {zip_name}\")\n",
+    "            files.download(zip_path)\n",
+    "    \n",
+    "    # Also handle other files\n",
+    "    other_files = [f for f in output_files if f.suffix not in ['.osz', '.osu']]\n",
+    "    if other_files:\n",
+    "        print(\"\\nAdditional files generated:\")\n",
+    "        for file in other_files:\n",
+    "            print(f\"  • {file.name}\")\n",
+    "    \n",
+    "    print(\"\\nDownload complete!\")\n",
+    "    print(\"\\nTips:\")\n",
+    "    print(\"• .osz files can be opened directly in osu!\")\n",
+    "    print(\"• .osu files should be placed in your Songs folder\")\n",
+    "    print(\"• Press F5 in osu! to refresh after adding files\")\n",
+    "\n",
+    "# Download files\n",
+    "download_results()"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "## Additional Information\n",
+    "\n",
+    "### Tips for Best Results:\n",
+    "- **Audio Quality**: Use high-quality audio files (320kbps MP3 or FLAC)\n",
+    "- **Difficulty Matching**: Match the difficulty rating to song intensity\n",
+    "- **Style Descriptors**: Choose descriptors that match the music genre\n",
+    "- **Variable BPM**: Enable `super_timing` for songs with tempo changes\n",
+    "\n",
+    "### Troubleshooting:\n",
+    "\n",
+    "**Out of Memory:**\n",
+    "- Restart runtime to clear GPU memory\n",
+    "- Use shorter songs or segments\n",
+    "- Reduce cfg_scale value\n",
+    "\n",
+    "**Poor Quality Output:**\n",
+    "- Lower temperature (0.7-0.8) for stability\n",
+    "- Increase cfg_scale (10-15) for stronger guidance\n",
+    "- Use more specific descriptors\n",
+    "\n",
+    "**Generation Errors:**\n",
+    "- Ensure audio file has no special characters\n",
+    "- Check GPU is enabled in runtime\n",
+    "- Try different model versions\n",
+    "\n",
+    "### Resources:\n",
+    "- [GitHub Repository](https://github.com/hongminh54/BeatHeritage)\n",
+    "- [Documentation](https://github.com/hongminh54/BeatHeritage/blob/main/README.md)\n",
+    "\n",
+    "### License & Credits:\n",
+    "- BeatHeritage V1 by hongminh54\n",
+    "- Based on Mapperatorinator by OliBomby\n",
+    "- Please credit AI usage in your beatmap descriptions\n",
+    "\n",
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  },
+  "colab": {
+   "provenance": [],
+   "gpuType": "T4",
+   "collapsed_sections": []
+  },
+  "accelerator": "GPU"
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

colab/classifier_classify.ipynb ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/OliBomby/Mapperatorinator/blob/main/colab/classifier_classify.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
+    "\n",
+    "# Beatmap Mapper Classification\n",
+    "\n",
+    "This notebook is an interactive demo of an osu! beatmap mapper classification model created by OliBomby. This model is capable of predicting which osu! standard ranked mapper mapped any given beatmap by looking at the style. You can use this on your own maps to see which mapper you are most similar to.\n",
+    "\n",
+    "### Instructions for running:\n",
+    "\n",
+    "* __Execute each cell in order__. Press ▶️ on the left of each cell to execute the cell.\n",
+    "* __Setup Environment__: run the first cell to clone the repository and install the required dependencies. You only need to run this cell once per session.\n",
+    "* __Upload Audio__: choose a .mp3 or .ogg file from your computer.\n",
+    "* __Upload Beatmap__: choose a .osu file from your computer.\n",
+    "* __Configure__: choose the time of the segment which the classifier should classify.\n",
+    "* Classify the beatmap using the __Classify Beatmap__ cell.\n"
+   ],
+   "id": "3c19902455e25588"
+  },
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "#@title Setup Environment { display-mode: \"form\" }\n",
+    "\n",
+    "!git clone https://github.com/OliBomby/Mapperatorinator.git\n",
+    "%cd Mapperatorinator\n",
+    "\n",
+    "!pip install hydra-core lightning nnaudio\n",
+    "!pip install slider git+https://github.com/OliBomby/slider.git@gedagedigedagedaoh\n",
+    "\n",
+    "from google.colab import files\n",
+    "from hydra import compose, initialize_config_dir\n",
+    "from classifier.classify import main\n",
+    "\n",
+    "input_audio = \"\"\n",
+    "input_beatmap = \"\""
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title Upload Audio { display-mode: \"form\" }\n",
+    "\n",
+    "def upload_audio():\n",
+    "  data = list(files.upload().keys())\n",
+    "  if len(data) > 1:\n",
+    "    print('Multiple files uploaded; using only one.')\n",
+    "  return data[0]\n",
+    "\n",
+    "input_audio = upload_audio()"
+   ],
+   "id": "624a60c5777279e7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title Upload Beatmap { display-mode: \"form\" }\n",
+    "\n",
+    "def upload_beatmap():\n",
+    "  data = list(files.upload().keys())\n",
+    "  if len(data) > 1:\n",
+    "    print('Multiple files uploaded; using only one.')\n",
+    "  return data[0]\n",
+    "\n",
+    "input_beatmap = upload_beatmap()"
+   ],
+   "id": "63884394491f6664",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title Configure and Classify Beatmap { display-mode: \"form\" }\n",
+    "\n",
+    "# @markdown #### Input the start time in seconds of the segment to classify.\n",
+    "time = 5 # @param {type:\"number\"}\n",
+    "    \n",
+    "# Create config\n",
+    "with initialize_config_dir(version_base=\"1.1\", config_dir=\"/content/Mapperatorinator/classifier/configs\"):\n",
+    "    conf = compose(config_name=\"inference\")\n",
+    "\n",
+    "# Do inference\n",
+    "conf.time = time\n",
+    "conf.beatmap_path = input_beatmap\n",
+    "conf.audio_path = input_audio\n",
+    "conf.mappers_path = \"./datasets/beatmap_users.json\"\n",
+    "\n",
+    "main(conf)\n"
+   ],
+   "id": "166eb3e5f9398554",
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "accelerator": "GPU",
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

colab/mai_mod_inference.ipynb ADDED Viewed

	@@ -0,0 +1,148 @@

+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/OliBomby/Mapperatorinator/blob/main/colab/mai_mod_inference.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
+    "\n",
+    "# Beatmap Modding with MaiMod\n",
+    "\n",
+    "This notebook is an interactive demo of an AI-driven osu! Beatmap Modding Tool created by OliBomby. This model is capable of finding various faults and inconsistencies in beatmaps which other automated modding tools can not detect. Run this tool on your beatmaps to get suggestions on how to improve them.\n",
+    "\n",
+    "### Instructions for running:\n",
+    "\n",
+    "* Make sure to use a GPU runtime, click:  __Runtime >> Change Runtime Type >> GPU__\n",
+    "* __Execute each cell in order__. Press ▶️ on the left of each cell to execute the cell.\n",
+    "* __Setup Environment__: run the first cell to clone the repository and install the required dependencies. You only need to run this cell once per session.\n",
+    "* __Upload Audio__: choose the beatmap song .mp3 or .ogg file from your computer. You can find these files in stable by using File > Open Song Folder, or in lazer by using File > Edit Externally.\n",
+    "* __Upload Beatmap__: choose the beatmap .osu file from your computer.\n",
+    "* __Generate Suggestions__ to generate suggestions for your uploaded beatmap.\n"
+   ],
+   "id": "3c19902455e25588"
+  },
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "#@title Setup Environment { display-mode: \"form\" }\n",
+    "#@markdown Run this cell to clone the repository and install the required dependencies. You only need to run this cell once per session.\n",
+    "\n",
+    "!git clone https://github.com/OliBomby/Mapperatorinator.git\n",
+    "%cd Mapperatorinator\n",
+    "\n",
+    "!pip install transformers==4.53.3\n",
+    "!pip install hydra-core\n",
+    "!pip install slider git+https://github.com/OliBomby/slider.git@gedagedigedagedaoh\n",
+    "\n",
+    "import os\n",
+    "from google.colab import files\n",
+    "from mai_mod import main\n",
+    "from hydra import compose, initialize_config_dir\n",
+    "\n",
+    "input_audio = \"\"\n",
+    "input_beatmap = \"\""
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title Upload Audio { display-mode: \"form\" }\n",
+    "#@markdown Run this cell to upload the song of the beatmap that you want to mod. Please upload a .mp3 or .ogg file. You can find these files in stable by using File > Open Song Folder, or in lazer by using File > Edit Externally.\n",
+    "\n",
+    "def upload_audio():\n",
+    "    data = list(files.upload().keys())\n",
+    "    if len(data) > 1:\n",
+    "        print('Multiple files uploaded; using only one.')\n",
+    "    file = data[0]\n",
+    "    if not file.endswith('.mp3') and not file.endswith('.ogg'):\n",
+    "        print('Invalid file format. Please upload a .mp3 or .ogg file.')\n",
+    "        return \"\"\n",
+    "    return data[0]\n",
+    "\n",
+    "input_audio = upload_audio()"
+   ],
+   "id": "624a60c5777279e7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title Upload Beatmap { display-mode: \"form\" }\n",
+    "#@markdown Run this cell to upload the beatmap **.osu** file of the beatmap that you want to mod. You can find these files in stable by using File > Open Song Folder, or in lazer by using File > Edit Externally.\n",
+    "\n",
+    "def upload_beatmap():\n",
+    "    data = list(files.upload().keys())\n",
+    "    if len(data) > 1:\n",
+    "        print('Multiple files uploaded; using only one.')\n",
+    "    file = data[0]\n",
+    "    if not file.endswith('.osu'):\n",
+    "        print('Invalid file format. Please upload a .osu file.\\nIn stable you can find the .osu file in the song folder (File > Open Song Folder).\\nIn lazer you can find the .osu file by using File > Edit Externally.')\n",
+    "        return \"\"\n",
+    "    return file\n",
+    "\n",
+    "input_beatmap = upload_beatmap()"
+   ],
+   "id": "63884394491f6664",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title Generate Suggestions { display-mode: \"form\" }\n",
+    "#@markdown Run this cell to generate suggestions for your uploaded beatmap. The suggestions will be printed in the output.\n",
+    "\n",
+    "# Validate stuff\n",
+    "assert os.path.exists(input_beatmap), \"Please upload a beatmap.\"\n",
+    "assert os.path.exists(input_audio), \"Please upload an audio file.\"\n",
+    "    \n",
+    "# Create config\n",
+    "config = \"mai_mod\"\n",
+    "with initialize_config_dir(version_base=\"1.1\", config_dir=\"/content/Mapperatorinator/configs\"):\n",
+    "    conf = compose(config_name=config)\n",
+    "\n",
+    "# Do inference\n",
+    "conf.audio_path = input_audio\n",
+    "conf.beatmap_path = input_beatmap\n",
+    "conf.precision = \"fp32\"  # For some reason AMP causes OOM in Colab\n",
+    "\n",
+    "main(conf)"
+   ],
+   "id": "166eb3e5f9398554",
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "accelerator": "GPU",
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

colab/mapperatorinator_inference.ipynb ADDED Viewed

	@@ -0,0 +1,305 @@

+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/hongminh54/BeatHeritage/blob/main/colab/mapperatorinator_inference.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
+    "\n",
+    "# Beatmap Generation with Mapperatorinator\n",
+    "\n",
+    "This notebook is an interactive demo of an osu! beatmap generation model created by OliBomby. This model is capable of generating hit objects, hitsounds, timing, kiai times, and SVs for all 4 gamemodes. You can upload a beatmap to give to the model as additional context or remap parts of the beatmap.\n",
+    "\n",
+    "### Instructions for running:\n",
+    "\n",
+    "* Read and accept the rules regarding using this tool by clicking the checkbox.\n",
+    "* Make sure to use a GPU runtime, click:  __Runtime >> Change Runtime Type >> GPU__\n",
+    "* __Execute each cell in order__. Press ▶️ on the left of each cell to execute the cell.\n",
+    "* __Setup Environment__: run the first cell to clone the repository and install the required dependencies. You only need to run this cell once per session.\n",
+    "* __Upload Audio__: choose a .mp3 or .ogg file from your computer.\n",
+    "* __Upload Beatmap__: optionally choose a beatmap .osu file from your computer.  You can find these files in stable by using File > Open Song Folder, or in lazer by using File > Edit Externally.\n",
+    "* __Configure__: choose your generation parameters to control the style of the generated beatmap.\n",
+    "* Generate the beatmap using the __Generate Beatmap__ cell. (it may take a few minutes depending on the length of the song)\n"
+   ],
+   "id": "3c19902455e25588"
+  },
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "#@title Setup Environment { display-mode: \"form\" }\n",
+    "#@markdown ### Use this tool responsibly. Always disclose the use of AI in your beatmaps. Accept the rules and run this cell.\n",
+    "i_accept_the_rules = False # @param {type:\"boolean\"}\n",
+    "\n",
+    "assert i_accept_the_rules, \"Read and accept the rules first!\"\n",
+    "\n",
+    "!git clone https://github.com/hongminh54/BeatHeritage.git\n",
+    "%cd Mapperatorinator\n",
+    "\n",
+    "!pip install transformers==4.53.3\n",
+    "!pip install hydra-core nnaudio\n",
+    "!pip install slider git+https://github.com/OliBomby/slider.git@gedagedigedagedaoh\n",
+    "\n",
+    "from google.colab import files\n",
+    "\n",
+    "import os\n",
+    "from hydra import compose, initialize_config_dir\n",
+    "from osuT5.osuT5.event import ContextType\n",
+    "from inference import main\n",
+    "\n",
+    "output_path = \"output\"\n",
+    "input_audio = \"\"\n",
+    "input_beatmap = \"\""
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title Upload Audio { display-mode: \"form\" }\n",
+    "#@markdown Run this cell to upload audio. This is the song to generate a beatmap for. Please upload a .mp3 or .ogg file.\n",
+    "\n",
+    "def upload_audio():\n",
+    "    data = list(files.upload().keys())\n",
+    "    if len(data) > 1:\n",
+    "        print('Multiple files uploaded; using only one.')\n",
+    "    file = data[0]\n",
+    "    if not file.endswith('.mp3') and not file.endswith('.ogg'):\n",
+    "        print('Invalid file format. Please upload a .mp3 or .ogg file.')\n",
+    "        return \"\"\n",
+    "    return data[0]\n",
+    "\n",
+    "input_audio = upload_audio()"
+   ],
+   "id": "624a60c5777279e7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title (Optional) Upload Beatmap { display-mode: \"form\" }\n",
+    "#@markdown This step is required if you want to use `in_context` or `add_to_beatmap` to provide additional info to the model.\n",
+    "#@markdown It will also fill in any missing metadata and unknown values in the configuration using info of the reference beatmap.\n",
+    "#@markdown Please upload a **.osu** file. You can find the .osu file in the song folder in stable or by using File > Edit Externally in lazer.\n",
+    "use_reference_beatmap = False # @param {type:\"boolean\"}\n",
+    "\n",
+    "def upload_beatmap():\n",
+    "    data = list(files.upload().keys())\n",
+    "    if len(data) > 1:\n",
+    "        print('Multiple files uploaded; using only one.')\n",
+    "    file = data[0]\n",
+    "    if not file.endswith('.osu'):\n",
+    "        print('Invalid file format. Please upload a .osu file.\\nIn stable you can find the .osu file in the song folder (File > Open Song Folder).\\nIn lazer you can find the .osu file by using File > Edit Externally.')\n",
+    "        return \"\"\n",
+    "    return file\n",
+    "\n",
+    "if use_reference_beatmap:\n",
+    "    input_beatmap = upload_beatmap()\n",
+    "else:\n",
+    "    input_beatmap = \"\""
+   ],
+   "id": "63884394491f6664",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "#@title Configure and Generate Beatmap { display-mode: \"form\" }\n",
+    "\n",
+    "#@markdown #### You can input -1 to leave the value unknown.\n",
+    "#@markdown ---\n",
+    "#@markdown This is the AI model to use. V30 is the most accurate model, but it does not support other gamemodes, year, descriptors, or in_context.\n",
+    "model = \"Mapperatorinator V30\" # @param [\"Mapperatorinator V29\", \"Mapperatorinator V30\"]\n",
+    "#@markdown This is the game mode to generate a beatmap for.\n",
+    "gamemode = \"standard\" # @param [\"standard\", \"taiko\", \"catch the beat\", \"mania\"]\n",
+    "#@markdown This is the Star Rating you want your beatmap to be. It might deviate from this number depending on the song intensity and other configuration.\n",
+    "difficulty = 5 # @param {type:\"number\"}\n",
+    "#@markdown This is the user ID of the ranked mapper to imitate for mapping style. You can find this in the URL of the mapper's profile.\n",
+    "mapper_id = -1 # @param {type:\"integer\"}\n",
+    "#@markdown This is the year you want the beatmap to be from. It should be in the range of 2007 to 2023.\n",
+    "year = 2023 # @param {type:\"integer\"}\n",
+    "#@markdown This is whether you want the beatmap to be hitsounded. This works only for mania mode.\n",
+    "hitsounded = True # @param {type:\"boolean\"}\n",
+    "#@markdown These are the standard difficulty parameters for the beatmap HP, OD, AR, and CS. These are the same as the ones in the editor.\n",
+    "hp_drain_rate = 5 # @param {type:\"number\"}\n",
+    "circle_size = 4 # @param {type:\"number\"}\n",
+    "overall_difficulty = 9 # @param {type:\"number\"}\n",
+    "approach_rate = 8 # @param {type:\"number\"}\n",
+    "slider_multiplier = 1.4 # @param {type:\"slider\", min:0.4, max:3.6, step:0.1}\n",
+    "slider_tick_rate = 1 # @param {type:\"number\"}\n",
+    "#@markdown This is the number of keys for the mania beatmap. This works only for mania mode.\n",
+    "keycount = 4 # @param {type:\"slider\", min:1, max:18, step:1}\n",
+    "#@markdown This is the ratio of hold notes to circles in the beatmap. It should be in the range [0,1]. This works only for mania mode.\n",
+    "hold_note_ratio = -1 # @param {type:\"number\"}\n",
+    "#@markdown This is the ratio of scroll speed changes to the number of notes. It should be in the range [0,1]. This works only for mania and taiko modes.\n",
+    "scroll_speed_ratio = -1 # @param {type:\"number\"}\n",
+    "#@markdown These descriptors of the beatmap. Descriptors are used to describe the style of the beatmap. All available descriptors can be found [here](https://osu.ppy.sh/wiki/en/Beatmap/Beatmap_tags).\n",
+    "descriptor_1 = '' # @param [\"slider only\", \"circle only\", \"collab\", \"megacollab\", \"marathon\", \"gungathon\", \"multi-song\", \"variable timing\", \"accelerating bpm\", \"time signatures\", \"storyboard\", \"storyboard gimmick\", \"keysounds\", \"download unavailable\", \"custom skin\", \"featured artist\", \"custom song\", \"style\", \"messy\", \"geometric\", \"grid snap\", \"hexgrid\", \"freeform\", \"symmetrical\", \"old-style revival\", \"clean\", \"slidershapes\", \"distance snapped\", \"iNiS-style\", \"avant-garde\", \"perfect stacks\", \"ninja spinners\", \"simple\", \"chaotic\", \"repetition\", \"progression\", \"high contrast\", \"improvisation\", \"playfield usage\", \"playfield constraint\", \"video gimmick\", \"difficulty spike\", \"low sv\", \"high sv\", \"colorhax\", \"tech\", \"slider tech\", \"complex sv\", \"reading\", \"visually dense\", \"overlap reading\", \"alt\", \"jump aim\", \"sharp aim\", \"wide aim\", \"linear aim\", \"aim control\", \"flow aim\", \"precision\", \"finger control\", \"complex snap divisors\", \"bursts\", \"streams\", \"spaced streams\", \"cutstreams\", \"stamina\", \"mapping contest\", \"tournament custom\", \"tag\", \"port\"] {allow-input: true}\n",
+    "descriptor_2 = '' # @param [\"slider only\", \"circle only\", \"collab\", \"megacollab\", \"marathon\", \"gungathon\", \"multi-song\", \"variable timing\", \"accelerating bpm\", \"time signatures\", \"storyboard\", \"storyboard gimmick\", \"keysounds\", \"download unavailable\", \"custom skin\", \"featured artist\", \"custom song\", \"style\", \"messy\", \"geometric\", \"grid snap\", \"hexgrid\", \"freeform\", \"symmetrical\", \"old-style revival\", \"clean\", \"slidershapes\", \"distance snapped\", \"iNiS-style\", \"avant-garde\", \"perfect stacks\", \"ninja spinners\", \"simple\", \"chaotic\", \"repetition\", \"progression\", \"high contrast\", \"improvisation\", \"playfield usage\", \"playfield constraint\", \"video gimmick\", \"difficulty spike\", \"low sv\", \"high sv\", \"colorhax\", \"tech\", \"slider tech\", \"complex sv\", \"reading\", \"visually dense\", \"overlap reading\", \"alt\", \"jump aim\", \"sharp aim\", \"wide aim\", \"linear aim\", \"aim control\", \"flow aim\", \"precision\", \"finger control\", \"complex snap divisors\", \"bursts\", \"streams\", \"spaced streams\", \"cutstreams\", \"stamina\", \"mapping contest\", \"tournament custom\", \"tag\", \"port\"] {allow-input: true}\n",
+    "descriptor_3 = '' # @param [\"slider only\", \"circle only\", \"collab\", \"megacollab\", \"marathon\", \"gungathon\", \"multi-song\", \"variable timing\", \"accelerating bpm\", \"time signatures\", \"storyboard\", \"storyboard gimmick\", \"keysounds\", \"download unavailable\", \"custom skin\", \"featured artist\", \"custom song\", \"style\", \"messy\", \"geometric\", \"grid snap\", \"hexgrid\", \"freeform\", \"symmetrical\", \"old-style revival\", \"clean\", \"slidershapes\", \"distance snapped\", \"iNiS-style\", \"avant-garde\", \"perfect stacks\", \"ninja spinners\", \"simple\", \"chaotic\", \"repetition\", \"progression\", \"high contrast\", \"improvisation\", \"playfield usage\", \"playfield constraint\", \"video gimmick\", \"difficulty spike\", \"low sv\", \"high sv\", \"colorhax\", \"tech\", \"slider tech\", \"complex sv\", \"reading\", \"visually dense\", \"overlap reading\", \"alt\", \"jump aim\", \"sharp aim\", \"wide aim\", \"linear aim\", \"aim control\", \"flow aim\", \"precision\", \"finger control\", \"complex snap divisors\", \"bursts\", \"streams\", \"spaced streams\", \"cutstreams\", \"stamina\", \"mapping contest\", \"tournament custom\", \"tag\", \"port\"] {allow-input: true}\n",
+    "#@markdown These are negative descriptors of the beatmap. Negative descriptors are used to describe what the beatmap should not have. These work only when `cfg_scale` is greater than 1.\n",
+    "negative_descriptor_1 = '' # @param [\"slider only\", \"circle only\", \"collab\", \"megacollab\", \"marathon\", \"gungathon\", \"multi-song\", \"variable timing\", \"accelerating bpm\", \"time signatures\", \"storyboard\", \"storyboard gimmick\", \"keysounds\", \"download unavailable\", \"custom skin\", \"featured artist\", \"custom song\", \"style\", \"messy\", \"geometric\", \"grid snap\", \"hexgrid\", \"freeform\", \"symmetrical\", \"old-style revival\", \"clean\", \"slidershapes\", \"distance snapped\", \"iNiS-style\", \"avant-garde\", \"perfect stacks\", \"ninja spinners\", \"simple\", \"chaotic\", \"repetition\", \"progression\", \"high contrast\", \"improvisation\", \"playfield usage\", \"playfield constraint\", \"video gimmick\", \"difficulty spike\", \"low sv\", \"high sv\", \"colorhax\", \"tech\", \"slider tech\", \"complex sv\", \"reading\", \"visually dense\", \"overlap reading\", \"alt\", \"jump aim\", \"sharp aim\", \"wide aim\", \"linear aim\", \"aim control\", \"flow aim\", \"precision\", \"finger control\", \"complex snap divisors\", \"bursts\", \"streams\", \"spaced streams\", \"cutstreams\", \"stamina\", \"mapping contest\", \"tournament custom\", \"tag\", \"port\"] {allow-input: true}\n",
+    "negative_descriptor_2 = '' # @param [\"slider only\", \"circle only\", \"collab\", \"megacollab\", \"marathon\", \"gungathon\", \"multi-song\", \"variable timing\", \"accelerating bpm\", \"time signatures\", \"storyboard\", \"storyboard gimmick\", \"keysounds\", \"download unavailable\", \"custom skin\", \"featured artist\", \"custom song\", \"style\", \"messy\", \"geometric\", \"grid snap\", \"hexgrid\", \"freeform\", \"symmetrical\", \"old-style revival\", \"clean\", \"slidershapes\", \"distance snapped\", \"iNiS-style\", \"avant-garde\", \"perfect stacks\", \"ninja spinners\", \"simple\", \"chaotic\", \"repetition\", \"progression\", \"high contrast\", \"improvisation\", \"playfield usage\", \"playfield constraint\", \"video gimmick\", \"difficulty spike\", \"low sv\", \"high sv\", \"colorhax\", \"tech\", \"slider tech\", \"complex sv\", \"reading\", \"visually dense\", \"overlap reading\", \"alt\", \"jump aim\", \"sharp aim\", \"wide aim\", \"linear aim\", \"aim control\", \"flow aim\", \"precision\", \"finger control\", \"complex snap divisors\", \"bursts\", \"streams\", \"spaced streams\", \"cutstreams\", \"stamina\", \"mapping contest\", \"tournament custom\", \"tag\", \"port\"] {allow-input: true}\n",
+    "negative_descriptor_3 = '' # @param [\"slider only\", \"circle only\", \"collab\", \"megacollab\", \"marathon\", \"gungathon\", \"multi-song\", \"variable timing\", \"accelerating bpm\", \"time signatures\", \"storyboard\", \"storyboard gimmick\", \"keysounds\", \"download unavailable\", \"custom skin\", \"featured artist\", \"custom song\", \"style\", \"messy\", \"geometric\", \"grid snap\", \"hexgrid\", \"freeform\", \"symmetrical\", \"old-style revival\", \"clean\", \"slidershapes\", \"distance snapped\", \"iNiS-style\", \"avant-garde\", \"perfect stacks\", \"ninja spinners\", \"simple\", \"chaotic\", \"repetition\", \"progression\", \"high contrast\", \"improvisation\", \"playfield usage\", \"playfield constraint\", \"video gimmick\", \"difficulty spike\", \"low sv\", \"high sv\", \"colorhax\", \"tech\", \"slider tech\", \"complex sv\", \"reading\", \"visually dense\", \"overlap reading\", \"alt\", \"jump aim\", \"sharp aim\", \"wide aim\", \"linear aim\", \"aim control\", \"flow aim\", \"precision\", \"finger control\", \"complex snap divisors\", \"bursts\", \"streams\", \"spaced streams\", \"cutstreams\", \"stamina\", \"mapping contest\", \"tournament custom\", \"tag\", \"port\"] {allow-input: true}\n",
+    "#@markdown ---\n",
+    "#@markdown If true, the generated beatmap will be exported as a .osz file. Otherwise, it will be exported as a .osu file.\n",
+    "export_osz = False # @param {type:\"boolean\"}\n",
+    "#@markdown If true, the generated beatmap will be added to the reference beatmap and the reference beatmap will be modified instead of creating a new beatmap. It will also continue any hit objects before the start time in the reference beatmap.\n",
+    "add_to_beatmap = False # @param {type:\"boolean\"}\n",
+    "#@markdown This is the start time of the beatmap in milliseconds. Use this to constrain the generation to a specific part of the song.\n",
+    "start_time = -1 # @param {type:\"integer\"}\n",
+    "#@markdown This is the end time of the beatmap in milliseconds. Use this to constrain the generation to a specific part of the song.\n",
+    "end_time = -1 # @param {type:\"integer\"}\n",
+    "#@markdown This is which additional information to give to the model:\n",
+    "#@markdown - TIMING: Give timing points to the model. This will skip the timing point generation step.\n",
+    "#@markdown - KIAI: Give kiai times to the model. This will skip the kiai time generation step.\n",
+    "#@markdown - MAP: Give hit objects to the model. This will skip the hit object generation step.\n",
+    "#@markdown - GD: Give hit objects of another difficulty in the same mapset to the model (can be a different game mode). It will improve the rhythm accuracy and consistency of the generated beatmap without copying the reference beatmap.\n",
+    "#@markdown - NO_HS: Give hit objects without hitsounds to the model. This will copy the hit objects of the reference beatmap and only add hitsounds to them.\n",
+    "in_context = \"[NONE]\" # @param [\"[NONE]\", \"[TIMING]\", \"[TIMING,KIAI]\", \"[TIMING,KIAI,MAP]\", \"[GD,TIMING,KIAI]\", \"[NO_HS,TIMING,KIAI]\"]\n",
+    "#@markdown This is the output type of the beatmap. You can choose to either generate everything or only generate timing points.\n",
+    "output_type = \"[MAP]\" # @param [\"[MAP]\", \"[TIMING,KIAI,MAP,SV]\", \"[TIMING]\"]\n",
+    "#@markdown This is the scale of the classifier-free guidance. A higher scale will make the model more likely to follow the descriptors and mapper style. A high `cfg_scale` or certain combinations of settings can produce unexpected results, so use it with caution. \n",
+    "cfg_scale = 1 # @param {type:\"slider\", min:1, max:5, step:0.1}\n",
+    "#@markdown This is the temperature of the sampling. A lower temperature will make the model more conservative and less creative. I only recommend lowering this slightly or when using `add_to_beatmap` and generating small sections.\n",
+    "temperature = 1 # @param {type:\"slider\", min:0, max:1, step:0.01}\n",
+    "#@markdown This is the random seed. Change this to sample a different beatmap with the same settings.\n",
+    "seed = -1 # @param {type:\"integer\"}\n",
+    "#@markdown ---\n",
+    "#@markdown If true, uses a slow and accurate timing generator. This will make the generation slower, but the timing will be more accurate.\n",
+    "#@markdown This is the leniency of the normal timing generator. It will allow the timing ticks to deviate from the real timing by this many milliseconds. A higher value will result in less timing points.\n",
+    "timing_leniency = 20 # @param {type:\"slider\", min:0, max:100, step:1}\n",
+    "super_timing = False # @param {type:\"boolean\"}\n",
+    "#@markdown This is the number of beams for beam search for the super timing generator. Higher values will result in slightly more accurate timing at the cost of speed. \n",
+    "timer_num_beams = 2 # @param {type:\"slider\", min:1, max:16, step:1}\n",
+    "#@markdown This is the number of iterations for the super timing generator. Higher values will result in slightly more accurate timing at the cost of speed.\n",
+    "timer_iterations = 20 # @param {type:\"slider\", min:1, max:100, step:1}\n",
+    "#@markdown This is the certainty threshold requirement for BPM changes in the super timing generator. Higher values will result in less BPM changes.\n",
+    "timer_bpm_threshold = 0.1 # @param {type:\"slider\", min:0, max:1, step:0.1}\n",
+    "#@markdown ---\n",
+    "\n",
+    "# Get actual parameters\n",
+    "a_config = model.split(' ')[-1].lower()\n",
+    "a_gamemode = [\"standard\", \"taiko\", \"catch the beat\", \"mania\"].index(gamemode)\n",
+    "a_difficulty = None if difficulty == -1 else difficulty\n",
+    "a_mapper_id = None if mapper_id == -1 else mapper_id\n",
+    "a_year = None if year == -1 else year\n",
+    "a_hp_drain_rate = None if hp_drain_rate == -1 else hp_drain_rate\n",
+    "a_circle_size = None if circle_size == -1 else circle_size\n",
+    "a_overall_difficulty = None if overall_difficulty == -1 else overall_difficulty\n",
+    "a_approach_rate = None if approach_rate == -1 else approach_rate\n",
+    "a_slider_multiplier = None if slider_multiplier == -1 else slider_multiplier\n",
+    "a_slider_tick_rate = None if slider_tick_rate == -1 else slider_tick_rate\n",
+    "a_hold_note_ratio = None if hold_note_ratio == -1 else hold_note_ratio\n",
+    "a_scroll_speed_ratio = None if scroll_speed_ratio == -1 else scroll_speed_ratio\n",
+    "descriptors = [d for d in [descriptor_1, descriptor_2, descriptor_3] if d != '']\n",
+    "negative_descriptors = [d for d in [negative_descriptor_1, negative_descriptor_2, negative_descriptor_3] if d != '']\n",
+    "\n",
+    "a_start_time = None if start_time == -1 else start_time\n",
+    "a_end_time = None if end_time == -1 else end_time\n",
+    "a_in_context = [ContextType(c.lower()) for c in in_context[1:-1].split(',')]\n",
+    "a_output_type = [ContextType(c.lower()) for c in output_type[1:-1].split(',')]\n",
+    "a_seed = None if seed == -1 else seed\n",
+    "\n",
+    "# Validate stuff\n",
+    "if any(c in a_in_context for c in [ContextType.TIMING, ContextType.KIAI, ContextType.MAP, ContextType.SV, ContextType.GD, ContextType.NO_HS]) or add_to_beatmap:\n",
+    "    assert os.path.exists(input_beatmap), \"Please upload a reference beatmap.\"\n",
+    "assert os.path.exists(input_audio), \"Please upload an audio file.\"\n",
+    "if a_config == \"v30\":\n",
+    "    assert a_gamemode == 0, \"V30 only supports standard mode.\"\n",
+    "    if any(c in a_in_context for c in [ContextType.KIAI, ContextType.MAP, ContextType.SV]):\n",
+    "        print(\"WARNING: V30 does not support KIAI, MAP, or SV in_context, ignoring.\")\n",
+    "    if output_type != \"[MAP]\":\n",
+    "        print(\"WARNING: V30 only supports [MAP] output type, setting output type to [MAP].\")\n",
+    "        a_output_type = [ContextType.MAP]\n",
+    "    if len(descriptors) != 0 and len(negative_descriptors) != 0:\n",
+    "        print(\"WARNING: V30 does not support descriptors or negative descriptors, ignoring.\")\n",
+    "    if super_timing:\n",
+    "        print(\"WARNING: V30 does not fully support super timing, generation will be VERY slow.\")\n",
+    "    \n",
+    "# Create config\n",
+    "with initialize_config_dir(version_base=\"1.1\", config_dir=\"/content/Mapperatorinator/configs/inference\"):\n",
+    "    conf = compose(config_name=a_config)\n",
+    "\n",
+    "# Do inference\n",
+    "conf.audio_path = input_audio\n",
+    "conf.output_path = output_path\n",
+    "conf.beatmap_path = input_beatmap\n",
+    "conf.gamemode = a_gamemode\n",
+    "conf.difficulty = a_difficulty\n",
+    "conf.mapper_id = a_mapper_id\n",
+    "conf.year = a_year\n",
+    "conf.hitsounded = hitsounded\n",
+    "conf.hp_drain_rate = a_hp_drain_rate\n",
+    "conf.circle_size = a_circle_size\n",
+    "conf.overall_difficulty = a_overall_difficulty\n",
+    "conf.approach_rate = a_approach_rate\n",
+    "conf.slider_multiplier = a_slider_multiplier\n",
+    "conf.slider_tick_rate = a_slider_tick_rate\n",
+    "conf.keycount = keycount\n",
+    "conf.hold_note_ratio = a_hold_note_ratio\n",
+    "conf.scroll_speed_ratio = a_scroll_speed_ratio\n",
+    "conf.descriptors = descriptors\n",
+    "conf.negative_descriptors = negative_descriptors\n",
+    "conf.export_osz = export_osz\n",
+    "conf.add_to_beatmap = add_to_beatmap\n",
+    "conf.start_time = a_start_time\n",
+    "conf.end_time = a_end_time\n",
+    "conf.in_context = a_in_context\n",
+    "conf.output_type = a_output_type\n",
+    "conf.cfg_scale = cfg_scale\n",
+    "conf.temperature = temperature\n",
+    "conf.seed = a_seed\n",
+    "conf.timing_leniency = timing_leniency\n",
+    "conf.super_timing = super_timing\n",
+    "conf.timer_num_beams = timer_num_beams\n",
+    "conf.timer_iterations = timer_iterations\n",
+    "conf.timer_bpm_threshold = timer_bpm_threshold\n",
+    "\n",
+    "_, result_path, osz_path = main(conf)\n",
+    "\n",
+    "if osz_path is not None:\n",
+    "    result_path = osz_path\n",
+    "\n",
+    "if conf.add_to_beatmap:\n",
+    "    files.download(result_path)\n",
+    "else:\n",
+    "    files.download(result_path)\n"
+   ],
+   "id": "166eb3e5f9398554",
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "accelerator": "GPU",
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

collate_results.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import os
+import re
+def get_color_for_value(value, min_val, max_val, lower_is_better=False):
+    """
+    Generates an HSL color string from red to green based on a value's
+    position between a min and max.
+    Args:
+        value (float): The current value.
+        min_val (float): The minimum value in the dataset for this metric.
+        max_val (float): The maximum value in the dataset for this metric.
+        lower_is_better (bool): If True, lower values get greener colors.
+    Returns:
+        str: An HSL color string for use in CSS.
+    """
+    # Avoid division by zero if all values are the same
+    if min_val == max_val:
+        return "hsl(120, 70%, 60%)"  # Default to green
+    # Normalize the value to a 0-1 range
+    normalized = (value - min_val) / (max_val - min_val)
+    if lower_is_better:
+        # Invert the scale: 1 (best) -> 0 (worst)
+        hue = (1 - normalized) * 120
+    else:
+        # Standard scale: 0 (worst) -> 1 (best)
+        hue = normalized * 120
+    # Return HSL color: hue from 0 (red) to 120 (green), with fixed saturation and lightness
+    return f"hsl({hue:.0f}, 70%, 60%)"
+def parse_log_files(root_dir):
+    """
+    Parses log files in subdirectories to extract metrics and format them
+    into an HTML table with colored cells.
+    Args:
+        root_dir (str): The path to the main folder containing the model subfolders.
+    Returns:
+        str: A string containing the formatted HTML table.
+    """
+    results = []
+    dir_pattern = re.compile(r"inference=(inference_)?([a-zA-Z0-9_-]+)")
+    metric_patterns = {
+        'FID': re.compile(r"FID: ([\d.]+)"),
+        'AR Pr.': re.compile(r"Active Rhythm Precision: ([\d.]+)"),
+        'AR Re.': re.compile(r"Active Rhythm Recall: ([\d.]+)"),
+        'AR F1': re.compile(r"Active Rhythm F1: ([\d.]+)"),
+        'PR Pr.': re.compile(r"Passive Rhythm Precision: ([\d.]+)"),
+        'PR Re.': re.compile(r"Passive Rhythm Recall: ([\d.]+)"),
+        'PR F1': re.compile(r"Passive Rhythm F1: ([\d.]+)")
+    }
+    for dirpath, dirnames, filenames in os.walk(root_dir):
+        if dirpath == root_dir:
+            for dirname in dirnames:
+                dir_match = dir_pattern.match(dirname)
+                if not dir_match:
+                    continue
+                model_name = dir_match.group(2)
+                log_file_path = os.path.join(dirpath, dirname, 'calc_fid.log')
+                if not os.path.exists(log_file_path):
+                    print(f"Warning: 'calc_fid.log' not found in {dirname}")
+                    continue
+                latest_metrics = {}
+                try:
+                    with open(log_file_path, 'r') as f:
+                        for line in f:
+                            for key, pattern in metric_patterns.items():
+                                match = pattern.search(line)
+                                if match:
+                                    latest_metrics[key] = float(match.group(1))
+                except Exception as e:
+                    print(f"Error reading {log_file_path}: {e}")
+                    continue
+                if latest_metrics:
+                    latest_metrics['Model name'] = model_name
+                    results.append(latest_metrics)
+            dirnames[:] = []
+    if not results:
+        return "<p>No results found. Check if <code>root_dir</code> is correct and log files exist.</p>"
+    # --- Pre-calculate Min/Max for coloring ---
+    headers = ["Model name", "FID", "AR Pr.", "AR Re.", "AR F1", "PR Pr.", "PR Re.", "PR F1"]
+    min_max_vals = {}
+    for header in headers:
+        if header == "Model name":
+            continue
+        # Get all valid values for the current header
+        values = [res.get(header) for res in results if res.get(header) is not None]
+        if values:
+            min_max_vals[header] = {'min': min(values), 'max': max(values)}
+    # --- Generate HTML Table ---
+    html = ["<table>"]
+    # Header row
+    html.append("  <thead>")
+    html.append("    <tr>" + "".join([f"<th>{h}</th>" for h in headers]) + "</tr>")
+    html.append("  </thead>")
+    # Data rows
+    html.append("  <tbody>")
+    for res in sorted(results, key=lambda x: x.get('Model name', '')):
+        row_html = "    <tr>"
+        for header in headers:
+            value = res.get(header)
+            if header == 'Model name':
+                row_html += f"<td>{res.get('Model name', 'N/A')}</td>"
+                continue
+            if value is None:
+                row_html += "<td>N/A</td>"
+                continue
+            # Formatting
+            if header == 'FID':
+                formatted_value = f"{value:.2f}"
+                lower_is_better = True
+            else:
+                formatted_value = f"{value:.3f}"
+                lower_is_better = False
+            # Get color and apply style
+            color = get_color_for_value(value, min_max_vals[header]['min'], min_max_vals[header]['max'],
+                                        lower_is_better)
+            # Added a light text shadow for better readability on bright colors
+            style = f"background-color: {color}; color: black; text-shadow: 0 0 5px white;"
+            row_html += f'<td style="{style}">{formatted_value}</td>'
+        row_html += "</tr>"
+        html.append(row_html)
+    html.append("  </tbody>")
+    html.append("</table>")
+    return "\n".join(html)
+if __name__ == '__main__':
+    # --- IMPORTANT ---
+    # Change this to the path of your main results folder.
+    # You can use "." if the script is in the same parent folder as the "inference=..." folders.
+    logs_directory = './logs_fid/sweeps/test_3'
+    markdown_table = parse_log_files(logs_directory)
+    print(markdown_table)

compose.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+name: mapperatorinator
+services:
+    mapperatorinator:
+        stdin_open: true
+        tty: true
+        deploy:
+            resources:
+                reservations:
+                    devices:
+                        - driver: nvidia
+                          count: all
+                          capabilities:
+                              - gpu
+        volumes:
+        - .:/workspace/Mapperatorinator
+        - ../datasets:/workspace/datasets
+        network_mode: host
+        container_name: mapperatorinator_space
+        shm_size: 8gb
+        build: .
+        # image: my_fixed_image
+        command: /bin/bash
+        environment:
+          - PROJECT_PATH=/workspace/Mapperatorinator
+          - WANDB_API_KEY=${WANDB_API_KEY}

config.py ADDED Viewed

	@@ -0,0 +1,197 @@

+from dataclasses import dataclass, field
+from typing import Any, Optional
+from hydra.core.config_store import ConfigStore
+from omegaconf import MISSING
+from osuT5.osuT5.config import TrainConfig
+from osuT5.osuT5.tokenizer import ContextType
+from osu_diffusion.config import DiffusionTrainConfig
+# BeatHeritage V1 Config Sections
+@dataclass
+class AdvancedFeaturesConfig:
+    enable_context_aware_generation: bool = True
+    enable_style_preservation: bool = True
+    enable_difficulty_scaling: bool = True
+    enable_pattern_variety: bool = True
+@dataclass
+class QualityControlConfig:
+    min_distance_threshold: int = 20
+    max_overlap_ratio: float = 0.15
+    enable_auto_correction: bool = True
+    enable_flow_optimization: bool = True
+@dataclass
+class PerformanceConfig:
+    use_flash_attention: bool = False
+    batch_size: int = 1
+    max_sequence_length: int = 5120
+    cache_size: int = 4096
+@dataclass
+class MetadataConfig:
+    preserve_timing_points: bool = True
+    preserve_bookmarks: bool = True
+    auto_detect_kiai: bool = True
+    smart_hitsounding: bool = True
+@dataclass
+class PostprocessorConfig:
+    use_custom: bool = True
+    class_name: str = 'beatheritage_postprocessor.BeatHeritagePostprocessor'
+    config_class: str = 'beatheritage_postprocessor.BeatHeritageConfig'
+@dataclass
+class IntegrationsConfig:
+    mai_mod_enhanced: bool = True
+    fid_evaluation: bool = True
+    benchmark_mode: bool = False
+# Default config here based on V28
+@dataclass
+class InferenceConfig:
+    model_path: str = ''  # Path to trained model
+    audio_path: str = ''  # Path to input audio
+    output_path: str = ''  # Path to output directory
+    beatmap_path: str = ''  # Path to .osu file to autofill metadata and use as reference
+    # Conditional generation settings
+    gamemode: Optional[int] = None  # Gamemode of the beatmap
+    beatmap_id: Optional[int] = None  # Beatmap ID to use as style
+    difficulty: Optional[float] = None  # Difficulty star rating to map
+    mapper_id: Optional[int] = None  # Mapper ID to use as style
+    year: Optional[int] = None  # Year to use as style
+    hitsounded: Optional[bool] = None  # Whether the beatmap has hitsounds
+    keycount: Optional[int] = None  # Number of keys to use for mania
+    hold_note_ratio: Optional[float] = None  # Ratio of how many hold notes to generate in mania
+    scroll_speed_ratio: Optional[float] = None  # Ratio of how many scroll speed changes to generate in mania and taiko
+    descriptors: Optional[list[str]] = None  # List of descriptors to use for style
+    negative_descriptors: Optional[list[str]] = None  # List of descriptors to avoid when using classifier-free guidance
+    # Difficulty settings
+    hp_drain_rate: Optional[float] = None  # HP drain rate (HP)
+    circle_size: Optional[float] = None  # Circle size (CS)
+    overall_difficulty: Optional[float] = None  # Overall difficulty (OD)
+    approach_rate: Optional[float] = None  # Approach rate (AR)
+    slider_multiplier: Optional[float] = None  # Multiplier for slider velocity
+    slider_tick_rate: Optional[float] = None  # Rate of slider ticks
+    # Inference settings
+    seed: Optional[int] = None  # Random seed
+    device: str = 'auto'  # Inference device (cpu/cuda/mps/auto)
+    precision: str = 'fp32'         # Lower precision for speed (fp32/bf16/amp)
+    add_to_beatmap: bool = False  # Add generated content to the reference beatmap
+    export_osz: bool = False  # Export beatmap as .osz file
+    start_time: Optional[int] = None  # Start time of audio to generate beatmap for
+    end_time: Optional[int] = None  # End time of audio to generate beatmap for
+    lookback: float = 0.5  # Fraction of audio sequence to fill with tokens from previous inference window
+    lookahead: float = 0.4  # Fraction of audio sequence to skip at the end of the audio window
+    timing_leniency: int = 20  # Number of milliseconds of error to allow for timing generation
+    in_context: list[ContextType] = field(default_factory=lambda: [ContextType.NONE])  # Context types of other beatmap(s)
+    output_type: list[ContextType] = field(default_factory=lambda: [ContextType.MAP])  # Output type (map, timing)
+    cfg_scale: float = 1.0  # Scale of classifier-free guidance
+    temperature: float = 1.0  # Sampling temperature
+    timing_temperature: float = 0.1  # Sampling temperature for timing
+    mania_column_temperature: float = 0.5  # Sampling temperature for mania columns
+    taiko_hit_temperature: float = 0.5  # Sampling temperature for taiko hit types
+    timeshift_bias: float = 0.0  # Logit bias for sampling timeshift tokens
+    top_p: float = 0.95  # Top-p sampling threshold
+    top_k: int = 0  # Top-k sampling threshold
+    repetition_penalty: float = 1.0  # Repetition penalty to reduce repetitive patterns
+    parallel: bool = False  # Use parallel sampling
+    do_sample: bool = True  # Use sampling
+    num_beams: int = 1  # Number of beams for beam search
+    super_timing: bool = False  # Use super timing generator (slow but accurate timing)
+    timer_num_beams: int = 2  # Number of beams for beam search
+    timer_bpm_threshold: float = 0.7  # Threshold requirement for BPM change in timer, higher values will result in less BPM changes
+    timer_cfg_scale: float = 1.0  # Scale of classifier-free guidance for timer
+    timer_iterations: int = 20  # Number of iterations for timer
+    use_server: bool = True  # Use server for optimized multiprocess inference
+    max_batch_size: int = 16  # Maximum batch size for inference (only used for parallel sampling or super timing)
+    resnap_events: bool = True  # Resnap notes to the timing after generation
+    position_refinement: bool = False  # Use position refinement
+    # Metadata settings
+    bpm: int = 120  # Beats per minute of input audio
+    offset: int = 0  # Start of beat, in miliseconds, from the beginning of input audio
+    title: str = ''  # Song title
+    artist: str = ''  # Song artist
+    creator: str = ''  # Beatmap creator
+    version: str = ''  # Beatmap version
+    background: Optional[str] = None  # File name of background image
+    preview_time: int = -1  # Time in milliseconds to start previewing the song
+    # Diffusion settings
+    generate_positions: bool = True  # Use diffusion to generate object positions
+    diff_cfg_scale: float = 1.0  # Scale of classifier-free guidance
+    compile: bool = False  # PyTorch 2.0 optimization
+    pad_sequence: bool = False  # Pad sequence to max_seq_len
+    diff_ckpt: str = ''  # Path to checkpoint for diffusion model
+    diff_refine_ckpt: str = ''  # Path to checkpoint for refining diffusion model
+    beatmap_idx: str = 'osu_diffusion/beatmap_idx.pickle'  # Path to beatmap index
+    refine_iters: int = 10  # Number of refinement iterations
+    random_init: bool = False  # Whether to initialize with random noise instead of positions generated by the previous model
+    timesteps: list[int] = field(default_factory=lambda: [100, 0, 0, 0, 0, 0, 0, 0, 0, 0])  # The number of timesteps we want to take from equally-sized portions of the original process
+    max_seq_len: int = 1024  # Maximum sequence length for diffusion
+    overlap_buffer: int = 128  # Buffer zone at start and end of sequence to avoid edge effects (should be less than half of max_seq_len)
+    # Training settings
+    train: TrainConfig = field(default_factory=TrainConfig)  # Training settings for osuT5 model
+    diffusion: DiffusionTrainConfig = field(default_factory=DiffusionTrainConfig)  # Training settings for diffusion model
+    # BeatHeritage V1 Config Sections
+    advanced_features: AdvancedFeaturesConfig = field(default_factory=AdvancedFeaturesConfig)
+    quality_control: QualityControlConfig = field(default_factory=QualityControlConfig)
+    performance: PerformanceConfig = field(default_factory=PerformanceConfig)
+    metadata: MetadataConfig = field(default_factory=MetadataConfig)
+    postprocessor: PostprocessorConfig = field(default_factory=PostprocessorConfig)
+    integrations: IntegrationsConfig = field(default_factory=IntegrationsConfig)
+    hydra: Any = MISSING
+@dataclass
+class FidConfig:
+    device: str = 'auto'  # Inference device (cpu/cuda/mps/auto)
+    compile: bool = True
+    num_processes: int = 3
+    seed: int = 0
+    skip_generation: bool = False
+    fid: bool = True
+    rhythm_stats: bool = True
+    dataset_type: str = 'ors'
+    dataset_path: str = '/workspace/datasets/ORS16291'
+    dataset_start: int = 16200
+    dataset_end: int = 16291
+    gamemodes: list[int] = field(default_factory=lambda: [0])  # List of gamemodes to include in the dataset
+    classifier_ckpt: str = 'OliBomby/osu-classifier'
+    classifier_batch_size: int = 16
+    training_set_ids_path: Optional[str] = None  # Path to training set beatmap IDs
+    inference: InferenceConfig = field(default_factory=InferenceConfig)  # Training settings for osuT5 model
+    hydra: Any = MISSING
+@dataclass
+class MaiModConfig:
+    beatmap_path: str = ''  # Path to .osu file
+    audio_path: str = ''  # Path to input audio
+    raw_output: bool = False
+    precision: str = 'fp32'         # Lower precision for speed (fp32/bf16/amp)
+    inference: InferenceConfig = field(default_factory=InferenceConfig)  # Training settings for osuT5 model
+    hydra: Any = MISSING
+cs = ConfigStore.instance()
+cs.store(group="inference", name="base", node=InferenceConfig)
+cs.store(name="base_fid", node=FidConfig)
+cs.store(name="base_mai_mod", node=MaiModConfig)

configs/calc_fid.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+defaults:
+  - base_fid
+  - inference: tiny_dist7
+  - _self_
+compile: false
+num_processes: 32
+seed: 0
+skip_generation: false
+fid: true
+rhythm_stats: true
+classifier_ckpt: 'OliBomby/osu-classifier'
+classifier_batch_size: 32
+training_set_ids_path: null
+dataset_type: "mmrs"
+dataset_path: C:/Users/Olivier/Documents/Collections/Beatmap ML Datasets/MMRS2025
+dataset_start: 0
+dataset_end: 106  # Contains 324 std beatmaps
+gamemodes: [0]  # List of gamemodes to include in the dataset
+inference:
+  super_timing: false
+  temperature: 0.9        # Sampling temperature
+  top_p: 0.9              # Top-p sampling threshold
+  lookback: 0.5             # Fraction of audio sequence to fill with tokens from previous inference window
+  lookahead: 0.4            # Fraction of audio sequence to skip at the end of the audio window
+  year: 2023
+  resnap_events: false
+  use_server: false
+hydra:
+  job:
+    chdir: True
+  run:
+#    dir: ./logs_fid/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    dir: ./logs_fid/test
+  sweep:
+    dir: ./logs_fid/sweeps/test_3
+    subdir: ${hydra.job.override_dirname}