somepago
/

AestheticSigLIP

+from dataclasses import dataclass, field
+from typing import List, Tuple
+@dataclass
+class SigLIP2VisionConfig:
+    hidden_size: int = 1152
+    intermediate_size: int = 4304
+    num_hidden_layers: int = 27
+    num_attention_heads: int = 16
+    num_channels: int = 3
+    patch_size: int = 16
+    max_num_patches: int = 256
+    layer_norm_eps: float = 1e-6
+@dataclass
+class TrainConfig:
+    # Architecture
+    vision: SigLIP2VisionConfig = field(default_factory=SigLIP2VisionConfig)
+    tap_layers: List[int] = field(default_factory=lambda: [8, 17])
+    head_hidden: List[int] = field(default_factory=lambda: [768, 256])
+    head_dropout: float = 0.3
+    @property
+    def head_dims(self) -> List[int]:
+        input_dim = (len(self.tap_layers) + 1) * self.vision.hidden_size
+        return [input_dim] + self.head_hidden
+    # Score buckets
+    score_buckets: List[float] = field(default_factory=lambda: [
+        0, 3, 4, 5, 6, 7, 8, 9, 10
+    ])
+    loss_type: str = "sord"       # "ce", "sord", or "mse"
+    sord_sigma: float = 1.0       # SORD label softness
+    # Ranking loss
+    ranking_lambda: float = 0.3   # weight for auxiliary ranking loss (0 = disabled)
+    ranking_margin: float = 0.5   # margin for MarginRankingLoss
+    ranking_threshold: float = 1.0  # min score diff to form a pair
+    # Paths
+    resume_from: str = None        # path to checkpoint to resume from
+    weights_path: str = "weights/siglip2_vision.safetensors"
+    score_column: str = "heuristic_score"  # which column to use for training scores
+    data_dir: str = None
+    labels_file: str = "data/labels.csv"
+    output_dir: str = "checkpoints"
+    # Preprocessing
+    image_mean: Tuple[float, ...] = (0.5, 0.5, 0.5)
+    image_std: Tuple[float, ...] = (0.5, 0.5, 0.5)
+    # Training
+    epochs: int = 10
+    batch_size: int = 96
+    lr_head: float = 1e-3
+    lr_backbone: float = 1e-5
+    llrd_decay: float = 0.7       # layer-wise LR decay (1.0 = no decay)
+    weight_decay: float = 0.01
+    warmup_ratio: float = 0.1
+    freeze_backbone: bool = False
+    grad_accum_steps: int = 2
+    max_grad_norm: float = 1.0
+    seed: int = 42
+    # EMA
+    ema_decay: float = 0.9998     # 0 = disabled
+    ema_start_step: int = 100     # start EMA after this many optimizer steps
+    # Eval
+    eval_split: float = 0.05
+    patience: int = 10
+    # Score normalization
+    score_min: float = 1.0
+    score_max: float = 9.0
+    # Data rebalancing
+    rebalance_scores: bool = True  # inverse-frequency weighting by score bucket
+    @property
+    def num_buckets(self) -> int:
+        return len(self.score_buckets) - 1
+    @property
+    def bucket_centers(self) -> List[float]:
+        b = self.score_buckets
+        return [(b[i] + b[i + 1]) / 2 for i in range(len(b) - 1)]