somepago commited on
Commit
c49a46b
·
verified ·
1 Parent(s): 4fa3795

Upload config.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.py +89 -0
config.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Tuple
3
+
4
+
5
+ @dataclass
6
+ class SigLIP2VisionConfig:
7
+ hidden_size: int = 1152
8
+ intermediate_size: int = 4304
9
+ num_hidden_layers: int = 27
10
+ num_attention_heads: int = 16
11
+ num_channels: int = 3
12
+ patch_size: int = 16
13
+ max_num_patches: int = 256
14
+ layer_norm_eps: float = 1e-6
15
+
16
+
17
+ @dataclass
18
+ class TrainConfig:
19
+ # Architecture
20
+ vision: SigLIP2VisionConfig = field(default_factory=SigLIP2VisionConfig)
21
+ tap_layers: List[int] = field(default_factory=lambda: [8, 17])
22
+ head_hidden: List[int] = field(default_factory=lambda: [768, 256])
23
+ head_dropout: float = 0.3
24
+
25
+ @property
26
+ def head_dims(self) -> List[int]:
27
+ input_dim = (len(self.tap_layers) + 1) * self.vision.hidden_size
28
+ return [input_dim] + self.head_hidden
29
+
30
+ # Score buckets
31
+ score_buckets: List[float] = field(default_factory=lambda: [
32
+ 0, 3, 4, 5, 6, 7, 8, 9, 10
33
+ ])
34
+ loss_type: str = "sord" # "ce", "sord", or "mse"
35
+ sord_sigma: float = 1.0 # SORD label softness
36
+
37
+ # Ranking loss
38
+ ranking_lambda: float = 0.3 # weight for auxiliary ranking loss (0 = disabled)
39
+ ranking_margin: float = 0.5 # margin for MarginRankingLoss
40
+ ranking_threshold: float = 1.0 # min score diff to form a pair
41
+
42
+ # Paths
43
+ resume_from: str = None # path to checkpoint to resume from
44
+ weights_path: str = "weights/siglip2_vision.safetensors"
45
+ score_column: str = "heuristic_score" # which column to use for training scores
46
+ data_dir: str = None
47
+ labels_file: str = "data/labels.csv"
48
+ output_dir: str = "checkpoints"
49
+
50
+ # Preprocessing
51
+ image_mean: Tuple[float, ...] = (0.5, 0.5, 0.5)
52
+ image_std: Tuple[float, ...] = (0.5, 0.5, 0.5)
53
+
54
+ # Training
55
+ epochs: int = 10
56
+ batch_size: int = 96
57
+ lr_head: float = 1e-3
58
+ lr_backbone: float = 1e-5
59
+ llrd_decay: float = 0.7 # layer-wise LR decay (1.0 = no decay)
60
+ weight_decay: float = 0.01
61
+ warmup_ratio: float = 0.1
62
+ freeze_backbone: bool = False
63
+ grad_accum_steps: int = 2
64
+ max_grad_norm: float = 1.0
65
+ seed: int = 42
66
+
67
+ # EMA
68
+ ema_decay: float = 0.9998 # 0 = disabled
69
+ ema_start_step: int = 100 # start EMA after this many optimizer steps
70
+
71
+ # Eval
72
+ eval_split: float = 0.05
73
+ patience: int = 10
74
+
75
+ # Score normalization
76
+ score_min: float = 1.0
77
+ score_max: float = 9.0
78
+
79
+ # Data rebalancing
80
+ rebalance_scores: bool = True # inverse-frequency weighting by score bucket
81
+
82
+ @property
83
+ def num_buckets(self) -> int:
84
+ return len(self.score_buckets) - 1
85
+
86
+ @property
87
+ def bucket_centers(self) -> List[float]:
88
+ b = self.score_buckets
89
+ return [(b[i] + b[i + 1]) / 2 for i in range(len(b) - 1)]