recipeai-ultra-performance / configs /config_reward_v3.py
bhxvxsh's picture
Upload configs/config_reward_v3.py with huggingface_hub
684a1db verified
"""
Reward Configuration v3 - Ultra-High Performance (Target: 85%+ all nutrients)
FAT-FOCUSED optimization to address critical bottleneck (10% → 85%+ target).
"""
import numpy as np
# ============================================================================
# NUTRIENT IMPORTANCE WEIGHTS - FAT PRIORITY
# ============================================================================
NUTRIENT_IMPORTANCE = {
'fat': 0.35, # INCREASED from 0.25
'calories': 0.18, # REDUCED from 0.25
'protein': 0.18, # REDUCED from 0.20
'carbs': 0.19, # Similar to v2
'sodium': 0.10, # Same as v2
}
# Verify weights sum to 1.0
assert abs(sum(NUTRIENT_IMPORTANCE.values()) - 1.0) < 1e-6
# ============================================================================
# FAT-SPECIFIC PARAMETERS
# ============================================================================
FAT_STRICT_MODE = True
FAT_EXPONENTIAL_FACTOR = 2.0 # vs 1.5 for others
FAT_BONUS_MULTIPLIER = 2.0
FAT_SATISFACTION_BONUS = 100
FAT_PROGRESSIVE_THRESHOLDS = {
'excellent': 0.10, # vs 0.15 for others
'good': 0.20, # vs 0.25
'acceptable': 0.30, # vs 0.35
}
# ============================================================================
# CURRICULUM PHASES - FAT FOCUSED
# ============================================================================
FAT_CURRICULUM_PHASES = [
{'name': 'Easy Fat', 'fat_multiplier': 2.5, 'steps': 150000},
{'name': 'Medium Fat', 'fat_multiplier': 2.0, 'steps': 150000},
{'name': 'Normal Fat', 'fat_multiplier': 1.5, 'steps': 150000},
{'name': 'Tight Fat', 'fat_multiplier': 1.2, 'steps': 150000},
{'name': 'Target Fat', 'fat_multiplier': 1.0, 'steps': 200000},
]
# ============================================================================
# BONUSES & PENALTIES
# ============================================================================
COMPLETION_BONUS = 200
PARTIAL_BONUS = 60
DIVERSITY_BONUS_WEIGHT = 12.0
NOVELTY_BONUS_WEIGHT = 6.0
CATEGORY_BALANCE_BONUS = 25.0
REPETITION_PENALTY = -25.0
MIN_INGREDIENT_PENALTY = -120.0
MAX_INGREDIENT_PENALTY = -60.0
EMPTY_RECIPE_PENALTY = -600.0
# ============================================================================
# VIOLATION PENALTY CALCULATION
# ============================================================================
def calculate_violation_penalty(value, min_val, max_val, target, nutrient_weight, nutrient_name=''):
"""Calculate penalty/reward with fat-specific handling"""
is_fat = nutrient_name.lower() == 'fat'
# Within range - progressive reward
if min_val <= value <= max_val:
deviation_pct = abs(value - target) / (target + 1e-8)
if is_fat:
if deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['excellent']:
return 100 * nutrient_weight * FAT_BONUS_MULTIPLIER
elif deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['good']:
return 50 * nutrient_weight * FAT_BONUS_MULTIPLIER
elif deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['acceptable']:
return 20 * nutrient_weight * FAT_BONUS_MULTIPLIER
else:
return 5 * nutrient_weight
else:
if deviation_pct < 0.15:
return 100 * nutrient_weight
elif deviation_pct < 0.25:
return 50 * nutrient_weight
elif deviation_pct < 0.35:
return 20 * nutrient_weight
else:
return 5 * nutrient_weight
# Out of range - exponential penalty
else:
if value < min_val:
violation_pct = (min_val - value) / (min_val + 1e-8)
else:
violation_pct = (value - max_val) / (max_val + 1e-8)
base_penalty = -50 * nutrient_weight
if is_fat and FAT_STRICT_MODE:
exponential_factor = (1 + violation_pct) ** FAT_EXPONENTIAL_FACTOR
else:
exponential_factor = (1 + violation_pct) ** 1.5
penalty = base_penalty * exponential_factor
penalty = max(penalty, -200 * nutrient_weight)
return penalty
def calculate_weighted_constraint_reward(current_nutrients, target_constraints):
"""Calculate total reward with fat priority"""
total_reward = 0.0
satisfied_count = 0
details = {}
for nutrient in ['calories', 'protein', 'fat', 'carbs', 'sodium']:
value = current_nutrients.get(nutrient, 0)
min_val = target_constraints[nutrient]['min']
max_val = target_constraints[nutrient]['max']
target = target_constraints[nutrient]['target']
weight = NUTRIENT_IMPORTANCE[nutrient]
reward = calculate_violation_penalty(value, min_val, max_val, target, weight, nutrient)
total_reward += reward
if min_val <= value <= max_val:
satisfied_count += 1
details[nutrient] = {
'value': value,
'target': target,
'min': min_val,
'max': max_val,
'satisfied': min_val <= value <= max_val,
'reward': reward,
'weight': weight,
}
return total_reward, satisfied_count, details
def calculate_completion_bonus(satisfied_count, total_constraints=5, fat_satisfied=False):
"""Calculate completion bonus with fat bonus"""
bonus = 0
if satisfied_count == total_constraints:
bonus += COMPLETION_BONUS
elif satisfied_count == total_constraints - 1:
bonus += PARTIAL_BONUS
elif satisfied_count == total_constraints - 2:
bonus += 30
if fat_satisfied:
bonus += FAT_SATISFACTION_BONUS
return bonus
def calculate_milestone_reward(ingredient_count, min_ingredients, max_ingredients):
"""Calculate milestone rewards"""
reward = 0.0
if ingredient_count == min_ingredients:
reward += 15.0
halfway = (min_ingredients + max_ingredients) / 2
if abs(ingredient_count - halfway) < 0.5:
reward += 8.0
if ingredient_count <= max_ingredients:
reward += 7.0
return reward
def print_config_summary():
"""Print configuration summary"""
print("=" * 70)
print("REWARD CONFIGURATION v3 - FAT-FOCUSED (85%+ TARGET)")
print("=" * 70)
print("\nNutrient Weights:")
for nutrient, weight in sorted(NUTRIENT_IMPORTANCE.items(), key=lambda x: x[1], reverse=True):
print(f" {nutrient:8s}: {weight:.2f} ({weight*100:.0f}%)")
print(f"\nFat-Specific:")
print(f" Exponential: {FAT_EXPONENTIAL_FACTOR}")
print(f" Bonus Multiplier: {FAT_BONUS_MULTIPLIER}x")
print(f" Satisfaction Bonus: {FAT_SATISFACTION_BONUS} pts")
print(f" Thresholds: 10%/20%/30%")
print(f"\nCurriculum: {len(FAT_CURRICULUM_PHASES)} phases, 800k steps")
print("=" * 70)