""" Reward Configuration v3 - Ultra-High Performance (Target: 85%+ all nutrients) FAT-FOCUSED optimization to address critical bottleneck (10% → 85%+ target). """ import numpy as np # ============================================================================ # NUTRIENT IMPORTANCE WEIGHTS - FAT PRIORITY # ============================================================================ NUTRIENT_IMPORTANCE = { 'fat': 0.35, # INCREASED from 0.25 'calories': 0.18, # REDUCED from 0.25 'protein': 0.18, # REDUCED from 0.20 'carbs': 0.19, # Similar to v2 'sodium': 0.10, # Same as v2 } # Verify weights sum to 1.0 assert abs(sum(NUTRIENT_IMPORTANCE.values()) - 1.0) < 1e-6 # ============================================================================ # FAT-SPECIFIC PARAMETERS # ============================================================================ FAT_STRICT_MODE = True FAT_EXPONENTIAL_FACTOR = 2.0 # vs 1.5 for others FAT_BONUS_MULTIPLIER = 2.0 FAT_SATISFACTION_BONUS = 100 FAT_PROGRESSIVE_THRESHOLDS = { 'excellent': 0.10, # vs 0.15 for others 'good': 0.20, # vs 0.25 'acceptable': 0.30, # vs 0.35 } # ============================================================================ # CURRICULUM PHASES - FAT FOCUSED # ============================================================================ FAT_CURRICULUM_PHASES = [ {'name': 'Easy Fat', 'fat_multiplier': 2.5, 'steps': 150000}, {'name': 'Medium Fat', 'fat_multiplier': 2.0, 'steps': 150000}, {'name': 'Normal Fat', 'fat_multiplier': 1.5, 'steps': 150000}, {'name': 'Tight Fat', 'fat_multiplier': 1.2, 'steps': 150000}, {'name': 'Target Fat', 'fat_multiplier': 1.0, 'steps': 200000}, ] # ============================================================================ # BONUSES & PENALTIES # ============================================================================ COMPLETION_BONUS = 200 PARTIAL_BONUS = 60 DIVERSITY_BONUS_WEIGHT = 12.0 NOVELTY_BONUS_WEIGHT = 6.0 CATEGORY_BALANCE_BONUS = 25.0 REPETITION_PENALTY = -25.0 MIN_INGREDIENT_PENALTY = -120.0 MAX_INGREDIENT_PENALTY = -60.0 EMPTY_RECIPE_PENALTY = -600.0 # ============================================================================ # VIOLATION PENALTY CALCULATION # ============================================================================ def calculate_violation_penalty(value, min_val, max_val, target, nutrient_weight, nutrient_name=''): """Calculate penalty/reward with fat-specific handling""" is_fat = nutrient_name.lower() == 'fat' # Within range - progressive reward if min_val <= value <= max_val: deviation_pct = abs(value - target) / (target + 1e-8) if is_fat: if deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['excellent']: return 100 * nutrient_weight * FAT_BONUS_MULTIPLIER elif deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['good']: return 50 * nutrient_weight * FAT_BONUS_MULTIPLIER elif deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['acceptable']: return 20 * nutrient_weight * FAT_BONUS_MULTIPLIER else: return 5 * nutrient_weight else: if deviation_pct < 0.15: return 100 * nutrient_weight elif deviation_pct < 0.25: return 50 * nutrient_weight elif deviation_pct < 0.35: return 20 * nutrient_weight else: return 5 * nutrient_weight # Out of range - exponential penalty else: if value < min_val: violation_pct = (min_val - value) / (min_val + 1e-8) else: violation_pct = (value - max_val) / (max_val + 1e-8) base_penalty = -50 * nutrient_weight if is_fat and FAT_STRICT_MODE: exponential_factor = (1 + violation_pct) ** FAT_EXPONENTIAL_FACTOR else: exponential_factor = (1 + violation_pct) ** 1.5 penalty = base_penalty * exponential_factor penalty = max(penalty, -200 * nutrient_weight) return penalty def calculate_weighted_constraint_reward(current_nutrients, target_constraints): """Calculate total reward with fat priority""" total_reward = 0.0 satisfied_count = 0 details = {} for nutrient in ['calories', 'protein', 'fat', 'carbs', 'sodium']: value = current_nutrients.get(nutrient, 0) min_val = target_constraints[nutrient]['min'] max_val = target_constraints[nutrient]['max'] target = target_constraints[nutrient]['target'] weight = NUTRIENT_IMPORTANCE[nutrient] reward = calculate_violation_penalty(value, min_val, max_val, target, weight, nutrient) total_reward += reward if min_val <= value <= max_val: satisfied_count += 1 details[nutrient] = { 'value': value, 'target': target, 'min': min_val, 'max': max_val, 'satisfied': min_val <= value <= max_val, 'reward': reward, 'weight': weight, } return total_reward, satisfied_count, details def calculate_completion_bonus(satisfied_count, total_constraints=5, fat_satisfied=False): """Calculate completion bonus with fat bonus""" bonus = 0 if satisfied_count == total_constraints: bonus += COMPLETION_BONUS elif satisfied_count == total_constraints - 1: bonus += PARTIAL_BONUS elif satisfied_count == total_constraints - 2: bonus += 30 if fat_satisfied: bonus += FAT_SATISFACTION_BONUS return bonus def calculate_milestone_reward(ingredient_count, min_ingredients, max_ingredients): """Calculate milestone rewards""" reward = 0.0 if ingredient_count == min_ingredients: reward += 15.0 halfway = (min_ingredients + max_ingredients) / 2 if abs(ingredient_count - halfway) < 0.5: reward += 8.0 if ingredient_count <= max_ingredients: reward += 7.0 return reward def print_config_summary(): """Print configuration summary""" print("=" * 70) print("REWARD CONFIGURATION v3 - FAT-FOCUSED (85%+ TARGET)") print("=" * 70) print("\nNutrient Weights:") for nutrient, weight in sorted(NUTRIENT_IMPORTANCE.items(), key=lambda x: x[1], reverse=True): print(f" {nutrient:8s}: {weight:.2f} ({weight*100:.0f}%)") print(f"\nFat-Specific:") print(f" Exponential: {FAT_EXPONENTIAL_FACTOR}") print(f" Bonus Multiplier: {FAT_BONUS_MULTIPLIER}x") print(f" Satisfaction Bonus: {FAT_SATISFACTION_BONUS} pts") print(f" Thresholds: 10%/20%/30%") print(f"\nCurriculum: {len(FAT_CURRICULUM_PHASES)} phases, 800k steps") print("=" * 70)