Upload configs/config_reward_v3.py with huggingface_hub

684a1db verified 4 months ago

6.91 kB

	"""
	Reward Configuration v3 - Ultra-High Performance (Target: 85%+ all nutrients)

	FAT-FOCUSED optimization to address critical bottleneck (10% → 85%+ target).
	"""

	import numpy as np

	# ============================================================================
	# NUTRIENT IMPORTANCE WEIGHTS - FAT PRIORITY
	# ============================================================================

	NUTRIENT_IMPORTANCE = {
	'fat': 0.35, # INCREASED from 0.25
	'calories': 0.18, # REDUCED from 0.25
	'protein': 0.18, # REDUCED from 0.20
	'carbs': 0.19, # Similar to v2
	'sodium': 0.10, # Same as v2
	}

	# Verify weights sum to 1.0
	assert abs(sum(NUTRIENT_IMPORTANCE.values()) - 1.0) < 1e-6


	# ============================================================================
	# FAT-SPECIFIC PARAMETERS
	# ============================================================================

	FAT_STRICT_MODE = True
	FAT_EXPONENTIAL_FACTOR = 2.0 # vs 1.5 for others
	FAT_BONUS_MULTIPLIER = 2.0
	FAT_SATISFACTION_BONUS = 100

	FAT_PROGRESSIVE_THRESHOLDS = {
	'excellent': 0.10, # vs 0.15 for others
	'good': 0.20, # vs 0.25
	'acceptable': 0.30, # vs 0.35
	}


	# ============================================================================
	# CURRICULUM PHASES - FAT FOCUSED
	# ============================================================================

	FAT_CURRICULUM_PHASES = [
	{'name': 'Easy Fat', 'fat_multiplier': 2.5, 'steps': 150000},
	{'name': 'Medium Fat', 'fat_multiplier': 2.0, 'steps': 150000},
	{'name': 'Normal Fat', 'fat_multiplier': 1.5, 'steps': 150000},
	{'name': 'Tight Fat', 'fat_multiplier': 1.2, 'steps': 150000},
	{'name': 'Target Fat', 'fat_multiplier': 1.0, 'steps': 200000},
	]


	# ============================================================================
	# BONUSES & PENALTIES
	# ============================================================================

	COMPLETION_BONUS = 200
	PARTIAL_BONUS = 60
	DIVERSITY_BONUS_WEIGHT = 12.0
	NOVELTY_BONUS_WEIGHT = 6.0
	CATEGORY_BALANCE_BONUS = 25.0

	REPETITION_PENALTY = -25.0
	MIN_INGREDIENT_PENALTY = -120.0
	MAX_INGREDIENT_PENALTY = -60.0
	EMPTY_RECIPE_PENALTY = -600.0


	# ============================================================================
	# VIOLATION PENALTY CALCULATION
	# ============================================================================

	def calculate_violation_penalty(value, min_val, max_val, target, nutrient_weight, nutrient_name=''):
	"""Calculate penalty/reward with fat-specific handling"""

	is_fat = nutrient_name.lower() == 'fat'

	# Within range - progressive reward
	if min_val <= value <= max_val:
	deviation_pct = abs(value - target) / (target + 1e-8)

	if is_fat:
	if deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['excellent']:
	return 100 * nutrient_weight * FAT_BONUS_MULTIPLIER
	elif deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['good']:
	return 50 * nutrient_weight * FAT_BONUS_MULTIPLIER
	elif deviation_pct < FAT_PROGRESSIVE_THRESHOLDS['acceptable']:
	return 20 * nutrient_weight * FAT_BONUS_MULTIPLIER
	else:
	return 5 * nutrient_weight
	else:
	if deviation_pct < 0.15:
	return 100 * nutrient_weight
	elif deviation_pct < 0.25:
	return 50 * nutrient_weight
	elif deviation_pct < 0.35:
	return 20 * nutrient_weight
	else:
	return 5 * nutrient_weight

	# Out of range - exponential penalty
	else:
	if value < min_val:
	violation_pct = (min_val - value) / (min_val + 1e-8)
	else:
	violation_pct = (value - max_val) / (max_val + 1e-8)

	base_penalty = -50 * nutrient_weight

	if is_fat and FAT_STRICT_MODE:
	exponential_factor = (1 + violation_pct) ** FAT_EXPONENTIAL_FACTOR
	else:
	exponential_factor = (1 + violation_pct) ** 1.5

	penalty = base_penalty * exponential_factor
	penalty = max(penalty, -200 * nutrient_weight)

	return penalty


	def calculate_weighted_constraint_reward(current_nutrients, target_constraints):
	"""Calculate total reward with fat priority"""
	total_reward = 0.0
	satisfied_count = 0
	details = {}

	for nutrient in ['calories', 'protein', 'fat', 'carbs', 'sodium']:
	value = current_nutrients.get(nutrient, 0)
	min_val = target_constraints[nutrient]['min']
	max_val = target_constraints[nutrient]['max']
	target = target_constraints[nutrient]['target']
	weight = NUTRIENT_IMPORTANCE[nutrient]

	reward = calculate_violation_penalty(value, min_val, max_val, target, weight, nutrient)
	total_reward += reward

	if min_val <= value <= max_val:
	satisfied_count += 1

	details[nutrient] = {
	'value': value,
	'target': target,
	'min': min_val,
	'max': max_val,
	'satisfied': min_val <= value <= max_val,
	'reward': reward,
	'weight': weight,
	}

	return total_reward, satisfied_count, details


	def calculate_completion_bonus(satisfied_count, total_constraints=5, fat_satisfied=False):
	"""Calculate completion bonus with fat bonus"""
	bonus = 0

	if satisfied_count == total_constraints:
	bonus += COMPLETION_BONUS
	elif satisfied_count == total_constraints - 1:
	bonus += PARTIAL_BONUS
	elif satisfied_count == total_constraints - 2:
	bonus += 30

	if fat_satisfied:
	bonus += FAT_SATISFACTION_BONUS

	return bonus


	def calculate_milestone_reward(ingredient_count, min_ingredients, max_ingredients):
	"""Calculate milestone rewards"""
	reward = 0.0

	if ingredient_count == min_ingredients:
	reward += 15.0

	halfway = (min_ingredients + max_ingredients) / 2
	if abs(ingredient_count - halfway) < 0.5:
	reward += 8.0

	if ingredient_count <= max_ingredients:
	reward += 7.0

	return reward


	def print_config_summary():
	"""Print configuration summary"""
	print("=" * 70)
	print("REWARD CONFIGURATION v3 - FAT-FOCUSED (85%+ TARGET)")
	print("=" * 70)
	print("\nNutrient Weights:")
	for nutrient, weight in sorted(NUTRIENT_IMPORTANCE.items(), key=lambda x: x[1], reverse=True):
	print(f" {nutrient:8s}: {weight:.2f} ({weight*100:.0f}%)")

	print(f"\nFat-Specific:")
	print(f" Exponential: {FAT_EXPONENTIAL_FACTOR}")
	print(f" Bonus Multiplier: {FAT_BONUS_MULTIPLIER}x")
	print(f" Satisfaction Bonus: {FAT_SATISFACTION_BONUS} pts")
	print(f" Thresholds: 10%/20%/30%")

	print(f"\nCurriculum: {len(FAT_CURRICULUM_PHASES)} phases, 800k steps")
	print("=" * 70)