| """ |
| Netlify Serverless Function for model data API. |
| This is a simplified version that works with Netlify Functions. |
| """ |
| import json |
| import os |
| import sys |
|
|
| |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..')) |
|
|
| from data_loader import ModelDataLoader |
| from embeddings import ModelEmbedder |
| from dimensionality_reduction import DimensionReducer |
| import pandas as pd |
| import numpy as np |
|
|
| |
| data_loader = ModelDataLoader() |
| embedder = None |
| reducer = None |
| df = None |
| embeddings = None |
| reduced_embeddings = None |
|
|
|
|
| def handler(event, context): |
| """ |
| Netlify serverless function handler. |
| """ |
| global embedder, reducer, df, embeddings, reduced_embeddings |
| |
| |
| query_params = event.get('queryStringParameters') or {} |
| path = event.get('path', '') |
| |
| |
| headers = { |
| 'Access-Control-Allow-Origin': '*', |
| 'Access-Control-Allow-Headers': 'Content-Type', |
| 'Access-Control-Allow-Methods': 'GET, OPTIONS', |
| 'Content-Type': 'application/json', |
| } |
| |
| |
| if event.get('httpMethod') == 'OPTIONS': |
| return { |
| 'statusCode': 200, |
| 'headers': headers, |
| 'body': '' |
| } |
| |
| |
| if df is None: |
| try: |
| print("Loading data...") |
| df = data_loader.load_data(sample_size=10000) |
| df = data_loader.preprocess_for_embedding(df) |
| print(f"Loaded {len(df)} models") |
| except Exception as e: |
| return { |
| 'statusCode': 500, |
| 'headers': headers, |
| 'body': json.dumps({'error': f'Failed to load data: {str(e)}'}) |
| } |
| |
| |
| if path.endswith('/api/models') or '/api/models' in path: |
| return get_models(query_params, headers) |
| elif path.endswith('/api/stats') or '/api/stats' in path: |
| return get_stats(headers) |
| else: |
| return { |
| 'statusCode': 404, |
| 'headers': headers, |
| 'body': json.dumps({'error': 'Not found'}) |
| } |
|
|
|
|
| def get_models(query_params, headers): |
| """Get filtered models.""" |
| global df, embedder, reducer, embeddings, reduced_embeddings |
| |
| try: |
| min_downloads = int(query_params.get('min_downloads', 0)) |
| min_likes = int(query_params.get('min_likes', 0)) |
| search_query = query_params.get('search_query') |
| max_points = int(query_params.get('max_points', 5000)) |
| |
| |
| filtered_df = data_loader.filter_data( |
| df=df, |
| min_downloads=min_downloads, |
| min_likes=min_likes, |
| search_query=search_query |
| ) |
| |
| if len(filtered_df) == 0: |
| return { |
| 'statusCode': 200, |
| 'headers': headers, |
| 'body': json.dumps([]) |
| } |
| |
| |
| if len(filtered_df) > max_points: |
| filtered_df = filtered_df.sample(n=max_points, random_state=42) |
| |
| |
| if embedder is None: |
| embedder = ModelEmbedder() |
| |
| if embeddings is None: |
| texts = df['combined_text'].tolist() |
| embeddings = embedder.generate_embeddings(texts) |
| |
| |
| if reducer is None: |
| reducer = DimensionReducer(method="umap", n_components=2) |
| |
| if reduced_embeddings is None: |
| reduced_embeddings = reducer.fit_transform(embeddings) |
| |
| |
| filtered_indices = filtered_df.index.tolist() |
| filtered_reduced = reduced_embeddings[filtered_indices] |
| |
| |
| models = [] |
| for idx, (i, row) in enumerate(filtered_df.iterrows()): |
| models.append({ |
| 'model_id': row.get('model_id', 'Unknown'), |
| 'x': float(filtered_reduced[idx, 0]), |
| 'y': float(filtered_reduced[idx, 1]), |
| 'library_name': row.get('library_name'), |
| 'pipeline_tag': row.get('pipeline_tag'), |
| 'downloads': int(row.get('downloads', 0)), |
| 'likes': int(row.get('likes', 0)), |
| 'trending_score': float(row.get('trendingScore', 0)) if pd.notna(row.get('trendingScore')) else None, |
| 'tags': row.get('tags') if pd.notna(row.get('tags')) else None |
| }) |
| |
| return { |
| 'statusCode': 200, |
| 'headers': headers, |
| 'body': json.dumps(models) |
| } |
| except Exception as e: |
| return { |
| 'statusCode': 500, |
| 'headers': headers, |
| 'body': json.dumps({'error': str(e)}) |
| } |
|
|
|
|
| def get_stats(headers): |
| """Get dataset statistics.""" |
| global df |
| |
| if df is None: |
| return { |
| 'statusCode': 503, |
| 'headers': headers, |
| 'body': json.dumps({'error': 'Data not loaded'}) |
| } |
| |
| stats = { |
| 'total_models': len(df), |
| 'unique_libraries': df['library_name'].nunique() if 'library_name' in df.columns else 0, |
| 'unique_pipelines': df['pipeline_tag'].nunique() if 'pipeline_tag' in df.columns else 0, |
| 'avg_downloads': float(df['downloads'].mean()) if 'downloads' in df.columns else 0, |
| 'avg_likes': float(df['likes'].mean()) if 'likes' in df.columns else 0 |
| } |
| |
| return { |
| 'statusCode': 200, |
| 'headers': headers, |
| 'body': json.dumps(stats) |
| } |
|
|
|
|