Spaces:

nick-leland
/

rd2l_prediction

Sleeping

App Files Files Community

nick-leland commited on Jan 12

Commit

5292003

1 Parent(s): 5307707

Finalized testing

Browse files

Files changed (2) hide show

app.py +66 -45
prediction_data_prepped.csv +15 -0

app.py CHANGED Viewed

@@ -22,15 +22,13 @@ def load_model():
         MODEL = ort.InferenceSession(str(model_path))
-        # Load feature columns from prediction data
-        try:
-            FEATURE_COLUMNS = pd.read_csv("result_prediction_data_prepped.csv").columns.tolist()
-        except:
-            try:
-                FEATURE_COLUMNS = pd.read_csv("prediction_data_prepped.csv").columns.tolist()
-            except:
-                return "Error: Could not find prediction data files to determine feature structure"
         return "Model loaded successfully"
     except Exception as e:
         return f"Error loading model: {str(e)}"
@@ -38,69 +36,92 @@ def load_model():
 def process_player_data(player_id, mmr, comf_1, comf_2, comf_3, comf_4, comf_5):
     """Process player data similar to training pipeline"""
     try:
-        # Define expected columns based on the model's requirements
-        expected_columns = ['mmr', 'p1', 'p2', 'p3', 'p4', 'p5', 'count', 'mean', 'std', 'min']
-        # Add hero-specific columns
-        for hero_id in range(1, 139):  # Based on max hero ID 138 from your data
-            if hero_id in [139, 140, 141, 142, 143, 144]:  # Skip any known gaps
-                continue
-            expected_columns.extend([f'games_{hero_id}', f'winrate_{hero_id}'])
-        print(f"\nExpected columns: {len(expected_columns)}")
         # Clean player ID from URL if needed
         if "/" in player_id:
             player_id = player_id.split("/")[-1]
         # Create initial player series
         player_data = {
             "mmr": float(mmr),
             "p1": int(comf_1),
             "p2": int(comf_2),
             "p3": int(comf_3),
             "p4": int(comf_4),
-            "p5": int(comf_5),
-            "count": 0,
-            "mean": 0,
-            "std": 0,
-            "min": 0
         }
         # Get hero statistics using OpenDota API
         try:
             hero_stats = hero_information(player_id)
-            hero_data = hero_stats.to_dict()
-            # Add hero-specific stats
-            for col in expected_columns:
-                if col.startswith('games_') or col.startswith('winrate_'):
-                    if col not in hero_data:
-                        player_data[col] = 0
-                    else:
-                        player_data[col] = hero_data[col]
         except Exception as e:
             print(f"Warning - Error fetching hero data: {str(e)}")
-            # If hero stats fail, add placeholder values for all hero columns
-            for col in expected_columns:
-                if col.startswith('games_') or col.startswith('winrate_'):
-                    player_data[col] = 0
-        # Convert to DataFrame and ensure column order
         df = pd.DataFrame([player_data])
-        # Verify we have all expected columns
-        missing_cols = set(expected_columns) - set(df.columns)
-        if missing_cols:
-            print("\nMissing columns:", missing_cols)
-            for col in missing_cols:
                 df[col] = 0
-        # Ensure correct column order
         df = df[expected_columns]
         print(f"\nFinal number of columns: {len(df.columns)}")
-        print(f"Column list: {df.columns.tolist()}")
         return df
     except Exception as e:

         MODEL = ort.InferenceSession(str(model_path))
+        # Use the known list of features
+        FEATURE_COLUMNS = ['mmr', 'p1', 'p2', 'p3', 'p4', 'p5', 'count', 'mean', 'std', 'min', 'max',
+                          'sum', 'total_games_played', 'total_winrate'] + \
+                         [f'games_{i}' for i in range(1, 139)] + \
+                         [f'winrate_{i}' for i in range(1, 139)]
+        print(f"Number of features loaded: {len(FEATURE_COLUMNS)}")
         return "Model loaded successfully"
     except Exception as e:
         return f"Error loading model: {str(e)}"
 def process_player_data(player_id, mmr, comf_1, comf_2, comf_3, comf_4, comf_5):
     """Process player data similar to training pipeline"""
     try:
         # Clean player ID from URL if needed
         if "/" in player_id:
             player_id = player_id.split("/")[-1]
         # Create initial player series
         player_data = {
+            "player_id": player_id,
             "mmr": float(mmr),
             "p1": int(comf_1),
             "p2": int(comf_2),
             "p3": int(comf_3),
             "p4": int(comf_4),
+            "p5": int(comf_5)
         }
+        # Read the example row from prediction_data_prepped.csv to get the expected structure
+        try:
+            pred_data = pd.read_csv("prediction_data_prepped.csv")
+            if not pred_data.empty:
+                # Get column structure from the first row
+                for col in pred_data.columns:
+                    if col not in player_data:
+                        player_data[col] = 0
+        except Exception as e:
+            print(f"Warning - Error reading prediction data template: {str(e)}")
         # Get hero statistics using OpenDota API
         try:
             hero_stats = hero_information(player_id)
+            player_data.update(hero_stats.to_dict())
+            # Add season identifier to match training data format
+            player_season = f"{player_id}_S34"  # Assuming current season is 34
+            temp_dict = {}
+            temp_dict[player_season] = 1.0  # Set current season flag to 1.0
+            player_data.update(temp_dict)
         except Exception as e:
             print(f"Warning - Error fetching hero data: {str(e)}")
+            # If hero stats fail, add placeholder values
+            player_data.update({
+                "total_games_played": 0,
+                "total_winrate": 0.0
+            })
+        # Convert to DataFrame for consistency with training
         df = pd.DataFrame([player_data])
+        # Load reference data structure if available
+        try:
+            ref_data = pd.read_csv("result_prediction_data_prepped.csv")
+            if not ref_data.empty:
+                # Get all columns from reference data
+                for col in ref_data.columns:
+                    if col not in df.columns:
+                        df[col] = 0
+                # Reorder columns to match reference data
+                df = df[ref_data.columns]
+        except Exception as e:
+            print(f"Warning - Error matching reference data structure: {str(e)}")
+        # Load the expected columns from your prediction data
+        pred_data = pd.read_csv("prediction_data_prepped.csv")
+        expected_columns = pred_data.columns.tolist()
+        # Debug print
+        print(f"\nNumber of expected columns: {len(expected_columns)}")
+        print(f"Number of current columns: {len(df.columns)}")
+        # Find missing columns
+        missing_columns = [col for col in expected_columns if col not in df.columns]
+        extra_columns = [col for col in df.columns if col not in expected_columns]
+        print(f"\nMissing columns: {missing_columns}")
+        print(f"Extra columns: {extra_columns}")
+        # Ensure all expected columns exist
+        for col in expected_columns:
+            if col not in df.columns:
                 df[col] = 0
+        # Remove any extra columns
         df = df[expected_columns]
         print(f"\nFinal number of columns: {len(df.columns)}")
+        print(f"First few columns: {list(df.columns)[:5]}")
         return df
     except Exception as e:

prediction_data_prepped.csv ADDED Viewed

	@@ -0,0 +1,15 @@

+,188649776_S34,917074310_S34,99929152_S34,143663588_S34,101387753_S34,40436072_S34,87992033_S34,153864932_S34,159164400_S34,67028556_S34,92001890_S34,130527149_S34,120052382_S34,101647591_S34,167829403_S34,240889153_S34,57011991_S34,45626568_S34,64041417_S34,477791331_S34,65495278_S34,170539030_S34,90793653_S34,152985237_S34,118858955_S34,177129466_S34,75864841_S34,108050692_S34,45226038_S34,74883563_S34,16710765_S34,83833103_S34,84060273_S34,57880458_S34,110119494_S34,519770_S34
+cost,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+player_id,188649776,917074310,99929152,143663588,101387753,40436072,87992033,153864932,159164400,67028556,92001890,130527149,120052382,101647591,167829403,240889153,57011991,45626568,64041417,477791331,65495278,170539030,90793653,152985237,118858955,177129466,75864841,108050692,45226038,74883563,16710765,83833103,84060273,57880458,110119494,519770
+mmr,6812,6492,6358,6050,6000,6000,5886,5714,5701,5698,5685,5640,5538,5431,5420,5307,5200,5164,4997,4876,4650,4630,4627,4627,4580,4560,4500,4251,4181,4124,3777,3754,3408,2889,2621,1144
+p1,5,5,3,3,4,5,4,1,3,4,4,1,5,4,5,5,1,1,1,5,1,5,2,1,1,1,3,2,1,2,3,1,1,4,1,1
+p2,5,2,5,5,3,1,5,3,2,2,5,1,1,2,5,2,1,1,2,2,2,4,2,1,1,1,3,2,1,5,5,1,1,4,1,1
+p3,4,5,4,4,2,1,3,5,5,5,4,1,5,5,5,5,2,2,3,4,4,3,5,1,1,2,3,3,2,3,5,1,4,4,2,2
+p4,2,5,1,2,3,1,3,4,3,4,2,5,5,3,5,5,4,5,4,4,5,3,4,2,5,4,3,5,4,2,2,1,4,5,3,4
+p5,1,5,1,2,2,1,3,4,4,5,2,5,5,2,5,1,5,5,5,2,5,2,4,3,5,5,3,3,4,3,2,5,3,2,3,5
+count,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
+mean,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333,489.3333333333333
+std,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204,77.4483698989204
+min,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0
+max,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0,593.0
+sum,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0,4404.0