Predict MLB Game Scores!
Just fill everything as it shows.
Input Name
hometeamoffense
hometeamdefense
awayteamoffense
awayteamdefense
homepitcherera
awaypitcherera
ballparkfactor
OpenAI Standard
1 run · @sus-bak 10 days ago
The prompt powering this tool. Want to modify it for yourself? Click the button →
# Install packages if needed # pip install pandas scikit-learn numpy import pandas as pd import numpy as np from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score # STEP 1: Load and Preprocess Data data = pd.read_csv('mlb_games_data.csv') # Drop missing values data = data.dropna() # Define features and targets features = [ 'home_team_offense', 'home_team_defense', 'away_team_offense', 'away_team_defense', 'home_pitcher_era', 'away_pitcher_era', 'ballpark_factor' ] X = data[features] y_home = data['home_team_runs'] y_away = data['away_team_runs'] # STEP 2: Train/Test Split X_train, X_test, y_home_train, y_home_test, y_away_train, y_away_test = train_test_split( X, y_home, y_away, test_size=0.2, random_state=42 ) # STEP 3: Hyperparameter Tuning using GridSearchCV param_grid = { 'n_estimators': [100, 200], 'max_depth': [None, 10, 20], 'min_samples_split': [2, 5], } grid_search_home = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='neg_mean_squared_error') grid_search_home.fit(X_train, y_home_train) home_model = grid_search_home.best_estimator_ grid_search_away = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='neg_mean_squared_error') grid_search_away.fit(X_train, y_away_train) away_model = grid_search_away.best_estimator_ print("Best parameters for Home Model:", grid_search_home.best_params_) print("Best parameters for Away Model:", grid_search_away.best_params_) # STEP 4: Using Input Name for User Inputs (for interactive environments) home_team_offense = hometeamoffense # Example: 110 home_team_defense = hometeamdefense # Example: 90 away_team_offense = awayteamoffense # Example: 105 away_team_defense = awayteamdefense # Example: 95 home_pitcher_era = homepitcherera # Example: 3.20 away_pitcher_era = awaypitcherera # Example: 4.10 ballpark_factor = ballparkfactor # Example: 1.05 # Organize inputs into a DataFrame input_data = pd.DataFrame({ 'home_team_offense': [home_team_offense], 'home_team_defense': [home_team_defense], 'away_team_offense': [away_team_offense], 'away_team_defense': [away_team_defense], 'home_pitcher_era': [home_pitcher_era], 'away_pitcher_era': [away_pitcher_era], 'ballpark_factor': [ballpark_factor] }) # STEP 5: Prediction pred_home_score = home_model.predict(input_data)[0] pred_away_score = away_model.predict(input_data)[0] print(f"\nPredicted Game Score:") print(f" Home: {round(pred_home_score)} - Away: {round(pred_away_score)}") # STEP 6: Evaluation Metrics for Home and Away Scores def evaluate(true, preds, label="Model"): mae = mean_absolute_error(true, preds) mse = mean_squared_error(true, preds) rmse = np.sqrt(mse) r2 = r2_score(true, preds) mape = np.mean(np.abs((true - preds) / true)) * 100 print(f"\nEvaluation for {label}:") print(f" MAE: {mae:.3f}") print(f" RMSE: {rmse:.3f}") print(f" R2 Score: {r2:.3f}") print(f" MAPE: {mape:.2f}%") # Evaluating model on test set home_preds = home_model.predict(X_test) away_preds = away_model.predict(X_test) evaluate(y_home_test, home_preds, label="Home Team Runs Prediction") evaluate(y_away_test, away_preds, label="Away Team Runs Prediction")