Exporing Statcast data

SDS 355

Prof. Baumer

September 29, 2025

Obtaining Statcast data

Statcast

1. via baseballr

```{r}
#| eval: false
library(baseballr)
sc <- statcast_search_batters("2025-09-26", "2025-09-26")
write_rds(sc, here::here("data/sc20250926.rds"))
```
```{r}
#| eval: true
library(tidyverse)
sc <- read_rds(here::here("data/sc20250926.rds"))
```

Other ways

What’s in there?

dim(sc)
[1] 4398  118
sc |> names()
  [1] "pitch_type"                              
  [2] "game_date"                               
  [3] "release_speed"                           
  [4] "release_pos_x"                           
  [5] "release_pos_z"                           
  [6] "player_name"                             
  [7] "batter"                                  
  [8] "pitcher"                                 
  [9] "events"                                  
 [10] "description"                             
 [11] "spin_dir"                                
 [12] "spin_rate_deprecated"                    
 [13] "break_angle_deprecated"                  
 [14] "break_length_deprecated"                 
 [15] "zone"                                    
 [16] "des"                                     
 [17] "game_type"                               
 [18] "stand"                                   
 [19] "p_throws"                                
 [20] "home_team"                               
 [21] "away_team"                               
 [22] "type"                                    
 [23] "hit_location"                            
 [24] "bb_type"                                 
 [25] "balls"                                   
 [26] "strikes"                                 
 [27] "game_year"                               
 [28] "pfx_x"                                   
 [29] "pfx_z"                                   
 [30] "plate_x"                                 
 [31] "plate_z"                                 
 [32] "on_3b"                                   
 [33] "on_2b"                                   
 [34] "on_1b"                                   
 [35] "outs_when_up"                            
 [36] "inning"                                  
 [37] "inning_topbot"                           
 [38] "hc_x"                                    
 [39] "hc_y"                                    
 [40] "tfs_deprecated"                          
 [41] "tfs_zulu_deprecated"                     
 [42] "umpire"                                  
 [43] "sv_id"                                   
 [44] "vx0"                                     
 [45] "vy0"                                     
 [46] "vz0"                                     
 [47] "ax"                                      
 [48] "ay"                                      
 [49] "az"                                      
 [50] "sz_top"                                  
 [51] "sz_bot"                                  
 [52] "hit_distance_sc"                         
 [53] "launch_speed"                            
 [54] "launch_angle"                            
 [55] "effective_speed"                         
 [56] "release_spin_rate"                       
 [57] "release_extension"                       
 [58] "game_pk"                                 
 [59] "fielder_2"                               
 [60] "fielder_3"                               
 [61] "fielder_4"                               
 [62] "fielder_5"                               
 [63] "fielder_6"                               
 [64] "fielder_7"                               
 [65] "fielder_8"                               
 [66] "fielder_9"                               
 [67] "release_pos_y"                           
 [68] "estimated_ba_using_speedangle"           
 [69] "estimated_woba_using_speedangle"         
 [70] "woba_value"                              
 [71] "woba_denom"                              
 [72] "babip_value"                             
 [73] "iso_value"                               
 [74] "launch_speed_angle"                      
 [75] "at_bat_number"                           
 [76] "pitch_number"                            
 [77] "pitch_name"                              
 [78] "home_score"                              
 [79] "away_score"                              
 [80] "bat_score"                               
 [81] "fld_score"                               
 [82] "post_away_score"                         
 [83] "post_home_score"                         
 [84] "post_bat_score"                          
 [85] "post_fld_score"                          
 [86] "if_fielding_alignment"                   
 [87] "of_fielding_alignment"                   
 [88] "spin_axis"                               
 [89] "delta_home_win_exp"                      
 [90] "delta_run_exp"                           
 [91] "bat_speed"                               
 [92] "swing_length"                            
 [93] "estimated_slg_using_speedangle"          
 [94] "delta_pitcher_run_exp"                   
 [95] "hyper_speed"                             
 [96] "home_score_diff"                         
 [97] "bat_score_diff"                          
 [98] "home_win_exp"                            
 [99] "bat_win_exp"                             
[100] "age_pit_legacy"                          
[101] "age_bat_legacy"                          
[102] "age_pit"                                 
[103] "age_bat"                                 
[104] "n_thruorder_pitcher"                     
[105] "n_priorpa_thisgame_player_at_bat"        
[106] "pitcher_days_since_prev_game"            
[107] "batter_days_since_prev_game"             
[108] "pitcher_days_until_next_game"            
[109] "batter_days_until_next_game"             
[110] "api_break_z_with_gravity"                
[111] "api_break_x_arm"                         
[112] "api_break_x_batter_in"                   
[113] "arm_angle"                               
[114] "attack_angle"                            
[115] "attack_direction"                        
[116] "swing_path_tilt"                         
[117] "intercept_ball_minus_batter_pos_x_inches"
[118] "intercept_ball_minus_batter_pos_y_inches"

Strike zone

sc <- sc |>
  mutate(
    is_called_strike = type == "S",
    within_zbounds = plate_z <= sz_top & plate_z >= sz_bot,
    within_xbounds = abs(plate_x <= 8.5/12),
    is_within_strike_zone = within_zbounds & within_xbounds
  )
kzone_plot <- ggplot(sc, aes(x = plate_x, y = plate_z, color = is_called_strike)) +
  geom_rect(
    aes(
      xmin = -8.5/12, xmax = 8.5/12, 
      ymin = mean(sz_bot, na.rm = TRUE), ymax = mean(sz_top, na.rm = TRUE)
    ), 
    color = "grey", alpha = 0.7
  ) +
  geom_point(alpha = 0.3) +
  facet_wrap(vars(stand)) +
  scale_color_brewer(palette = "Set2")

Strike zone plot

kzone_plot

Release point

ggplot(sc, aes(x = release_pos_x, y = release_pos_z, color = pitch_type)) +
  geom_point(alpha = 0.5)

Separate by pitcher

sc |>
  filter(home_team == "BOS") |>
  ggplot(aes(x = release_pos_x, y = release_pos_z, color = pitch_type)) +
  geom_point(alpha = 0.5) +
  facet_wrap(vars(pitcher))

Spin rate and velocity

ggplot(sc, aes(x = release_speed, y = release_spin_rate, color = pitch_type)) +
  geom_point(alpha = 0.5)

Spin rate and break

ggplot(sc, aes(x = release_spin_rate, y = api_break_z_with_gravity, color = pitch_type)) +
  geom_point(alpha = 0.5)

Launch angle and launch speed

ggplot(sc, aes(y = launch_angle, x = launch_speed, color = estimated_ba_using_speedangle)) +
  geom_point(alpha = 0.5)