Run Expectancy

SDS 355

Prof. Baumer

September 17, 2025

What is the value of a game state?

Last time

Run Expectancy Matrix

George R. Lindsey

Run Expectancy Matrix

Retrosheet play-by-play data

library(tidyverse)
library(abdwr3edata)

retro2016 |>
  head()
# A tibble: 6 × 98
  game_id      away_team_id inn_ct bat_home_id outs_ct balls_ct strikes_ct
  <chr>        <chr>         <int>       <int>   <int>    <int>      <int>
1 ANA201604040 CHN               1           0       0        1          0
2 ANA201604040 CHN               1           0       0        0          0
3 ANA201604040 CHN               1           0       1        0          2
4 ANA201604040 CHN               1           0       2        1          1
5 ANA201604040 CHN               1           0       2        2          2
6 ANA201604040 CHN               1           1       0        2          0
# ℹ 91 more variables: pitch_seq_tx <chr>, away_score_ct <int>,
#   home_score_ct <int>, bat_id <chr>, bat_hand_cd <chr>, resp_bat_id <chr>,
#   resp_bat_hand_cd <chr>, pit_id <chr>, pit_hand_cd <chr>, resp_pit_id <chr>,
#   resp_pit_hand_cd <chr>, pos2_fld_id <chr>, pos3_fld_id <chr>,
#   pos4_fld_id <chr>, pos5_fld_id <chr>, pos6_fld_id <chr>, pos7_fld_id <chr>,
#   pos8_fld_id <chr>, pos9_fld_id <chr>, base1_run_id <chr>,
#   base2_run_id <chr>, base3_run_id <chr>, event_tx <chr>, leadoff_fl <lgl>, …

What was the situation?

retro2016_small <- retro2016 |> 
  select(
    # where are we in the game?
    game_id, inn_ct, bat_home_id, 
    # how many outs (before and on play)?
    outs_ct, event_outs_ct, 
    # what was the score?
    away_score_ct, home_score_ct,
    # who was on base?
    base1_run_id, base2_run_id, base3_run_id,
    # where did everyone end up?
    bat_dest_id, run1_dest_id, run2_dest_id, run3_dest_id,
    # just in case, what happened?
    bat_id, event_cd, bat_event_fl, event_tx
  ) |>
  mutate(
    runs_before = away_score_ct + home_score_ct,
    half_inning = paste(game_id, inn_ct, bat_home_id, sep = "-"),
    runs_scored = 
      (bat_dest_id > 3) + (run1_dest_id > 3) + 
      (run2_dest_id > 3) + (run3_dest_id > 3)
  )

Summarize all half innings

half_innings <- retro2016_small |>
  group_by(half_inning) |>
  summarize(
    outs_inning = sum(event_outs_ct), 
    runs_inning = sum(runs_scored),
    runs_start = first(runs_before),
    max_runs = runs_inning + runs_start
  )

half_innings |>
  head()
# A tibble: 6 × 5
  half_inning      outs_inning runs_inning runs_start max_runs
  <chr>                  <int>       <int>      <int>    <int>
1 ANA201604040-1-0           3           1          0        1
2 ANA201604040-1-1           3           0          1        1
3 ANA201604040-2-0           3           0          1        1
4 ANA201604040-2-1           3           0          1        1
5 ANA201604040-3-0           3           0          1        1
6 ANA201604040-3-1           3           0          1        1

Runs scored in remainder of inning

retro2016_small <- retro2016_small |>
  inner_join(half_innings, by = "half_inning") |>
  mutate(runs_roi = max_runs - runs_before)

retro2016_small |>
  select(contains("runs")) |>
  head()
# A tibble: 6 × 6
  runs_before runs_scored runs_inning runs_start max_runs runs_roi
        <int>       <int>       <int>      <int>    <int>    <int>
1           0           0           1          0        1        1
2           0           0           1          0        1        1
3           0           0           1          0        1        1
4           0           1           1          0        1        1
5           1           0           1          0        1        0
6           1           0           0          1        1        0

Starting base-out state

retro2016_small <- retro2016_small |>
  mutate(
    bases = paste0(
      if_else(base1_run_id == "", 0, 1),
      if_else(base2_run_id == "", 0, 1),
      if_else(base3_run_id == "", 0, 1)
    ),
    state = paste(bases, outs_ct, sep = "-")
  )

retro2016_small |>
  select(state, contains("run_id")) |>
  head()
# A tibble: 6 × 4
  state base1_run_id base2_run_id base3_run_id
  <chr> <chr>        <chr>        <chr>       
1 000-0 ""           ""           ""          
2 010-0 ""           "fowld001"   ""          
3 001-1 ""           ""           "fowld001"  
4 001-2 ""           ""           "fowld001"  
5 100-2 "rizza001"   ""           ""          
6 000-0 ""           ""           ""          

Ending base-out state

retro2016_small <- retro2016_small |>
  mutate(
    is_runner1 = as.numeric(run1_dest_id == 1 | bat_dest_id == 1),
    is_runner2 = as.numeric(run1_dest_id == 2 | run2_dest_id == 2 | bat_dest_id == 2),
    is_runner3 = as.numeric(run1_dest_id == 3 | run2_dest_id == 3 | run3_dest_id == 3 | bat_dest_id == 3),
    new_outs = outs_ct + event_outs_ct,
    new_bases = paste0(is_runner1, is_runner2, is_runner3),
    new_state = paste(new_bases, new_outs, sep = "-")
  )

retro2016_small |>
  select(state, new_state, runs_scored, event_tx) |>
  head()
# A tibble: 6 × 4
  state new_state runs_scored event_tx
  <chr> <chr>           <int> <chr>   
1 000-0 010-0               0 D9/G+   
2 010-0 001-1               0 63/G.2-3
3 001-1 001-2               0 K       
4 001-2 100-2               1 S8/G.3-H
5 100-2 100-3               0 K23     
6 000-0 000-1               0 13/G-   

Remove irrelevant data

changes2016 <- retro2016_small |>
  # a handful of weird plays...
  filter(state != new_state | runs_scored > 0) |>
  # only complete innings -- no walk-offs!
  filter(outs_inning == 3)

Summarize the changes

erm_2016 <- changes2016 |> 
  group_by(bases, outs_ct) |>
  summarize(exp_run_value = mean(runs_roi))
erm_2016 |>
  pivot_wider(
    names_from = outs_ct, 
    values_from = exp_run_value, 
    names_prefix = "Outs="
  )
# A tibble: 8 × 4
# Groups:   bases [8]
  bases `Outs=0` `Outs=1` `Outs=2`
  <chr>    <dbl>    <dbl>    <dbl>
1 000      0.498    0.268    0.106
2 001      1.35     0.937    0.372
3 010      1.13     0.673    0.312
4 011      1.93     1.36     0.548
5 100      0.858    0.512    0.220
6 101      1.72     1.20     0.478
7 110      1.44     0.921    0.414
8 111      2.11     1.54     0.695

Historical ERMs

https://www.tangotiger.net/re24.html

RE24

Run values = Change in expected run value

retro2016_small <- retro2016_small |>
  left_join(erm_2016, join_by("bases", "outs_ct")) |>
  rename(erv_start = exp_run_value) |>
  left_join(
    erm_2016, 
    join_by(new_bases == bases, new_outs == outs_ct)
  ) |>
  rename(erv_end = exp_run_value) |>
  replace_na(list(erv_end = 0)) |>
  mutate(delta_erv = erv_end - erv_start + runs_scored)

retro2016_small |>
  select(contains("state"), contains("rv_"), event_tx, contains("delta")) |>
  head()
# A tibble: 6 × 6
  state new_state erv_start erv_end event_tx delta_erv
  <chr> <chr>         <dbl>   <dbl> <chr>        <dbl>
1 000-0 010-0         0.498   1.13  D9/G+        0.635
2 010-0 001-1         1.13    0.937 63/G.2-3    -0.196
3 001-1 001-2         0.937   0.372 K           -0.565
4 001-2 100-2         0.372   0.220 S8/G.3-H     0.848
5 100-2 100-3         0.220   0     K23         -0.220
6 000-0 000-1         0.498   0.268 13/G-       -0.230

Run values for events

retro2016_small |> 
  group_by(event_cd) |>
  summarize(
    sample_desc = first(event_tx),
    mean_run_value = mean(delta_erv)
  ) |>
  arrange(desc(mean_run_value))
# A tibble: 21 × 3
   event_cd sample_desc    mean_run_value
      <int> <chr>                   <dbl>
 1       23 HR/9/L.1-H              1.38 
 2       22 T9/F.1-H                1.01 
 3       21 D9/G+                   0.739
 4       18 E5/G                    0.464
 5       20 S8/G.3-H                0.439
 6       16 HP                      0.318
 7       17 C/E2.2-3;1-2            0.311
 8       14 W                       0.293
 9       11 BK.2-3;1-2              0.277
10        9 WP.3-H(NR);1-2          0.260
# ℹ 11 more rows

RE24

retro2016_small |>
  filter(bat_event_fl == TRUE) |>
  group_by(bat_id) |>
  summarize(
    PA = n(),
    RE24 = sum(delta_erv)
  ) |>
  mutate(RE24_PA = RE24 / PA) |>
  arrange(desc(RE24)) |>
  left_join(Lahman::People, by = join_by(bat_id == retroID)) |>
  mutate(Player = paste(nameFirst, nameLast)) |>
  select(Player, PA, contains("RE24")) |>
  head(10)
# A tibble: 10 × 4
   Player             PA  RE24 RE24_PA
   <chr>           <int> <dbl>   <dbl>
 1 Mike Trout        681  66.4  0.0976
 2 David Ortiz       625  60.8  0.0973
 3 Freddie Freeman   693  47.2  0.0681
 4 Joey Votto        677  47.0  0.0694
 5 Josh Donaldson    700  46.2  0.0660
 6 Nolan Arenado     696  46.0  0.0661
 7 Daniel Murphy     582  45.6  0.0784
 8 Kris Bryant       699  44.9  0.0642
 9 Anthony Rizzo     676  42.7  0.0632
10 Mookie Betts      730  36.0  0.0493