Sporcle

Note
Code
library(tidyverse)
library(Lahman)

1 Sporcle Quizzes

1.1 9/10: MLB Teams Last MVP

Code
# find all active teams (teams that played in 2023)
active_teams <- Teams |>
  filter(yearID == 2023) |>
  select(teamID, name)

# find all MVPs
AwardsPlayers |>
  filter(awardID == "Most Valuable Player") |>
  # find out which team(s) they played for in that year
  left_join(
    Appearances, 
    by = join_by("playerID", "yearID", "lgID")
  ) |>
  # sort them by year
  arrange(desc(yearID)) |>
  group_by(teamID) |>
  # take the first one, since they're already sorted
  summarize(
    playerID = first(playerID),
    yearID = first(yearID)
  ) |>
  # now reduce to only the active teams!
  right_join(active_teams, by = join_by(teamID)) |>
  arrange(name) |>
  # get the players' names
  left_join(People, by = join_by(playerID)) |>
  mutate(Player = paste(nameFirst, nameLast)) |>
  # make the headers match
  select(Team = name, Player, Year = yearID) |>
  knitr::kable()
Team Player Year
Arizona Diamondbacks NA NA NA
Atlanta Braves Ronald Acuña 2023
Baltimore Orioles Cal Ripken 1991
Boston Red Sox Mookie Betts 2018
Chicago Cubs Kris Bryant 2016
Chicago White Sox Jose Abreu 2020
Cincinnati Reds Joey Votto 2010
Cleveland Guardians Al Rosen 1953
Colorado Rockies Larry Walker 1997
Detroit Tigers Miguel Cabrera 2013
Houston Astros Jose Altuve 2017
Kansas City Royals George Brett 1980
Los Angeles Angels of Anaheim Shohei Ohtani 2023
Los Angeles Dodgers Cody Bellinger 2019
Miami Marlins Giancarlo Stanton 2017
Milwaukee Brewers Christian Yelich 2018
Minnesota Twins Joe Mauer 2009
New York Mets NA NA NA
New York Yankees Aaron Judge 2024
Oakland Athletics Miguel Tejada 2002
Philadelphia Phillies Bryce Harper 2021
Pittsburgh Pirates Andrew McCutchen 2013
San Diego Padres Ken Caminiti 1996
San Francisco Giants Buster Posey 2012
Seattle Mariners Ichiro Suzuki 2001
St. Louis Cardinals Paul Goldschmidt 2022
Tampa Bay Rays NA NA NA
Texas Rangers Josh Hamilton 2010
Toronto Blue Jays Josh Donaldson 2015
Washington Nationals Bryce Harper 2015
Code
SELECT 
  CONCAT(nameFirst, ' ', nameLast) AS Player, 
  t.name, 
  mvp_year AS Year
FROM AwardsPlayers ap
  LEFT JOIN Appearances a ON a.playerID = ap.playerID AND a.yearID = ap.yearID AND a.lgID = ap.lgID
  RIGHT JOIN (
    SELECT a.teamID, MAX(ap.yearID) AS mvp_year
    FROM AwardsPlayers ap
    LEFT JOIN Appearances a ON a.playerID = ap.playerID AND a.yearID = ap.yearID AND a.lgID = ap.lgID
    WHERE ap.awardID = 'Most Valuable Player' 
    GROUP BY a.teamID
    ORDER BY a.teamID
  ) last_mvps ON last_mvps.teamID = a.teamID AND last_mvps.mvp_year = a.yearID
  LEFT JOIN Teams t ON t.teamID = last_mvps.teamID
  LEFT JOIN People p ON p.playerID = a.playerID
WHERE ap.awardID = 'Most Valuable Player' 
  AND t.yearID = 2023
ORDER BY t.name;

1.2 9/15: MLB Top 5 HR Hitters (1970-2009)

Code
library(tidyverse)
library(Lahman)
Batting |>
  filter(yearID >= 1970 & yearID <= 2009) |>
  group_by(playerID, yearID) |>
  summarize(
    teamIDs = paste(unique(teamID), collapse = "/"),
    tHR = sum(HR)
  ) |>
  left_join(People, by = join_by(playerID)) |>
  mutate(player_name = paste(nameLast, nameFirst)) |>
  select(yearID, teamIDs, player_name, tHR) |>
  ungroup() |>
  group_by(yearID) |>
  group_split() |>
  map(~arrange(.x, desc(tHR))) |>
  map(head, n = 5) |>
  bind_rows() |>
  arrange(desc(yearID), desc(tHR)) |>
  select(-playerID) |>
  knitr::kable()
`summarise()` has grouped output by 'playerID'. You can override using the
`.groups` argument.
Adding missing grouping variables: `playerID`
yearID teamIDs player_name tHR
2009 SLN Pujols Albert 47
2009 MIL Fielder Prince 46
2009 PHI Howard Ryan 45
2009 ARI Reynolds Mark 44
2009 SDN Gonzalez Adrian 40
2008 PHI Howard Ryan 48
2008 ARI/CIN Dunn Adam 40
2008 NYN Delgado Carlos 38
2008 MIL Braun Ryan 37
2008 DET Cabrera Miguel 37
2007 NYA Rodriguez Alex 54
2007 MIL Fielder Prince 50
2007 PHI Howard Ryan 47
2007 TBA Peña Carlos 46
2007 CIN Dunn Adam 40
2006 PHI Howard Ryan 58
2006 BOS Ortiz David 54
2006 SLN Pujols Albert 49
2006 WAS Soriano Alfonso 46
2006 HOU Berkman Lance 45
2005 ATL Jones Andruw 51
2005 NYA Rodriguez Alex 48
2005 BOS Ortiz David 47
2005 CHN Lee Derrek 46
2005 BOS Ramirez Manny 45
2004 LAN Beltre Adrian 48
2004 CIN Dunn Adam 46
2004 SLN Pujols Albert 46
2004 SFN Bonds Barry 45
2004 BOS Ramirez Manny 43
2003 TEX Rodriguez Alex 47
2003 PHI Thome Jim 47
2003 SFN Bonds Barry 45
2003 MIL Sexson Richie 45
2003 ATL Lopez Javy 43
2002 TEX Rodriguez Alex 57
2002 CLE Thome Jim 52
2002 CHN Sosa Sammy 49
2002 SFN Bonds Barry 46
2002 TEX Palmeiro Rafael 43
2001 SFN Bonds Barry 73
2001 CHN Sosa Sammy 64
2001 ARI Gonzalez Luis 57
2001 TEX Rodriguez Alex 52
2001 LAN Green Shawn 49
2000 CHN Sosa Sammy 50
2000 SFN Bonds Barry 49
2000 HOU Bagwell Jeff 47
2000 ANA Glaus Troy 47
2000 MON Guerrero Vladimir 44
1999 SLN McGwire Mark 65
1999 CHN Sosa Sammy 63
1999 SEA Griffey Ken 48
1999 TEX Palmeiro Rafael 47
1999 ATL Jones Chipper 45
1998 SLN McGwire Mark 70
1998 CHN Sosa Sammy 66
1998 SEA Griffey Ken 56
1998 SDN Vaughn Greg 50
1998 CHA Belle Albert 49
1997 OAK/SLN McGwire Mark 58
1997 SEA Griffey Ken 56
1997 COL Walker Larry 49
1997 NYA Martinez Tino 44
1997 HOU Bagwell Jeff 43
1996 OAK McGwire Mark 52
1996 BAL Anderson Brady 50
1996 SEA Griffey Ken 49
1996 CLE Belle Albert 48
1996 COL Galarraga Andres 47
1995 CLE Belle Albert 50
1995 COL Bichette Dante 40
1995 SEA Buhner Jay 40
1995 CHA Thomas Frank 40
1995 OAK McGwire Mark 39
1994 SFN Williams Matt 43
1994 SEA Griffey Ken 40
1994 HOU Bagwell Jeff 39
1994 CHA Thomas Frank 38
1994 SFN Bonds Barry 37
1993 SFN Bonds Barry 46
1993 TEX Gonzalez Juan 46
1993 SEA Griffey Ken 45
1993 CHA Thomas Frank 41
1993 ATL Justice David 40
1992 TEX Gonzalez Juan 43
1992 OAK McGwire Mark 42
1992 DET Fielder Cecil 35
1992 SDN McGriff Fred 35
1992 CLE Belle Albert 34
1991 OAK Canseco Jose 44
1991 DET Fielder Cecil 44
1991 NYN Johnson Howard 38
1991 BAL Ripken Cal 34
1991 SFN Williams Matt 34
1990 DET Fielder Cecil 51
1990 CHN Sandberg Ryne 40
1990 OAK McGwire Mark 39
1990 OAK Canseco Jose 37
1990 NYN Strawberry Darryl 37
1989 SFN Mitchell Kevin 47
1989 NYN Johnson Howard 36
1989 TOR McGriff Fred 36
1989 CLE Carter Joe 35
1989 CIN Davis Eric 34
1988 OAK Canseco Jose 42
1988 NYN Strawberry Darryl 39
1988 TOR McGriff Fred 34
1988 OAK McGwire Mark 32
1988 HOU Davis Glenn 30
1987 CHN Dawson Andre 49
1987 OAK McGwire Mark 49
1987 TOR Bell George 47
1987 ATL Murphy Dale 44
1987 NYN Strawberry Darryl 39
1986 TOR Barfield Jesse 40
1986 PHI Schmidt Mike 37
1986 OAK Kingman Dave 35
1986 MIN Gaetti Gary 34
1986 OAK Canseco Jose 33
1985 DET Evans Darrell 40
1985 CHA Fisk Carlton 37
1985 ATL Murphy Dale 37
1985 KCA Balboni Steve 36
1985 NYA Mattingly Don 35
1984 BOS Armas Tony 43
1984 ATL Murphy Dale 36
1984 PHI Schmidt Mike 36
1984 OAK Kingman Dave 35
1984 OAK Murphy Dwayne 33
1983 PHI Schmidt Mike 40
1983 BOS Rice Jim 39
1983 BOS Armas Tony 36
1983 ATL Murphy Dale 36
1983 CHA Kittle Ron 35
1982 CAL Jackson Reggie 39
1982 ML4 Thomas Gorman 39
1982 NYN Kingman Dave 37
1982 NYA Winfield Dave 37
1982 ATL Murphy Dale 36
1981 PHI Schmidt Mike 31
1981 MON Dawson Andre 24
1981 OAK Armas Tony 22
1981 BOS Evans Dwight 22
1981 CIN Foster George 22
1980 PHI Schmidt Mike 48
1980 NYA Jackson Reggie 41
1980 ML4 Oglivie Ben 41
1980 ML4 Thomas Gorman 38
1980 OAK Armas Tony 35
1979 CHN Kingman Dave 48
1979 PHI Schmidt Mike 45
1979 ML4 Thomas Gorman 45
1979 BOS Lynn Fred 39
1979 BOS Rice Jim 39
1978 BOS Rice Jim 46
1978 CIN Foster George 40
1978 PHI Luzinski Greg 35
1978 CAL Baylor Don 34
1978 ML4 Hisle Larry 34
1977 CIN Foster George 52
1977 ATL Burroughs Jeff 41
1977 PHI Luzinski Greg 39
1977 BOS Rice Jim 39
1977 PHI Schmidt Mike 38
1976 PHI Schmidt Mike 38
1976 NYN Kingman Dave 37
1976 CHN Monday Rick 32
1976 NYA Nettles Graig 32
1976 CIN Foster George 29
1975 PHI Schmidt Mike 38
1975 OAK Jackson Reggie 36
1975 NYN Kingman Dave 36
1975 ML4 Scott George 36
1975 PHI Luzinski Greg 34
1974 PHI Schmidt Mike 36
1974 CIN Bench Johnny 33
1974 CHA Allen Dick 32
1974 LAN Wynn Jim 32
1974 OAK Jackson Reggie 29
1973 PIT Stargell Willie 44
1973 ATL Johnson Davey 43
1973 ATL Evans Darrell 41
1973 ATL Aaron Hank 40
1973 SFN Bonds Bobby 39
1972 CIN Bench Johnny 40
1972 SDN Colbert Nate 38
1972 CHA Allen Dick 37
1972 CHN Williams Billy 37
1972 ATL Aaron Hank 34
1971 PIT Stargell Willie 48
1971 ATL Aaron Hank 47
1971 CIN May Lee 39
1971 PHI Johnson Deron 34
1971 SFN Bonds Bobby 33
1970 CIN Bench Johnny 45
1970 WS2 Howard Frank 44
1970 CHN Williams Billy 42
1970 MIN Killebrew Harmon 41
1970 CIN Perez Tony 40
Code
SELECT 
  CONCAT(ANY_VALUE(p.nameFirst), ' ', ANY_VALUE(p.nameLast)) AS Player, 
  yearID, 
  GROUP_CONCAT(DISTINCT teamID) AS teamIDs, 
  SUM(HR) AS tHR
FROM Batting b
LEFT JOIN People p ON p.playerID = b.playerID
WHERE yearID >= 1970 AND yearID < 2010
  AND yearID = 2009
GROUP BY b.playerID, yearID
ORDER BY tHR DESC
LIMIT 5;

1.3 9/17: MLB 40-HRs by 4-Letter Players

Code
Batting |>
  group_by(playerID, yearID) |>
  summarize(
    tHR = sum(HR)
  ) |>
  filter(tHR >= 40) |>
  group_by(playerID) |>
  summarize(
    n = n(),
    last = max(yearID)
  ) |>
  arrange(desc(n)) |>
  left_join(People, by = join_by(playerID)) |>
  filter(nchar(nameLast) <= 4) |>
  select(n, last, nameLast, nameFirst) |>
  knitr::kable()
`summarise()` has grouped output by 'playerID'. You can override using the
`.groups` argument.
n last nameLast nameFirst
11 1932 Ruth Babe
7 2003 Sosa Sammy
6 2012 Dunn Adam
6 1965 Mays Willie
5 1938 Foxx Jimmie
4 2019 Cruz Nelson
3 1948 Mize Johnny
1 1987 Bell George
1 1961 Cash Norm
1 2006 Dye Jermaine
1 2005 Lee Derrek
1 1929 Ott Mel
1 2007 Peña Carlos
1 1955 Post Wally
1 1978 Rice Jim
1 2024 Soto Juan
Code
SELECT 
  COUNT(DISTINCT yearID) AS N, 
  MAX(yearID) AS most_recent, 
  ANY_VALUE(p.nameLast) AS last_name
FROM
  (SELECT playerID, yearID, SUM(HR) AS tHR
  FROM Batting
  GROUP BY playerID, yearID
  HAVING SUM(HR) >= 40) AS hr40
LEFT JOIN People p ON p.playerID = hr40.playerID
GROUP BY hr40.playerID
HAVING LEN(ANY_VALUE(p.nameLast)) <= 4
ORDER BY N DESC;

1.4 9/22: Multiple Cy Young Winners

Code
AwardsPlayers |>
  filter(str_detect(awardID, "Cy Young")) |>
  group_by(playerID) |>
  summarize(
    n = n(),
    years = paste(yearID, collapse = ", ")
  ) |>
  filter(n > 1) |>
  arrange(n)
# A tibble: 22 × 3
   playerID      n years     
   <chr>     <int> <chr>     
 1 degroja01     2 2018, 2019
 2 gibsobo01     2 1968, 1970
 3 glavito02     2 1991, 1998
 4 hallaro01     2 2003, 2010
 5 klubeco01     2 2014, 2017
 6 linceti01     2 2008, 2009
 7 mclaide01     2 1968, 1969
 8 perryga01     2 1972, 1978
 9 saberbr01     2 1985, 1989
10 santajo01     2 2004, 2006
# ℹ 12 more rows
Code
SELECT playerID, COUNT(*) AS num_awards, GROUP_CONCAT(yearID) AS years
FROM AwardsPlayers
WHERE awardID = 'Cy Young Award'
GROUP BY playerID
HAVING COUNT(*) > 1
ORDER BY COUNT(*);

1.5 9/24: MLB Win Leaders by Team

Code
Pitching |>
  group_by(playerID, teamID) |>
  summarize(wins = sum(W)) |>
  group_by(teamID) |>
  group_split() |>
  map(arrange, desc(wins)) |>
  map(slice_head, n = 1) |>
  bind_rows() |>
  right_join(
    Teams |>
      filter(yearID == 2023) |>
      select(teamID, name),
    by = join_by(teamID)
  ) |>
  arrange(name) |>
  knitr::kable()
`summarise()` has grouped output by 'playerID'. You can override using the
`.groups` argument.
playerID teamID wins name
johnsra05 ARI 118 Arizona Diamondbacks
niekrph01 ATL 266 Atlanta Braves
palmeji01 BAL 268 Baltimore Orioles
clemero02 BOS 192 Boston Red Sox
rootch01 CHN 201 Chicago Cubs
lyonste01 CHA 260 Chicago White Sox
rixeyep01 CIN 179 Cincinnati Reds
fellebo01 CLE 266 Cleveland Guardians
delarjo01 COL 86 Colorado Rockies
daussho01 DET 222 Detroit Tigers
niekrjo01 HOU 144 Houston Astros
splitpa01 KCA 166 Kansas City Royals
weaveje02 LAA 150 Los Angeles Angels of Anaheim
suttodo01 LAN 233 Los Angeles Dodgers
alcansa01 MIA 41 Miami Marlins
gallayo01 MIL 89 Milwaukee Brewers
kaatji01 MIN 189 Minnesota Twins
seaveto01 NYN 198 New York Mets
fordwh01 NYA 236 New York Yankees
hunteca01 OAK 131 Oakland Athletics
carltst01 PHI 241 Philadelphia Phillies
coopewi01 PIT 202 Pittsburgh Pirates
shower01 SDN 100 San Diego Padres
maricju01 SFN 238 San Francisco Giants
hernafe02 SEA 169 Seattle Mariners
gibsobo01 SLN 251 St. Louis Cardinals
shielja02 TBA 87 Tampa Bay Rays
houghch01 TEX 139 Texas Rangers
stiebda01 TOR 175 Toronto Blue Jays
strasst01 WAS 113 Washington Nationals

1.6 9/24: MLB Most Wins by Decade

1.7 9/24: MLB Home Run Leaders (A-Z)

2 Appendix

2.1 How to create a SQL duckdb database from the Lahman package

2.1.1 Educate yourself

  1. Read R for Data Science, Ch. 21
  2. See also Modern Data Science with R, Ch. 15

2.1.2 Instantiate the database

Create a duckdb instance

Code
library(dbplyr)
library(duckdb)
con <- DBI::dbConnect(duckdb::duckdb())

2.1.3 Populate the database

Start populating it with data from the Lahman package.

Code
# Have to do this for every table...
dbWriteTable(con, "Teams", Lahman::Teams)
dbWriteTable(con, "People", Lahman::People)
dbWriteTable(con, "Appearances", Lahman::Appearances)
dbWriteTable(con, "AwardsPlayers", Lahman::AwardsPlayers)
dbWriteTable(con, "Batting", Lahman::Batting)
dbWriteTable(con, "TeamsFranchises", Lahman::TeamsFranchises)
Tip

Challenge: Use a map() statement to do them all!

2.1.4 Step 4: Inpsect the database

Code
dbListTables(con)
[1] "Appearances"     "AwardsPlayers"   "Batting"         "People"         
[5] "Teams"           "TeamsFranchises"

2.1.5 Access

Now, you can access the data in your duckdb SQL database in several ways.

2.1.5.1 Using dbplyr

Code
teams <- tbl(con, "Teams")

active_teams <- teams |>
  filter(yearID == 2023) |>
  select(teamID, name)

active_teams
# Source:   SQL [?? x 2]
# Database: DuckDB 1.4.0 [unknown@Linux 6.11.0-1018-azure:R 4.5.1/:memory:]
   teamID name                         
   <fct>  <chr>                        
 1 LAA    Los Angeles Angels of Anaheim
 2 ARI    Arizona Diamondbacks         
 3 ATL    Atlanta Braves               
 4 BAL    Baltimore Orioles            
 5 BOS    Boston Red Sox               
 6 CHN    Chicago Cubs                 
 7 CHA    Chicago White Sox            
 8 CIN    Cincinnati Reds              
 9 CLE    Cleveland Guardians          
10 COL    Colorado Rockies             
# ℹ more rows

And you can see the translated SQL query!

Code
active_teams |>
  show_query()
<SQL>
SELECT teamID, "name"
FROM Teams
WHERE (yearID = 2023.0)

2.1.5.2 Using DBI

If you want to practice writing the raw SQL query

Code
con |>
  dbGetQuery(
"
SELECT teamID, name
FROM teams
WHERE yearID = 2023;
"
)
   teamID                          name
1     LAA Los Angeles Angels of Anaheim
2     ARI          Arizona Diamondbacks
3     ATL                Atlanta Braves
4     BAL             Baltimore Orioles
5     BOS                Boston Red Sox
6     CHN                  Chicago Cubs
7     CHA             Chicago White Sox
8     CIN               Cincinnati Reds
9     CLE           Cleveland Guardians
10    COL              Colorado Rockies
11    DET                Detroit Tigers
12    MIA                 Miami Marlins
13    HOU                Houston Astros
14    KCA            Kansas City Royals
15    LAN           Los Angeles Dodgers
16    MIL             Milwaukee Brewers
17    MIN               Minnesota Twins
18    NYN                 New York Mets
19    NYA              New York Yankees
20    OAK             Oakland Athletics
21    PHI         Philadelphia Phillies
22    PIT            Pittsburgh Pirates
23    SDN              San Diego Padres
24    SEA              Seattle Mariners
25    SFN          San Francisco Giants
26    SLN           St. Louis Cardinals
27    TBA                Tampa Bay Rays
28    TEX                 Texas Rangers
29    TOR             Toronto Blue Jays
30    WAS          Washington Nationals

2.1.5.3 Using Quarto/Markdown

If you want SQL syntax highlighting, and to use the data back in R, make an SQL chunk

Code
```{sql}
#| connection: con
#| output.var: "teams_sql"
SELECT teamID, name
FROM teams
WHERE yearID = 2023;
```
Code
```{r}
teams_sql
```
   teamID                          name
1     LAA Los Angeles Angels of Anaheim
2     ARI          Arizona Diamondbacks
3     ATL                Atlanta Braves
4     BAL             Baltimore Orioles
5     BOS                Boston Red Sox
6     CHN                  Chicago Cubs
7     CHA             Chicago White Sox
8     CIN               Cincinnati Reds
9     CLE           Cleveland Guardians
10    COL              Colorado Rockies
11    DET                Detroit Tigers
12    MIA                 Miami Marlins
13    HOU                Houston Astros
14    KCA            Kansas City Royals
15    LAN           Los Angeles Dodgers
16    MIL             Milwaukee Brewers
17    MIN               Minnesota Twins
18    NYN                 New York Mets
19    NYA              New York Yankees
20    OAK             Oakland Athletics
21    PHI         Philadelphia Phillies
22    PIT            Pittsburgh Pirates
23    SDN              San Diego Padres
24    SEA              Seattle Mariners
25    SFN          San Francisco Giants
26    SLN           St. Louis Cardinals
27    TBA                Tampa Bay Rays
28    TEX                 Texas Rangers
29    TOR             Toronto Blue Jays
30    WAS          Washington Nationals