Code
library(tidyverse)
library(Lahman)
https://www.sporcle.com/games/subcategory/mlb
You must show your code!!
# find all active teams (teams that played in 2023)
active_teams <- Teams |>
filter(yearID == 2023) |>
select(teamID, name)
# find all MVPs
AwardsPlayers |>
filter(awardID == "Most Valuable Player") |>
# find out which team(s) they played for in that year
left_join(
Appearances,
by = join_by("playerID", "yearID", "lgID")
) |>
# sort them by year
arrange(desc(yearID)) |>
group_by(teamID) |>
# take the first one, since they're already sorted
summarize(
playerID = first(playerID),
yearID = first(yearID)
) |>
# now reduce to only the active teams!
right_join(active_teams, by = join_by(teamID)) |>
arrange(name) |>
# get the players' names
left_join(People, by = join_by(playerID)) |>
mutate(Player = paste(nameFirst, nameLast)) |>
# make the headers match
select(Team = name, Player, Year = yearID) |>
knitr::kable()
Team | Player | Year |
---|---|---|
Arizona Diamondbacks | NA NA | NA |
Atlanta Braves | Ronald Acuña | 2023 |
Baltimore Orioles | Cal Ripken | 1991 |
Boston Red Sox | Mookie Betts | 2018 |
Chicago Cubs | Kris Bryant | 2016 |
Chicago White Sox | Jose Abreu | 2020 |
Cincinnati Reds | Joey Votto | 2010 |
Cleveland Guardians | Al Rosen | 1953 |
Colorado Rockies | Larry Walker | 1997 |
Detroit Tigers | Miguel Cabrera | 2013 |
Houston Astros | Jose Altuve | 2017 |
Kansas City Royals | George Brett | 1980 |
Los Angeles Angels of Anaheim | Shohei Ohtani | 2023 |
Los Angeles Dodgers | Cody Bellinger | 2019 |
Miami Marlins | Giancarlo Stanton | 2017 |
Milwaukee Brewers | Christian Yelich | 2018 |
Minnesota Twins | Joe Mauer | 2009 |
New York Mets | NA NA | NA |
New York Yankees | Aaron Judge | 2024 |
Oakland Athletics | Miguel Tejada | 2002 |
Philadelphia Phillies | Bryce Harper | 2021 |
Pittsburgh Pirates | Andrew McCutchen | 2013 |
San Diego Padres | Ken Caminiti | 1996 |
San Francisco Giants | Buster Posey | 2012 |
Seattle Mariners | Ichiro Suzuki | 2001 |
St. Louis Cardinals | Paul Goldschmidt | 2022 |
Tampa Bay Rays | NA NA | NA |
Texas Rangers | Josh Hamilton | 2010 |
Toronto Blue Jays | Josh Donaldson | 2015 |
Washington Nationals | Bryce Harper | 2015 |
SELECT
CONCAT(nameFirst, ' ', nameLast) AS Player,
t.name,
mvp_year AS Year
FROM AwardsPlayers ap
LEFT JOIN Appearances a ON a.playerID = ap.playerID AND a.yearID = ap.yearID AND a.lgID = ap.lgID
RIGHT JOIN (
SELECT a.teamID, MAX(ap.yearID) AS mvp_year
FROM AwardsPlayers ap
LEFT JOIN Appearances a ON a.playerID = ap.playerID AND a.yearID = ap.yearID AND a.lgID = ap.lgID
WHERE ap.awardID = 'Most Valuable Player'
GROUP BY a.teamID
ORDER BY a.teamID
) last_mvps ON last_mvps.teamID = a.teamID AND last_mvps.mvp_year = a.yearID
LEFT JOIN Teams t ON t.teamID = last_mvps.teamID
LEFT JOIN People p ON p.playerID = a.playerID
WHERE ap.awardID = 'Most Valuable Player'
AND t.yearID = 2023
ORDER BY t.name;
library(tidyverse)
library(Lahman)
Batting |>
filter(yearID >= 1970 & yearID <= 2009) |>
group_by(playerID, yearID) |>
summarize(
teamIDs = paste(unique(teamID), collapse = "/"),
tHR = sum(HR)
) |>
left_join(People, by = join_by(playerID)) |>
mutate(player_name = paste(nameLast, nameFirst)) |>
select(yearID, teamIDs, player_name, tHR) |>
ungroup() |>
group_by(yearID) |>
group_split() |>
map(~arrange(.x, desc(tHR))) |>
map(head, n = 5) |>
bind_rows() |>
arrange(desc(yearID), desc(tHR)) |>
select(-playerID) |>
knitr::kable()
`summarise()` has grouped output by 'playerID'. You can override using the
`.groups` argument.
Adding missing grouping variables: `playerID`
yearID | teamIDs | player_name | tHR |
---|---|---|---|
2009 | SLN | Pujols Albert | 47 |
2009 | MIL | Fielder Prince | 46 |
2009 | PHI | Howard Ryan | 45 |
2009 | ARI | Reynolds Mark | 44 |
2009 | SDN | Gonzalez Adrian | 40 |
2008 | PHI | Howard Ryan | 48 |
2008 | ARI/CIN | Dunn Adam | 40 |
2008 | NYN | Delgado Carlos | 38 |
2008 | MIL | Braun Ryan | 37 |
2008 | DET | Cabrera Miguel | 37 |
2007 | NYA | Rodriguez Alex | 54 |
2007 | MIL | Fielder Prince | 50 |
2007 | PHI | Howard Ryan | 47 |
2007 | TBA | Peña Carlos | 46 |
2007 | CIN | Dunn Adam | 40 |
2006 | PHI | Howard Ryan | 58 |
2006 | BOS | Ortiz David | 54 |
2006 | SLN | Pujols Albert | 49 |
2006 | WAS | Soriano Alfonso | 46 |
2006 | HOU | Berkman Lance | 45 |
2005 | ATL | Jones Andruw | 51 |
2005 | NYA | Rodriguez Alex | 48 |
2005 | BOS | Ortiz David | 47 |
2005 | CHN | Lee Derrek | 46 |
2005 | BOS | Ramirez Manny | 45 |
2004 | LAN | Beltre Adrian | 48 |
2004 | CIN | Dunn Adam | 46 |
2004 | SLN | Pujols Albert | 46 |
2004 | SFN | Bonds Barry | 45 |
2004 | BOS | Ramirez Manny | 43 |
2003 | TEX | Rodriguez Alex | 47 |
2003 | PHI | Thome Jim | 47 |
2003 | SFN | Bonds Barry | 45 |
2003 | MIL | Sexson Richie | 45 |
2003 | ATL | Lopez Javy | 43 |
2002 | TEX | Rodriguez Alex | 57 |
2002 | CLE | Thome Jim | 52 |
2002 | CHN | Sosa Sammy | 49 |
2002 | SFN | Bonds Barry | 46 |
2002 | TEX | Palmeiro Rafael | 43 |
2001 | SFN | Bonds Barry | 73 |
2001 | CHN | Sosa Sammy | 64 |
2001 | ARI | Gonzalez Luis | 57 |
2001 | TEX | Rodriguez Alex | 52 |
2001 | LAN | Green Shawn | 49 |
2000 | CHN | Sosa Sammy | 50 |
2000 | SFN | Bonds Barry | 49 |
2000 | HOU | Bagwell Jeff | 47 |
2000 | ANA | Glaus Troy | 47 |
2000 | MON | Guerrero Vladimir | 44 |
1999 | SLN | McGwire Mark | 65 |
1999 | CHN | Sosa Sammy | 63 |
1999 | SEA | Griffey Ken | 48 |
1999 | TEX | Palmeiro Rafael | 47 |
1999 | ATL | Jones Chipper | 45 |
1998 | SLN | McGwire Mark | 70 |
1998 | CHN | Sosa Sammy | 66 |
1998 | SEA | Griffey Ken | 56 |
1998 | SDN | Vaughn Greg | 50 |
1998 | CHA | Belle Albert | 49 |
1997 | OAK/SLN | McGwire Mark | 58 |
1997 | SEA | Griffey Ken | 56 |
1997 | COL | Walker Larry | 49 |
1997 | NYA | Martinez Tino | 44 |
1997 | HOU | Bagwell Jeff | 43 |
1996 | OAK | McGwire Mark | 52 |
1996 | BAL | Anderson Brady | 50 |
1996 | SEA | Griffey Ken | 49 |
1996 | CLE | Belle Albert | 48 |
1996 | COL | Galarraga Andres | 47 |
1995 | CLE | Belle Albert | 50 |
1995 | COL | Bichette Dante | 40 |
1995 | SEA | Buhner Jay | 40 |
1995 | CHA | Thomas Frank | 40 |
1995 | OAK | McGwire Mark | 39 |
1994 | SFN | Williams Matt | 43 |
1994 | SEA | Griffey Ken | 40 |
1994 | HOU | Bagwell Jeff | 39 |
1994 | CHA | Thomas Frank | 38 |
1994 | SFN | Bonds Barry | 37 |
1993 | SFN | Bonds Barry | 46 |
1993 | TEX | Gonzalez Juan | 46 |
1993 | SEA | Griffey Ken | 45 |
1993 | CHA | Thomas Frank | 41 |
1993 | ATL | Justice David | 40 |
1992 | TEX | Gonzalez Juan | 43 |
1992 | OAK | McGwire Mark | 42 |
1992 | DET | Fielder Cecil | 35 |
1992 | SDN | McGriff Fred | 35 |
1992 | CLE | Belle Albert | 34 |
1991 | OAK | Canseco Jose | 44 |
1991 | DET | Fielder Cecil | 44 |
1991 | NYN | Johnson Howard | 38 |
1991 | BAL | Ripken Cal | 34 |
1991 | SFN | Williams Matt | 34 |
1990 | DET | Fielder Cecil | 51 |
1990 | CHN | Sandberg Ryne | 40 |
1990 | OAK | McGwire Mark | 39 |
1990 | OAK | Canseco Jose | 37 |
1990 | NYN | Strawberry Darryl | 37 |
1989 | SFN | Mitchell Kevin | 47 |
1989 | NYN | Johnson Howard | 36 |
1989 | TOR | McGriff Fred | 36 |
1989 | CLE | Carter Joe | 35 |
1989 | CIN | Davis Eric | 34 |
1988 | OAK | Canseco Jose | 42 |
1988 | NYN | Strawberry Darryl | 39 |
1988 | TOR | McGriff Fred | 34 |
1988 | OAK | McGwire Mark | 32 |
1988 | HOU | Davis Glenn | 30 |
1987 | CHN | Dawson Andre | 49 |
1987 | OAK | McGwire Mark | 49 |
1987 | TOR | Bell George | 47 |
1987 | ATL | Murphy Dale | 44 |
1987 | NYN | Strawberry Darryl | 39 |
1986 | TOR | Barfield Jesse | 40 |
1986 | PHI | Schmidt Mike | 37 |
1986 | OAK | Kingman Dave | 35 |
1986 | MIN | Gaetti Gary | 34 |
1986 | OAK | Canseco Jose | 33 |
1985 | DET | Evans Darrell | 40 |
1985 | CHA | Fisk Carlton | 37 |
1985 | ATL | Murphy Dale | 37 |
1985 | KCA | Balboni Steve | 36 |
1985 | NYA | Mattingly Don | 35 |
1984 | BOS | Armas Tony | 43 |
1984 | ATL | Murphy Dale | 36 |
1984 | PHI | Schmidt Mike | 36 |
1984 | OAK | Kingman Dave | 35 |
1984 | OAK | Murphy Dwayne | 33 |
1983 | PHI | Schmidt Mike | 40 |
1983 | BOS | Rice Jim | 39 |
1983 | BOS | Armas Tony | 36 |
1983 | ATL | Murphy Dale | 36 |
1983 | CHA | Kittle Ron | 35 |
1982 | CAL | Jackson Reggie | 39 |
1982 | ML4 | Thomas Gorman | 39 |
1982 | NYN | Kingman Dave | 37 |
1982 | NYA | Winfield Dave | 37 |
1982 | ATL | Murphy Dale | 36 |
1981 | PHI | Schmidt Mike | 31 |
1981 | MON | Dawson Andre | 24 |
1981 | OAK | Armas Tony | 22 |
1981 | BOS | Evans Dwight | 22 |
1981 | CIN | Foster George | 22 |
1980 | PHI | Schmidt Mike | 48 |
1980 | NYA | Jackson Reggie | 41 |
1980 | ML4 | Oglivie Ben | 41 |
1980 | ML4 | Thomas Gorman | 38 |
1980 | OAK | Armas Tony | 35 |
1979 | CHN | Kingman Dave | 48 |
1979 | PHI | Schmidt Mike | 45 |
1979 | ML4 | Thomas Gorman | 45 |
1979 | BOS | Lynn Fred | 39 |
1979 | BOS | Rice Jim | 39 |
1978 | BOS | Rice Jim | 46 |
1978 | CIN | Foster George | 40 |
1978 | PHI | Luzinski Greg | 35 |
1978 | CAL | Baylor Don | 34 |
1978 | ML4 | Hisle Larry | 34 |
1977 | CIN | Foster George | 52 |
1977 | ATL | Burroughs Jeff | 41 |
1977 | PHI | Luzinski Greg | 39 |
1977 | BOS | Rice Jim | 39 |
1977 | PHI | Schmidt Mike | 38 |
1976 | PHI | Schmidt Mike | 38 |
1976 | NYN | Kingman Dave | 37 |
1976 | CHN | Monday Rick | 32 |
1976 | NYA | Nettles Graig | 32 |
1976 | CIN | Foster George | 29 |
1975 | PHI | Schmidt Mike | 38 |
1975 | OAK | Jackson Reggie | 36 |
1975 | NYN | Kingman Dave | 36 |
1975 | ML4 | Scott George | 36 |
1975 | PHI | Luzinski Greg | 34 |
1974 | PHI | Schmidt Mike | 36 |
1974 | CIN | Bench Johnny | 33 |
1974 | CHA | Allen Dick | 32 |
1974 | LAN | Wynn Jim | 32 |
1974 | OAK | Jackson Reggie | 29 |
1973 | PIT | Stargell Willie | 44 |
1973 | ATL | Johnson Davey | 43 |
1973 | ATL | Evans Darrell | 41 |
1973 | ATL | Aaron Hank | 40 |
1973 | SFN | Bonds Bobby | 39 |
1972 | CIN | Bench Johnny | 40 |
1972 | SDN | Colbert Nate | 38 |
1972 | CHA | Allen Dick | 37 |
1972 | CHN | Williams Billy | 37 |
1972 | ATL | Aaron Hank | 34 |
1971 | PIT | Stargell Willie | 48 |
1971 | ATL | Aaron Hank | 47 |
1971 | CIN | May Lee | 39 |
1971 | PHI | Johnson Deron | 34 |
1971 | SFN | Bonds Bobby | 33 |
1970 | CIN | Bench Johnny | 45 |
1970 | WS2 | Howard Frank | 44 |
1970 | CHN | Williams Billy | 42 |
1970 | MIN | Killebrew Harmon | 41 |
1970 | CIN | Perez Tony | 40 |
SELECT
CONCAT(ANY_VALUE(p.nameFirst), ' ', ANY_VALUE(p.nameLast)) AS Player,
yearID,
GROUP_CONCAT(DISTINCT teamID) AS teamIDs,
SUM(HR) AS tHR
FROM Batting b
LEFT JOIN People p ON p.playerID = b.playerID
WHERE yearID >= 1970 AND yearID < 2010
AND yearID = 2009
GROUP BY b.playerID, yearID
ORDER BY tHR DESC
LIMIT 5;
Batting |>
group_by(playerID, yearID) |>
summarize(
tHR = sum(HR)
) |>
filter(tHR >= 40) |>
group_by(playerID) |>
summarize(
n = n(),
last = max(yearID)
) |>
arrange(desc(n)) |>
left_join(People, by = join_by(playerID)) |>
filter(nchar(nameLast) <= 4) |>
select(n, last, nameLast, nameFirst) |>
knitr::kable()
`summarise()` has grouped output by 'playerID'. You can override using the
`.groups` argument.
n | last | nameLast | nameFirst |
---|---|---|---|
11 | 1932 | Ruth | Babe |
7 | 2003 | Sosa | Sammy |
6 | 2012 | Dunn | Adam |
6 | 1965 | Mays | Willie |
5 | 1938 | Foxx | Jimmie |
4 | 2019 | Cruz | Nelson |
3 | 1948 | Mize | Johnny |
1 | 1987 | Bell | George |
1 | 1961 | Cash | Norm |
1 | 2006 | Dye | Jermaine |
1 | 2005 | Lee | Derrek |
1 | 1929 | Ott | Mel |
1 | 2007 | Peña | Carlos |
1 | 1955 | Post | Wally |
1 | 1978 | Rice | Jim |
1 | 2024 | Soto | Juan |
SELECT
COUNT(DISTINCT yearID) AS N,
MAX(yearID) AS most_recent,
ANY_VALUE(p.nameLast) AS last_name
FROM
(SELECT playerID, yearID, SUM(HR) AS tHR
FROM Batting
GROUP BY playerID, yearID
HAVING SUM(HR) >= 40) AS hr40
LEFT JOIN People p ON p.playerID = hr40.playerID
GROUP BY hr40.playerID
HAVING LEN(ANY_VALUE(p.nameLast)) <= 4
ORDER BY N DESC;
# A tibble: 22 × 3
playerID n years
<chr> <int> <chr>
1 degroja01 2 2018, 2019
2 gibsobo01 2 1968, 1970
3 glavito02 2 1991, 1998
4 hallaro01 2 2003, 2010
5 klubeco01 2 2014, 2017
6 linceti01 2 2008, 2009
7 mclaide01 2 1968, 1969
8 perryga01 2 1972, 1978
9 saberbr01 2 1985, 1989
10 santajo01 2 2004, 2006
# ℹ 12 more rows
Pitching |>
group_by(playerID, teamID) |>
summarize(wins = sum(W)) |>
group_by(teamID) |>
group_split() |>
map(arrange, desc(wins)) |>
map(slice_head, n = 1) |>
bind_rows() |>
right_join(
Teams |>
filter(yearID == 2023) |>
select(teamID, name),
by = join_by(teamID)
) |>
arrange(name) |>
knitr::kable()
`summarise()` has grouped output by 'playerID'. You can override using the
`.groups` argument.
playerID | teamID | wins | name |
---|---|---|---|
johnsra05 | ARI | 118 | Arizona Diamondbacks |
niekrph01 | ATL | 266 | Atlanta Braves |
palmeji01 | BAL | 268 | Baltimore Orioles |
clemero02 | BOS | 192 | Boston Red Sox |
rootch01 | CHN | 201 | Chicago Cubs |
lyonste01 | CHA | 260 | Chicago White Sox |
rixeyep01 | CIN | 179 | Cincinnati Reds |
fellebo01 | CLE | 266 | Cleveland Guardians |
delarjo01 | COL | 86 | Colorado Rockies |
daussho01 | DET | 222 | Detroit Tigers |
niekrjo01 | HOU | 144 | Houston Astros |
splitpa01 | KCA | 166 | Kansas City Royals |
weaveje02 | LAA | 150 | Los Angeles Angels of Anaheim |
suttodo01 | LAN | 233 | Los Angeles Dodgers |
alcansa01 | MIA | 41 | Miami Marlins |
gallayo01 | MIL | 89 | Milwaukee Brewers |
kaatji01 | MIN | 189 | Minnesota Twins |
seaveto01 | NYN | 198 | New York Mets |
fordwh01 | NYA | 236 | New York Yankees |
hunteca01 | OAK | 131 | Oakland Athletics |
carltst01 | PHI | 241 | Philadelphia Phillies |
coopewi01 | PIT | 202 | Pittsburgh Pirates |
shower01 | SDN | 100 | San Diego Padres |
maricju01 | SFN | 238 | San Francisco Giants |
hernafe02 | SEA | 169 | Seattle Mariners |
gibsobo01 | SLN | 251 | St. Louis Cardinals |
shielja02 | TBA | 87 | Tampa Bay Rays |
houghch01 | TEX | 139 | Texas Rangers |
stiebda01 | TOR | 175 | Toronto Blue Jays |
strasst01 | WAS | 113 | Washington Nationals |
Create a duckdb instance
Start populating it with data from the Lahman package.
# Have to do this for every table...
dbWriteTable(con, "Teams", Lahman::Teams)
dbWriteTable(con, "People", Lahman::People)
dbWriteTable(con, "Appearances", Lahman::Appearances)
dbWriteTable(con, "AwardsPlayers", Lahman::AwardsPlayers)
dbWriteTable(con, "Batting", Lahman::Batting)
dbWriteTable(con, "TeamsFranchises", Lahman::TeamsFranchises)
Challenge: Use a map()
statement to do them all!
Now, you can access the data in your duckdb SQL database in several ways.
dbplyr
# Source: SQL [?? x 2]
# Database: DuckDB 1.4.0 [unknown@Linux 6.11.0-1018-azure:R 4.5.1/:memory:]
teamID name
<fct> <chr>
1 LAA Los Angeles Angels of Anaheim
2 ARI Arizona Diamondbacks
3 ATL Atlanta Braves
4 BAL Baltimore Orioles
5 BOS Boston Red Sox
6 CHN Chicago Cubs
7 CHA Chicago White Sox
8 CIN Cincinnati Reds
9 CLE Cleveland Guardians
10 COL Colorado Rockies
# ℹ more rows
And you can see the translated SQL query!
DBI
If you want to practice writing the raw SQL query
teamID name
1 LAA Los Angeles Angels of Anaheim
2 ARI Arizona Diamondbacks
3 ATL Atlanta Braves
4 BAL Baltimore Orioles
5 BOS Boston Red Sox
6 CHN Chicago Cubs
7 CHA Chicago White Sox
8 CIN Cincinnati Reds
9 CLE Cleveland Guardians
10 COL Colorado Rockies
11 DET Detroit Tigers
12 MIA Miami Marlins
13 HOU Houston Astros
14 KCA Kansas City Royals
15 LAN Los Angeles Dodgers
16 MIL Milwaukee Brewers
17 MIN Minnesota Twins
18 NYN New York Mets
19 NYA New York Yankees
20 OAK Oakland Athletics
21 PHI Philadelphia Phillies
22 PIT Pittsburgh Pirates
23 SDN San Diego Padres
24 SEA Seattle Mariners
25 SFN San Francisco Giants
26 SLN St. Louis Cardinals
27 TBA Tampa Bay Rays
28 TEX Texas Rangers
29 TOR Toronto Blue Jays
30 WAS Washington Nationals
If you want SQL syntax highlighting, and to use the data back in R, make an SQL chunk
teamID name
1 LAA Los Angeles Angels of Anaheim
2 ARI Arizona Diamondbacks
3 ATL Atlanta Braves
4 BAL Baltimore Orioles
5 BOS Boston Red Sox
6 CHN Chicago Cubs
7 CHA Chicago White Sox
8 CIN Cincinnati Reds
9 CLE Cleveland Guardians
10 COL Colorado Rockies
11 DET Detroit Tigers
12 MIA Miami Marlins
13 HOU Houston Astros
14 KCA Kansas City Royals
15 LAN Los Angeles Dodgers
16 MIL Milwaukee Brewers
17 MIN Minnesota Twins
18 NYN New York Mets
19 NYA New York Yankees
20 OAK Oakland Athletics
21 PHI Philadelphia Phillies
22 PIT Pittsburgh Pirates
23 SDN San Diego Padres
24 SEA Seattle Mariners
25 SFN San Francisco Giants
26 SLN St. Louis Cardinals
27 TBA Tampa Bay Rays
28 TEX Texas Rangers
29 TOR Toronto Blue Jays
30 WAS Washington Nationals