res <- read.csv("/tmp/3000_hit_club.csv")
library(ggplot2)
ggplot(data = res, aes(x = reorder(playerID, tH), y = tH)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Clunky, error-prone, not automated, not reproducible
(https://github.com/rstats-db/DBI)
Connects seamlessly to SQLite, MySQL, PostgreSQL, MonetDB, etc.
library(DBI)
## Loading required package: methods
conn <- dbConnect(RMySQL::MySQL(), dbname = "lahman", user = "r-user", password = "mypass")
Retrieve results using dbGetQuery()
.
sql <- "
SELECT playerID, sum(H) as tH
FROM Batting
GROUP BY playerID
HAVING tH >= 3000
ORDER BY tH desc;"
res <- dbGetQuery(conn, sql)
## Warning in .local(conn, statement, ...): Decimal MySQL column 1 imported as
## numeric
res
## playerID tH
## 1 rosepe01 4256
## 2 cobbty01 4189
## 3 aaronha01 3771
## 4 musiast01 3630
## 5 speaktr01 3514
## 6 jeterde01 3465
## 7 yastrca01 3419
## 8 ansonca01 3418
## 9 wagneho01 3415
## 10 molitpa01 3319
## 11 collied01 3315
## 12 mayswi01 3283
## 13 murraed02 3255
## 14 lajoina01 3242
## 15 ripkeca01 3184
## 16 brettge01 3154
## 17 wanerpa01 3152
## 18 yountro01 3142
## 19 gwynnto01 3141
## 20 winfida01 3110
## 21 biggicr01 3060
## 22 henderi01 3055
## 23 carewro01 3053
## 24 brocklo01 3023
## 25 palmera01 3020
## 26 boggswa01 3010
## 27 kalinal01 3007
## 28 clemero01 3000
About a connection
dbGetInfo(conn)
## $host
## [1] "localhost"
##
## $user
## [1] "r-user"
##
## $dbname
## [1] "lahman"
##
## $conType
## [1] "Localhost via UNIX socket"
##
## $serverVersion
## [1] "5.7.13-0ubuntu0.16.04.2"
##
## $protocolVersion
## [1] 10
##
## $threadId
## [1] 212
##
## $rsId
## list()
dbListFields(conn, name = "Batting")
## [1] "playerID" "yearID" "stint" "teamID" "lgID"
## [6] "G" "G_batting" "AB" "R" "H"
## [11] "2B" "3B" "HR" "RBI" "SB"
## [16] "CS" "BB" "SO" "IBB" "HBP"
## [21] "SH" "SF" "GIDP" "G_old"
About a database/schema
dbListTables(conn)
## [1] "AllstarFull" "Appearances" "AwardsManagers"
## [4] "AwardsPlayers" "AwardsShareManagers" "AwardsSharePlayers"
## [7] "Batting" "BattingPost" "CollegePlaying"
## [10] "Fielding" "FieldingOF" "FieldingPost"
## [13] "HallOfFame" "Managers" "ManagersHalf"
## [16] "Master" "Pitching" "PitchingPost"
## [19] "Salaries" "Schools" "SeriesPost"
## [22] "Teams" "TeamsFranchises" "TeamsHalf"
About a table
dbListFields(conn, name = "Batting")
## [1] "playerID" "yearID" "stint" "teamID" "lgID"
## [6] "G" "G_batting" "AB" "R" "H"
## [11] "2B" "3B" "HR" "RBI" "SB"
## [16] "CS" "BB" "SO" "IBB" "HBP"
## [21] "SH" "SF" "GIDP" "G_old"
Teams <- dbReadTable(conn, "Teams")
str(Teams)
## 'data.frame': 2775 obs. of 48 variables:
## $ yearID : int 1871 1871 1871 1871 1871 1871 1871 1871 1871 1872 ...
## $ lgID : chr "NA" "NA" "NA" "NA" ...
## $ teamID : chr "BS1" "CH1" "CL1" "FW1" ...
## $ franchID : chr "BNA" "CNA" "CFC" "KEK" ...
## $ divID : chr NA NA NA NA ...
## $ Rank : int 3 2 8 7 5 1 9 6 4 2 ...
## $ G : int 31 28 29 19 33 28 25 29 32 58 ...
## $ Ghome : int NA NA NA NA NA NA NA NA NA NA ...
## $ W : int 20 19 10 7 16 21 4 13 15 35 ...
## $ L : int 10 9 19 12 17 7 21 15 15 19 ...
## $ DivWin : chr NA NA NA NA ...
## $ WCWin : chr NA NA NA NA ...
## $ LgWin : chr "N" "N" "N" "N" ...
## $ WSWin : chr NA NA NA NA ...
## $ R : int 401 302 249 137 302 376 231 351 310 617 ...
## $ AB : int 1372 1196 1186 746 1404 1281 1036 1248 1353 2576 ...
## $ H : int 426 323 328 178 403 410 274 384 375 747 ...
## $ X2B : int 70 52 35 19 43 66 44 51 54 94 ...
## $ X3B : int 37 21 40 8 21 27 25 34 26 35 ...
## $ HR : int 3 10 7 2 1 9 3 6 6 14 ...
## $ BB : int 60 60 26 33 33 46 38 49 48 27 ...
## $ SO : int 19 22 25 9 15 23 30 19 13 28 ...
## $ SB : int 73 69 18 16 46 56 53 62 48 35 ...
## $ CS : int NA NA NA NA NA NA NA NA NA 15 ...
## $ HBP : int NA NA NA NA NA NA NA NA NA NA ...
## $ SF : int NA NA NA NA NA NA NA NA NA NA ...
## $ RA : int 303 241 341 243 313 266 287 362 303 434 ...
## $ ER : int 109 77 116 97 121 137 108 153 137 173 ...
## $ ERA : num 3.55 2.76 4.11 5.17 3.72 4.95 4.3 5.51 4.37 3.02 ...
## $ CG : int 22 25 23 19 32 27 23 28 32 48 ...
## $ SHO : int 1 0 0 1 1 0 1 0 0 1 ...
## $ SV : int 3 1 0 0 0 0 0 0 0 1 ...
## $ IPouts : int 828 753 762 507 879 747 678 750 846 1545 ...
## $ HA : int 367 308 346 261 373 329 315 431 371 566 ...
## $ HRA : int 2 6 13 5 7 3 3 4 4 3 ...
## $ BBA : int 42 28 53 21 42 53 34 75 45 63 ...
## $ SOA : int 23 22 34 17 22 16 16 12 13 0 ...
## $ E : int 225 218 223 163 227 194 220 198 217 432 ...
## $ DP : int NA NA NA NA NA NA NA NA NA NA ...
## $ FP : num 0.83 0.82 0.81 0.8 0.83 0.84 0.82 0.84 0.85 0.82 ...
## $ name : chr "Boston Red Stockings" "Chicago White Stockings" "Cleveland Forest Citys" "Fort Wayne Kekiongas" ...
## $ park : chr "South End Grounds I" "Union Base-Ball Grounds" "National Association Grounds" "Hamilton Field" ...
## $ attendance : int NA NA NA NA NA NA NA NA NA NA ...
## $ BPF : int 103 104 96 101 90 102 97 101 94 106 ...
## $ PPF : int 98 102 100 107 88 98 99 100 98 102 ...
## $ teamIDBR : chr "BOS" "CHI" "CLE" "KEK" ...
## $ teamIDlahman45: chr "BS1" "CH1" "CL1" "FW1" ...
## $ teamIDretro : chr "BS1" "CH1" "CL1" "FW1" ...
Many other SQL operations including writing tables, performing transactions, etc. are possible through DBI
.