SDS 236
4/19/23
[1] "RegionID" "SizeRank" "RegionName"
[4] "RegionType" "StateName" "State"
[7] "Metro" "StateCodeFIPS" "MunicipalCodeFIPS"
[10] "2000-01-31" "2000-02-29" "2000-03-31"
[13] "2000-04-30" "2000-05-31" "2000-06-30"
[16] "2000-07-31" "2000-08-31" "2000-09-30"
[19] "2000-10-31" "2000-11-30" "2000-12-31"
[22] "2001-01-31" "2001-02-28" "2001-03-31"
[25] "2001-04-30" "2001-05-31" "2001-06-30"
[28] "2001-07-31" "2001-08-31" "2001-09-30"
[31] "2001-10-31" "2001-11-30" "2001-12-31"
[34] "2002-01-31" "2002-02-28" "2002-03-31"
[37] "2002-04-30" "2002-05-31" "2002-06-30"
[40] "2002-07-31" "2002-08-31" "2002-09-30"
[43] "2002-10-31" "2002-11-30" "2002-12-31"
[46] "2003-01-31" "2003-02-28" "2003-03-31"
[49] "2003-04-30" "2003-05-31" "2003-06-30"
[52] "2003-07-31" "2003-08-31" "2003-09-30"
[55] "2003-10-31" "2003-11-30" "2003-12-31"
[58] "2004-01-31" "2004-02-29" "2004-03-31"
[61] "2004-04-30" "2004-05-31" "2004-06-30"
[64] "2004-07-31" "2004-08-31" "2004-09-30"
[67] "2004-10-31" "2004-11-30" "2004-12-31"
[70] "2005-01-31" "2005-02-28" "2005-03-31"
[73] "2005-04-30" "2005-05-31" "2005-06-30"
[76] "2005-07-31" "2005-08-31" "2005-09-30"
[79] "2005-10-31" "2005-11-30" "2005-12-31"
[82] "2006-01-31" "2006-02-28" "2006-03-31"
[85] "2006-04-30" "2006-05-31" "2006-06-30"
[88] "2006-07-31" "2006-08-31" "2006-09-30"
[91] "2006-10-31" "2006-11-30" "2006-12-31"
[94] "2007-01-31" "2007-02-28" "2007-03-31"
[97] "2007-04-30" "2007-05-31" "2007-06-30"
[100] "2007-07-31" "2007-08-31" "2007-09-30"
[103] "2007-10-31" "2007-11-30" "2007-12-31"
[106] "2008-01-31" "2008-02-29" "2008-03-31"
[109] "2008-04-30" "2008-05-31" "2008-06-30"
[112] "2008-07-31" "2008-08-31" "2008-09-30"
[115] "2008-10-31" "2008-11-30" "2008-12-31"
[118] "2009-01-31" "2009-02-28" "2009-03-31"
[121] "2009-04-30" "2009-05-31" "2009-06-30"
[124] "2009-07-31" "2009-08-31" "2009-09-30"
[127] "2009-10-31" "2009-11-30" "2009-12-31"
[130] "2010-01-31" "2010-02-28" "2010-03-31"
[133] "2010-04-30" "2010-05-31" "2010-06-30"
[136] "2010-07-31" "2010-08-31" "2010-09-30"
[139] "2010-10-31" "2010-11-30" "2010-12-31"
[142] "2011-01-31" "2011-02-28" "2011-03-31"
[145] "2011-04-30" "2011-05-31" "2011-06-30"
[148] "2011-07-31" "2011-08-31" "2011-09-30"
[151] "2011-10-31" "2011-11-30" "2011-12-31"
[154] "2012-01-31" "2012-02-29" "2012-03-31"
[157] "2012-04-30" "2012-05-31" "2012-06-30"
[160] "2012-07-31" "2012-08-31" "2012-09-30"
[163] "2012-10-31" "2012-11-30" "2012-12-31"
[166] "2013-01-31" "2013-02-28" "2013-03-31"
[169] "2013-04-30" "2013-05-31" "2013-06-30"
[172] "2013-07-31" "2013-08-31" "2013-09-30"
[175] "2013-10-31" "2013-11-30" "2013-12-31"
[178] "2014-01-31" "2014-02-28" "2014-03-31"
[181] "2014-04-30" "2014-05-31" "2014-06-30"
[184] "2014-07-31" "2014-08-31" "2014-09-30"
[187] "2014-10-31" "2014-11-30" "2014-12-31"
[190] "2015-01-31" "2015-02-28" "2015-03-31"
[193] "2015-04-30" "2015-05-31" "2015-06-30"
[196] "2015-07-31" "2015-08-31" "2015-09-30"
[199] "2015-10-31" "2015-11-30" "2015-12-31"
[202] "2016-01-31" "2016-02-29" "2016-03-31"
[205] "2016-04-30" "2016-05-31" "2016-06-30"
[208] "2016-07-31" "2016-08-31" "2016-09-30"
[211] "2016-10-31" "2016-11-30" "2016-12-31"
[214] "2017-01-31" "2017-02-28" "2017-03-31"
[217] "2017-04-30" "2017-05-31" "2017-06-30"
[220] "2017-07-31" "2017-08-31" "2017-09-30"
[223] "2017-10-31" "2017-11-30" "2017-12-31"
[226] "2018-01-31" "2018-02-28" "2018-03-31"
[229] "2018-04-30" "2018-05-31" "2018-06-30"
[232] "2018-07-31" "2018-08-31" "2018-09-30"
[235] "2018-10-31" "2018-11-30" "2018-12-31"
[238] "2019-01-31" "2019-02-28" "2019-03-31"
[241] "2019-04-30" "2019-05-31" "2019-06-30"
[244] "2019-07-31" "2019-08-31" "2019-09-30"
[247] "2019-10-31" "2019-11-30" "2019-12-31"
[250] "2020-01-31" "2020-02-29" "2020-03-31"
[253] "2020-04-30" "2020-05-31" "2020-06-30"
[256] "2020-07-31" "2020-08-31" "2020-09-30"
[259] "2020-10-31" "2020-11-30" "2020-12-31"
[262] "2021-01-31" "2021-02-28" "2021-03-31"
[265] "2021-04-30" "2021-05-31" "2021-06-30"
[268] "2021-07-31" "2021-08-31" "2021-09-30"
[271] "2021-10-31" "2021-11-30" "2021-12-31"
[274] "2022-01-31" "2022-02-28" "2022-03-31"
[277] "2022-04-30" "2022-05-31" "2022-06-30"
[280] "2022-07-31" "2022-08-31" "2022-09-30"
[283] "2022-10-31" "2022-11-30" "2022-12-31"
[286] "2023-01-31" "2023-02-28" "2023-03-31"
{xml_nodeset (42)}
[1] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[2] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[3] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[4] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[5] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[6] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[7] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[8] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[9] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[10] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[11] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[12] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[13] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[14] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[15] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[16] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[17] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[18] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[19] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[20] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
...
price <- properties |>
html_element(".ldp-redesign-price") |>
html_text() |>
str_replace("k", ",000") |>
parse_number()
price
[1] 4250000 7995000 5200000 4750000 4275000 4500000 4800000 5500000
[9] 4595000 5399000 6750000 8250000 7500000 4795000 4300000 7995000
[17] 7250000 7495000 4195000 7950000 8350000 4750000 8950000 5950000
[25] 7250000 4650000 5295000 4895000 4750000 9950000 4850000 4675000
[33] 7598000 5700000 10000000 5350000 6350000 6699500 5900000 4650000
[41] 5950000 5900000
meta <- properties |>
html_element(".prop-meta") |>
html_children() |>
html_text()
props <- tibble(price, meta) |>
mutate(
beds = parse_number(str_remove(str_extract(meta, "[0-9]bed"), "bed")),
baths = parse_number(str_remove(str_extract(meta, "[0-9.+]+bath"), "bath")),
sqft = parse_number(str_remove(str_extract(meta, "[0-9,]+sqft"), "sqft"))
)
props
# A tibble: 42 × 5
price meta beds baths sqft
<dbl> <chr> <dbl> <dbl> <dbl>
1 4250000 3bed4bath0.28acre lot 3 4 NA
2 7995000 4bed4.5bath 4 4.5 NA
3 5200000 4bed2.5+bath3,750sqft 4 2.5 3750
4 4750000 4bed3.5bath3,100sqft 4 3.5 3100
5 4275000 3bed2.5bath 3 2.5 NA
6 4500000 4bed3.5bath2,725sqft 4 3.5 2725
7 4800000 3bed3bath1,899sqft0.36acre lot 3 3 1899
8 5500000 4bed4bath2,740sqft 4 4 2740
9 4595000 3bed3bath5,200sqft lot 3 3 5200
10 5399000 3bed3bath0.23acre lot 3 3 NA
# ℹ 32 more rows
ggplot(props, aes(x = sqft, y = price / 1e6, size = beds)) +
geom_point() +
scale_y_continuous("Price (millions of USD)") +
scale_x_continuous("Home Size (square feet)") +
labs(
title = "List prices for 3+ BD, 2+ BR homes",
subtitle = "Upper West Side of Manhattan",
source = "caption: Realtor.com"
)