Home values

SDS 236

Ben Baumer

4/19/23

Recap

Recall that last time

Obtaining home values

Zillow

  • Publishes housing price trends in a variety of levels of granularity
library(tidyverse)
url <- "https://files.zillowstatic.com/research/public_csvs/zhvi/County_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv"
zillow <- read_csv(url)
mass <- zillow |>
  filter(StateName == "MA")
names(mass)
  [1] "RegionID"          "SizeRank"          "RegionName"       
  [4] "RegionType"        "StateName"         "State"            
  [7] "Metro"             "StateCodeFIPS"     "MunicipalCodeFIPS"
 [10] "2000-01-31"        "2000-02-29"        "2000-03-31"       
 [13] "2000-04-30"        "2000-05-31"        "2000-06-30"       
 [16] "2000-07-31"        "2000-08-31"        "2000-09-30"       
 [19] "2000-10-31"        "2000-11-30"        "2000-12-31"       
 [22] "2001-01-31"        "2001-02-28"        "2001-03-31"       
 [25] "2001-04-30"        "2001-05-31"        "2001-06-30"       
 [28] "2001-07-31"        "2001-08-31"        "2001-09-30"       
 [31] "2001-10-31"        "2001-11-30"        "2001-12-31"       
 [34] "2002-01-31"        "2002-02-28"        "2002-03-31"       
 [37] "2002-04-30"        "2002-05-31"        "2002-06-30"       
 [40] "2002-07-31"        "2002-08-31"        "2002-09-30"       
 [43] "2002-10-31"        "2002-11-30"        "2002-12-31"       
 [46] "2003-01-31"        "2003-02-28"        "2003-03-31"       
 [49] "2003-04-30"        "2003-05-31"        "2003-06-30"       
 [52] "2003-07-31"        "2003-08-31"        "2003-09-30"       
 [55] "2003-10-31"        "2003-11-30"        "2003-12-31"       
 [58] "2004-01-31"        "2004-02-29"        "2004-03-31"       
 [61] "2004-04-30"        "2004-05-31"        "2004-06-30"       
 [64] "2004-07-31"        "2004-08-31"        "2004-09-30"       
 [67] "2004-10-31"        "2004-11-30"        "2004-12-31"       
 [70] "2005-01-31"        "2005-02-28"        "2005-03-31"       
 [73] "2005-04-30"        "2005-05-31"        "2005-06-30"       
 [76] "2005-07-31"        "2005-08-31"        "2005-09-30"       
 [79] "2005-10-31"        "2005-11-30"        "2005-12-31"       
 [82] "2006-01-31"        "2006-02-28"        "2006-03-31"       
 [85] "2006-04-30"        "2006-05-31"        "2006-06-30"       
 [88] "2006-07-31"        "2006-08-31"        "2006-09-30"       
 [91] "2006-10-31"        "2006-11-30"        "2006-12-31"       
 [94] "2007-01-31"        "2007-02-28"        "2007-03-31"       
 [97] "2007-04-30"        "2007-05-31"        "2007-06-30"       
[100] "2007-07-31"        "2007-08-31"        "2007-09-30"       
[103] "2007-10-31"        "2007-11-30"        "2007-12-31"       
[106] "2008-01-31"        "2008-02-29"        "2008-03-31"       
[109] "2008-04-30"        "2008-05-31"        "2008-06-30"       
[112] "2008-07-31"        "2008-08-31"        "2008-09-30"       
[115] "2008-10-31"        "2008-11-30"        "2008-12-31"       
[118] "2009-01-31"        "2009-02-28"        "2009-03-31"       
[121] "2009-04-30"        "2009-05-31"        "2009-06-30"       
[124] "2009-07-31"        "2009-08-31"        "2009-09-30"       
[127] "2009-10-31"        "2009-11-30"        "2009-12-31"       
[130] "2010-01-31"        "2010-02-28"        "2010-03-31"       
[133] "2010-04-30"        "2010-05-31"        "2010-06-30"       
[136] "2010-07-31"        "2010-08-31"        "2010-09-30"       
[139] "2010-10-31"        "2010-11-30"        "2010-12-31"       
[142] "2011-01-31"        "2011-02-28"        "2011-03-31"       
[145] "2011-04-30"        "2011-05-31"        "2011-06-30"       
[148] "2011-07-31"        "2011-08-31"        "2011-09-30"       
[151] "2011-10-31"        "2011-11-30"        "2011-12-31"       
[154] "2012-01-31"        "2012-02-29"        "2012-03-31"       
[157] "2012-04-30"        "2012-05-31"        "2012-06-30"       
[160] "2012-07-31"        "2012-08-31"        "2012-09-30"       
[163] "2012-10-31"        "2012-11-30"        "2012-12-31"       
[166] "2013-01-31"        "2013-02-28"        "2013-03-31"       
[169] "2013-04-30"        "2013-05-31"        "2013-06-30"       
[172] "2013-07-31"        "2013-08-31"        "2013-09-30"       
[175] "2013-10-31"        "2013-11-30"        "2013-12-31"       
[178] "2014-01-31"        "2014-02-28"        "2014-03-31"       
[181] "2014-04-30"        "2014-05-31"        "2014-06-30"       
[184] "2014-07-31"        "2014-08-31"        "2014-09-30"       
[187] "2014-10-31"        "2014-11-30"        "2014-12-31"       
[190] "2015-01-31"        "2015-02-28"        "2015-03-31"       
[193] "2015-04-30"        "2015-05-31"        "2015-06-30"       
[196] "2015-07-31"        "2015-08-31"        "2015-09-30"       
[199] "2015-10-31"        "2015-11-30"        "2015-12-31"       
[202] "2016-01-31"        "2016-02-29"        "2016-03-31"       
[205] "2016-04-30"        "2016-05-31"        "2016-06-30"       
[208] "2016-07-31"        "2016-08-31"        "2016-09-30"       
[211] "2016-10-31"        "2016-11-30"        "2016-12-31"       
[214] "2017-01-31"        "2017-02-28"        "2017-03-31"       
[217] "2017-04-30"        "2017-05-31"        "2017-06-30"       
[220] "2017-07-31"        "2017-08-31"        "2017-09-30"       
[223] "2017-10-31"        "2017-11-30"        "2017-12-31"       
[226] "2018-01-31"        "2018-02-28"        "2018-03-31"       
[229] "2018-04-30"        "2018-05-31"        "2018-06-30"       
[232] "2018-07-31"        "2018-08-31"        "2018-09-30"       
[235] "2018-10-31"        "2018-11-30"        "2018-12-31"       
[238] "2019-01-31"        "2019-02-28"        "2019-03-31"       
[241] "2019-04-30"        "2019-05-31"        "2019-06-30"       
[244] "2019-07-31"        "2019-08-31"        "2019-09-30"       
[247] "2019-10-31"        "2019-11-30"        "2019-12-31"       
[250] "2020-01-31"        "2020-02-29"        "2020-03-31"       
[253] "2020-04-30"        "2020-05-31"        "2020-06-30"       
[256] "2020-07-31"        "2020-08-31"        "2020-09-30"       
[259] "2020-10-31"        "2020-11-30"        "2020-12-31"       
[262] "2021-01-31"        "2021-02-28"        "2021-03-31"       
[265] "2021-04-30"        "2021-05-31"        "2021-06-30"       
[268] "2021-07-31"        "2021-08-31"        "2021-09-30"       
[271] "2021-10-31"        "2021-11-30"        "2021-12-31"       
[274] "2022-01-31"        "2022-02-28"        "2022-03-31"       
[277] "2022-04-30"        "2022-05-31"        "2022-06-30"       
[280] "2022-07-31"        "2022-08-31"        "2022-09-30"       
[283] "2022-10-31"        "2022-11-30"        "2022-12-31"       
[286] "2023-01-31"        "2023-02-28"        "2023-03-31"       
mass_long <- mass |>
  select(RegionName, 10:284) |>
  pivot_longer(cols = -RegionName, names_to = "month", values_to = "zillow_home_value_index")

Home value index over time

ggplot(
  mass_long, 
  aes(x = as.Date(month), 
      y = zillow_home_value_index /1000, 
      color = RegionName)
) +
  geom_point() +
  geom_line() + 
  scale_y_continuous("Zillow Home Value Index (thousands of USD)") +
  scale_x_date(NULL)

Airbnb

  • Publishes listings in selected cities
boston <- read_csv("http://data.insideairbnb.com/united-states/ma/boston/2023-03-19/visualisations/listings.csv") |>
  sf::st_as_sf(coords = c("longitude", "latitude")) |>
  sf::st_set_crs(4326)

Listings near Fenway Park

library(leaflet)
boston |>
  filter(neighbourhood == "Fenway") |>
  leaflet() |>
  addTiles() |>
  addMarkers()

Realtor.com

  • Publishes weekly and monthly home price trends
realtor <- read_csv("https://econdata.s3-us-west-2.amazonaws.com/Reports/Core/listing_weekly_core_aggregate_by_country.csv")

Scraping

Realtor.com listings

  • Examine this URL!
query <- "https://www.realtor.com/realestateandhomes-search/Upper-West-Side_Manhattan_NY/beds-3/baths-2/price-4000000-10000000"
query
[1] "https://www.realtor.com/realestateandhomes-search/Upper-West-Side_Manhattan_NY/beds-3/baths-2/price-4000000-10000000"
library(rvest)
listings <- read_html(query)

Identifying properties

properties <- listings |>
  html_elements(".detail-wrap")
properties
{xml_nodeset (42)}
 [1] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
 [2] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
 [3] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
 [4] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
 [5] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
 [6] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
 [7] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
 [8] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
 [9] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[10] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[11] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[12] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[13] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[14] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[15] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[16] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[17] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[18] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[19] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
[20] <div data-testid="property-detail" class="jsx-2683691781 detail-wrap fix ...
...

Extracting prices

price <- properties |> 
  html_element(".ldp-redesign-price") |>
  html_text() |>
  str_replace("k", ",000") |>
  parse_number()
price
 [1]  4250000  7995000  5200000  4750000  4275000  4500000  4800000  5500000
 [9]  4595000  5399000  6750000  8250000  7500000  4795000  4300000  7995000
[17]  7250000  7495000  4195000  7950000  8350000  4750000  8950000  5950000
[25]  7250000  4650000  5295000  4895000  4750000  9950000  4850000  4675000
[33]  7598000  5700000 10000000  5350000  6350000  6699500  5900000  4650000
[41]  5950000  5900000

Extracting other metadata

meta <- properties |> 
  html_element(".prop-meta") |> 
  html_children() |> 
  html_text()

props <- tibble(price, meta) |>
  mutate(
    beds = parse_number(str_remove(str_extract(meta, "[0-9]bed"), "bed")),
    baths = parse_number(str_remove(str_extract(meta, "[0-9.+]+bath"), "bath")),
    sqft = parse_number(str_remove(str_extract(meta, "[0-9,]+sqft"), "sqft"))
  )
props
# A tibble: 42 × 5
     price meta                            beds baths  sqft
     <dbl> <chr>                          <dbl> <dbl> <dbl>
 1 4250000 3bed4bath0.28acre lot              3   4      NA
 2 7995000 4bed4.5bath                        4   4.5    NA
 3 5200000 4bed2.5+bath3,750sqft              4   2.5  3750
 4 4750000 4bed3.5bath3,100sqft               4   3.5  3100
 5 4275000 3bed2.5bath                        3   2.5    NA
 6 4500000 4bed3.5bath2,725sqft               4   3.5  2725
 7 4800000 3bed3bath1,899sqft0.36acre lot     3   3    1899
 8 5500000 4bed4bath2,740sqft                 4   4    2740
 9 4595000 3bed3bath5,200sqft lot             3   3    5200
10 5399000 3bed3bath0.23acre lot              3   3      NA
# ℹ 32 more rows

Visualizing home prices

ggplot(props, aes(x = sqft, y = price / 1e6, size = beds)) +
  geom_point() +
  scale_y_continuous("Price (millions of USD)") +
  scale_x_continuous("Home Size (square feet)") +
  labs(
    title = "List prices for 3+ BD, 2+ BR homes",
    subtitle = "Upper West Side of Manhattan",
    source = "caption: Realtor.com"
  )

Your turn

  • Let’s get to work!