House Sales

By Carl Goodwin in R

December 17, 2017

Various events have impacted house sales in London. There has been a series of increases in stamp duty and the impact of the financial crisis. More recently Brexit and the consequences of Covid-19.

How is London postal area SW10 coping with all this?

library(tidyverse)
library(scales, exclude = "date_format")
library(SPARQL)
library(clock)
library(wesanderson)
library(glue)
library(vctrs)
library(tsibble)
library(patchwork)
library(ggmosaic)
library(kableExtra)
theme_set(theme_bw())

(cols <- wes_palette(name = "Darjeeling1"))

House prices paid data are provided by HM Land Registry Open Data.

endpoint <- "https://landregistry.data.gov.uk/landregistry/query"

query <- 'PREFIX  text: <http://jena.apache.org/text#>
PREFIX  ppd:  <http://landregistry.data.gov.uk/def/ppi/>
PREFIX  lrcommon: <http://landregistry.data.gov.uk/def/common/>
  
SELECT  ?item ?ppd_propertyAddress ?ppd_hasTransaction ?ppd_pricePaid ?ppd_transactionCategory ?ppd_transactionDate ?ppd_transactionId ?ppd_estateType ?ppd_newBuild ?ppd_propertyAddressCounty ?ppd_propertyAddressDistrict ?ppd_propertyAddressLocality ?ppd_propertyAddressPaon ?ppd_propertyAddressPostcode ?ppd_propertyAddressSaon ?ppd_propertyAddressStreet ?ppd_propertyAddressTown ?ppd_propertyType ?ppd_recordStatus

WHERE
{ ?ppd_propertyAddress text:query _:b0 .
  _:b0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> lrcommon:postcode .
  _:b0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b1 .
  _:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "( SW10 )" .
  _:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b2 .
  _:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> 3000000 .
  _:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> .
  ?item ppd:propertyAddress ?ppd_propertyAddress .
  ?item ppd:hasTransaction ?ppd_hasTransaction .
  ?item ppd:pricePaid ?ppd_pricePaid .
  ?item ppd:transactionCategory ?ppd_transactionCategory .
  ?item ppd:transactionDate ?ppd_transactionDate .
  ?item ppd:transactionId ?ppd_transactionId
  
  OPTIONAL { ?item ppd:estateType ?ppd_estateType }
  OPTIONAL { ?item ppd:newBuild ?ppd_newBuild }
  OPTIONAL { ?ppd_propertyAddress lrcommon:county ?ppd_propertyAddressCounty }
  OPTIONAL { ?ppd_propertyAddress lrcommon:district ?ppd_propertyAddressDistrict }
  OPTIONAL { ?ppd_propertyAddress lrcommon:locality ?ppd_propertyAddressLocality }
  OPTIONAL { ?ppd_propertyAddress lrcommon:paon ?ppd_propertyAddressPaon }
  OPTIONAL { ?ppd_propertyAddress lrcommon:postcode ?ppd_propertyAddressPostcode }
  OPTIONAL { ?ppd_propertyAddress lrcommon:saon ?ppd_propertyAddressSaon }
  OPTIONAL { ?ppd_propertyAddress lrcommon:street ?ppd_propertyAddressStreet }
  OPTIONAL { ?ppd_propertyAddress lrcommon:town ?ppd_propertyAddressTown }
  OPTIONAL { ?item ppd:propertyType ?ppd_propertyType }
  OPTIONAL { ?item ppd:recordStatus ?ppd_recordStatus }
}'

data_list <- SPARQL(endpoint, query)

The focus is on the standard price paid.

data_tidy <- data_list$results |>
  as_tibble() |>
  mutate(
    date = new_datetime(ppd_transactionDate) |> as_date(),
    amount = ppd_pricePaid,
    prop_type = str_extract(ppd_propertyType, "(?<=common/)[\\w]+"),
    est_type = str_extract(ppd_estateType, "(?<=common/)[\\w]+"),
    cat = str_remove(ppd_transactionCategory, "<http://landregistry.data.gov.uk/def/ppi/"),
    prop_type = recode(prop_type, otherPropertyType = "Other")
  ) |>
  filter(str_detect(cat, "standard"))

A Telegraph article entitled Timeline: 20 years of stamp duty increases for home buyers pinpoints many of the key event dates.

events <- tribble(
  ~date, ~change,
  "96-07-31", "Stamp Duty £250k (1.5%) £500k (2%)",
  "98-03-31", "£250k (2%) £500k (3%)",
  "99-03-31", "£250k (2.5%) £500k (3.5%)",
  "00-03-31", "£250k (3%) £500k (4%)",
  "11-04-30", "£250k (3%) £500k (4%) £1m (5%)",
  "12-03-31", "£250k (3%) £500k (4%) £1m (5%) £2m (7%)",
  "14-12-31", "£250k (5%) £925k (10%) 1.5m (12%)",
  "07-08-09", "Financial Crisis",
  "16-06-23", "Brexit Vote",
  "20-03-23", "Covid-19 Lockdown"
) |>
  mutate(date = date_parse(date, format = "%y-%m-%d"))

events |> 
  kbl(col.names = c("Date", "Event"))
Date Event
1996-07-31 Stamp Duty £250k (1.5%) £500k (2%)
1998-03-31 £250k (2%) £500k (3%)
1999-03-31 £250k (2.5%) £500k (3.5%)
2000-03-31 £250k (3%) £500k (4%)
2011-04-30 £250k (3%) £500k (4%) £1m (5%)
2012-03-31 £250k (3%) £500k (4%) £1m (5%) £2m (7%)
2014-12-31 £250k (5%) £925k (10%) 1.5m (12%)
2007-08-09 Financial Crisis
2016-06-23 Brexit Vote
2020-03-23 Covid-19 Lockdown

Visually, it appears that the financial crisis had a big impact on sales volume, with the Brexit vote sucking much of the remaining oxygen out of the market. Stamp duty increases in between probably slowed any intermediate recovery.

to_date <- data_tidy |> summarise(max(date)) |> pull() |> date_format(format = "%b %d, %Y")

data_tidy |>
  ggplot(aes(date, amount, colour = est_type)) +
  geom_point(alpha = 0.2, size = 0.7, show.legend = FALSE) +
  geom_smooth(se = FALSE, aes(linetype = est_type), size = 1.2) +
  labs(
    title = "SW10 Standard House Prices",
    subtitle = glue("Prices Paid to {to_date} (Prices > £5m Not Shown)"
    ),
    x = NULL,
    y = NULL,
    colour = "Type", linetype = "Type",
    caption = "Source: HM Land Registry"
  ) +
  geom_vline(xintercept = events$date, size = 0.5, lty = 2, alpha = 0.4) +
  annotate("text", events$date, 5000000,
    angle = 90,
    label = events$change, vjust = 1.4, hjust = 1, size = 3, fontface = 2
  ) +
  coord_cartesian(ylim = c(0, 5000000)) +
  scale_colour_manual(values = cols[c(2, 3)]) +
  scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
  scale_y_continuous(labels = label_dollar(accuracy = 0.1, prefix = "£", 
                                           scale_cut = cut_short_scale()))

An alternative way of looking at this is by median quarterly prices (with upper and lower quartiles), supplemented by sales volumes.

qtr_start <- date_today("Europe/London") |> 
  lubridate::floor_date("quarter")

data_qtile <- 
  data_tidy |> 
  filter(date < qtr_start) |> 
  mutate(yr_qtr = yearquarter(date)) |> 
  group_by(yr_qtr) |> 
  summarise(price = quantile(amount, c(0.25, 0.5, 0.75)), 
            quantile = c("lower", "median", "upper") |> factor(),
            n = n()) |> 
  ungroup() |> 
  pivot_wider(names_from = quantile, values_from = price)

last <- data_qtile |> summarise(max(yr_qtr)) |> pull()
first <- data_qtile |> summarise(min(yr_qtr)) |> pull()

p1 <- data_qtile |> 
  ggplot(aes(yr_qtr, median)) +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = cols[5]) +
  geom_line(colour = "white") +
  geom_hline(yintercept = 1000000, linetype = "dashed") +
  annotate("text", x = 17700, y = 300000, label = "Covid-19\nLockdown", size = 3) +
  scale_x_yearquarter(date_breaks = "2 years") +
  scale_y_log10(labels = label_dollar(prefix = "£", scale_cut = cut_short_scale())) +
  labs(title = glue("Median Quarterly SW10 Property Prices ({first} to {last})"), 
       subtitle = "With Upper / Lower Price Quartiles & Sales Volume",
       x = NULL, y = "Price (Log10 Scale)") +
  theme(axis.text.x = element_blank())

p2 <- data_qtile |> 
  ggplot(aes(yr_qtr, n)) +
  geom_line() +
  annotate("text", x = 14100, y = 180, label = "Financial\nCrisis", size = 3) +
  annotate("text", x = 17100, y = 130, label = "Brexit\nVote", size = 3) +
  scale_x_yearquarter(date_breaks = "2 years") +
  labs(x = NULL, y = "Transactions",
       caption = "Source: HM Land Registry") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

p1 / p2 + plot_layout(heights = c(2, 1))

The composition of SW10 reveals the postal area to be overwhelmingly dominated by leasehold flats.

trans <- data_tidy |> nrow()

data_tidy |> 
  ggplot() +
  geom_mosaic(aes(product(prop_type, est_type), fill = prop_type), 
              offset = 0.02, divider = mosaic("h")) +
  scale_fill_manual(values = cols[c(2:5)]) + 
  labs(
    title = "SW10 Transactions by Estate & Property Types",
    subtitle = glue("{comma(trans)} Transactions to {to_date}"),
    x = "", y = "", fill = "Property Type", 
    caption = "Source: HM Land Registry"
  ) +
  theme_minimal()

Other blog posts on quantum jitter look at SW10 property from diffferent perspectives: Digging Deep considers the correlation between house sales and planning applications; and Bootstraps & Bandings uses a sample of recent house sales to infer whether property bands are as representative of property values today as they were three decades ago.

R Toolbox

Summarising below the packages and functions used in this post enables me to separately create a toolbox visualisation summarising the usage of packages and functions across all posts.

Package Function
base c[5]; conflicts[1]; cumsum[1]; factor[1]; function[1]; max[2]; min[1]; nrow[1]; search[1]; sum[1]
clock as_date[1]; date_format[1]; date_parse[1]; date_today[1]
dplyr filter[7]; arrange[2]; desc[2]; group_by[2]; if_else[3]; mutate[7]; n[2]; pull[3]; recode[1]; summarise[5]; ungroup[1]
ggmosaic geom_mosaic[1]; mosaic[1]; product[1]
ggplot2 aes[6]; annotate[4]; coord_cartesian[1]; element_blank[1]; element_text[1]; geom_hline[1]; geom_line[2]; geom_point[1]; geom_ribbon[1]; geom_smooth[1]; geom_vline[1]; ggplot[4]; labs[4]; Scale[1]; scale_colour_manual[1]; scale_fill_manual[1]; scale_x_date[1]; scale_y_continuous[1]; scale_y_log10[1]; theme[2]; theme_bw[1]; theme_minimal[1]; theme_set[1]
glue glue[3]
kableExtra kbl[2]
patchwork plot_layout[1]
purrr map[1]; map2_dfr[1]; possibly[1]; set_names[1]
readr read_lines[1]
scales comma[1]; cut_short_scale[2]; label_dollar[2]
SPARQL SPARQL[1]
stats median[1]; quantile[1]
stringr str_c[5]; str_count[1]; str_detect[3]; str_extract[2]; str_remove[3]; str_remove_all[1]; str_starts[1]
tibble as_tibble[2]; tibble[2]; tribble[2]; enframe[1]
tidyr pivot_wider[1]; unnest[1]
tsibble scale_x_yearquarter[2]; yearquarter[1]
vctrs new_datetime[1]
wesanderson wes_palette[1]
Posted:
December 17, 2017
Updated:
April 21, 2022
Length:
6 minute read, 1119 words
Categories:
R
See Also: