Historical Rainfall in Bremen, 1891-2015

Dan Gray

2018/08/01

Summary

I’m fairly convinced that knowing something about the distribution of data is more important that the particular values that are within that distribution.

A mentor encouraged me to always inspect and understand the “wide-scale” patterns of the dataset before zomming in and becoming concerned with my particular question.

Here I use “ridge-plots” (density plots) to get a feeling for the overall dynamics of rainfall in Bremen over the modern observational record.

Initialise Packages

# load libraries
library(tidyverse)
library(forcats)
library(ggjoy)
library(ggridges)
library(viridis)
library(extrafont)

Prepare Monthly Data

# set up directories
setwd("C:/Users/daniel/Desktop/locstore/portfolio")
source("static/data/ROAM.r")
where <-getwd() # get home-dir

# load monthly data
monthly <-read_delim("static/data/gg_ridges/produkt_nieder_monat_18900101_20161231_00691.txt",";", escape_double = FALSE, trim_ws = TRUE)

# extract individual months and years
dateStr <-as.character(monthly$MESS_DATUM_BEGINN)
dateNum <-substr(dateStr, 1, 6) %>% as.double()
dateMonth <-substr(dateNum, 5, 6) %>% as.double()
dateYear <-substr(dateNum, 1, 4) %>% as.double()

# create temporary data.frame for modifications
frame <-mutate(monthly, frameID = dateNum-dateNum[1],
                month = dateMonth, year = dateYear)

# convert factors to numeric and fix NaN
frame$STATIONS_ID <-as.factor(frame$STATIONS_ID)
frame$monthFct <-as.factor(frame$month)
frame$MO_RR[frame$MO_RR==-999] <-NA

Seasonal Distribution

# select, mutate, and plot in one go
frame %>% select (MO_RR,monthFct,year) %>% 
  mutate(labels=fct_recode(monthFct,"January" = "1", "February" = "2", "March" = "3", 
                           "April" = "4","May"= "5", "June" = "6", "July" = "7",
                           "August" = "8", "September" = "9", "October" = "10", 
                           "November" = "11", "December" = "12")) %>% 
  ggplot(aes(x=MO_RR,y=labels %>% fct_rev())) + 
  geom_density_ridges_gradient(aes(fill=labels)) + 
  scale_fill_viridis(discrete=T,option="D", direction=-1,begin=.1,end=.9) + 
  labs(x = "Total Precipitation (mm)", y = "Month of Year",
       title = "Distribution of Historical Monthly Rainfall in Bremen (1890-2015)",
       subtitle = "Data: Der Deutsche Wetter Dienst - Climate Data Center (CDC)") +
  theme_plain(base_size = 11) +
  theme(legend.position = "none")
## Picking joint bandwidth of 9.51

Historical Yearly Rainfall

# historical yearly totals
frame %>% filter(year<=2015 & year>=1891) %>% 
  mutate(grp = cut(year, breaks = c(1890,1915,1940,1965,1990,2015),
                    labels=c("1891-1915","1916-1940","1941-1965","1966-1990","1991-2015"))) %>% 
  select(MO_RR,monthFct,year,grp) %>% 
  group_by(year,grp) %>% 
  summarise(Total=sum(MO_RR)) %>% ungroup() %>% 
  ggplot(aes(x=Total,y=grp %>% fct_rev(),fill=factor(..quantile..))) +
  stat_density_ridges(geom = "density_ridges_gradient", 
                      calc_ecdf = TRUE, quantiles = c(0.025, 0.975)) + 
  scale_fill_manual(name = "Probability", values = c("#FF0000A0", "#A0A0A0A0", "#0000FFA0"),
                    labels = c("(0, 0.025]", "(0.025, 0.975]", "(0.975, 1]")) + 
  labs(x = "Total Precipitation (mm)", y = "Inteval",
       title = "Distribution of Historical Yearly Rainfall in Bremen (1891-2015)",
       subtitle = "Data: Der Deutsche Wetter Dienst - Climate Data Center (CDC)") +
  theme_plain(base_size = 11) 
## Picking joint bandwidth of 42.6

Peak Rainfall Month

# historical maximum peak rainfall month
frame %>% filter(year<=2015 & year>=1891) %>% 
  mutate(grp = cut(year, breaks = c(1890,1915,1940,1965,1990,2015),
                   labels=c("1891-1915","1916-1940","1941-1965","1966-1990","1991-2015"))) %>% 
  select(MO_RR,month,year,grp) %>% 
  group_by(year,grp) %>% filter(MO_RR==max(MO_RR)) %>% 
  ungroup() %>% 
  ggplot(aes(x=month,y=grp %>% fct_rev()))+
  geom_density_ridges(aes(fill=grp),rel_min_height=0.01) + 
  scale_fill_viridis(discrete=T,option="D", direction=-1,begin=.1,end=.9) + 
  labs(x = "Month of Year", y = "Inteval",
       title = "Distribution of Historical Peak Rainfall Month in Bremen (1891-2015)",
       subtitle = "Data: Der Deutsche Wetter Dienst - Climate Data Center (CDC)") +
  theme_plain(base_size = 11) +
  theme(legend.position = "none") 
## Picking joint bandwidth of 0.934

Prepare Daily Data

rm(dateMonth,dateNum,dateStr,dateYear,frame)

# set up directories
setwd("C:/Users/daniel/Desktop/locstore/portfolio")
where <-getwd() # get home-dir

# load daily data
daily <-read_delim("static/data/gg_ridges/produkt_nieder_tag_18900101_20161231_00691.txt",
                      ";", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   STATIONS_ID = col_double(),
##   MESS_DATUM = col_double(),
##   QN_6 = col_double(),
##   RS = col_double(),
##   RSF = col_double(),
##   SH_TAG = col_double(),
##   eor = col_character()
## )
# extract individual months and years
dateStr <-as.character(daily$MESS_DATUM)
dateNum <-substr(dateStr, 1, 6) %>% as.double()
dateMonth <-substr(dateNum, 5, 6) %>% as.double()
dateYear <-substr(dateNum, 1, 4) %>% as.double()

# create temporary data.frame for modifications
frame <-mutate(daily, frameID = dateNum-dateNum[1],
                month = dateMonth, year = dateYear)

# convert factors to numeric and fix NaN
frame$STATIONS_ID <-as.factor(frame$STATIONS_ID)
frame$yearFct <-as.factor(frame$year)
frame$monthFct <-as.factor(frame$month)
frame$RS[frame$RS==-999] <-NA

Inspect Daily Data

# historical daily maximum within a month  
frame_01 <-frame %>% filter(year<=2015 & year>=1891) %>% 
  mutate(grp = cut(year, breaks = c(1890,1915,1940,1965,1990,2015),
                   labels=c("1891-1915","1916-1940","1941-1965","1966-1990","1991-2015"))) %>% 
  select(RS,monthFct,yearFct,grp) %>% 
  group_by(monthFct,yearFct,grp) %>% 
  summarise(maxDay_Month=max(RS)) %>% ungroup() 

# historical total within a month 
frame_02 <-frame %>% filter(year<=2015 & year>=1891) %>% 
  mutate(grp = cut(year, breaks = c(1890,1915,1940,1965,1990,2015),
                   labels=c("1891-1915","1916-1940","1941-1965","1966-1990","1991-2015"))) %>% 
  select(RS,monthFct,yearFct,grp) %>% 
  group_by(monthFct,yearFct,grp) %>% 
  summarise(tot_Month=sum(RS)) %>% ungroup() 

# lookup table :: join and summarise
frame_join <-frame_01 %>% 
  mutate(dayContMonth = (maxDay_Month/frame_02$tot_Month)*100) %>% 
  filter(dayContMonth>=50) %>% add_count(dayContMonth) %>% 
  group_by(grp) %>% 
  summarise(tots=sum(n))

Doubling of “Single Day Downpours”

# plot N heavy rainfall "events" per interval
frame_join %>% 
  ggplot() + geom_col(aes(x=grp,y=tots,fill=grp)) + 
  scale_fill_viridis(discrete = T) + 
  labs(x = "Inteval", y ="Number of Events (n)",
  title = "Historical Single Day Extreme Rainfall Events in Bremen (1891-2015)",
  subtitle = "Data: Der Deutsche Wetter Dienst - Climate Data Center (CDC)") +
  theme_plain(base_size = 11) +
  theme(legend.position = "none")