Data set from the paper by Cooley, Davis, Naveau (The Annals of Applied Statistics 2012, Vol. 6, No. 4, 1406–1429). Data available at http://www.stat.colostate.edu/~cooleyd/DataAndCode/PredExtremes/.
Variable: daily maximum hourly NO2 measurement.
Five stations located in Washington DC and nearby Virginia: Alexandria (alx), McMillan (mc), River Terrace (rt), Takoma School (ts) and Arlington (arl).
Period: 5000 daily NO2 measurements recorded between 1995 and 2010, keeping only those records for which all five stations have measurements, resulting in 4497 daily measurements.
Rounding: because the data were truncated to the nearest ppb, a uniform random variable on the interval [−0.5, 0.5] was added to the data so that they behave more like the underlying continuous variable.
## Load data and display first and last 5 lines
load("nitrogenDioxideMsmts.RData")
head(no2Msmts); tail(no2Msmts)
## date alx arl mc rt ts
## 1 9162 48.94594 40.02866 44.394349 41.51378 41.44504
## 2 9163 36.89465 36.48012 32.839171 35.87481 35.16941
## 3 9164 38.98373 44.07211 39.801299 40.08453 38.58505
## 4 9165 34.41888 31.91026 26.643805 31.29779 29.14976
## 5 9166 17.34388 14.71787 5.918686 12.71006 11.78087
## 6 9167 34.01735 32.12474 33.940509 31.13621 21.81810
## date alx arl mc rt ts
## 4631 14635 26.86159 24.84220 26.10330 28.25237 26.56204
## 4632 14636 38.20120 32.46180 37.88082 38.51873 36.11601
## 4633 14637 31.54337 28.05154 42.41072 34.67496 41.32923
## 4634 14638 23.26186 10.96069 14.56890 21.55876 16.49128
## 4635 14639 25.54794 19.02271 20.27997 21.12096 19.73806
## 4636 14640 51.28440 44.88402 43.02091 49.27270 45.42105
## Load graphical package lattice
library(lattice)
## Times series at each of the 5 stations
dates=as.Date(no2Msmts[,1], origin = "1970-01-01")
xyplot(no2Msmts[,2]~ dates,pch=19,ylab="NO2 at Alexandria")
xyplot(no2Msmts[,3]~ dates,pch=19,ylab="NO2 at Arlington")
xyplot(no2Msmts[,4]~ dates,pch=19,ylab="NO2 at McMillan ")
xyplot(no2Msmts[,5]~ dates,pch=19,ylab="NO2 at River Terrace")
xyplot(no2Msmts[,6]~ dates,pch=19,ylab="NO2 at Takoma School")
On Saturday and Sunday there is less NO2 for all series.
## data object is a matrix, create a data frame
class(no2Msmts)
## [1] "matrix" "array"
NO2=as.data.frame(no2Msmts)
names(NO2)
## [1] "date" "alx" "arl" "mc" "rt" "ts"
## in case the locale is not in english
Sys.setlocale("LC_TIME", "en_US.UTF-8")
## [1] "en_US.UTF-8"
## dependence on weekdays: on Sat and Sun there is decrease of NO2
day= factor( weekdays(dates,abbreviate=TRUE), levels=c("Mon","Tue","Wed","Thu", "Fri", "Sat", "Sun"),ordered=TRUE)
bwplot(day~NO2$ts, main="Tacoma school station",xlab="NO2")
Strangely the median is highest for Feb, Mar, Apr for all series
month =factor( months(dates,abbreviate=TRUE),levels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE)
bwplot(month~NO2$alx);bwplot(month~NO2$arl);bwplot(month~NO2$mc);bwplot(month~NO2$rt);bwplot(month~NO2$ts)
Empirical quantiles vs normal quantiles
qqmath(NO2$alx[month=="Jul"])
Empirical quantiles vs exponential quantiles
qqmath(NO2$alx[month=="Jul"],distri=qexp)
We use the library ismev that goes with the book by Stuart Coles
# the library ismev needs to be downloaded from CRAN to your machine
# install.packages("ismev")
## Load library ismev
library(ismev)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-38. For overview type 'help("mgcv-package")'.
Modelling all data without distinguishing days and months, the tail is not fitted well at all for each of the series.
## all days and months together
gev.diag(gev.fit(NO2$alx));gev.diag(gev.fit(NO2$arl));gev.diag(gev.fit(NO2$mc));gev.diag(gev.fit(NO2$rt));gev.diag(gev.fit(NO2$ts))
Better fits are obtained for individual months
## for a specific month
gev.diag(gev.fit(NO2$alx[month=="Jul"]));gev.diag(gev.fit(NO2$arl[month=="Jul"]));gev.diag(gev.fit(NO2$mc[month=="Jul"]));gev.diag(gev.fit(NO2$rt[month=="Jul"]));gev.diag(gev.fit(NO2$ts[month=="Jul"]))
## $conv
## [1] 0
##
## $nllh
## [1] 1648.973
##
## $mle
## [1] 32.8942383 14.1591176 -0.2119891
##
## $se
## [1] 0.79464224 0.57170539 0.03715304
## $conv
## [1] 0
##
## $nllh
## [1] 1647.064
##
## $mle
## [1] 30.7457773 14.3098402 -0.2366442
##
## $se
## [1] 0.79541361 0.56987139 0.03438239
## $conv
## [1] 0
##
## $nllh
## [1] 1616.905
##
## $mle
## [1] 28.52068010 11.86502236 -0.03510485
##
## $se
## [1] 0.66513952 0.48023036 0.03576291
## $conv
## [1] 0
##
## $nllh
## [1] 1608.132
##
## $mle
## [1] 32.7382090 12.6154456 -0.1859158
##
## $se
## [1] 0.70277439 0.50049085 0.03468554
## $conv
## [1] 0
##
## $nllh
## [1] 1593.454
##
## $mle
## [1] 30.090874 12.255358 -0.197299
##
## $se
## [1] 0.67986659 0.47858079 0.03377075
mrl.plot(NO2$alx[month=="Jul"])
No distinction of months.
Observe the extremes: are they common for a pair of stations ?
splom(NO2[,2:6],pch=19)
Using a different colour for each month.
splom(NO2[,2:6],group=as.factor(month),auto.key=TRUE)