# Here you find software for the Online-Monitoring algorithm introduced in Einbeck & Kauermann (2003). # The copyright for the article and its content, including the implemented algorithms, is hold by Taylor & Francis ltd. # If you intend to use results obtained via this software for publication or commercial purposes, please send a short e-mail to jochen.einbeck"at"durham.ac.uk. # Please download this file jumpnet3.s to an arbitary directory. Change on a shell in that directory, open R and source this file with # source("jumpnet3.s"). For explanations of the program, see below. # The software works with R as well as with S-Plus, but is considerably faster using R. # As an example for application, we provide the data set april.mag describing actual variations of the magnetic field elements recorded by # FUR (Geophysical Observatory) in Fürstenfeldbruck (Germany) in April 2001. We focus on the 5th column of this data set, # containing the angle between magnetic and geographic north in minutes ['] (postive east). Below is a short decription how to # apply the algorithm to this example. Many thanks to Dr. Martin Beblo for providing the data. # Online Monitoring with Local Smoothing Methods and Adaptive Ridging. Software: Version 0.3. (2003) # # "jump(Y, ...)" applies the online monitoring algorithm on the data set "Y", if desired starting at time point "start" and ending at time point "end". # By default the whole data set is used. # The notation is mostly identical as in the paper. # Instead of fixing "alpha" in formula (13), one might provide the corresponding quantile ("barrier"), typically 2 or 3. # "center1" and "center2" denote the centers of the kernels K1 and K2 on a range from 0 to 1, # and "support1" and "support2" the range which the kernels effectively use (0.4 corresponds to 40%). # If "gap"=100, say, the first 100 data points are not plotted. # "c0" corresponds to the constant c in (16) # The standard deviation "stype" of the observed data might be assumed fixed (e.g. stype=3 ), # or may be estimated. There are 6 options: # stype=-1 applies the triple-estimator introduced in Gasser, Sroka & Steinmetz, Biometrika, 1986 # stype=-2 computes the standard dev. as in (9), but discarding autocorrelation # stype=-3 computes the standard dev. as in (9), taking autocorrelation into account, yet without bias reduction # stype=-33 does the same as stype=-3, but with bias reduction # stype=-4 computes the autocovariances, but with update rule (10) (works only for h>h1+1) # stype=-44 same as stype=-4, but with bias reduction (works only for h>h1+1) . # Our recomendation: stype=-1, when no autocorrelation is present, and stype=-33 under presence of autocorrelation # The function "jump" yields a list of 12 items (check with names()): # [1] The original data, but outliers/missing values are replaced by short-term estimates, [2] the long term linear fit # [3] the long term constant fit, [4] the resulting long term fit , [5] the short term fit, [6] the standard deviation of [1], # [7] difference between [4] and [5], [8] the standard deviation of [7], [9] the test statistics T_t, [10] The ridge parameter lambda_t, # [11] the points were alarm is given (only the first point of each period) ,[12] "0" denotes "no alarm", "1" denotes "alarm", and "2" denotes "new alarm" # "jump" plots the results in the file "filename", default "save.ps". # "big" denotes the quantity of pictures printed on one page of the output file (1,2,and 4 is supported). # In case of any questions, please contact einbeck@stat.uni-muenchen.de # The program works on Splus as well as on R, but is considerably faster using R. # HINT: Try for beginning simply "jump(data)" and regard "save.ps", then continue with further tuning. # EXAMPLE: The magnetogram "april.mag" is available for download from this folder. Load it into R (or Splus) with magnet<-read.table("http://www.maths.dur.ac.uk/~dma0je/Software/april.mag",header=F), then # run e.g. savemag<-jump(magnet$V5, 2000,2400,stype=-33,h2=30, c0=150,h=300,filename="mag.test.ps",gap=100,barrier=3) jump<-function(Y, start=1,end=length(Y),h1=150,h2=25,h=200,c0=80,alpha=0.01, barrier=0, center1=0.5,center2=1, support1=0.4,support2=0.4,type=2, stype=-1, gap=0,filename="save.ps",main=filename,big=4){ save <- runvarspeed(Y,start,end,h1,h2,h,c0,alpha, barrier,center1,center2,support1,support2,type,stype) while (is.na(Y[start]) || Y[start]==0 ){start<-start+1} y<-Y[(start+gap):end] ymin<-min(y,na.rm=T) ymax<-max(y,na.rm=T) if (filename!="0") {postscript(filename,horizontal=T)} if (big==1){par(mfrow=c(1,1))} else if (big==2) {par(mfrow=c(2,1))} else {par(mfrow=c(2,2))} plot(y,main=main,sub ="Fig. 1 ",ylim=c(ymin,ymax),xlab="",cex=0.8) lines(save$"LongTerm"[(gap+1):(end-start+1)]) lines(save$"ShortTerm"[(gap+1):(end-start+1)],lty=7) for(i in (start+gap):end){ if (save$"Alarm"[i-start+1]==2 && i>=start+gap+h2){ segments(i-start-gap+3/4,ymin,i-start-gap+3/4,ymax)} } if (barrier==0){barrier<-qnorm(1-alpha/2)} tmin<- min(-barrier,min(save$"T_t")) tmax<- max(barrier,max(save$"T_t")) plot(save$"T_t"[(gap+1):(end-start)],type="l",sub="Fig. 2",ylim=c(tmin,tmax),xlab="",ylab="test statistics",cex=0.8) lines(rep(barrier,end-start-gap)) lines(rep(-barrier,end-start-gap)) lines(rep(0,end-start-gap)) for(i in (start+gap):end){ if (save$"Alarm"[i-start+1]==2 && i>=start+gap+h2){segments(i-start-gap+3/4,tmin,i-start-gap+3/4,tmax)} } plot(save$"Ridgeparameter"[(gap+1):(end-start)],type="l",sub="Fig. 3",ylab="ridge parameter",xlab="",cex=0.8) plot(save$"Stdev"[(gap+1+3):(end-start)],type="l",sub="Fig. 4",ylab="estimated std. dev.",xlab="",cex=0.8) if (filename!="0") {dev.off()} save } # jump calls: runvarspeed #------------------------------------------------------------------------------------------------- # 2. "runvarspeed" is the actual online monitoring program runvarspeed<-function(Y,start = 1, end=length(Y), h1=150, h2=25, h=200,c0=80,alpha = 0.01, barrier= 0, center1 = 0.5,center2 = 1,support1=0.4,support2=0.4, type=2, stype=-1) { Prog<-Prog0<-Prog1 <- Prog2 <- Sigmav<- CPAlarmwerte <- Alarm<- w1lc<- w1ll <- v1 <- w2ll <- v2 <- R<-Teststatistik<-Sigmaofmu<-gamma<-Gamma<-NULL while( is.na(Y[start]) || Y[start]==0) {start<-start+1} alarm<-mu2<-wait<-0; sigmav<-max(1,Y[start]) for(t in start:end) { if ((t %% 10)== 0){print(t)} if (is.na(Y[t])){Y[t]<-mu2} if(Y[t] == 0 || abs(Y[t]-mu2)>10*sigmav && abs(Y[min(t+1,end)]-mu2)<10*sigmav ) {Y[t] <- mu2} fit0 <- lc.weights(Y, start, t, h1, w1lc, center1, support1) fit1 <- ll.weights(Y, start, t, h1, w1ll, v1, center1,support1) fit2 <- ll.weights(Y, start, t, h2, w2ll, v2, center2,support2) mu1NW <- fit0[[1]] mu1 <- fit1[[1]] mu2 <- fit2[[1]] beta1 <- fit1[[2]] w1lc <-fit0[[2]] w1ll <- fit1[[3]] w2ll <- fit2[[3]] w2<- append(rep(0,min(h1-h2,max(t-h2-start,0))),w2ll) v1 <- fit1[[4]] #v2 <- fit2[[4]] Prog0 <- append(Prog0, mu1NW) Prog1 <- append(Prog1, mu1) Prog2 <- append(Prog2, mu2) y<- Y[max((t-h),start):t] prog2<-Prog2[max(1,t-start+1-h):(t-start+1)] if (stype==-1){ #without covariances, triple-estimator sigmav <- sisi(y, h) } else if (stype==-2){ # without covariances, from residuals sigmav<-sigma(Y[max((t-h),start):t], start,t,h, Prog2[max(1,t-start+1-h):(t-start+1)]) } else if (stype==-3){ # with covariances, from residuals Covi<-covariance3(y, start,t,max(min(t-start,h1),1),h2,h,prog2) Gamma<-Covi[[2]] sigmav<-sqrt(Covi[[1]][1]) } else if (stype==-33){ # with covariances, from residuals, with bias correction Covi<-covariance33(y, start,t,max(min(t-start,h1),1),h, prog2, w2ll/sum(w2ll)) Gamma<-Covi[[2]] #matrix of autocovariances sigmav<-sqrt(Covi[[1]][1]) #standard deviation } else if (stype==-4){ #update Covi<-covariance4(y, start,t,max(min(t-start,h1),1),max(min(t-start,h2),1),h, prog2, gamma) gamma<-Covi[[1]]; Gamma<-Covi[[2]] sigmav<-sqrt(Covi[[1]][1]) } else if (stype==-44){ #update, with bias reduction Covi<-covariance44(y, start,t,max(min(t-start,h1),1), h, prog2,w2ll/sum(w2ll), gamma) gamma<-Covi[[1]]; Gamma<-Covi[[2]] sigmav<-sqrt(Covi[[1]][1]) } else if (stype==-5){ # with covariances, from residuals, AR-Model Covi2<-covariance5(y, start, t , max(min(t-start,h1),1), h, prog2) #with AR Gamma<-Covi2[[2]] sigmav<-sqrt(Covi2[[1]][1]) } else { # use fixed value sigmav<-s } Sigmav <- append(Sigmav,sigmav) r<-exp(-c0*beta1^2) if (barrier==0){barrier<-qnorm(1-alpha/2)} if (type==0){ mu1r<-mu1NW if (stype==-3||stype==-33||stype==-4||stype==-44||stype==-5){ sigmaofmu<-corrofestimate(start, t, w1lc/sum(w1lc)-w2/sum(w2),Gamma) } else { sigmaofmu<-sigmaofestimate(start,t,w1lc/sum(w1lc)-w2/sum(w2),Sigmav[max(1, t - h1 - start + 1):(t - start+1)]) } } else if (type==2){ mu1r<-r*mu1NW +(1-r)*mu1 if (stype==-3||stype==-33||stype==-4||stype==-44||stype==-5) { sigmaofmu<-corrofestimate(start, t, r*w1lc/sum(w1lc)+(1-r)*w1ll/sum(w1ll)-w2/sum(w2),Gamma) } else { sigmaofmu<-sigmaofestimate(start,t,r*w1lc/sum(w1lc)+(1-r)*w1ll/sum(w1ll)-w2/sum(w2),Sigmav[max(1, t-h1-start+1):(t-start+1)]) } } else { mu1r<-mu1 if (stype==-3||stype==-33||stype==-4||stype==-44||stype==-5 ){ sigmaofmu<-corrofestimate(start, t, w1ll/sum(w1ll)-w2/sum(w2),Gamma) } else { sigmaofmu<-sigmaofestimate(start,t,w1ll/sum(w1ll)-w2/sum(w2),Sigmav[max(1, t - h1 - start + 1):(t - start+1)]) } } mu<- mu1r - mu2 teststatistik<-mu/sigmaofmu if(abs(mu) > max(barrier * sigmaofmu, 0.01) && t >= start + 1) { if (alarm==0 && wait==0){alarm<-2 CPAlarmwerte <- append(CPAlarmwerte, t) } else{ alarm<-1;wait<-0 } } else{ if(alarm==1){alarm<-0;wait<-1} else{alarm<-0; wait<-0} } Prog <- append(Prog,mu1r) Sigmaofmu<-append(Sigmaofmu, sigmaofmu) R<-append(R,r) Teststatistik<-append(Teststatistik,teststatistik) Alarm<-append(Alarm,alarm) } names(Prog1) <- names(Prog) <- names(Prog0)<-names(Prog2) <-names(R)<-names(Teststatistik)<-names(Sigmav)<-names(Sigmaofmu) <- names(Y[start:end])<- names(Alarm)<-start:end resultlist <- list("CorrectedData" = Y[start:end],"LongTermLinear" = Prog1,"LongTermConstant" = Prog0,"LongTerm"= Prog, "ShortTerm" = Prog2, "Stdev"=Sigmav, "LongTerm-ShortTerm"=Prog-Prog2 , "Stdev(mu)"=Sigmaofmu, "T_t"=Teststatistik , "Ridgeparameter"=R, "Alarmpoints" = CPAlarmwerte,"Alarm"=Alarm) resultlist } #runvarspeed calls: n.mu , var.slope, sisi, sigma, covariance, covariance2, corrofestimate, sigmaofestimate #------------------------------------------------------------------------------------------------- #3.1 n.mu computes and updates weights for local constant long term fit lc.weights<-function(Y, start, t, w, weights, center, support) { twindow <- max(start, (t - w)):t ; y<-Y[twindow] if(t == start || t == start + 1) { predict <- y[1] } else if(t < start + w) { weights <- kern(twindow, start*(1-center)+t*center, support * (t - start+1)/2) predict <- wfit(t, twindow, y, lweights = weights)[[1]] } else if(t == start + w) { weights <- kern(twindow, start*(1-center)+t*center, support * (w+1)/2) predict <- wfit(t, twindow, y, lweights = weights)[[1]] } else { predict <- wfit(t, twindow, y , lweights = weights)[[1]] } list(predict = predict, nadaweights = weights) } #lc.weights calls: kern, wfit #3.2 var.slope computes and updates weights for local linear fit ll.weights <-function(Y, start, t, w, lweights, sweights, center, support) { twindow <- max(start, (t - w)):t; y<-Y[twindow] if(t == start || t == start + 1 || t == start + 2) { predict <- y[1] slope <- 0 } else if(t < start + w) { lweights <- linweights(start * (1 - center) + t * center, t, twindow, support * max(center * (t - start+1), (1 - center) * (t - start+1))) sweights <- sloweights(start * (1 - center) + t * center, t, twindow, support * max(center * (t - start+1),(1 - center) * (t - start+1))) f0 <- wfit(t, twindow, y, lweights, sweights) predict <- f0[[1]] slope <- f0[[2]] } else if(t == start + w) { lweights <- linweights((t - w) * (1 - center) + t * center, t, twindow, support * max(center * (w+1), (1 - center) * (w+1))) sweights <- sloweights((t - w) * (1 - center) + t * center, t, twindow, support * max(center * (w+1), (1 - center) * (w+1))) f <- wfit(t, twindow, y, lweights, sweights) predict <- f[[1]] slope <- f[[2]] } else { f <- wfit(t, twindow, y, lweights, sweights) predict <- f[[1]] slope <- f[[2]] } list(predict = predict, slope = slope, linearweights = lweights, slopeweights = sweights) } #ll.weights calls: linweights, sloweights, wfit #------------------------------------------------------------------------------------------------- #4.1 kern is a kernel function kern<-function(xi, x = 0, w = 1) { 1/w * dnorm((x - xi)/w) } #4.2 wfit computes from given weights the intercept and slope paremeter wfit<-function(x0, twindow, y, lweights, sweights = NULL) { alpha <- sum(lweights * y)/sum(lweights) beta <- sum(sweights * y)/sum(sweights * (twindow - x0)) list(alpha = alpha, beta = beta) } #------------------------------------------------------------------------------------------------- #4.3 linweights computes weights for local linear fit in starting period linweights<-function(x, x0, twindow, h = 1) { kern(twindow, x, h) * (sum(kern(twindow, x, h) * (twindow - x0)^2) - (twindow - x0) * sum(kern(twindow, x, h) * (twindow - x0))) } #linweigths calls: kern #4.4 sloweights computes weights for the slope parameter of the local linear fit in starting period sloweights<-function(x, x0, twindow, h = 1) { kern(twindow, x, h) * (sum(kern(twindow, x, h) * (twindow - x0)^1) - (twindow - x0) * sum(kern(twindow, x, h))) } #sloweigths calls: kern #------------------------------------------------------------------------------------------------- #3.3 sisi computes variance with triple-method (Gasser, Sroka & Steinmetz, Biometrika, 1986) sisi<-function(y, h) { var.d<-1 l<-length(y) if (l > 2){ var.d <- 2/3 * mean((1/2 * (y[1:(l-2)] + y[3:l]) - y[2:(l-1)])^2, na.rm = T) } sqrt(var.d) } #------------------------------------------------------------------------------------------------- #3.4 sigma computes variance by means of residuals sigma<-function(y, start, t, h, pro) { l<-length(y) if(l == 1 || l == 2) { s <- 1 } else if(l <= 6) { yquer <- 1/l * sum(y) s <- 1/(l - 1) * sum((y - yquer)^2) } else if(l <= h) { s <- 1/(l-2) * sum((y - pro)^2) } else { s <- 1/(l - 2) * sum((y - pro)^2) #with bias correction } sqrt(s) } #------------------------------------------------------------------------------------------------- #3.5 covariance computes autocovariances gamma #in virtue of (9) with c_d =1 covariance3<-function(y, start = 1, t, h1, h2, h, prog) { l<-length(y) if(h <= h1 + 1) {h <- h1 + 2} if( l <=h) {gamm <- append((sigma(y, start, t, h, prog))^2, rep(0, h1))} else { gamm<-rep(0,h1+1) dy<- y-prog for (d in 0: h2){gamm[d+1] <- 1/(l-d)*sum(dy[1:(l-d)]*dy[(1+d):l])} } gamm <- as.vector(gamm) names(gamm) <- 0:h1 Gamm <- matrix(0, h1 + 1, h1 + 1) Gamm[, 1] <- gamm for(i in 2:(h1 + 1)) {Gamm[, i] <- append(gamm[i:2], gamm[1:(h1 - i + 2)])} list(gamm, Gamm) } #covariance3 calls: sigma. # with bias-correction, formula (9): covariance33<-function(y, start = 1, t, h1, h, prog, weights) { vecC<-NULL; l<-length(y) h2<-length(weights)-1 if(h <= h1 + 1) {h <- h1 + 2} if(l<=h) {gamm <- append((sigma(y, start, t, h, prog))^2, rep(0, h1))} else { gamm<-rep(0,h1+1) dy <- y - prog vecC <- append( rep( 1/(1-2*weights[h2+1]+ sum(weights^2)), h2+1), rep(0, h1-h2) ) for (d in 0:h2){gamm[d+1] <- 1/(l-d)*vecC[d+1]* sum(dy[1:(l-d)]*dy[(1+d):l])} } gamm <- as.vector(gamm) names(gamm) <- 0:h1 Gamm <- matrix(0, h1 + 1, h1 + 1) Gamm[, 1] <- gamm for(i in 2:(h1 + 1)) {Gamm[, i] <- append(gamm[i:2], gamm[1:(h1 - i + 2)])} list(gamm, Gamm) } #covariance33 calls: sigma #----------------------------------------------------------------------------------------------Update #without bias reduktion covariance4<-function(y, start = 1, t, h1, h2, h,prog, gamm) { if(h <= h1 + 1) {h <- h1 + 2} if(t < start +h) {gamm <- append((sigma(y, start, t, h, prog))^2, rep(0, h1))} else { dy <- y[(h-h1+1):(h+1)] - prog[(h-h1+1):(h+1)] dth1 <- matrix(dy[(h1+1):1], 1, h1+1) matrixC <- diag(append (rep(1, h2+1), rep(0, h1-h2) )) gamm <- 1/(h+1)*(y[h+1]-prog[h+1])*dth1 %*% matrixC +h/(h+1)*gamm } gamm <- as.vector(gamm) names(gamm) <- 0:h1 Gamm <- matrix(0, h1 + 1, h1 + 1) Gamm[, 1] <- gamm for(i in 2:(h1 + 1)) {Gamm[, i] <- append(gamm[i:2], gamm[1:(h1 - i + 2)])} list(gamm, Gamm) } #covariance4 calls: sigma. #with bias reduction: covariance44<-function(y, start = 1, t, h1, h,prog, weights, gamm) { if(h <= h1 + 1) {h <- h1 + 2}; h2<-length(weights) -1 if(t < start + h) {gamm <- append((sigma(y, start, t, h, prog))^2, rep(0, h1))} else { dy <- y[(h-h1+1):(h+1)]- prog[(h-h1+1):(h+1)] dth1 <- matrix(dy[(h1+1):1], 1, h1+1) matrixC <- diag(append( rep( 1/(1-2*weights[h2+1]+ sum(weights^2)), h2+1), rep(0, h1-h2) )) # all higher autokorrelations are set to zero. gamm <- 1/(h+1)*(y[h+1]-prog[h+1])*dth1 %*% matrixC + h/(h+1)*gamm } gamm <- as.vector(gamm) names(gamm) <- 0:h1 Gamm <- matrix(0, h1 + 1, h1 + 1) Gamm[, 1] <- gamm for(i in 2:(h1 + 1)) {Gamm[, i] <- append(gamm[i:2], gamm[1:(h1 - i + 2)])} list(gamm, Gamm) } #covariance44 calls: sigma #------------------------------------------------------------------------------------------------- #3.6 covariance5 estimates autocovariances gamma with AR covariance5<-function(y, start = 1, t, h1, h, prog) { if(t <= start + 3) {alpha <- 0.9} else {alpha <- ar(y, aic = F, order.max = 1, method = "yule-walker")[[2]][1]} if(h <= h1 + 1) {h <- h1 + 2} gamm <- rep(0, h1 + 1) gamm[1] <- sigma(y, start, t, h, prog)^2 if(t >= start + h) { gamm[2:(h1+1)]<- gamm[1] *alpha^(2:(h1+1)-1) } gamm <- as.vector(gamm) names(gamm) <- 0:h1 Gamm <- matrix(0, h1 + 1, h1 + 1) Gamm[, 1] <- gamm for(i in 2:(h1 + 1)) {Gamm[, i] <- append(gamm[i:2], gamm[1:(h1 - i + 2)])} list(gamm, Gamm) } #covariance5 calls: sigma #------------------------------------------------------------------------------------------------- #3.7 corrofestimate computes the local variance, when autocorrelation is present corrofestimate<-function(start, t, weights, Gamm = matrix(0, length(weights))) { if(t == start || t == start + 1 || t == start + 2) { sigmapredict <- 1 } else { W <- matrix(weights, 1, length(weights)) G <- Gamm sigmapredict <- sqrt(abs(W %*% G %*% t(W))) } sigmapredict } #------------------------------------------------------------------------------------------------- #3.8. sigmaofestimate estimates local variance, when no autocorrelation is present sigmaofestimate<-function(start, t, weights, Sigma) { if(t == start || t == start + 1 || t == start + 2) { sigmapredict <- 1 } else { varpredict <- sum(weights^2 * Sigma^2) sigmapredict <- sqrt(varpredict) } sigmapredict }