# R Script accompanying the talk "Local Principal curve" (Jochen Einbeck) # at the Workshop "Principal manifolds for data cartography and dimension reduction" # Leicester, August 24-26, 2006, Leicester, UK. # The use of the examples or of this R code in publications # (however, not the use of function lpc() itself) # requires the permission of the authors (jochen.einbeck at durham.ac.uk). # Requires packages hdrcde, lattice, and scatterplot3d to be loaded from CRAN. # This source file will produce a series of pictures. # Load LPC -code, [1] source("lpc.r") # Speed - Flow data library(hdrcde) data(lane2) # Plot on page 2 # Local principal curve par(mfrow=c(2,1)) speedlpc <- lpc(cbind(lane2$flow/40,lane2$speed), h= 3, t0=3, x0=c(25,12), way="two", pen=F, plotlpc=0) plot(lane2$flow/40, lane2$speed,pch=".",ylab="speed", xlab="flow/40") lines(speedlpc[[1]][,1],speedlpc[[1]][,2], col=2) # Plot on page 30 # Multi-valued regression, [3] # function modalreg is part of R package hdrcde (Hyndman & Einbeck, 2006). lane2cond2.a <- modalreg(lane2$flow, lane2$speed, xfix=(1:55)*40, a=100, b=4, iter= 50, P=2, deg=0) # Density plot, page 9 kde2dbypoint <- function(xdat,ydat, xyfix, h){ # point wise kernel density estimation # xdat, ydat: data # xyfix: evaluation points lx <- dim(xyfix)[1] kde <- rep(0,lx) for (s in 1:lx){ kde[s]<-1/length(xdat)*sum(kern(xdat,xyfix[s,1],h[1])*kern(ydat,xyfix[s,2],h[2])) } kde } library(lattice) trellis.device() surf <- expand.grid(x = 1:55, y=0:80) surf$z <- kde2dbypoint(lane2$flow/40,lane2$speed, surf, h=c(3,3)) # Simple density plot without lpc: # wireframe(z ~ x * y, data=surf, aspect = c(1, 0.5), scales = list(arrows = FALSE)) kern.lpc <- kde2dbypoint(lane2$flow/40,lane2$speed, speedlpc[[1]], h=c(3,3)) # kde with LPC: See R help Tue 22 Nov 2005 (Deepayan) # at http://tolstoy.newcastle.edu.au/R/help/05/11/16135.html pts <- data.frame(x = speedlpc[[1]][,1], y = speedlpc[[1]][,2], z = kern.lpc) wireframe(z ~ x * y, surf, aspect = c(1, .5), scales = list(arrows = FALSE), pts = pts, panel.3d.wireframe = function(x, y, z, xlim, ylim, zlim, xlim.scaled, ylim.scaled, zlim.scaled, pts, ...) { panel.3dwire(x = x, y = y, z = z, xlim = xlim, ylim = ylim, zlim = zlim, xlim.scaled = xlim.scaled, ylim.scaled = ylim.scaled, zlim.scaled = zlim.scaled, ...) xx <- xlim.scaled[1] + diff(xlim.scaled) * (pts$x - xlim[1]) / diff(xlim) yy <- ylim.scaled[1] + diff(ylim.scaled) * (pts$y - ylim[1]) / diff(ylim) zz <- zlim.scaled[1] + diff(zlim.scaled) * (pts$z - zlim[1]) / diff(zlim) panel.3dscatter(x = xx, y = yy, z = zz, xlim = xlim, ylim = ylim, zlim = zlim, xlim.scaled = xlim.scaled, ylim.scaled = ylim.scaled, zlim.scaled = zlim.scaled, ...) }) # Pennsylvania Floodplain Data, page 18, [1] beaver<- read.table("beaver1.txt",header=F) beaverlist<-NULL for (i in 1:106){ for (j in 1:70){ if (beaver[i,j]==1){ beaverlist<-rbind(c(j,(107-i)),beaverlist) } } } # Plot data without LPC: # plot(beaverlist[,1],beaverlist[,2]) lpc.b1<-lpc(cbind(beaverlist[,1],beaverlist[,2]),h=1.4, t0=1.4,iter=40,way="two",pen=T,mult=50) # Europe Coastal Resort Data, [2] seebad<- read.table("seebad.dat",header=F) dim(seebad) seelist<-NULL for (i in 1:101){ for (j in 1:135){ if (seebad[i,j]==1){ seelist<-rbind(c(j,(102-i)),seelist) } } } # Plot coastal resort data without LPC: # par(mfrow=c(1,1)) # plot(seelist[,1],seelist[,2],pch=".") # LPC lpc.see <- lpc(seelist, h= 1.9, t0=1.9, iter=150, way= "two", mult=20) # Phillips curves # data sets available on request: jochen.einbeck@durham.ac.uk phil.rate <- read.table("UNRATE.txt", header=T) # US, 1948-2005 phil.infl <- read.table("CPIAUCNS.txt", header=T) # US, 1921-2005 rate <- phil.rate[565:692,2] # infl <- phil.infl[889:1016,2] # 01/1995-08/2005 # 2-dim plot # plot(rate, infl, col=as.vector(cbind(rep(1,12),rep(2,12),rep(3,12),rep(4,12),rep(5,12),rep(6,12),rep(7,12),rep(8,12),rep(9,12),rep(10,12),rep(11,12)))) # LPC library(scatterplot3d) lpc.rate <- lpc(cbind(rate,1:128,infl), h=c(0.6,3,3),t0=1, iter=150, plotlpc=2) lpc.phil.scat <- scatterplot3d(rate,1:128,infl,color= as.vector(cbind(rep(1,12),rep(2,12),rep(3,12),rep(4,12),rep(5,12),rep(6,12),rep(7,12),rep(8,12),rep(9,12),rep(10,12),rep(11,12)))[1:128],ylab="time") lpc.phil.scat$points3d(lpc.rate[[1]][,1],lpc.rate[[1]][,2],lpc.rate[[1]][,3],type="l") # Letters, [2] source("letters.r") # Scallops, [2] # The Scallops Data are part of the S+ Spatial Stats package # The following code is S+ code (not tested in R) # module(spatial) # summary(scallops) # library(maps) # scallops[,"lgcatch"] <- log(scallops$tcatch+1) # trellis.device() # par(mfrow=c(2,3)) # map("usa", xlim= c(-74,-71), ylim=c(38.2, 41.5)) # points(scallops$long, scallops$lat, cex=0.75) # int.scp <- interp(scallops$long, scallops$lat, scallops$lgcatch) # contour(int.scp, add=T) # lpc.scallops <- lpc(cbind(scallops$long, scallops$lat), h= 0.15, # t0=0.15, iter=50, way= "two", pen=T, penpot=2, mult=1, thresh=0.3) # Bladder cancer microarray data bladder<-read.table("d2fn.txt") sbladder<- t(bladder) dimnames(sbladder)<-list(1:40, 1:3036) dim(sbladder) system.time(pairs(sbladder[,11:20])) system.time(lpc(sbladder[,11:20], h=1, t0=1, plotlpc=2, iter=30)) # Literature: # [1] Einbeck, Tutz & Evers (2005): Local principal # curves. Statistics and Computing 15, 301--313. # [2] Einbeck, Tutz & Evers (2005b): Exploring # multivariate data structures with local principal curves. In: # Weihs, C. and Gaul, W. (Eds.). Classification - The # Ubiquitous Challenge. Springer, Heidelberg. # [3] Einbeck & Tutz (2006): Modelling beyond # regression functions: An application of multimodal regression to # speed-flow data. Journal of the Royal Statistical Society, # Series C (Applied Statistics), 55, 461--475.