### Part I

### Read in data

 data(galaxies, package="MASS")
 data(rock)

### Examine data set
 
 galaxies
 ?galaxies
 mode(galaxies)
 length(galaxies)
 
 rock
 ?rock
 mode(rock)
 dim(rock)
 names(rock)

 is.data.frame(galaxies)
 is.data.frame(rock)


### Extract, say,
  # 5th ovservation from a vector
  galaxies[5]
  # 2nd row from a data frame (or matrix)
  rock[2,]
  # 1st column from a data frame
  rock[,1] 
  rock$area
  # the element in the 2nd row and first colum 
  rock[2,1]
  # subsets of interest, such as all cases for which perimeter > 200
  rock[which(rock$peri>2000),]
  # the data frame orderd wrt to a particular variable, say perimeter
  rock[order(rock$peri),]


### Know your working directory 
  getwd()
  ls()
  # Save stuff in working directory
  save.image("mixture.RData")
  # close R, and open up again
  load("mixture.RData")


### Basic programming
  # if-then, for, while as usual --- see hadndout
  # Important peculiarity of R: apply  
  apply(rock,2,max)
  # gives the maximum over each colum of rock

### Data display 

  hist(galaxies, breaks=20)

  plot(rock$peri, rock$shape)
  # or
  plot(rock[,2:3])

  par(mfrow=c(2,1))
  hist(rock$peri)
  hist(rock$shape)

### Part II

estep<- function(dat, p, mu, sigma){
   n <- length(dat)
   K <- length(mu) 
   W <- matrix(0, n,K)
   for (i in 1:n){
        W[i,]<- p*exp(-1/(2*sigma^2)*(dat[i]-mu)^2)/ sum( p*exp(-1/(2*sigma^2)*(dat[i]-mu)^2))
      }
  return(W)
 }


#or, alternatively:
estep<- function(dat, p, mu, sigma){
   n <- length(dat)
   K <- length(mu) 
   W <- matrix(0, n,K)
   for (i in 1:n){
     for (k in 1:K){
        W[i,k]<- p[k]*exp(-1/(2*sigma^2)*(dat[i]-mu[k])^2)/ sum( p*exp(-1/(2*sigma^2)*(dat[i]-mu)^2))
      }
   }
  return(W)
 }


mstep<- function(dat, W){
   n <- dim(W)[1]
   K <- dim(W)[2]
   p <- apply(W,2,sum)/n
   mu<- apply(W*dat,2,sum)/apply(W,2,sum)

   diff<-matrix(0,n, K)
   for (k in 1:K){
     diff[,k]<- (dat -mu[k])^2
   }
   var <- 1/n* sum(W*diff)
   sigma <- sqrt(var)
      
   return(list("p"=p, "mu"=mu, "sigma"=sigma))
}

# or, alternatively 
mstep<- function(dat, W){
   n <- dim(W)[1]
   K <- dim(W)[2]
   p <- apply(W,2,sum)/n
   mu <- rep(0,K)
   for (k in 1:K){
     mu[k] <- sum(W[,k]*dat)/sum(W[,k])
   }
   var <- 0
   for (i in 1:n){
     for (k in 1:K){
        var<- var+ W[i,k]*(dat[i]-mu[k])^2
      }
   }
   sigma <- sqrt(var/n)
   return(list("p"=p, "mu"=mu, "sigma"=sigma))
}

                                      
em <- function(dat,K){
  
 steps  <- 400  # arbitrary 'sufficiently large' number
 p    <- rep(1/K,K)
 mu    <- quantile(dat, probs= (1:K)/K-1/(2*K))  
 sigma <- sd(dat)

 s<-1
 while (s <=steps){
   W   <- estep(dat, p, mu, sigma)
   fit <- mstep(dat, W)
   p  <- fit$p
   mu  <- fit$mu
   sigma <-fit$sigma
   s<-s+1
 }
 
 fit<- list("p"=p, "mu"=mu, "sigma"=sigma,  "W" =W)
 return(fit)  
}


test1 <- em(galaxies, K=4)

  # Visualization: 
par(mfrow=c(1,1))
hist(galaxies, breaks=20, freq=FALSE)
x<-seq(-12000,36000, length=2001)
for (j in 1:4){
lines(x, test1$p[j]*dnorm(x, test1$mu[j], test1$sigma), col=j)
}


test2<- em(rock$peri, K=2)
round(test2$W, digits=4)


### Part III

# 5a) 
gauss.mix.sim1<-function(K, p, mu, sigma){
   x <-runif(1)
   sim<-0
   cp<-cumsum(p) 
       k <-1
       while (x>cp[k]){
           k<-k+1
       }    
       sim <- rnorm(1,mu[k],sigma)
   return(sim)
 }

gauss.mix.sim1(2, c(0.5,0.5), c(1/4,3/4),1/2)     

# n observations
gauss.mix.sim<-function(n, K, p, mu, sigma){
   x<-runif(n)
   sim<-rep(0,n)
   cp<-cumsum(p)
   for (i in 1:n){
       k <-1
       while (x[i]>cp[k]){
           k<-k+1
       }    
       #k<- which((x[i]-pi)[x[i]-pi>0]== min((x[i]-pi)[x[i]-pi>0])) 
       sim[i] <- rnorm(1,mu[k],sigma)
   }
   return(sim)
 }

save<-gauss.mix.sim(100,2, c(0.5,0.5), c(-1,1),1/2)     
hist(save)

# improved EM:

 em <- function(dat,K,tol=1, steps=400){
   p    <- rep(1/K,K)
   mu    <-  mean(dat)+tol*quantile(dat-mean(dat), probs= (1:K)/K-1/(2*K))  
   sigma <- sd(dat)

   s<-1
   while (s <=steps){
     w   <- estep(dat, p, mu, sigma)
     fit <- mstep(dat, w)
     mu  <- fit$mu
     p  <- fit$p
     sigma <-fit$sigma
     s<-s+1
   }
   dens.all<-matrix(0,length(dat), K)
      for (k in 1:K){
         dens.all[,k]<- p[k]*dnorm(dat, mu[k],sigma)
       }
   loglik<- sum(log(apply(dens.all,1,sum)))
 
   fit<- list("pi"=pi, "mu"=mu, "sigma"=sigma, "wik" =w,"data"=dat, "disp"=-2*loglik )
   class(fit)<-"em"
   return(fit)  
}


  # Bootstrap:
  # Aitkin, Francis, Hinde, and Darnell, page 442.

  # H_0: K components
  # H_1: K+1 components
  
  n <- length(galaxies)

  fit4 <- em(galaxies, K=4,tol=1)
  fit5 <- em(galaxies, K=5,tol=1)
  LRTS <- fit4$disp-fit5$disp

  M <- 99
  sim.LRTS <-rep(0,M)
  for (m in 1:M){
     sim4 <- gauss.mix.sim(n, 4, p=fit4$p, mu=fit4$mu, sigma=fit4$sigma)
     fit.sim4<-  em(sim4,4)
     fit.sim5<-  em(sim4,5)
     sim.LRTS[m]<- fit.sim4$disp-fit.sim5$disp
  }

p <- 1-rank(c(LRTS, sim.LRTS))[1]/100
p