TEXT   28
EMoutput
Guest on 15th March 2023 02:26:25 AM


  1. initialcov<-function(X,meanvec)
  2.   {
  3.    for (i in 1:ncol(X))
  4.     {
  5.      X[is.na(X[,i]),i]=meanvec[i]
  6.     }
  7.    covmat=cov(X)*(nrow(X)-1)/nrow(X)
  8.    return(covmat)
  9.   }
  10.  
  11.  
  12.  
  13. empredict<-function(X, emmean, emcov)
  14.   {
  15.    completeX=X
  16.    rowmiss=sort(unique(row(X)[is.na(X)]))
  17.    nrowmiss=length(rowmiss)  #number of rows with missing
  18.    T2=0
  19.    for (i in 1:nrowmiss)
  20.     {
  21.      wholerow=X[rowmiss[i],]
  22.      x2=wholerow[!is.na(wholerow)]
  23.      colmiss=col(t(as.matrix(wholerow)))[is.na(t(as.matrix(wholerow)))]
  24.      mu1=emmean[colmiss]  #missing
  25.      mu2=emmean[-colmiss] #available
  26.      sigma11=emcov[colmiss,colmiss]
  27.      sigma12=emcov[colmiss,-colmiss]
  28.      sigma21=emcov[-colmiss,colmiss]
  29.      sigma22=emcov[-colmiss,-colmiss]
  30.      x1=mu1+sigma12%*%solve(sigma22)%*%(x2-mu2)
  31.      wholerow[is.na(wholerow)]=x1
  32.      completeX[rowmiss[i],]=wholerow
  33.      matprod=completeX[rowmiss[i],]%*%t(completeX[rowmiss[i],])
  34.      x1x1=sigma11-sigma12%*%solve(sigma22)%*%sigma21+x1%*%t(x1)
  35.      matprod[colmiss,colmiss]=x1x1
  36.      T2=T2+matprod
  37.     }
  38.    T1=apply(completeX,2,sum)
  39.    
  40.    rownomiss=seq(1,nrow(X))[-rowmiss]
  41.    nrownomiss=length(rownomiss)
  42.    for (i in 1:nrownomiss)
  43.     {
  44.      wholerow=X[rownomiss[i],]
  45.      T2=T2+wholerow%*%t(wholerow)
  46.     }
  47.    
  48.    return(list(T1=T1, T2=T2))
  49.   }
  50.  
  51.  
  52. emestimate<-function(T1, T2, n)
  53.   {
  54.    means=T1/n
  55.    covs=T2/n-means%*%t(means)
  56.    return(list(means=means, covs=covs))
  57.   }
  58.  
  59.  
  60.  
  61. EM<-function(X, delta=0.0001) #X is a n*p matrix with missing
  62.  {
  63.   x=as.matrix(X)
  64.   #initial estimate
  65.   emmean=apply(X,2,mean,na.rm=TRUE)
  66.   emcov=initialcov(X, emmean)
  67.  
  68.   if (min(eigen(emcov)$values)<0)
  69.     {
  70.      stop("the initial estimate of covariance matrix is negative definite")
  71.     }
  72.  
  73.  
  74.  
  75.   n=nrow(X)  #number of rows
  76.  
  77.   meandiff=9999999
  78.   covdiff=9999999
  79.  
  80.   while(meandiff>delta & covdiff>delta)
  81.     {
  82.       #prediction
  83.       T1=empredict(X, emmean, emcov)$T1
  84.       T2=empredict(X, emmean, emcov)$T2
  85.  
  86.       #estimate
  87.       tempmean=emestimate(T1, T2, n)$means
  88.       tempcov=emestimate(T1, T2, n)$covs
  89.  
  90.       meandiff=sqrt(sum((tempmean-emmean)^2))
  91.       covdiff=sqrt(sum((tempcov-emcov)^2))
  92.  
  93.       emmean=tempmean
  94.       emcov=tempcov
  95.     }
  96.    
  97.   return(list(emmean=emmean, emcov=emcov))
  98.  
  99.  }
  100.  
  101.  
  102.  
  103.  
  104. > X
  105.      [,1] [,2] [,3]
  106. [1,]   NA    0    3
  107. [2,]    7    2    6
  108. [3,]    5    1    2
  109. [4,]   NA   NA    5
  110. > EM(X,delta=0.00000001)
  111. $emmean
  112. [1] 6.055589 1.115385 4.000000
  113.  
  114. $emcov
  115.           [,1]      [,2]      [,3]
  116. [1,] 0.5974356 0.3743413 1.2152566
  117. [2,] 0.3743413 0.6200690 0.8653846
  118. [3,] 1.2152566 0.8653846 2.5000000

Raw Paste

Login or Register to edit or fork this paste. It's free.