TEXT   28
EMoutput
Guest on 15th March 2023 02:26:25 AM

1. initialcov<-function(X,meanvec)
2.   {
3.    for (i in 1:ncol(X))
4.     {
5.      X[is.na(X[,i]),i]=meanvec[i]
6.     }
7.    covmat=cov(X)*(nrow(X)-1)/nrow(X)
8.    return(covmat)
9.   }
10.
11.
12.
13. empredict<-function(X, emmean, emcov)
14.   {
15.    completeX=X
16.    rowmiss=sort(unique(row(X)[is.na(X)]))
17.    nrowmiss=length(rowmiss)  #number of rows with missing
18.    T2=0
19.    for (i in 1:nrowmiss)
20.     {
21.      wholerow=X[rowmiss[i],]
22.      x2=wholerow[!is.na(wholerow)]
23.      colmiss=col(t(as.matrix(wholerow)))[is.na(t(as.matrix(wholerow)))]
24.      mu1=emmean[colmiss]  #missing
25.      mu2=emmean[-colmiss] #available
26.      sigma11=emcov[colmiss,colmiss]
27.      sigma12=emcov[colmiss,-colmiss]
28.      sigma21=emcov[-colmiss,colmiss]
29.      sigma22=emcov[-colmiss,-colmiss]
30.      x1=mu1+sigma12%*%solve(sigma22)%*%(x2-mu2)
31.      wholerow[is.na(wholerow)]=x1
32.      completeX[rowmiss[i],]=wholerow
33.      matprod=completeX[rowmiss[i],]%*%t(completeX[rowmiss[i],])
34.      x1x1=sigma11-sigma12%*%solve(sigma22)%*%sigma21+x1%*%t(x1)
35.      matprod[colmiss,colmiss]=x1x1
36.      T2=T2+matprod
37.     }
38.    T1=apply(completeX,2,sum)
39.
40.    rownomiss=seq(1,nrow(X))[-rowmiss]
41.    nrownomiss=length(rownomiss)
42.    for (i in 1:nrownomiss)
43.     {
44.      wholerow=X[rownomiss[i],]
45.      T2=T2+wholerow%*%t(wholerow)
46.     }
47.
48.    return(list(T1=T1, T2=T2))
49.   }
50.
51.
52. emestimate<-function(T1, T2, n)
53.   {
54.    means=T1/n
55.    covs=T2/n-means%*%t(means)
56.    return(list(means=means, covs=covs))
57.   }
58.
59.
60.
61. EM<-function(X, delta=0.0001) #X is a n*p matrix with missing
62.  {
63.   x=as.matrix(X)
64.   #initial estimate
65.   emmean=apply(X,2,mean,na.rm=TRUE)
66.   emcov=initialcov(X, emmean)
67.
68.   if (min(eigen(emcov)\$values)<0)
69.     {
70.      stop("the initial estimate of covariance matrix is negative definite")
71.     }
72.
73.
74.
75.   n=nrow(X)  #number of rows
76.
77.   meandiff=9999999
78.   covdiff=9999999
79.
80.   while(meandiff>delta & covdiff>delta)
81.     {
82.       #prediction
83.       T1=empredict(X, emmean, emcov)\$T1
84.       T2=empredict(X, emmean, emcov)\$T2
85.
86.       #estimate
87.       tempmean=emestimate(T1, T2, n)\$means
88.       tempcov=emestimate(T1, T2, n)\$covs
89.
90.       meandiff=sqrt(sum((tempmean-emmean)^2))
91.       covdiff=sqrt(sum((tempcov-emcov)^2))
92.
93.       emmean=tempmean
94.       emcov=tempcov
95.     }
96.
97.   return(list(emmean=emmean, emcov=emcov))
98.
99.  }
100.
101.
102.
103.
104. > X
105.      [,1] [,2] [,3]
106. [1,]   NA    0    3
107. [2,]    7    2    6
108. [3,]    5    1    2
109. [4,]   NA   NA    5
110. > EM(X,delta=0.00000001)
111. \$emmean
112. [1] 6.055589 1.115385 4.000000
113.
114. \$emcov
115.           [,1]      [,2]      [,3]
116. [1,] 0.5974356 0.3743413 1.2152566
117. [2,] 0.3743413 0.6200690 0.8653846
118. [3,] 1.2152566 0.8653846 2.5000000

Raw Paste

or to edit or fork this paste. It's free.