TEXT   29
EMtwosample
Guest on 15th March 2023 02:26:57 AM

1. twosampleinitialcov<-function(X1, X2, meanvec1, meanvec2)
2.   {
3.    for (i in 1:ncol(X1))
4.     {
5.      X1[is.na(X1[,i]),i]=meanvec1[i]
6.     }
7.    for (i in 1:ncol(X2))
8.     {
9.      X2[is.na(X2[,i]),i]=meanvec2[i]
10.     }
11.    X=rbind(X1,X2)
12.    covmat=cov(X)*(nrow(X)-1)/nrow(X)
13.    return(covmat)
14.   }
15.
16.
17. empredict<-function(X, emmean, emcov)
18.   {
19.    completeX=X
20.    rowmiss=sort(unique(row(X)[is.na(X)]))
21.    nrowmiss=length(rowmiss)  #number of rows with missing
22.    T2=0
23.    for (i in 1:nrowmiss)
24.     {
25.      wholerow=X[rowmiss[i],]
26.      x2=wholerow[!is.na(wholerow)]
27.      colmiss=col(t(as.matrix(wholerow)))[is.na(t(as.matrix(wholerow)))]
28.      mu1=emmean[colmiss]  #missing
29.      mu2=emmean[-colmiss] #available
30.      sigma11=emcov[colmiss,colmiss]
31.      sigma12=emcov[colmiss,-colmiss]
32.      sigma21=emcov[-colmiss,colmiss]
33.      sigma22=emcov[-colmiss,-colmiss]
34.      x1=mu1+sigma12%*%solve(sigma22)%*%(x2-mu2)
35.      wholerow[is.na(wholerow)]=x1
36.      completeX[rowmiss[i],]=wholerow
37.      matprod=completeX[rowmiss[i],]%*%t(completeX[rowmiss[i],])
38.      x1x1=sigma11-sigma12%*%solve(sigma22)%*%sigma21+x1%*%t(x1)
39.      matprod[colmiss,colmiss]=x1x1
40.      T2=T2+matprod
41.     }
42.    T1=apply(completeX,2,sum)
43.
44.    rownomiss=seq(1,nrow(X))[-rowmiss]
45.    nrownomiss=length(rownomiss)
46.    for (i in 1:nrownomiss)
47.     {
48.      wholerow=X[rownomiss[i],]
49.      T2=T2+wholerow%*%t(wholerow)
50.     }
51.
52.    return(list(T1=T1, T2=T2))
53.   }
54.
55.
56. twosampleemestimate<-function(T1.1, T1.2, T2.1, T2.2, n1, n2)
57.   {
58.    means1=T1.1/n1
59.    means2=T1.2/n2
60.    covs=(T2.1/n1-means1%*%t(means1))*n1/(n1+n2)+(T2.2/n2-means2%*%t(means2))*n2/(n1+n2)
61.    return(list(means1=means1, means2=means2, covs=covs))
62.   }
63.
64.
65.
66. twosampleEM<-function(X1, X2, delta=0.0001) #X1 and X2 are two matrices with missing assume different mean and same covariance
67.  {
68.   X1=as.matrix(X1)
69.   X2=as.matrix(X2)
70.   #initial estimate
71.   emmean1=apply(X1,2,mean,na.rm=TRUE)
72.   emmean2=apply(X2,2,mean,na.rm=TRUE)
73.   emcov=twosampleinitialcov(X1, X2, emmean1, emmean2)
74.
75.   if (min(eigen(emcov)\$values)<0)
76.     {
77.      stop("the initial estimate of covariance matrix is negative definite")
78.     }
79.
80.   n1=nrow(X1)
81.   n2=nrow(X2)
82.
83.   meandiff1=9999999
84.   meandiff2=9999999
85.   covdiff=9999999
86.
87.   while(meandiff1>delta & meandiff2>delta & covdiff>delta)
88.     {
89.       #prediction
90.       T1.1=empredict(X1, emmean1, emcov)\$T1
91.       T2.1=empredict(X1, emmean1, emcov)\$T2
92.       T1.2=empredict(X2, emmean2, emcov)\$T1
93.       T2.2=empredict(X2, emmean2, emcov)\$T2
94.       #T2=T2.1+T2.2
95.
96.       #estimate
97.       tempmean1=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)\$means1
98.       tempmean2=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)\$means2
99.       tempcov=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)\$covs
100.
101.       meandiff1=sqrt(sum((tempmean1-emmean1)^2))
102.       meandiff2=sqrt(sum((tempmean2-emmean2)^2))
103.       covdiff=sqrt(sum((tempcov-emcov)^2))
104.
105.       emmean1=tempmean1
106.       emmean2=tempmean2
107.       emcov=tempcov
108.     }
109.
110.   return(list(emmean1=emmean1, emmean2=emmean2, emcov=emcov))
111.
112.  }
113.
114.
116. sampA=twosamp[twosamp[,1]=="A",2:5]
117. sampB=twosamp[twosamp[,1]=="B",2:5]
118. emout=twosampleEM(sampA,sampB, delta=0.00000001)
119.
120.
121. > emout
122. \$emmean1
123.       V2       V3       V4       V5
124. 28.51950 49.14400 55.91789 64.22370
125.
126. \$emmean2
127.       V2       V3       V4       V5
128. 19.39571 38.24143 44.24044 46.81729
129.
130. \$emcov
131.            V2       V3       V4       V5
132. [1,] 384.0486 396.9089 321.3101 294.0010
133. [2,] 396.9089 563.7274 488.0587 481.6738
134. [3,] 321.3101 488.0587 520.6385 505.2430
135. [4,] 294.0010 481.6738 505.2430 543.9159

Raw Paste

or to edit or fork this paste. It's free.