- twosampleinitialcov<-function(X1, X2, meanvec1, meanvec2)
- {
- for (i in 1:ncol(X1))
- {
- X1[is.na(X1[,i]),i]=meanvec1[i]
- }
- for (i in 1:ncol(X2))
- {
- X2[is.na(X2[,i]),i]=meanvec2[i]
- }
- X=rbind(X1,X2)
- covmat=cov(X)*(nrow(X)-1)/nrow(X)
- return(covmat)
- }
- empredict<-function(X, emmean, emcov)
- {
- completeX=X
- rowmiss=sort(unique(row(X)[is.na(X)]))
- nrowmiss=length(rowmiss) #number of rows with missing
- T2=0
- for (i in 1:nrowmiss)
- {
- wholerow=X[rowmiss[i],]
- x2=wholerow[!is.na(wholerow)]
- colmiss=col(t(as.matrix(wholerow)))[is.na(t(as.matrix(wholerow)))]
- mu1=emmean[colmiss] #missing
- mu2=emmean[-colmiss] #available
- sigma11=emcov[colmiss,colmiss]
- sigma12=emcov[colmiss,-colmiss]
- sigma21=emcov[-colmiss,colmiss]
- sigma22=emcov[-colmiss,-colmiss]
- x1=mu1+sigma12%*%solve(sigma22)%*%(x2-mu2)
- wholerow[is.na(wholerow)]=x1
- completeX[rowmiss[i],]=wholerow
- matprod=completeX[rowmiss[i],]%*%t(completeX[rowmiss[i],])
- x1x1=sigma11-sigma12%*%solve(sigma22)%*%sigma21+x1%*%t(x1)
- matprod[colmiss,colmiss]=x1x1
- T2=T2+matprod
- }
- T1=apply(completeX,2,sum)
- rownomiss=seq(1,nrow(X))[-rowmiss]
- nrownomiss=length(rownomiss)
- for (i in 1:nrownomiss)
- {
- wholerow=X[rownomiss[i],]
- T2=T2+wholerow%*%t(wholerow)
- }
- return(list(T1=T1, T2=T2))
- }
- twosampleemestimate<-function(T1.1, T1.2, T2.1, T2.2, n1, n2)
- {
- means1=T1.1/n1
- means2=T1.2/n2
- covs=(T2.1/n1-means1%*%t(means1))*n1/(n1+n2)+(T2.2/n2-means2%*%t(means2))*n2/(n1+n2)
- return(list(means1=means1, means2=means2, covs=covs))
- }
- twosampleEM<-function(X1, X2, delta=0.0001) #X1 and X2 are two matrices with missing assume different mean and same covariance
- {
- X1=as.matrix(X1)
- X2=as.matrix(X2)
- #initial estimate
- emmean1=apply(X1,2,mean,na.rm=TRUE)
- emmean2=apply(X2,2,mean,na.rm=TRUE)
- emcov=twosampleinitialcov(X1, X2, emmean1, emmean2)
- if (min(eigen(emcov)$values)<0)
- {
- stop("the initial estimate of covariance matrix is negative definite")
- }
- n1=nrow(X1)
- n2=nrow(X2)
- meandiff1=9999999
- meandiff2=9999999
- covdiff=9999999
- while(meandiff1>delta & meandiff2>delta & covdiff>delta)
- {
- #prediction
- T1.1=empredict(X1, emmean1, emcov)$T1
- T2.1=empredict(X1, emmean1, emcov)$T2
- T1.2=empredict(X2, emmean2, emcov)$T1
- T2.2=empredict(X2, emmean2, emcov)$T2
- #T2=T2.1+T2.2
- #estimate
- tempmean1=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$means1
- tempmean2=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$means2
- tempcov=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$covs
- meandiff1=sqrt(sum((tempmean1-emmean1)^2))
- meandiff2=sqrt(sum((tempmean2-emmean2)^2))
- covdiff=sqrt(sum((tempcov-emcov)^2))
- emmean1=tempmean1
- emmean2=tempmean2
- emcov=tempcov
- }
- return(list(emmean1=emmean1, emmean2=emmean2, emcov=emcov))
- }
- twosamp=read.table("E:/Study/RA/2009/20090210/cochlear09.dt")
- sampA=twosamp[twosamp[,1]=="A",2:5]
- sampB=twosamp[twosamp[,1]=="B",2:5]
- emout=twosampleEM(sampA,sampB, delta=0.00000001)
- > emout
- $emmean1
- V2 V3 V4 V5
- 28.51950 49.14400 55.91789 64.22370
- $emmean2
- V2 V3 V4 V5
- 19.39571 38.24143 44.24044 46.81729
- $emcov
- V2 V3 V4 V5
- [1,] 384.0486 396.9089 321.3101 294.0010
- [2,] 396.9089 563.7274 488.0587 481.6738
- [3,] 321.3101 488.0587 520.6385 505.2430
- [4,] 294.0010 481.6738 505.2430 543.9159
Raw Paste