TEXT   29
EMtwosample
Guest on 15th March 2023 02:26:57 AM


  1. twosampleinitialcov<-function(X1, X2, meanvec1, meanvec2)
  2.   {
  3.    for (i in 1:ncol(X1))
  4.     {
  5.      X1[is.na(X1[,i]),i]=meanvec1[i]
  6.     }
  7.    for (i in 1:ncol(X2))
  8.     {
  9.      X2[is.na(X2[,i]),i]=meanvec2[i]
  10.     }
  11.    X=rbind(X1,X2)
  12.    covmat=cov(X)*(nrow(X)-1)/nrow(X)
  13.    return(covmat)
  14.   }
  15.  
  16.  
  17. empredict<-function(X, emmean, emcov)
  18.   {
  19.    completeX=X
  20.    rowmiss=sort(unique(row(X)[is.na(X)]))
  21.    nrowmiss=length(rowmiss)  #number of rows with missing
  22.    T2=0
  23.    for (i in 1:nrowmiss)
  24.     {
  25.      wholerow=X[rowmiss[i],]
  26.      x2=wholerow[!is.na(wholerow)]
  27.      colmiss=col(t(as.matrix(wholerow)))[is.na(t(as.matrix(wholerow)))]
  28.      mu1=emmean[colmiss]  #missing
  29.      mu2=emmean[-colmiss] #available
  30.      sigma11=emcov[colmiss,colmiss]
  31.      sigma12=emcov[colmiss,-colmiss]
  32.      sigma21=emcov[-colmiss,colmiss]
  33.      sigma22=emcov[-colmiss,-colmiss]
  34.      x1=mu1+sigma12%*%solve(sigma22)%*%(x2-mu2)
  35.      wholerow[is.na(wholerow)]=x1
  36.      completeX[rowmiss[i],]=wholerow
  37.      matprod=completeX[rowmiss[i],]%*%t(completeX[rowmiss[i],])
  38.      x1x1=sigma11-sigma12%*%solve(sigma22)%*%sigma21+x1%*%t(x1)
  39.      matprod[colmiss,colmiss]=x1x1
  40.      T2=T2+matprod
  41.     }
  42.    T1=apply(completeX,2,sum)
  43.  
  44.    rownomiss=seq(1,nrow(X))[-rowmiss]
  45.    nrownomiss=length(rownomiss)
  46.    for (i in 1:nrownomiss)
  47.     {
  48.      wholerow=X[rownomiss[i],]
  49.      T2=T2+wholerow%*%t(wholerow)
  50.     }
  51.  
  52.    return(list(T1=T1, T2=T2))
  53.   }
  54.  
  55.  
  56. twosampleemestimate<-function(T1.1, T1.2, T2.1, T2.2, n1, n2)
  57.   {
  58.    means1=T1.1/n1
  59.    means2=T1.2/n2
  60.    covs=(T2.1/n1-means1%*%t(means1))*n1/(n1+n2)+(T2.2/n2-means2%*%t(means2))*n2/(n1+n2)
  61.    return(list(means1=means1, means2=means2, covs=covs))
  62.   }
  63.  
  64.  
  65.  
  66. twosampleEM<-function(X1, X2, delta=0.0001) #X1 and X2 are two matrices with missing assume different mean and same covariance
  67.  {
  68.   X1=as.matrix(X1)
  69.   X2=as.matrix(X2)
  70.   #initial estimate
  71.   emmean1=apply(X1,2,mean,na.rm=TRUE)
  72.   emmean2=apply(X2,2,mean,na.rm=TRUE)
  73.   emcov=twosampleinitialcov(X1, X2, emmean1, emmean2)
  74.  
  75.   if (min(eigen(emcov)$values)<0)
  76.     {
  77.      stop("the initial estimate of covariance matrix is negative definite")
  78.     }
  79.  
  80.   n1=nrow(X1)
  81.   n2=nrow(X2)
  82.  
  83.   meandiff1=9999999
  84.   meandiff2=9999999
  85.   covdiff=9999999
  86.  
  87.   while(meandiff1>delta & meandiff2>delta & covdiff>delta)
  88.     {
  89.       #prediction
  90.       T1.1=empredict(X1, emmean1, emcov)$T1
  91.       T2.1=empredict(X1, emmean1, emcov)$T2
  92.       T1.2=empredict(X2, emmean2, emcov)$T1
  93.       T2.2=empredict(X2, emmean2, emcov)$T2
  94.       #T2=T2.1+T2.2
  95.  
  96.       #estimate
  97.       tempmean1=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$means1
  98.       tempmean2=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$means2
  99.       tempcov=twosampleemestimate(T1.1, T1.2, T2.1, T2.2, n1, n2)$covs
  100.      
  101.       meandiff1=sqrt(sum((tempmean1-emmean1)^2))
  102.       meandiff2=sqrt(sum((tempmean2-emmean2)^2))
  103.       covdiff=sqrt(sum((tempcov-emcov)^2))
  104.  
  105.       emmean1=tempmean1
  106.       emmean2=tempmean2
  107.       emcov=tempcov
  108.     }
  109.  
  110.   return(list(emmean1=emmean1, emmean2=emmean2, emcov=emcov))
  111.  
  112.  }
  113.  
  114.  
  115. twosamp=read.table("E:/Study/RA/2009/20090210/cochlear09.dt")
  116. sampA=twosamp[twosamp[,1]=="A",2:5]
  117. sampB=twosamp[twosamp[,1]=="B",2:5]
  118. emout=twosampleEM(sampA,sampB, delta=0.00000001)
  119.  
  120.  
  121. > emout
  122. $emmean1
  123.       V2       V3       V4       V5
  124. 28.51950 49.14400 55.91789 64.22370
  125.  
  126. $emmean2
  127.       V2       V3       V4       V5
  128. 19.39571 38.24143 44.24044 46.81729
  129.  
  130. $emcov
  131.            V2       V3       V4       V5
  132. [1,] 384.0486 396.9089 321.3101 294.0010
  133. [2,] 396.9089 563.7274 488.0587 481.6738
  134. [3,] 321.3101 488.0587 520.6385 505.2430
  135. [4,] 294.0010 481.6738 505.2430 543.9159

Raw Paste

Login or Register to edit or fork this paste. It's free.