TEXT   78

mixturemodel

Guest on 10th May 2022 02:00:29 AM

  1. # Annotated R session to illustrate fitting a mixture of 2 normals,
  2. # following Chapter 18 of the text
  3.  
  4. > library(MASS)
  5. > data(geyser)
  6. > attach(geyser)
  7.  
  8. ## first we plot the data and a density estimate
  9.  
  10. > truehist(waiting,xlim=c(35,115),ymax=0.04,h=5)
  11. > wait.dns <- density(waiting,n=512,width="SJ")
  12. > lines(wait.dns,lty=2)
  13.  
  14.  
  15. # mix.obj is a function to compute the negative log-likelihood
  16. # the parameter is specified as a vector of length 5
  17.  
  18. > mix.obj <-  function(p,x)
  19. {
  20.  
  21. e <- p[1] *dnorm((x-p[2])/p[3])/p[3] +
  22. (1-p[1])*dnorm((x-p[4])/p[5])/p[5]
  23. if(any(e <= 0)) Inf else -sum(log(e)) }
  24.  
  25. > p0 <- c(0.36, 50, 5, 80, 5)  # these starting values taken from the book
  26.  
  27.  
  28. ## Nelder-Mead optimization (the simplex method)
  29.  
  30. >   optim(p0,mix.obj,x=waiting)
  31. $par
  32. [1]  0.3075467 54.1999336  4.9515962 80.3568069  7.5085281
  33.  
  34. $value
  35. [1] 1157.542
  36.  
  37. $counts
  38. function gradient
  39.      147       NA
  40.  
  41. $convergence
  42. [1] 0
  43.  
  44. $message
  45. NULL
  46.  
  47. ## method BFGS, a quasi-Newton method
  48.  
  49. >   optim(p0,mix.obj,x=waiting,method="BFGS")
  50. $par
  51. [1] 8188.12630   73.29862   12.90303  801.67155 1485.53616
  52.  
  53. $value
  54. [1] -1475.754
  55.  
  56. $counts
  57. function gradient
  58.      122      100
  59.  
  60. $convergence
  61. [1] 1
  62.  
  63. $message
  64. NULL
  65.  
  66. ## it failed, but why?  here is what the book did
  67.  
  68. > optim(p0,mix.obj,x=waiting,method="BFGS",                 control=list(parscale=c(.11,rep(1,4))))
  69.  
  70. $par
  71. [1]  0.3075900 54.2024434  4.9518185 80.3602804  7.5077183
  72.  
  73. $value
  74. [1] 1157.542
  75.  
  76. $counts
  77. function gradient
  78.       42       19
  79.  
  80. $convergence
  81. [1] 0
  82.  
  83. $message
  84. NULL
  85.  
  86. ## here's what the help file says about parscale
  87.  
  88. parscale
  89. A vector of scaling values for the parameters. Optimization is performed on par/parscale and these should be comparable in the sense that a unit change in any element produces about a unit change in the scaled value.
  90.  
  91. > c(.11,rep(1,4))
  92. [1] 0.11 1.00 1.00 1.00 1.00
  93.  
  94. ## this would have to be determined by trial and error; but these mixture
  95. ## models are tricky to fit and it's usually pi that causes trouble
  96.  
  97. ## now try a version with derivatives
  98. ## we could compute the derivatives by hand, but I'll use deriv3 as
  99. ## on p.438
  100.  
  101. > lmix2 <- deriv3(
  102. + ~ -log(p*dnorm((x-u1)/s1)/s1 + (1-p)*dnorm((x-u2)/s2)/s2),
  103. + c("p","u1","s1","u2","s2"),
  104. + function(x,p,u1,s1,u2,s2) NULL)
  105.  
  106. ## we seem to need this function, even though we already have mix.obj
  107. ## because mix.obj takes a parameter argument of length 5, whereas this
  108. ## version takes 5 scalar parameters
  109.  
  110. ## this gives a really complicated result; here's how it is used
  111.  
  112. > mix.gr <- function(p,x){
  113. + u1 <- p[2]; s1 <- p[3]; u2 <- p[4]; s2 <- p[5]; p <- p[1]
  114. + colSums(attr(lmix2(x,p,u1,s1,u2,s2),"gradient"))}
  115.  
  116.  
  117. ## after exploring you can verify that the attribute called gradient
  118. ## of lmix2 is of dimension 299 x 5; the gradient is evaluated at 299
  119. ## values for each of the 5 parameters
  120.  
  121. > optim(p0,mix.obj,mix.gr,x=waiting,method="BFGS",hessian = TRUE, control=list(parscale=c(.11,rep(1,4))))
  122. $par
  123. [1]  0.3075900 54.2024436  4.9518189 80.3602802  7.5077181
  124.  
  125. $value
  126. [1] 1157.542
  127.  
  128. $counts
  129. function gradient
  130.       43       19
  131.  
  132. $convergence
  133. [1] 0
  134.  
  135. $message
  136. NULL
  137.  
  138. $hessian
  139.             [,1]       [,2]        [,3]       [,4]       [,5]
  140. [1,] 1302.324447 -7.9155320 -11.6701192 -6.5967516 12.7068775
  141. [2,]   -7.915532  3.0608623  -1.1456662 -0.4822492  0.8365219
  142. [3,]  -11.670119 -1.1456662   5.2997642 -0.6549970  0.9902167
  143. [4,]   -6.596752 -0.4822492  -0.6549970  3.2307670  0.8919459
  144. [5,]   12.706877  0.8365219   0.9902167  0.8919459  5.4255334
  145.  
  146. ## hessian = TRUE produces the last matrix, which is used to
  147. ## estimate the variances and covariances of the mle's
  148.  
  149. ## We can investigate the performance of deriv3 in computing 2nd derivs:
  150.  
  151. ## similarly colSums(attr(lmix2(...),"hessian") is a 5x5 matrix
  152.  
  153. > phat <- .Last.value$par  
  154.  
  155. > colSums(attr(lmix2(waiting,phat[1],phat[2],phat[3],phat[4],phat[5]),
  156. "hessian"))
  157.  
  158.              p         u1          s1         u2         s2
  159. p  1302.314648 -7.9155228 -11.6701125 -6.5967384 12.7068465
  160. u1   -7.915523  3.0608623  -1.1456661 -0.4822492  0.8365219
  161. s1  -11.670112 -1.1456661   5.2997634 -0.6549970  0.9902167
  162. u2   -6.596738 -0.4822492  -0.6549970  3.2307670  0.8919459
  163. s2   12.706847  0.8365219   0.9902167  0.8919459  5.4255331

Raw Paste


Login or Register to edit or fork this paste. It's free.