######################################################################################## # LPMOde R prototype: Author Deep, 30th Jan, 2017. # https://projecteuclid.org/euclid.ejs/1486090845 # Cite: Mukhopadhyay, Subhadeep. Large-scale mode identification and data-driven sciences. Electron. J. Statist. 11 (2017), no. 1, 215--240. ######################################################################################## ######################################################################################### # # # Required Functions # # ######################################################################################### library(orthopolynom) library(nleqslv) library(Hmisc) LP.scorefun <- function(u,m){ m <- min( length(unique(u))-1, m ) sleg.f <- slegendre.polynomials(m,normalized=TRUE) X <- polynomial.values(sleg.f, u) S <- do.call(cbind,X)[,-1] return(S) } LP.smooth <- function(CR,n,method){ CR.s <- sort(CR^2,decreasing=TRUE,index=TRUE)$x aa <- rep(0,length(CR.s)) if(method=="AIC"){ penalty <- 2} if(method=="BIC"){ penalty <- log(n)} aa[1] <- CR.s[1] - penalty/n if(aa[1]< 0){ return(rep(0,length(CR))) } for(i in 2:length(CR.s)){ aa[i] <- aa[(i-1)] + (CR.s[i] - penalty/n) } CR[CR^20]) theta <-rep(0,length(beta)+1) S.exp <- cbind(1,S[,abs(beta)>0]) S.grid <- matrix(NA,ngrid,ncol(S.exp)) for(k in 1:ncol(S.exp)){ A <- approxfun(u,S.exp[,k],rule=2,method="constant",f=1) S.grid[,k] <- A(u.grid) } #--sub-function of MAIN CD.MAXENT FUNCTION maxent estimation cd.maxent <- function(theta){ cd.val <- wd*exp(S.grid%*%theta) y <- numeric(ncol(S.grid)) for(i in 1:ncol(S.grid)){ y[i] <- (t(S.grid[,i])%*%cd.val) - beta.exp[i] } return(y) } coef <- nleqslv(beta.exp,cd.maxent)$x theta[1] <- coef[1] theta[1+which(abs(beta)>0)] <- coef[-1] return(theta) } ##another slick way to get the maxent coef from L2 CD.MaxEnt.reg<- function(beta,S){ dl2.val <- 1 + S%*%beta ind <- which(dl2.val>0) y=log(dl2.val[ind]) maxent.coef <- as.numeric(lm(y~S[ind, abs(beta)>0])$coef) theta <- rep(0, length(beta)+1) theta[1] <- maxent.coef[1] theta[1+which(abs(beta)>0)] <- maxent.coef[-1] return(theta) } ######################################################################################### # # # Reproducing Acidity Data Result (G=Normal) # # ######################################################################################### library("mixAK") data(Acidity) y<-Acidity # Q1. display the data, which follows unknown F hist(y,20) # Q2. The ref density G is normal (scientists can choose any density based on their domain knowledge). The question is how F is different from the specified G? #--estimates of the parameters of G mu <- mean(y) si <- sd(y) y <- sort(y) u <- pnorm(y,mu,si) # one can use any distribution. hist(u,30) #--elementary look: a flat shaped histogram indicates F=G. # Q3. How to estimate the smoothed comparison density that indicates How F is different from assumed G=Normal model? m<- 6 # default S <- LP.scorefun(u,m) #--construction of Leg_j(G(x_i) polynoamils beta <- apply(S,FUN="mean",2) beta <- LP.smooth(beta,length(u),"AIC") #--parameters of L2 comparison density theta <- CD.MaxEnt(u,beta,S,ngrid=200) #--parameters of exponential comparison density #--Draw the connector density z.L2 <- approxExtrap(u,1 + S%*%beta,xout=c(0,u,1)) cd.L2 <- approxfun(z.L2$x,z.L2$y, rule=2) z.exp <- approxExtrap(u,exp(cbind(1,S)%*%theta),xout=c(0,u,1)) cd.exp <- approxfun(z.exp$x, z.exp$y, rule=2) uu <- seq(0,1,length=100) yy1 <- cd.L2(uu) yy1[yy1<0]<-0 yy2 <- cd.exp(uu) yy2[yy2<0]<-0 pdf("acid-cd.pdf") hist(u,30,col="gray", xlab="",prob=TRUE,cex.axis=1.2,ylab="",main="") lines(uu,yy1,col="forestgreen", lwd=1.8,type="l",ylab="",xlab="") lines(uu,yy2,col="chocolate1", lwd=1.8,type="l",ylab="",xlab="") legend("topright",c("MaxEnt estimate","L2 estimate"),col=c("chocolate1","forestgreen"),lwd=1.56) title(xlab = "Goodness of fit connector density", line = 4,cex.lab=1.24) #--Fig 1(b) in the paper dev.off() # Q4. What is the Skew-G density estimator for F? Use Eq (2.1) of the paper. z<- seq(min(y),max(y),length=100) u.z <-pnorm(z,mu,si) y.l2 <- dnorm(z,mu,si)*cd.L2(u.z) #--Applying Eq (2.1) y.l2[y.l2<0] <- 0 y.exp <- dnorm(z,mu,si)*cd.exp(u.z) #--Applying Eq (2.1) #--plot it pdf("acid-skewG.pdf") hist(y,20, col="gray83",xlab="",prob=TRUE,cex.axis=1.2,ylab="",main="") lines(z,y.exp,col="red",type="l", lwd=1.6) lines(z,y.l2,col="green3",type="l", lwd=1.6) lines(z,dnorm(z,mean(y),sd(y)), col="blue", lwd=1.8,lty=2) legend("topright",c("Fitted Normal","MaxEnt LP Estimate","L2 LP Estimate"),col=c("blue","red","green2"),lty=c(2,1,1),cex=.9,lwd=1.46) dev.off()