########################################################################################
#  LPMOde R prototype: Author Deep, 30th Jan, 2017.
#  https://projecteuclid.org/euclid.ejs/1486090845
# Cite: Mukhopadhyay, Subhadeep. Large-scale mode identification and data-driven sciences. Electron. J. Statist. 11 (2017), no. 1, 215--240. 
########################################################################################


#########################################################################################
#
#
# Required Functions
#
#
#########################################################################################


library(orthopolynom)
library(nleqslv)
library(Hmisc)

LP.scorefun <- function(u,m){
	 m <- min( length(unique(u))-1, m   )
     sleg.f <-  slegendre.polynomials(m,normalized=TRUE)
	 X <- polynomial.values(sleg.f, u)
	 S <- do.call(cbind,X)[,-1]
	 return(S)
}


LP.smooth <- function(CR,n,method){  
  CR.s <- sort(CR^2,decreasing=TRUE,index=TRUE)$x
  aa <- rep(0,length(CR.s))
   if(method=="AIC"){ penalty <- 2}
   if(method=="BIC"){ penalty <- log(n)}
   aa[1] <- CR.s[1] - penalty/n
   if(aa[1]< 0){ return(rep(0,length(CR)))  }
   for(i in 2:length(CR.s)){
    aa[i] <- aa[(i-1)] + (CR.s[i] - penalty/n)
    }
  CR[CR^2<CR.s[which(aa==max(aa))]] <- 0
  return(CR)
}

CD.MaxEnt <- function(u,beta,S,ngrid=150){
	u.grid <- seq(0,1,length=ngrid)
	wd <- diff(u.grid)[1]
	beta.exp <- c(1,beta[abs(beta)>0])
	theta <-rep(0,length(beta)+1)
	S.exp <- cbind(1,S[,abs(beta)>0])
	S.grid <- matrix(NA,ngrid,ncol(S.exp))
	for(k in 1:ncol(S.exp)){
	     A <- approxfun(u,S.exp[,k],rule=2,method="constant",f=1)
	     S.grid[,k] <- A(u.grid)
		}

 #--sub-function of MAIN CD.MAXENT FUNCTION maxent estimation
 
		cd.maxent <- function(theta){
			cd.val <- wd*exp(S.grid%*%theta)
			y <- numeric(ncol(S.grid))
			for(i in 1:ncol(S.grid)){
				y[i]   <-   (t(S.grid[,i])%*%cd.val)   -  beta.exp[i]
			}
			return(y)
			}

	coef <- nleqslv(beta.exp,cd.maxent)$x
	theta[1] <- coef[1]
	theta[1+which(abs(beta)>0)]  <- coef[-1]
	return(theta)
}

##another slick way to get the maxent coef from L2

CD.MaxEnt.reg<- function(beta,S){
	dl2.val <- 1 + S%*%beta
	ind <- which(dl2.val>0)
	y=log(dl2.val[ind])
	maxent.coef <- as.numeric(lm(y~S[ind,   abs(beta)>0])$coef)
	theta <- rep(0, length(beta)+1)
	theta[1] <- maxent.coef[1]
	theta[1+which(abs(beta)>0)]  <- maxent.coef[-1]
	return(theta)
	}
	



#########################################################################################
#
#
# Reproducing Acidity Data Result (G=Normal)
#
#
#########################################################################################

library("mixAK")
data(Acidity)
y<-Acidity


# Q1. display the data, which follows unknown F

hist(y,20)

# Q2. The ref density G is normal (scientists can choose any density based on their domain knowledge). The question is how F is different from the specified G?

#--estimates of the parameters of G
mu <- mean(y)
si <- sd(y)
y <- sort(y)
u <- pnorm(y,mu,si) # one can use any distribution.
hist(u,30) #--elementary look: a flat shaped histogram indicates F=G.




# Q3. How to estimate the smoothed comparison density that indicates How F is different from assumed G=Normal model?

m<- 6 # default 
S <- LP.scorefun(u,m) #--construction of Leg_j(G(x_i) polynoamils
beta <- apply(S,FUN="mean",2)
beta <- LP.smooth(beta,length(u),"AIC")   #--parameters of L2 comparison density
theta <- CD.MaxEnt(u,beta,S,ngrid=200) #--parameters of exponential comparison density


#--Draw the connector density
z.L2 <- approxExtrap(u,1 + S%*%beta,xout=c(0,u,1))
cd.L2 <- approxfun(z.L2$x,z.L2$y, rule=2)
z.exp <- approxExtrap(u,exp(cbind(1,S)%*%theta),xout=c(0,u,1))
cd.exp <- approxfun(z.exp$x, z.exp$y, rule=2)
uu <- seq(0,1,length=100)
yy1 <- cd.L2(uu)
yy1[yy1<0]<-0
yy2 <- cd.exp(uu)
yy2[yy2<0]<-0
 pdf("acid-cd.pdf")
  hist(u,30,col="gray", xlab="",prob=TRUE,cex.axis=1.2,ylab="",main="")
  lines(uu,yy1,col="forestgreen", lwd=1.8,type="l",ylab="",xlab="")
  lines(uu,yy2,col="chocolate1", lwd=1.8,type="l",ylab="",xlab="")
  legend("topright",c("MaxEnt estimate","L2 estimate"),col=c("chocolate1","forestgreen"),lwd=1.56)
  title(xlab = "Goodness of fit connector density", line = 4,cex.lab=1.24)  #--Fig 1(b) in the paper 
 dev.off()
 
# Q4. What is the Skew-G density estimator for F? Use Eq (2.1) of the paper.
 
z<- seq(min(y),max(y),length=100)
u.z <-pnorm(z,mu,si)
y.l2 <- dnorm(z,mu,si)*cd.L2(u.z)  #--Applying  Eq (2.1)
y.l2[y.l2<0] <- 0
y.exp <- dnorm(z,mu,si)*cd.exp(u.z) #--Applying  Eq (2.1)

#--plot it


pdf("acid-skewG.pdf")
hist(y,20, col="gray83",xlab="",prob=TRUE,cex.axis=1.2,ylab="",main="")
lines(z,y.exp,col="red",type="l", lwd=1.6)
lines(z,y.l2,col="green3",type="l", lwd=1.6) 
lines(z,dnorm(z,mean(y),sd(y)), col="blue", lwd=1.8,lty=2)	
legend("topright",c("Fitted Normal","MaxEnt LP Estimate","L2 LP Estimate"),col=c("blue","red","green2"),lty=c(2,1,1),cex=.9,lwd=1.46)
dev.off()