#Functions to calculate map accuracy
# Carl Salk

# version 10 Apr 2016 #Corrected division by zero problem
# version 20 Jan 2017 #Double checked that unbiased estimators are being used
				#for overall, user and producer (plus their variances).
			    #Note that the examples of Card (1982) give slightly different answers 
				#due to propagated rounding errors in their paper.
# version 28 Feb 2017 #Now using Olofsson (2014) equations for user and producer variance.
# version 10 May 2017 #Corrected a bug in weighting for producers acc. variance function.
# version 31 Aug 2017 #Removed NaN values in calculation of producers acc. variance function.
# version 01 Nov 2017 #Added shift and exchange metrics from Pontius and Santacruz (2014).

makemat<-function(classified,validated,counts=TRUE){	#Function to make a confusion matrix
	if(length(classified)!=length(validated)){
		stop("Unequal vector lengths")}
	if(!is.logical(counts)) stop("'counts' must be interpretable as logical")
	cats<-sort(unique(c(classified,validated)))
	outmat<-matrix(NA,nrow=length(cats),ncol=length(cats))
	for(i in 1:length(cats)){
		for(j in 1:length(cats)){
			outmat[i,j]<-sum(classified==cats[i] & validated==cats[j])
		}
	}
	colnames(outmat)<-cats
	rownames(outmat)<-cats
	if(counts) return(outmat)
	if(!counts) return(outmat/sum(outmat))
}

pmatmake<-function(mat,w=apply(mat,1,sum)){	#Joint probability matrix (sums to 1)
	if(diff(dim(mat))!=0){stop("Error matrix must have two equal dimensions")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	wvec<-w/sum(w)
	outmat<-wvec*mat/apply(mat,1,sum,na.rm=T)
	outmat[!is.finite(outmat)]<-0
	return(outmat)
}

#Structure of binary matrix:
#		val.true	val.false
#class.true		n1		n2
#class.false	n3		n4

make.bin<-function(mat,var){	#Collapse a confusion matrix to a binary confusion matrix
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	if(var>dim(mat)[1]){
		stop("Variable number greater than matrix size")}
	binmat<-matrix(NA,nrow=2,ncol=2)
	binmat[1,1]<-mat[var,var]
	binmat[2,2]<-sum(mat[-var,-var],na.rm=T)
	binmat[1,2]<-sum(mat[var,-var],na.rm=T)
	binmat[2,1]<-sum(mat[-var,var],na.rm=T)
	if(!is.finite(binmat[1,1])){binmat[1,1]<-0}
	return(binmat)
}

mat.comb<-function(mat,comblist){	#Creates a combined category matrix
	newmat<-matrix(NA,nrow=length(comblist),ncol=length(comblist))
	for(i in 1:length(comblist)){
		for(j in 1:length(comblist)){
			newmat[i,j]<-sum(mat[comblist[[i]],comblist[[j]]])
		}
	}
	return(newmat)
}


#BEGIN ACCURACY METRICS

overacc<-function(mat,w=apply(mat,1,sum)){	#Weighted overall accuracy: Card (1982) eqn. 19
	if(diff(dim(mat))!=0){stop("Error matrix must have two equal dimensions")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	wvec<-w/sum(w)
	return(sum(wvec*diag(mat)/apply(mat,1,sum),na.rm=T))
}

over.var<-function(mat,w=apply(mat,1,sum),ci=FALSE){	#Variance of user accuracy: Olofsson (2014)
	if(diff(dim(mat))!=0){stop("Error matrix must have two equal dimensions")}
	if(length(w)!=dim(mat)[1]){stop("Error: weight vector length does not match error matrix dimensions")}
	if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")}
	wvec<-w/sum(w)	#Normalize the weights to proportions
	pmat<-mat*wvec/apply(mat,1,sum)
	outvar<-sum(diag(pmat)*(wvec-diag(pmat))/(apply(mat,1,sum)-1),na.rm=T)
	if(ci==FALSE){return(outvar)}
	if(ci==TRUE){return(1.96*sqrt(outvar))}
}
	
user<-function(mat,w=-9999){		#Users accuracy: Card (1982) eqn. 18
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(w[1]!=-9999) {
		print('Warning: Weighting does not affect the calculation of this metric')}
	return(diag(mat)/apply(mat,1,sum))
}

user.var<-function(mat,w=-9999,ci=FALSE){	#Variance of user accuracy: Olofsson (2014)
	if(diff(dim(mat))!=0){stop("Error matrix must have two equal dimensions")}
	if(w[1]!=-9999) {print('Warning: Weighting does not affect the calculation of this metric')}
	if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")}
	pmat<-mat/apply(mat,1,sum)
	outvar<-diag(pmat)*(1-diag(pmat))/(apply(mat,1,sum)-1)
	if(ci==FALSE){return(outvar)}
	if(ci==TRUE){return(1.96*sqrt(outvar))}
}


producer<-function(mat,w=apply(mat,1,sum)){	#Producer's accuracy: Card (1982) eqn. 17; Mas (2014) eqn. 4
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	wvec<-w/sum(w)	#Proportionalize the weights
	pmat<-mat*wvec/apply(mat,1,sum)
	return(wvec*diag(mat)/(apply(pmat,2,sum,na.rm=T)*apply(mat,1,sum,na.rm=T)))
}

prod.var<-function(mat,w=apply(mat,1,sum),ci=FALSE){	#Producer's acc variance: Olofsson (2014)
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error: weight vector length does not match error matrix dimensions")}
	if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")}
	wvec<-w/sum(w)	#Normalize the weights to proportions
	pmat<-mat*wvec/apply(mat,1,sum)
	outvar<-rep(NA,dim(mat)[1])
	for(i in 1:length(outvar)){
		outvar[i]<-1/sum(wvec*mat[,i]/apply(mat,1,sum))^2 * 
			(wvec[i]^2*(1-producer(mat,w=wvec)[i])^2*user(mat)[i]*(1-user(mat)[i])/(apply(mat,1,sum)[i]-1) +
			producer(mat,w=wvec)[i]^2*sum(wvec[-i]^2*mat[-i,i]/apply(mat,1,sum)[-i]*(1-mat[-i,i]/apply(mat,1,sum)[-i])/
				(apply(mat,1,sum)[-i]-1),na.rm=T))
	}
	if(ci==FALSE){return(outvar)}
	if(ci==TRUE){return(1.96*sqrt(outvar))}
}

port<-function(mat,w=apply(mat,1,sum)){	#Portmanteau accuracy
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	outvec<-rep(NA,dim(mat)[1])
	pmat<-pmatmake(mat,w)
	for(i in 1:dim(mat)[1]){
		binmat<-make.bin(pmat,i)
		outvec[i]<-overacc(binmat)
	}
	return(outvec)
}

port.part<-function(mat){	#Partial portmanteau accuracy - note lack of weighting
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	outvec<-rep(NA,dim(mat)[1])
	for(i in 1:dim(mat)[1]){
		binmat<-make.bin(mat,i)
		binmat[2,2]<-0
		outvec[i]<-binmat[1,1]/(binmat[1,1]+binmat[1,2]+binmat[2,1])
	}
	return(outvec)
}

kappa<-function(mat){	#Kappa metric #Following Congalton & Green
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	chance.agr<-sum((apply(mat,1,sum)/(sum(mat))^2)*apply(mat,2,sum))
	tot.agr<-overacc(mat)
	return((tot.agr-chance.agr)/(1-chance.agr))
}

kappa.cat<-function(mat){	#Category-wise Kappa based on collapsed binary error matrices
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	outvec<-rep(NA,dim(mat)[1])
	for(i in 1:length(outvec)){
		binmat<-make.bin(mat,i)	#BINary MATrix
		chance.agr<-sum(apply(binmat,1,sum)*apply(binmat,2,sum)/(sum(binmat))^2)
		tot.agr<-overacc(binmat)
		outvec[i]<-(tot.agr-chance.agr)/(1-chance.agr)
	}
	return(outvec)
}

kappa.var<-function(mat,ci=FALSE){	#Variance of Kappa #Following Congalton & Green (2008)
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")}
	theta1<-sum(diag(mat))/sum(mat)
	theta2<-sum(apply(mat,1,sum)*apply(mat,2,sum)/(sum(mat))^2)
	theta3<-sum(diag(mat)*(apply(mat,1,sum)+apply(mat,2,sum))/(sum(mat))^2)
	theta4<-sum(
		mat*(matrix(apply(mat,1,sum),nrow=dim(mat)[1],ncol=dim(mat)[2],byrow=F)+
			matrix(apply(mat,2,sum),nrow=dim(mat)[1],ncol=dim(mat)[2],byrow=T))^2)/
				(sum(mat))^3
	outvar<-((sum(mat)^-1)*(
		theta1*(1-theta1)/(1-theta2)^2+
		2*(1-theta1)*(2*theta1*theta2-theta3)/(1-theta2)^3+
		(1-theta1)^2*(theta4-4*theta2^2)/(1-theta2)^4))
	if(ci==FALSE){return(outvar)}
	if(ci==TRUE){return(1.96*sqrt(outvar))}
}

kappa.comp<-function(mat1,mat2){	#Statistical significance of kappa difference
						#Close to 0 or 1 is a significant result in a 2-tailed sense
	if(dim(mat1)[1]!=dim(mat1)[2]){
		stop("Non-square confusion matrix")}
	if(dim(mat2)[1]!=dim(mat2)[2]){
		stop("Non-square confusion matrix")}
	zscore<-(kappa(mat1)-kappa(mat2))/
		sqrt(kappa.var(mat1)+kappa.var(mat2))
	return(zscore)
}

cond.kappa<-function(mat){	#Conditional kappa #Following Congalton & Green
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	outvec<-rep(NA,dim(mat)[1])
	for(i in 1:length(outvec)){
		outvec[i]<-(sum(mat)*mat[i,i]-sum(mat[i,])*sum(mat[,i]))/
			(sum(mat)*sum(mat[i,])-sum(mat[i,])*sum(mat[,i]))
	}
	return(outvec)
}

cond.kappa.var<-function(mat,ci=FALSE){	#Conditional kappa variance
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")}
	outvar<-rep(NA,dim(mat)[1])
	for(i in 1:length(outvar)){
		n<-sum(mat)
		nii<-mat[i,i]
		ni.<-sum(mat[i,])
		n.i<-sum(mat[,i])
		outvar[i]<-(n*(ni.-nii)/(ni.*(n-n.i))^3)*
			((ni.-nii)*(ni.*n.i-n*nii)+n*nii*(n-ni.-n.i+nii))
			
	}
	if(ci==FALSE){return(outvar)}
	if(ci==TRUE){return(1.96*sqrt(outvar))}
}

sensitivity<-function(mat,w=apply(mat,1,sum)){	#Sensitivity from Foody 2010
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	pmat<-pmatmake(mat,w)
	outvec<-rep(NA,dim(mat)[1])
	for(i in 1:dim(mat)[1]){
		binmat<-make.bin(pmat,i)
		outvec[i]<-binmat[1,1]/sum(binmat[,1])
	}
	return(outvec)
}

specificity<-function(mat,w=apply(mat,1,sum)){	#Sensitivity from Foody 2010
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	pmat<-pmatmake(mat,w)
	outvec<-rep(NA,dim(mat)[1])
	for(i in 1:dim(mat)[1]){
		binmat<-make.bin(pmat,i)
		outvec[i]<-binmat[2,2]/sum(binmat[,2])
	}
	return(outvec)
}

quant.dis.cat<-function(mat,w=apply(mat,1,sum)){	#Categorical Quantity disagreement from Pontius and Milliones
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	pmat<-pmatmake(mat,w)	
	outvec<-rep(NA,dim(mat)[1])
	for(i in 1:dim(pmat)[1]){
		outvec[i]<-abs(sum(pmat[i,],na.rm=T)-sum(pmat[,i],na.rm=T))
	}
	return(outvec)
}

quant.dis<-function(mat,w=apply(mat,1,sum)){	#Overall quantity disagreement, Pontius and Milliones
	if(dim(mat)[1]!=dim(mat)[2]){	stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	quant.cat<-quant.dis.cat(mat,w)
	return(sum(quant.cat)/2)
}
	
alloc.dis.cat<-function(mat,w=apply(mat,1,sum)){	#Categorical allocation disagreement, Pontius and Milliones
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	pmat<-pmatmake(mat,w)	
	outvec<-rep(NA,dim(mat)[1])
	for(i in 1:dim(pmat)[1]){
		outvec[i]<-2*min(sum(pmat[i,],na.rm=T)-pmat[i,i],sum(pmat[,i],na.rm=T)-pmat[i,i])
	}
	return(outvec)
}

alloc.dis<-function(mat,w=apply(mat,1,sum)){	#Overall allocation disagreement, Pontius and Milliones
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	alloc.cat<-alloc.dis.cat(mat,w)
	return(sum(alloc.cat)/2)
}

#Note that the following 4 functions are not normalized to change over time as in Pontius and Santacruz (2014), but are
# simply the changes between two matrices (which could be different times, or mapped vs. validated pixels, etc.)

exchange.cat<-function(mat,w=apply(mat,1,sum)){	#Categorical allocation disagreement, Pontius and Santacruz (2014)
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	pmat<-pmatmake(mat,w)	
	epsmat<-mat*NA
	for(i in 1:dim(pmat)[1]){
		for(j in 1:dim(pmat)[2]){
			epsmat[i,j]<-2*min(pmat[i,j],pmat[j,i])
			if(i<=j){epsmat[i,j]<-0}
		}
	}
	outvec<-rep(NA,dim(mat)[1])
	for(i in 1:dim(pmat)[1]){
		outvec[i]<-sum(epsmat[i,]+epsmat[,i],na.rm=T)
	}
	return(outvec)
}

exchange<-function(mat,w=apply(mat,1,sum)){	#Overall exchange, Pontius and Santacruz (2014)
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	return(sum(exchange.cat(mat,w))/2)
}

shift.cat<-function(mat,w=apply(mat,1,sum),round.pts=7){	#Categorical shift, Pontius and Santacruz (2014)
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	pmat<-pmatmake(mat,w)	
	dvec<-rep(NA,dim(mat)[1])
	for(i in 1:dim(pmat)[1]){
		dvec[i]<-sum(pmat[i,]+pmat[,i],na.rm=T)-2*pmat[i,i]
	}
	outvec<-dvec-quant.dis.cat(pmat)-exchange.cat(pmat)
	return(round(outvec,round.pts))	#The round.pts deals with some small floating point issues that crop up in R
}

shift<-function(mat,w=apply(mat,1,sum)){	#Overall shift, Pontius and Santacruz (2014)
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")}
	return(sum(shift.cat(mat,w))/2)
}

ami<-function(mat,w=apply(mat,1,sum)){	#Average mutual information (in bits!)
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	pmat<-pmatmake(mat,w)
	return(sum(pmat*log2(pmat/(apply(pmat,1,sum)%*%t(apply(pmat,2,sum)))),
		na.rm=T))	#na.rm=T in the sum treats this value as a 0, which is the correct
				#value of lim x->0 (x*log(x))  
}

ami.adj<-function(mat,w=apply(mat,1,sum)){ #AMI adjusted so no positive values off-diag are counted.
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	pmat<-pmatmake(mat,w)
	imat<-pmat*log2(pmat/(apply(pmat,1,sum)%*%t(apply(pmat,2,sum))))
	imatdiag<-diag(imat)
	imat[imat>0]<-0
	diag(imat)<-imatdiag
	amiplusgood<-sum(imat,na.rm=T)
	return(amiplusgood)
}

ami.var<-function(mat,w=apply(mat,1,sum),ci=FALSE){	#var(AMI) in BITS; Moddemeijer (1989) eqn. 4.6 
	if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")}
	if(sum(floor(mat))!=sum(mat)){stop("Matrix contains non-integer values")}
	if(length(w)!=dim(mat)[1]){stop("Error: weight vector length does not match error matrix dimensions")}
	pmat<-pmatmake(mat,w)	#Make matrix into proportional matrix
	outvar<-((1/sum(mat))*(sum(pmat*log2(pmat/(apply(pmat,1,sum)%*%t(apply(pmat,2,sum))))^2,na.rm=T)-
		sum(pmat*log2(pmat/(apply(pmat,1,sum)%*%t(apply(pmat,2,sum)))),na.rm=T)^2))
	if(ci==FALSE){return(outvar)}
	if(ci==TRUE){return(1.96*sqrt(outvar))}
}

ami.prop<-function(mat,w=apply(mat,1,sum)){	#Proportional AMI (scaled on [0,1])
	if(dim(mat)[1]!=dim(mat)[2]){
		stop("Non-square confusion matrix")}
	pmat<-pmatmake(mat,w)	#Make matrix into proportional matrix
		return(ami(pmat)/sum(-apply(pmat,2,sum)*log2(apply(pmat,2,sum)),na.rm=T))
}


#LITERATURE CITED

# Card, DH (1982). Using known map category marginal frequencies to improve estimates of thematic map accuracy. Photogrammetric Enginneering and Remote Sensing, 48(3), 431-439.
# Mas, J-F et al. (2014).  A suite of tools for assessing thematic map accuracy.  Geography Journal.
# Moddemeijer, R. (1989). On estimation of entropy and mutual information of continuous distributions. Signal processing, 16(3), 233-248.
# Pontius, RG & M Millones (2011). Death to kappa: birth of quantity disagreement and allocation disagreement for accuracy assessment.  International Journal of Remote Sensing 32>4407-4429.
# Pontius, RG & A. Santacruz (2014). Quantity, exchange, and shift components of difference in a square contingency table. International journal of remote sensing, 35(21), 7543-7554.
# Olofsson, P., Foody, G. M., Herold, M., Stehman, S. V., Woodcock, C. E., & Wulder, M. A. (2014). Good practices for estimating area and assessing accuracy of land change. Remote Sensing of Environment, 148, 42-57.