#Functions to calculate map accuracy # Carl Salk # version 10 Apr 2016 #Corrected division by zero problem # version 20 Jan 2017 #Double checked that unbiased estimators are being used #for overall, user and producer (plus their variances). #Note that the examples of Card (1982) give slightly different answers #due to propagated rounding errors in their paper. # version 28 Feb 2017 #Now using Olofsson (2014) equations for user and producer variance. # version 10 May 2017 #Corrected a bug in weighting for producers acc. variance function. # version 31 Aug 2017 #Removed NaN values in calculation of producers acc. variance function. # version 01 Nov 2017 #Added shift and exchange metrics from Pontius and Santacruz (2014). makemat<-function(classified,validated,counts=TRUE){ #Function to make a confusion matrix if(length(classified)!=length(validated)){ stop("Unequal vector lengths")} if(!is.logical(counts)) stop("'counts' must be interpretable as logical") cats<-sort(unique(c(classified,validated))) outmat<-matrix(NA,nrow=length(cats),ncol=length(cats)) for(i in 1:length(cats)){ for(j in 1:length(cats)){ outmat[i,j]<-sum(classified==cats[i] & validated==cats[j]) } } colnames(outmat)<-cats rownames(outmat)<-cats if(counts) return(outmat) if(!counts) return(outmat/sum(outmat)) } pmatmake<-function(mat,w=apply(mat,1,sum)){ #Joint probability matrix (sums to 1) if(diff(dim(mat))!=0){stop("Error matrix must have two equal dimensions")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} wvec<-w/sum(w) outmat<-wvec*mat/apply(mat,1,sum,na.rm=T) outmat[!is.finite(outmat)]<-0 return(outmat) } #Structure of binary matrix: # val.true val.false #class.true n1 n2 #class.false n3 n4 make.bin<-function(mat,var){ #Collapse a confusion matrix to a binary confusion matrix if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} if(var>dim(mat)[1]){ stop("Variable number greater than matrix size")} binmat<-matrix(NA,nrow=2,ncol=2) binmat[1,1]<-mat[var,var] binmat[2,2]<-sum(mat[-var,-var],na.rm=T) binmat[1,2]<-sum(mat[var,-var],na.rm=T) binmat[2,1]<-sum(mat[-var,var],na.rm=T) if(!is.finite(binmat[1,1])){binmat[1,1]<-0} return(binmat) } mat.comb<-function(mat,comblist){ #Creates a combined category matrix newmat<-matrix(NA,nrow=length(comblist),ncol=length(comblist)) for(i in 1:length(comblist)){ for(j in 1:length(comblist)){ newmat[i,j]<-sum(mat[comblist[[i]],comblist[[j]]]) } } return(newmat) } #BEGIN ACCURACY METRICS overacc<-function(mat,w=apply(mat,1,sum)){ #Weighted overall accuracy: Card (1982) eqn. 19 if(diff(dim(mat))!=0){stop("Error matrix must have two equal dimensions")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} wvec<-w/sum(w) return(sum(wvec*diag(mat)/apply(mat,1,sum),na.rm=T)) } over.var<-function(mat,w=apply(mat,1,sum),ci=FALSE){ #Variance of user accuracy: Olofsson (2014) if(diff(dim(mat))!=0){stop("Error matrix must have two equal dimensions")} if(length(w)!=dim(mat)[1]){stop("Error: weight vector length does not match error matrix dimensions")} if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")} wvec<-w/sum(w) #Normalize the weights to proportions pmat<-mat*wvec/apply(mat,1,sum) outvar<-sum(diag(pmat)*(wvec-diag(pmat))/(apply(mat,1,sum)-1),na.rm=T) if(ci==FALSE){return(outvar)} if(ci==TRUE){return(1.96*sqrt(outvar))} } user<-function(mat,w=-9999){ #Users accuracy: Card (1982) eqn. 18 if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(w[1]!=-9999) { print('Warning: Weighting does not affect the calculation of this metric')} return(diag(mat)/apply(mat,1,sum)) } user.var<-function(mat,w=-9999,ci=FALSE){ #Variance of user accuracy: Olofsson (2014) if(diff(dim(mat))!=0){stop("Error matrix must have two equal dimensions")} if(w[1]!=-9999) {print('Warning: Weighting does not affect the calculation of this metric')} if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")} pmat<-mat/apply(mat,1,sum) outvar<-diag(pmat)*(1-diag(pmat))/(apply(mat,1,sum)-1) if(ci==FALSE){return(outvar)} if(ci==TRUE){return(1.96*sqrt(outvar))} } producer<-function(mat,w=apply(mat,1,sum)){ #Producer's accuracy: Card (1982) eqn. 17; Mas (2014) eqn. 4 if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} wvec<-w/sum(w) #Proportionalize the weights pmat<-mat*wvec/apply(mat,1,sum) return(wvec*diag(mat)/(apply(pmat,2,sum,na.rm=T)*apply(mat,1,sum,na.rm=T))) } prod.var<-function(mat,w=apply(mat,1,sum),ci=FALSE){ #Producer's acc variance: Olofsson (2014) if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error: weight vector length does not match error matrix dimensions")} if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")} wvec<-w/sum(w) #Normalize the weights to proportions pmat<-mat*wvec/apply(mat,1,sum) outvar<-rep(NA,dim(mat)[1]) for(i in 1:length(outvar)){ outvar[i]<-1/sum(wvec*mat[,i]/apply(mat,1,sum))^2 * (wvec[i]^2*(1-producer(mat,w=wvec)[i])^2*user(mat)[i]*(1-user(mat)[i])/(apply(mat,1,sum)[i]-1) + producer(mat,w=wvec)[i]^2*sum(wvec[-i]^2*mat[-i,i]/apply(mat,1,sum)[-i]*(1-mat[-i,i]/apply(mat,1,sum)[-i])/ (apply(mat,1,sum)[-i]-1),na.rm=T)) } if(ci==FALSE){return(outvar)} if(ci==TRUE){return(1.96*sqrt(outvar))} } port<-function(mat,w=apply(mat,1,sum)){ #Portmanteau accuracy if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} outvec<-rep(NA,dim(mat)[1]) pmat<-pmatmake(mat,w) for(i in 1:dim(mat)[1]){ binmat<-make.bin(pmat,i) outvec[i]<-overacc(binmat) } return(outvec) } port.part<-function(mat){ #Partial portmanteau accuracy - note lack of weighting if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} outvec<-rep(NA,dim(mat)[1]) for(i in 1:dim(mat)[1]){ binmat<-make.bin(mat,i) binmat[2,2]<-0 outvec[i]<-binmat[1,1]/(binmat[1,1]+binmat[1,2]+binmat[2,1]) } return(outvec) } kappa<-function(mat){ #Kappa metric #Following Congalton & Green if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} chance.agr<-sum((apply(mat,1,sum)/(sum(mat))^2)*apply(mat,2,sum)) tot.agr<-overacc(mat) return((tot.agr-chance.agr)/(1-chance.agr)) } kappa.cat<-function(mat){ #Category-wise Kappa based on collapsed binary error matrices if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} outvec<-rep(NA,dim(mat)[1]) for(i in 1:length(outvec)){ binmat<-make.bin(mat,i) #BINary MATrix chance.agr<-sum(apply(binmat,1,sum)*apply(binmat,2,sum)/(sum(binmat))^2) tot.agr<-overacc(binmat) outvec[i]<-(tot.agr-chance.agr)/(1-chance.agr) } return(outvec) } kappa.var<-function(mat,ci=FALSE){ #Variance of Kappa #Following Congalton & Green (2008) if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")} theta1<-sum(diag(mat))/sum(mat) theta2<-sum(apply(mat,1,sum)*apply(mat,2,sum)/(sum(mat))^2) theta3<-sum(diag(mat)*(apply(mat,1,sum)+apply(mat,2,sum))/(sum(mat))^2) theta4<-sum( mat*(matrix(apply(mat,1,sum),nrow=dim(mat)[1],ncol=dim(mat)[2],byrow=F)+ matrix(apply(mat,2,sum),nrow=dim(mat)[1],ncol=dim(mat)[2],byrow=T))^2)/ (sum(mat))^3 outvar<-((sum(mat)^-1)*( theta1*(1-theta1)/(1-theta2)^2+ 2*(1-theta1)*(2*theta1*theta2-theta3)/(1-theta2)^3+ (1-theta1)^2*(theta4-4*theta2^2)/(1-theta2)^4)) if(ci==FALSE){return(outvar)} if(ci==TRUE){return(1.96*sqrt(outvar))} } kappa.comp<-function(mat1,mat2){ #Statistical significance of kappa difference #Close to 0 or 1 is a significant result in a 2-tailed sense if(dim(mat1)[1]!=dim(mat1)[2]){ stop("Non-square confusion matrix")} if(dim(mat2)[1]!=dim(mat2)[2]){ stop("Non-square confusion matrix")} zscore<-(kappa(mat1)-kappa(mat2))/ sqrt(kappa.var(mat1)+kappa.var(mat2)) return(zscore) } cond.kappa<-function(mat){ #Conditional kappa #Following Congalton & Green if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} outvec<-rep(NA,dim(mat)[1]) for(i in 1:length(outvec)){ outvec[i]<-(sum(mat)*mat[i,i]-sum(mat[i,])*sum(mat[,i]))/ (sum(mat)*sum(mat[i,])-sum(mat[i,])*sum(mat[,i])) } return(outvec) } cond.kappa.var<-function(mat,ci=FALSE){ #Conditional kappa variance if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(!is.logical(ci)){stop("Error: parameter 'ci' must be logical")} outvar<-rep(NA,dim(mat)[1]) for(i in 1:length(outvar)){ n<-sum(mat) nii<-mat[i,i] ni.<-sum(mat[i,]) n.i<-sum(mat[,i]) outvar[i]<-(n*(ni.-nii)/(ni.*(n-n.i))^3)* ((ni.-nii)*(ni.*n.i-n*nii)+n*nii*(n-ni.-n.i+nii)) } if(ci==FALSE){return(outvar)} if(ci==TRUE){return(1.96*sqrt(outvar))} } sensitivity<-function(mat,w=apply(mat,1,sum)){ #Sensitivity from Foody 2010 if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} pmat<-pmatmake(mat,w) outvec<-rep(NA,dim(mat)[1]) for(i in 1:dim(mat)[1]){ binmat<-make.bin(pmat,i) outvec[i]<-binmat[1,1]/sum(binmat[,1]) } return(outvec) } specificity<-function(mat,w=apply(mat,1,sum)){ #Sensitivity from Foody 2010 if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} pmat<-pmatmake(mat,w) outvec<-rep(NA,dim(mat)[1]) for(i in 1:dim(mat)[1]){ binmat<-make.bin(pmat,i) outvec[i]<-binmat[2,2]/sum(binmat[,2]) } return(outvec) } quant.dis.cat<-function(mat,w=apply(mat,1,sum)){ #Categorical Quantity disagreement from Pontius and Milliones if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} pmat<-pmatmake(mat,w) outvec<-rep(NA,dim(mat)[1]) for(i in 1:dim(pmat)[1]){ outvec[i]<-abs(sum(pmat[i,],na.rm=T)-sum(pmat[,i],na.rm=T)) } return(outvec) } quant.dis<-function(mat,w=apply(mat,1,sum)){ #Overall quantity disagreement, Pontius and Milliones if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} quant.cat<-quant.dis.cat(mat,w) return(sum(quant.cat)/2) } alloc.dis.cat<-function(mat,w=apply(mat,1,sum)){ #Categorical allocation disagreement, Pontius and Milliones if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} pmat<-pmatmake(mat,w) outvec<-rep(NA,dim(mat)[1]) for(i in 1:dim(pmat)[1]){ outvec[i]<-2*min(sum(pmat[i,],na.rm=T)-pmat[i,i],sum(pmat[,i],na.rm=T)-pmat[i,i]) } return(outvec) } alloc.dis<-function(mat,w=apply(mat,1,sum)){ #Overall allocation disagreement, Pontius and Milliones if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} alloc.cat<-alloc.dis.cat(mat,w) return(sum(alloc.cat)/2) } #Note that the following 4 functions are not normalized to change over time as in Pontius and Santacruz (2014), but are # simply the changes between two matrices (which could be different times, or mapped vs. validated pixels, etc.) exchange.cat<-function(mat,w=apply(mat,1,sum)){ #Categorical allocation disagreement, Pontius and Santacruz (2014) if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} pmat<-pmatmake(mat,w) epsmat<-mat*NA for(i in 1:dim(pmat)[1]){ for(j in 1:dim(pmat)[2]){ epsmat[i,j]<-2*min(pmat[i,j],pmat[j,i]) if(i<=j){epsmat[i,j]<-0} } } outvec<-rep(NA,dim(mat)[1]) for(i in 1:dim(pmat)[1]){ outvec[i]<-sum(epsmat[i,]+epsmat[,i],na.rm=T) } return(outvec) } exchange<-function(mat,w=apply(mat,1,sum)){ #Overall exchange, Pontius and Santacruz (2014) if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} return(sum(exchange.cat(mat,w))/2) } shift.cat<-function(mat,w=apply(mat,1,sum),round.pts=7){ #Categorical shift, Pontius and Santacruz (2014) if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} pmat<-pmatmake(mat,w) dvec<-rep(NA,dim(mat)[1]) for(i in 1:dim(pmat)[1]){ dvec[i]<-sum(pmat[i,]+pmat[,i],na.rm=T)-2*pmat[i,i] } outvec<-dvec-quant.dis.cat(pmat)-exchange.cat(pmat) return(round(outvec,round.pts)) #The round.pts deals with some small floating point issues that crop up in R } shift<-function(mat,w=apply(mat,1,sum)){ #Overall shift, Pontius and Santacruz (2014) if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(length(w)!=dim(mat)[1]){stop("Error weight vector length does not match error matrix dimensions")} return(sum(shift.cat(mat,w))/2) } ami<-function(mat,w=apply(mat,1,sum)){ #Average mutual information (in bits!) if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} pmat<-pmatmake(mat,w) return(sum(pmat*log2(pmat/(apply(pmat,1,sum)%*%t(apply(pmat,2,sum)))), na.rm=T)) #na.rm=T in the sum treats this value as a 0, which is the correct #value of lim x->0 (x*log(x)) } ami.adj<-function(mat,w=apply(mat,1,sum)){ #AMI adjusted so no positive values off-diag are counted. if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} pmat<-pmatmake(mat,w) imat<-pmat*log2(pmat/(apply(pmat,1,sum)%*%t(apply(pmat,2,sum)))) imatdiag<-diag(imat) imat[imat>0]<-0 diag(imat)<-imatdiag amiplusgood<-sum(imat,na.rm=T) return(amiplusgood) } ami.var<-function(mat,w=apply(mat,1,sum),ci=FALSE){ #var(AMI) in BITS; Moddemeijer (1989) eqn. 4.6 if(dim(mat)[1]!=dim(mat)[2]){stop("Non-square confusion matrix")} if(sum(floor(mat))!=sum(mat)){stop("Matrix contains non-integer values")} if(length(w)!=dim(mat)[1]){stop("Error: weight vector length does not match error matrix dimensions")} pmat<-pmatmake(mat,w) #Make matrix into proportional matrix outvar<-((1/sum(mat))*(sum(pmat*log2(pmat/(apply(pmat,1,sum)%*%t(apply(pmat,2,sum))))^2,na.rm=T)- sum(pmat*log2(pmat/(apply(pmat,1,sum)%*%t(apply(pmat,2,sum)))),na.rm=T)^2)) if(ci==FALSE){return(outvar)} if(ci==TRUE){return(1.96*sqrt(outvar))} } ami.prop<-function(mat,w=apply(mat,1,sum)){ #Proportional AMI (scaled on [0,1]) if(dim(mat)[1]!=dim(mat)[2]){ stop("Non-square confusion matrix")} pmat<-pmatmake(mat,w) #Make matrix into proportional matrix return(ami(pmat)/sum(-apply(pmat,2,sum)*log2(apply(pmat,2,sum)),na.rm=T)) } #LITERATURE CITED # Card, DH (1982). Using known map category marginal frequencies to improve estimates of thematic map accuracy. Photogrammetric Enginneering and Remote Sensing, 48(3), 431-439. # Mas, J-F et al. (2014). A suite of tools for assessing thematic map accuracy. Geography Journal. # Moddemeijer, R. (1989). On estimation of entropy and mutual information of continuous distributions. Signal processing, 16(3), 233-248. # Pontius, RG & M Millones (2011). Death to kappa: birth of quantity disagreement and allocation disagreement for accuracy assessment. International Journal of Remote Sensing 32>4407-4429. # Pontius, RG & A. Santacruz (2014). Quantity, exchange, and shift components of difference in a square contingency table. International journal of remote sensing, 35(21), 7543-7554. # Olofsson, P., Foody, G. M., Herold, M., Stehman, S. V., Woodcock, C. E., & Wulder, M. A. (2014). Good practices for estimating area and assessing accuracy of land change. Remote Sensing of Environment, 148, 42-57.