.packageName <- "adk"
`adk.combined.test` <-
function (...) 
{
# This function adk.combined.test combines several Anderson-Darling K-sample test
# statistics AD.m, m = 1,...,M, into one overall test statistic AD.combined as suggested 
# in Section 8 of Scholz F.W. and Stephens M.A. (1987), K-sample Anderson-Darling Tests,
# Journal of the American Statistical Association, Vol 82, No. 399, pp. 918-924.
# See also the documentation of adk.test for the comparison of a single set of K samples 
# Here each application of the Anderson-Darling K-sample test can be based 
# on a different K > 1. This combined version tests the hypothesis that all the 
# the hypotheses underlying the individual K-sample tests are true simultaneously.
# The individual K-sample test hypothesis is that all samples from the m-th group come  
# from a common population. However, that population may be different from one individual  
# K-sample situation to the next. Such a combined test is useful in 
#
# 1) examining intra-laboratory measurement equivalence, when samples from the same 
#    material or batch are compared for M laboratories and such comparisons are made 
#    for samples from several different materials or batches and one assessment
#    over all materials/batches is desired.
#
# 2) analyzing treatment effects in randomized complete or incomplete block designs.
#
# Input: Either several lists, say L.1,...,L.M, where list L.i contains K.i sample vectors
#        of respective sizes n.i[1], ..., n.i[K.i] (n.i[j] > 4 is recommended)
#
#        or a single list of such lists.
#        
#
# An example:
# x1 <- c(1, 3, 2, 5, 7), x2 <- c(2, 8, 1, 6, 9, 4), and x3 <- c(12,  5,  7,  9, 11)
# y1 <- c(51, 43, 31, 53, 21, 75) and y2 <- c(23, 45, 61, 17, 60)
# then adk.combined.test(list(x1,x2,x3),list(y1,y2)) 
# or equivalently adk.combined.test(list(list(x1,x2,x3),list(y1,y2)))
# produces the outcome below.
##########################################################################################
#Combination of Anderson-Darling K-Sample Tests.
#
# Number of data sets = 2 
#
# Sample sizes within each data set:
# Data set 1 :  5 6 5
# Data set 2 :  6 5
# Total sample size per data set: 16 11 
# Number of unique values per data set: 11 11 
#
# AD.i = Anderson-Darling Criterion for i-th data set
# Means: 2 1 
# Standard deviations: 0.92837 0.64816 
#
# T.i = (AD.i - mean.i)/sigma.i
#
#
# Null Hypothesis:
# All samples within a data set come from a common distribution.
# The common distribution may change between data sets.
#
#                        T.i P-value extrapolation
# not adj. for ties  1.41756 0.08956             0
# adj. for ties      1.62856 0.07084             0
#                                                
# not adj. for ties -0.96786 0.61831             1
# adj. for ties     -1.02925 0.63586             1
#                                                
#
# Combined Anderson-Darling Criterion: AD.combined = AD.1+AD.2 
# Mean = 3    Standard deviation = 1.13225 
#
# T.combined = (AD.combined - mean)/sigma
#
#                  T.combined P-value extrapolation
# not adj. for ties    0.60825 0.22302             0
# adj. for ties        0.74612 0.19293             0
#
####################################################################################
# For out.adk.combined <- adk.combined.test(list(x1,x2,x3),list(y1,y2))
# or out.adk.combined <- adk.combined.test(list(list(x1,x2,x3),list(y1,y2)))
# we get the object out.adk.combined of class adk with the following components
# > names(out)
# [1] "M"         "n.samples" "nt"        "n.ties"    "adk.i"     "mu"       
# [7] "sig"       "adk.c"     "mu.c"      "sig.c"     "warning"  
# where 
# M = number of sets of samples being compared
# n.samples = is a list of the vectors giving the sample sizes for each 
#             set of samples being compared
# nt = vector of total sample sizes involved in each of the M comparisons
# n.ties = vector of lenth M giving the number of ties in each comparison group
# adk.i = (2*M) * 3 matrix containing the ti.obs, P-value, and extrapolation for  
#            the M individual Anderson-Darling tests, not adjusted for ties and
#            adjusted for ties. Here ti.obs is the observed value of T.i and
#            the corresponding P-value = P(T.i > ti.obs). 
#            Further,extrapolation = TRUE when the P-value is linearly extrapolated
#            outside of [.01,.25].
# mu = vector of means of the M AD statistics
# sig = vector of standard deviations of the M AD statistics
# adk.c = 2 * 3 matrix containing tc.obs, P-value, and extrapolation 
#            for the combined test, not adjusted for ties and adjusted for ties.
#            Here tc.obs is the observed value of Tc and P-value = P(Tc > tc.obs).
#            Further,extrapolation = TRUE when the P-value is linearly extrapolated
#            outside of [.01,.25].
# mu.c = mean of the combined AD statistic
# sig.c = standard deviation of the combined AD statistic
# warning = logical indicator, warning = T when at least one of the sample
#           sizes is < 5.
#
# Fritz Scholz, January 2008
#####################################################################################
   if(nargs() == 1 & is.list(list(...)[[1]])) {
        data.sets <- list(...)[[1]]
   }
   else {
        data.sets <- list(...)
   }
   n.sizes <- NULL
   M <- length(data.sets)
   if(M < 2) 
        stop("To combine test results you must have at least two data sets.")
   n.data <- sapply(data.sets, length)
   if(any(n.data <= 1)) 
        stop("One or more of the data sets consists of less than 2 samples.")
   n.samples <- list()
   for(i in 1:M){
      n.sample <- sapply(data.sets[[i]], length)
      n.sizes <- c(n.sizes, n.sample)
      if(any(n.sample==0))
      stop(paste("one or more samples in data set", i," has no observations"))
      n.samples[[i]] <- n.sample
   }
   warning <- min(n.sizes) < 5
   AD <- 0
   sig <- NULL
   n.ties <- NULL
   nt <- NULL
   mu <- NULL
   adk.i <- NULL
   mu.c <- 0
   for(i in 1:M){
      out <- adk.test(data.sets[[i]])
      out.adk=out$adk
      dimnames(out.adk) <- list(c("not adj. for ties","adj. for ties"),
                               c("ti.obs","P-value","extrapolation"))
      adk.i <- rbind(adk.i, out.adk)
      sig.i <- out$sig
      mu <- c(mu, length(data.sets[[i]])-1)
      AD.i <- out$adk[,1]*sig.i + length(data.sets[[i]]) - 1
      sig <- c(sig, sig.i)
      AD <- AD+AD.i
      mu.c <- mu.c + length(data.sets[[i]]) - 1
      n.ties <- c(n.ties, out$n.ties)
      nt <- c(nt, sum(out$ns))
   }
   sig.c <- sqrt(sum(sig^2))
   tc.obs <- (AD - mu.c)/sig.c
   adk.pval1 <- adk.pval(tc.obs[1], mu.c)
   adk.pval2 <- adk.pval(tc.obs[2], mu.c)

adk.c <- matrix(c(signif(tc.obs[1],7),
           round(adk.pval1[[1]],7), adk.pval1[[2]],
           signif(tc.obs[2],7), round(adk.pval2[[1]],7),
           adk.pval2[[2]]), byrow=TRUE, ncol=3)
dimnames(adk.c) <- list(c("not adj. for ties", "adj. for ties"),
                        c("tc.obs", "P-value", "extrapolation"))

object <- list(M=M, n.samples=n.samples, nt=nt, n.ties=n.ties, adk.i=adk.i,
           mu=mu, sig=sig, adk.c = round(adk.c,5), mu.c=mu.c,
           sig.c=round(sig.c,5), warning=warning)
class(object) <-  "adk"
object
}
`adk.pval` <-
function (tx,m) 
{
# This function "adk.pval" evaluates the p-value of the observed value tx of the T_m 
# statistic in "K-Sample Anderson-Darling Tests" by F.W. Scholz and M.A. Stephens (1987),
# Journal of the American Statistical Association", Vol 82, No. 399, pp 918-924.
# Thus this p-value is P(T_m >= tx).
#
# Input: tx = observed value of T_m, tx > 0.
#         m = the index of T_m, m >= 1.
#
# Output: a list with components
#         p0 = p-value of tx, i.e., p0 = P(T_m >= tx)
#         extrap = a logical indicator
#                    extrap = T means that linear extrapolation took place
#                    extrap = F means that quadratic interpolation was used.
#
# Computational Details:
#
# This function first interpolates the upper T_m quantiles as given in Table 1
# of the above reference to the given value of m by fitting a quadratic in 1/sqrt(m)
# to the quantiles as tabulated for the upper quantile levels .25, .10, .05, .025, .01.
#
# Next a quadratic in the interpolated quantiles (for m) is fitted to the  
# log-odds of the upper probability levels defining these quantiles
# and the fitted log-odds value at tx is converted back to the calculated upper 
# probability value, i.e., the p-value. p-values outside the tabulated range
# [.01,.25] are obtained by linear extrapolation of the fitted quadratic.
#  
# Fritz Scholz Jan. 2008
#=========================================================================================
table1.adk <- cbind(c(1, 2, 3, 4, 6, 8, 10, Inf), c(0.326, 
        0.449, 0.498, 0.525, 0.557, 0.576, 0.59, 0.674), c(1.225, 
        1.309, 1.324, 1.329, 1.332, 1.33, 1.329, 1.282), c(1.96, 
        1.945, 1.915, 1.894, 1.859, 1.839, 1.823, 1.645), c(2.719, 
        2.576, 2.493, 2.438, 2.365, 2.318, 2.284, 1.96), c(3.752, 
        3.414, 3.246, 3.139, 3.005, 2.92, 2.862, 2.326))
    extrap <- F
    mt <- table1.adk[, 1]
    sqm1 <- 1/sqrt(mt)
    sqm2 <- sqm1^2
    tm <- NULL
    p <- c(0.25, 0.1, 0.05, 0.025, 0.01)
    lp <- log(p/(1 - p))
    for (i in 1:5) {
        out <- lsfit(cbind(sqm1, sqm2), table1.adk[, i + 1])
        x <- 1/sqrt(m)
        coef <- out$coef
        y <- coef[1] + coef[2] * x + coef[3] * x^2
        tm <- c(tm, y)
    }
    out <- lsfit(cbind(tm, tm^2), lp)
    coef <- out$coef
    if (tx <= max(tm) & tx >= min(tm)) {
        lp0 <- coef[1] + coef[2] * tx + coef[3] * tx^2
    }
    if (tx > max(tm)) {
        extrap <- T
        lp0 <- min(lp) + (tx - max(tm)) * (coef[2] + 2 * coef[3] * 
            max(tm))
    }
    if (tx < min(tm)) {
        extrap <- T
        lp0 <- max(lp) + (tx - min(tm)) * (coef[2] + 2 * coef[3] * 
            min(tm))
    }
    p0 <- exp(lp0)/(1 + exp(lp0))
    names(p0) <- NULL
    list(p0 = p0, extrap = extrap)
}
`adk.test` <-
function (...) 
{
#################################################################################
# This function "adk.test" tests whether k samples (k>1) come from a common
# continuous distribution, using the nonparametric (rank) test described in
# Scholz F.W. and Stephens M.A. (1987), K-sample Anderson-Darling Tests,
# Journal of the American Statistical Association, Vol 82, No. 399, pp. 918-924. 
# This test is consistent against all alternatives. 
# There is an adjustment for ties, and for a moderate number of tied observations
# the P-value calculation should be reasonable.
# 
# The inputs can either be a sequence or a list of k (>1) sample vectors.
#
# An example:
# x1 <- c(1, 3, 2, 5, 7), x2 <- c(2, 8, 1, 6, 9, 4), 
# and x3 <- c(12,  5,  7,  9, 11)
# adk.test(x1,x2,x3) # or adk.test(list(x1,x2,x3)) produces the output below.
#################################################################################
# Anderson-Darling k-sample test.
#
# Number of samples:  3
# Sample sizes: 5 6 5
# Total number of values: 16
# Number of unique values: 11
#
# Mean of Anderson Darling Criterion: 2
# Standard deviation of Anderson Darling Criterion: 0.92837
#
# T = (Anderson Darling Criterion - mean)/sigma
#
# Null Hypothesis: All samples come from a common population.
#
#                         T P-value extrapolation
# not adj. for ties 1.41756 0.08956             0
# adj. for ties     1.62856 0.07084             0
#################################################################################
# In order to get the output list, call out.adk <- adk.test(x1,x2,x3)
# then out.adk is of class adk and has components 
# > names(out.adk)
# [1] "k"       "ns"      "n"       "n.ties"  "sig"    "adk"     "warning"
#
# where
# k = number of samples being compared
# ns = vector of the k sample sizes
# n = total sample size = n_1+...+n_k
# n.ties = number of ties in the combined set of all n observations
# sig = standard deviation of the AD statistic
# adk = 2 x 3 matrix containing t.obs, P-value, extrapolation, adjusting for
#       ties and not adjusting for ties. extrapolation is TRUE when 
#       the P-value was extrapolated. Here t.obs is the observed value of T
#       and P-value = P(T > t.obs).
# warning = logical indicator, warning = TRUE indicates that at least  
# one of the sample sizes is < 5.
#
# Fritz Scholz, January 2008
#
#################################################################################
    if (nargs() == 1 & is.list(list(...)[[1]])) {
        samples <- list(...)[[1]]
    }
    else {
        samples <- list(...)
    }
    k <- length(samples)
    if (k < 2) 
        stop("Must have at least two samples.")
    ns <- sapply(samples, length)
    if (any(ns == 0)) 
        stop("One or more samples have no observations.")
    x <- NULL
    for (i in 1:k) x <- c(x, samples[[i]])
    n <- length(x)
    Z.star <- sort(unique(x))
    L <- length(Z.star)
    AkN2 <- 0
    AakN2 <- 0
    l.vec <- NULL
    for (j in 1:L) {
        fij <- NULL
        for (i in 1:k) {
            fij <- c(fij, sum(samples[[i]] == Z.star[j]))
        }
        l.vec <- c(l.vec, sum(fij))
    }
    for (i in 1:k) {
        Mij <- 0
        Maij <- 0
        inner.sum <- 0
        inner.sum.a <- 0
        for (j in 1:L) {
            fij <- sum(samples[[i]] == Z.star[j])
            Mij <- Mij + fij
            Maij <- Mij - fij/2
            Bj <- sum(l.vec[1:j])
            Baj <- Bj - l.vec[j]/2
            if (j < L) {
                inner.sum <- inner.sum + l.vec[j] * (n * Mij - 
                  ns[i] * Bj)^2/(Bj * (n - Bj))
            }
            inner.sum.a <- inner.sum.a + l.vec[j] * (n * Maij - 
                ns[i] * Baj)^2/(Baj * (n - Baj) - n * l.vec[j]/4)
        }
        AkN2 <- AkN2 + inner.sum/ns[i]
        AakN2 <- AakN2 + inner.sum.a/ns[i]
    }
    AkN2 <- AkN2/n
    AakN2 <- (n - 1) * AakN2/n^2
    coef.d <- 0
    coef.c <- 0
    coef.b <- 0
    coef.a <- 0
    H <- sum(1/ns)
    h <- sum(1/(1:(n - 1)))
    g <- 0
    for (i in 1:(n - 2)) {
        g <- g + (1/(n - i)) * sum(1/((i + 1):(n - 1)))
    }
    coef.a <- (4 * g - 6) * (k - 1) + (10 - 6 * g) * H
    coef.b <- (2 * g - 4) * k^2 + 8 * h * k + (2 * g - 14 * h - 
        4) * H - 8 * h + 4 * g - 6
    coef.c <- (6 * h + 2 * g - 2) * k^2 + (4 * h - 4 * g + 6) * 
        k + (2 * h - 6) * H + 4 * h
    coef.d <- (2 * h + 6) * k^2 - 4 * h * k
    sig2 <- (coef.a * n^3 + coef.b * n^2 + coef.c * n + coef.d)/((n - 
        1) * (n - 2) * (n - 3))
    sig <- sqrt(sig2)
    TkN <- (AkN2 - (k - 1))/sig
    TakN <- (AakN2 - (k - 1))/sig
    pvalTkN <- adk.pval(TkN, k - 1)
    pvalTakN <- adk.pval(TakN, k - 1)
    warning <- min(ns) < 5
    ad.mat <- matrix(c(signif(TkN, 7), round(pvalTkN[[1]][1], 
        7), pvalTkN[[2]][1], signif(TakN, 7), round(pvalTakN[[1]][1], 
        7), pvalTakN[[2]][1]), byrow = T, ncol = 3)
    dimnames(ad.mat) <- list(c("not adj. for ties", "adj. for ties"), 
        c("t.obs", "P-value", "extrapolation"))
    object <- list(k = k, ns = ns, n = n, n.ties = n - L, sig = round(sig, 
        5), adk = round(ad.mat, 5), warning = warning)
    class(object) <- "adk"
    object
}
`print.adk` <-
function (x, ...) 
{
######################################################
#
# This is a print function for objects of class adk,
# as they are produced by adk.test and adk.combined.test.
#
# Fritz Scholz, January 2008
#
#######################################################
    if(names(x)[1]=="k"){# checking whether the object x came from adk.test
    cat("Anderson-Darling k-sample test.\n")
    cat(paste("\nNumber of samples: ", x$k))
    cat("\nSample sizes: ")
    cat(x$ns)
    cat(paste("\nTotal number of values:", x$n))
    cat(paste("\nNumber of unique values:", x$n-x$n.ties))
    cat(paste("\n\nMean of Anderson Darling Criterion:", 
        x$k-1))
    cat(paste("\nStandard deviation of Anderson Darling Criterion:", 
        x$sig))
    cat("\n\nT = (Anderson Darling Criterion - mean)/sigma")
    cat("\n\nNull Hypothesis: All samples come from a common population.\n\n")
    print(x$adk)
    if (x$warning) {
        cat("\n\nWarning: At least one sample size is less than 5.\n")
        cat("   p-values may not be very accurate.\n")
    }
    invisible(x)
    }
  if(names(x)[1]=="M"){# checking whether the object x came from adk.combined.test
    cat("Combination of Anderson-Darling K-Sample Tests.\n")
    cat(paste("\nNumber of data sets =", x$M,"\n"))
    cat("\nSample sizes within each data set:\n")
    ns <- NULL
    k <- length(x$n.samples)
    d.sets <- paste("Data set",1:k)
    for(i in 1:k){
       cat(d.sets[i],": ",x$n.samples[[i]])
       cat("\n")
    }
    if(k>3) AD.name=paste("AD.1","...",paste("AD.",k,sep=""),sep="+")
    if(k==2)AD.name=paste("AD.1+AD.2")
    if(k==3)AD.name=paste("AD.1+AD.2+AD.3")
    cat("Total sample size per data set: ")
    cat(x$nt,"\n")
    cat("Number of unique values per data set: ")
    cat(x$nt-x$n.ties,"\n")
    cat("\nAD.i = Anderson-Darling Criterion for i-th data set\n")
    cat("Means:",x$mu,"\n")
    cat("Standard deviations:", x$sig,"\n")
    cat("\nT.i = (AD.i - mean.i)/sigma.i\n")
    cat("\nNull Hypothesis:\nAll samples within a data set come from a common distribution.\n")
    cat("The common distribution may change between data sets.\n")
    cat("\n")
    adk.mat=NULL
    nx <- nrow(x$adk.i)/2
    for(i in 1:nx){
    adk.mat <- rbind(adk.mat,x$adk.i[c(2*i-1,2*i),],c(NA,NA,NA))
    }
    print(adk.mat,na.print=" ")
    cat("Combined Anderson-Darling Criterion: AD.combined =",AD.name,"\n")
    cat("Mean =",x$mu.c,"   Standard deviation =",round(x$sig.c,5),"\n")
    cat("\nT.combined = (AD.combined - mean)/sigma\n")
    cat("\n")
    print(x$adk.c)
    if (x$warning) {
        cat("\n\nWarning: At least one sample size is less than 5.\n         p-values may not be very accurate.\n")
    }
    invisible(x)
  }
}

