#!/usr/bin/python
#-----------------------------------------------------------------------
# File    : brreg.py
# Contents: best response regression
# Author  : Christian Borgelt
# History : 2020.03.?? file created
#           2020.03.16 bootstrap optimization added
#           2020.03.26 standard deviation added for x-distribution
#           2020.03.30 eraly stopping with objective function added
#           2020.03.31 FQ added, optimization functions simplified
#-----------------------------------------------------------------------
from sys    import argv, stderr
from time   import time
from math   import sqrt, exp, log, tan, pi
from random import seed, random, sample, choice, uniform, normalvariate
from matrix import chdecom, chdecomx, chinv, gjinv

#-----------------------------------------------------------------------
oo = float('inf')               # shorthand for infinity

#-----------------------------------------------------------------------
# auxiliary functions
#-----------------------------------------------------------------------

def regval (a, x):              # --- compute linear regression function
    return a[0] +sum([v*b for v,b in zip(x,a[1:])])

#-----------------------------------------------------------------------

def prod (l):                   # --- multiply several numbers
    r = 1.0                     # compute product of a list of numbers
    for x in l: r *= x          # (utility function that is not
    return r                    # available in basic Python)

#-----------------------------------------------------------------------

def noise (n, t='n', s=1.0):    # --- sample n noise value of type t
    if t in ['c', 'C', 'cauchy',  'Cauchy']:
        return [s *2 *tan(pi *uniform(-0.5,0.5)) for i in range(n)]
    if t in ['l', 'L', 'laplace', 'Laplace']:
        return [s *(4.0 if x < 0 else -4.0) *log(1.0 -abs(x))
                for x in [uniform(-1.0,1.0) for i in range(n)]]
    if t in ['u', 'U', 'uniform', 'Uniform']:
        return [s*uniform(-1.0,1.0) for i in range(n)]
    if t in ['n', 'N', 'normal',  'Normal',
             'g', 'G', 'gauss',   'Gauss']:
        return [s *normalvariate(0.0,1.0) for i in range(n)]
    return [s *prod([normalvariate(0.0,1.0) for j in range(int(t))])
            for i in range(n)]  # product of t normal distributions

#-----------------------------------------------------------------------

def subdata (n, size=2):        # --- get random subset as weights
    r    = range(n)             # get the data point index range
    wgts = [0.0 for i in r]     # and initialze the weights array
    for i in range(size):       # draw 'size' data point samples
        wgts[choice(r)] += 1.0  # increase weight of chosen point
    return wgts                 # return created weight vector

#-----------------------------------------------------------------------
# regression function (ordinary least squares, OLS)
#-----------------------------------------------------------------------

def coeffs (X, y, wgts=None):
    if not wgts: wgts = [1.0      for x in X]
    else:        wgts = [float(w) for w in wgts]
    X   = [(1.0,)+x for x in X] # extend the data matrix
    rcx = range(len(X[0]))      # get range of row/column indices
    mat = [[0.0 for c in rcx] for r in rcx]
    rhs =  [0.0 for r in rcx]   # init. matrix and right hand side
    for x,y,w in zip(X,y,wgts): # traverse the sample cases
        for r in rcx:           # traverse the attributes/rows
            rhs[r] += w*y*x[r]  # compute element-wise vector sum
            for c in rcx[r:]:   # aggregate outer vector products
                mat[r][c] += w*x[r]*x[c]
    dec = chdecomx(mat)         # compute Cholesky decomposition L
    if not dec: return None     # check for successful decomposition
    dia = [1/dec[i][i] if dec[i][i] > 0 else oo for i in rcx]
    cfs = [0 for i in rcx]      # initialize the coefficient array
    for r in rcx:               # forward substitution with L
        t = rhs[r] -sum(cfs[c] *dec[r][c] for c in rcx[:r])
        cfs[r] = t *dia[r]  
    for r in reversed(rcx):     # backward substitution with L^T
        t = cfs[r] -sum(cfs[c] *dec[c][r] for c in rcx[r+1:])
        cfs[r] = t *dia[r]
    return cfs                  # return the computed coefficients

#-----------------------------------------------------------------------
# robust regression functions (MM estimator)
#-----------------------------------------------------------------------

def tukey (e, k):               # Tukey's bisquare weights
    if abs(e) > k: return 0.0
    e /= k; e = 1-e*e; return e*e

#-----------------------------------------------------------------------

def robust (X, y, thresh=1e-6, steps=100):
    n = float(len(X))           # get the number of data points
    a = coeffs(X, y)            # compute OLS coefficients
    # e = [abs(u-v) for u,v in zip(y,z)]
    # s = sum(e); q = sum(x*x for x in e)
    # d = sqrt((q -s*s/n)/(n-1.0))
    # d *= 1.3
    for i in range(steps):      # compute at most 100 steps
        z = [regval(a,x) for x in X]
        e = [abs(u-v) for u,v in zip(y,z)]
        s = sum(e); q = sum(x*x for x in e)
        d = sqrt((q -s*s/n)/(n-1.0))
        w = [tukey(x,d) for x in e]
        o = a; a = coeffs(X, y, w)
        if max([abs(c-o) for c,o in zip(a,o)]) < thresh: break
    return a                    # return the computed coefficients

#-----------------------------------------------------------------------
# best response regression functions
#-----------------------------------------------------------------------

def H (a, x, y, z, eps):        # --- compute values of step functions
    u = regval(a,x)             # evaluate the regression function
    v = (1.0-eps) *abs(z-y)     # compute distance from true value
    return (1 if u >  y-v else 0,
            1 if u >= y+v else 0)

#-----------------------------------------------------------------------

def FH (a, X, y, z, eps, beta, gamma):
                                # --- compute value of function F_H
    f = [H(a,x,y,z,eps) for x,y,z in zip(X,y,z)]
    return sum(l-h for l,h in f)

#-----------------------------------------------------------------------

def L (a, x, y, z, eps, beta):  # --- compute values of logistic fns.
    u = regval(a,x)             # evaluate the regression function
    v = (1.0-eps) *abs(z-y)     # compute distance from true value
    l = -beta *(u -(y-v))       # compute arguments of logistic fns.
    h = -beta *(u -(y+v))       # and evaluate logistic functions
    return (1.0 /(1.0 +exp(l)) if l < 709 else 0.0,
            1.0 /(1.0 +exp(h)) if h < 709 else 0.0)

#-----------------------------------------------------------------------

def FL (a, X, y, z, eps, beta, gamma):
                                # --- compute value of function F_L
    f = [L(a,x,y,z,eps,beta) for x,y,z in zip(X,y,z)]
    return sum(l-h for l,h in f)

#-----------------------------------------------------------------------

def gradFL (a, X, y, z, eps, beta, gamma):
                                # --- compute gradient of function F_L
    f = [L(a,x,y,z,eps,beta) for x,y,z in zip(X,y,z)]
    s = [beta *(l*(1-l) -h*(1-h)) for l,h in f]
    return [sum(s)] +[sum(c*x[i] for c,x in zip(s,X))
                      for i in range(len(X[0]))]

#-----------------------------------------------------------------------

def HessianFL (a, X, y, z, eps, beta, gamma):
                                # --- compute Hessian of function F_L
    f = [L(a,x,y,z,eps,beta) for x,y,z in zip(X,y,z)]
    b = beta *beta              # get common factor
    h = [b *(l*(1-l)*(1-2*l) -h*(1-h)*(1-2*h)) for l,h in f]
    i = range(len(X[0])+1)      # compute data point factors
    H = [[0 for c in i] for r in i] # create an empty matrix
    X = [(1.0,)+x for x in X]   # extend the data matrix
    for u,x in zip(h,X):        # traverse the data
        for r in i:             # traverse the rows
            for c in i:         # aggregate outer vector products
                H[r][c] += u *x[r] *x[c]
    return H                    # return the computed Hessian matrix

#-----------------------------------------------------------------------

def R (a, x, y, z, eps, beta, gamma):
                                # --- compute values of exponential
    u = regval(a,x)             # evaluate the regression function
    v = (1.0-eps) *abs(z-y)     # compute distance from true value
    l = u -(y-v)                # compute distance to lower
    h = u -(y+v)                # and to upper bound
    return (1.0 -0.5*exp(-gamma*l) if l >= 0 else 0.5*exp(+beta*l),
            1.0 -0.5*exp(+gamma*h) if h <= 0 else 0.5*exp(-beta*h))

#-----------------------------------------------------------------------

def FR (a, X, y, z, eps, beta, gamma):
                                # --- compute value of function F_R
    f = [R(a,x,y,z,eps,beta,gamma) for x,y,z in zip(X,y,z)]
    return sum(l+h-1 for l,h in f)

#-----------------------------------------------------------------------

def gradFR (a, X, y, z, eps, beta, gamma):
                                # --- compute gradient of function F_R
    f = [R(a,x,y,z,eps,beta,gamma) for x,y,z in zip(X,y,z)]
    s = [((beta*l if l <= 0.5 else gamma*(1-l))
        - (beta*h if h <= 0.5 else gamma*(1-h))) for l,h in f]
    return [sum(s)] +[sum(c*x[i] for c,x in zip(s,X))
                      for i in range(len(X[0]))]

#-----------------------------------------------------------------------

def HessianFR (a, X, y, z, eps, beta, gamma):
                                # --- compute Hessian of function F_R
    f = [R(a,x,y,z,eps,beta,gamma) for x,y,z in zip(X,y,z)]
    b = beta  *beta             # compute terms of function F_R
    g = gamma *gamma            # and get common factors
    h = [((b*l if l <= 0.5 else g*(l-1))
        + (b*h if h <= 0.5 else g*(h-1))) for l,h in f]
    i = range(len(X[0])+1)      # compute data point factors
    H = [[0 for c in i] for r in i] # create an empty matrix
    X = [(1.0,)+x for x in X]   # extend the data matrix
    for u,x in zip(h,X):        # traverse the data
        for r in i:             # traverse the rows
            for c in i:         # aggregate outer vector products
                H[r][c] += u *x[r] *x[c]
    return H                    # return the computed Hessian matrix

#-----------------------------------------------------------------------

fnFH = [FH, None,   None]
fnFL = [FL, gradFL, HessianFL]
fnFR = [FR, gradFR, HessianFR]
objfns = {'FH': fnFH, 'FL': fnFL, 'FR': fnFR, '--': None}

#-----------------------------------------------------------------------

def optBSdirect (X, y, z, eps, trials=100):
    n = len(X)                  # get the number of data points
    t = [abs(u-v) for u,v in zip(y,z)]
    a = coeffs(X,y)             # ordinary regression as default
    o = [regval(a,x) for x in X]
    r = [abs(u-v) for u,v in zip(y,o)]
    p = [u for u,v in zip(r,t) if u < v *(1.0-eps)]
    b = (a, len(p), sum(p))
    for wgts in [subdata(n,n) for i in range(trials)]:
        a = coeffs(X,y,wgts)    # compute linear model coefficients
        o = [regval(a,x) for x in X]
        r = [abs(u-v) for u,v in zip(y,o)]
        p = [u for u,v in zip(r,t) if u < v *(1.0-eps)]
        w = len(p)              # compute number of won points
        s = sum(p)              # and their sum of residuals
        if (w > b[1]) or ((w == b[1]) and (s < b[2])):
            b = (a,w,s)         # update best model parameters
    return b[0]                 # return best parameters found

#-----------------------------------------------------------------------

def optBootstrap (fns, X, y, z, eps, beta, gamma, trials=100):
    n = len(X)                  # get the number of data points
    a = coeffs(X,y)             # ordinary regression as default
    m = (a, fns[0](a, X, y, z, eps, beta, gamma))
    for wgts in [subdata(n,n) for i in range(trials)]:
        a = coeffs(X,y,wgts)    # compute linear model coefficients and
        q = fns[0](a, X, y, z, eps, beta, gamma)   # objective function
        if q > m[1]: m = (a,q)  # update the best parameters
    return m[0]                 # return best parameters found

#-----------------------------------------------------------------------

def optClimb (fns, X, y, z, eps, beta, gamma,
              trials=100, eta=0.1, steps=1000):
    n = len(X)                  # get the number of data points
    a = coeffs(X,y)             # ordinary regression as default
    m = (a, fns[0](a, X, y, z, eps, beta, gamma))
    for wgts in [subdata(n,n) for i in range(trials)]:
        a = coeffs(X,y,wgts)    # compute linear model coefficients and
        q = fns[0](a, X, y, z, eps, beta, gamma)   # objective function
        if q > m[1]: m = (a,q)  # update the best parameters
        for i in range(steps):
            c = [u +uniform(-eta,eta) for u in a]
            q = fns[0](c, X, y, z, eps, beta, gamma)
            if q > m[1]: a = c; m = (a,q)
    return m[0]                 # return best parameters found

#-----------------------------------------------------------------------

def optGrad (fns, X, y, z, eps, beta, gamma,
             trials=20, eta=0.1, thresh=1e-6,
             steps=1000):       # --- standard gradient descent
    n = len(X)                  # get the number of data points
    a = coeffs(X,y)             # ordinary regression as default
    m = (a, fns[0](a, X, y, z, eps, beta, gamma))
    w = [1.0 for i in range(n)] # start with standard data points
    for wgts in [w] +[subdata(n,n) for i in range(trials)]:
        if min(wgts) <= 0:      # for any actual data subset,
            a = coeffs(X,y,wgts)# compute initial coefficients
        for k in range(steps):  # do a maximum number of update steps
            g = fns[1](a, X, y, z, eps, beta, gamma)
            a = [u +eta *h for u,h in zip(a,g)]
            if sqrt(eta *sum(z*z for z in g)) < thresh: break
            if k >= 8 and k % 4 == 0 \
            and fns[0](a, X, y, z, eps, beta, gamma) < m[1]: break
        q = fns[0](a, X, y, z, eps, beta, gamma)
        if q > m[1]: m = (a,q)  # update the best parameters
    return m[0]                 # return best parameters found

#-----------------------------------------------------------------------

def optResilient (fns, X, y, z, eps, beta, gamma,
                  trials=20, eta=0.1, thresh=1e-6,
                  facts=(0.5,1.2), change=(1e-6,1.0),
                  steps=100):   # --- resilient gradient descent
    n = len(X)                  # get the number of data points
    a = coeffs(X,y)             # ordinary regression as default
    m = (a, fns[0](a, X, y, z, eps, beta, gamma))
    w = [1.0 for i in range(n)] # start with standard data points
    for wgts in [w] +[subdata(n,n) for i in range(trials)]:
        if min(wgts) <= 0:      # for any actual data subset,
            a = coeffs(X,y,wgts)# compute initial coefficients
        e = [eta for x in a]    # initial step width
        p = [0   for x in a]    # previous gradient
        for k in range(steps):  # do a maximum number of update steps
            g = fns[1](a, X, y, z, eps, beta, gamma)
            for i in range(len(a)):
                if   g[i] > 0: t =  p[i]
                elif g[i] < 0: t = -p[i]
                else:          t =  0
                if   t > 0:     # if gradients have the same sign,
                    e[i] *= facts[1]  # increase the step width
                    if e[i] > change[1]: e[i] = change[1]
                    p[i] = g[i] # note the current gradient
                elif t < 0:     # if gradients have opposite signs,
                    e[i] *= facts[0]  # decrease the step width
                    if e[i] < change[0]: e[i] = change[0]
                    p[i] = 0    # suppress a change in the next step
                else:           # if one gradient is zero,
                    p[i] = g[i] # only note the current gradient
                # SuperSAB
                # a[i] += e[i] *g[i]   # update the parameters
                # Resilient
                if   g[i] > 0: a[i] += e[i]
                elif g[i] < 0: a[i] -= e[i]
            if sqrt(sum(z*z for z in e)) < thresh: break
            if k >= 8 and k % 4 == 0 \
            and fns[0](a, X, y, z, eps, beta, gamma) < m[1]: break
        q = fns[0](a, X, y, z, eps, beta, gamma)
        if q > m[1]: m = (a,q)  # update the best parameters
    return m[0]                 # return best parameters found

#-----------------------------------------------------------------------

def optNewton (fns, X, y, z, eps, beta, gamma,
               trials=20, thresh=1e-6, steps=100):
                                # --- Newton-Raphson on gradient
    n = len(X)                  # get the number of data points
    a = coeffs(X,y)             # ordinary regression as default
    m = (a, fns[0](a, X, y, z, eps, beta, gamma))
    w = [1.0 for i in range(n)] # start with standard data points
    for wgts in [w] +[subdata(n,n) for i in range(trials)]:
        if min(wgts) <= 0:      # for any actual data subset,
            a = coeffs(X,y,wgts)# compute initial coefficients
        for k in range(steps):  # do a maximum number of update steps
            H = gjinv(fns[2](a, X, y, z, eps, beta, gamma))
            if not H: break     # compute inverse of Hessian matrix
            g = fns[1](a, X, y, z, eps, beta, gamma)
            g = [sum(u*v for u,v in zip(r,g)) for r in H]
            a = [u-v for u,v in zip(a,g)]
            if sqrt(sum(z*z for z in g)) < thresh: break
            if k >= 8 and k % 4 == 0 \
            and fns[0](a, X, y, z, eps, beta, gamma) < m[1]: break
        q = fns[0](a, X, y, z, eps, beta, gamma)
        if q > m[1]: m = (a,q)  # update the best parameters
    return m[0]                 # return best parameters found

#-----------------------------------------------------------------------
# main program
#-----------------------------------------------------------------------

def ctest ():                   # --- basic test of coeffs function
    X = [(1.0,), (2.0,), (3.0,), (4.0,),
         (5.0,), (6.0,), (7.0,), (8.0,)]
    y = [ 1.0,    3.0,    2.0,    3.0,
          4.0,    3.0,    5.0,    6.0]
    c = tuple(coeffs(X,y))
    print("% 8.6f % 8.6f" % c)
    for x,y in zip(X,y):
        print("% 8.6f % 8.6f % 8.6f" % (x[0],y,regval(c,x)))

#-----------------------------------------------------------------------

def showres (a, X, y, z, eps, beta, gamma, t):
    print('% 9.6f % 9.6f (%.3fs)' % tuple(a+[t]))
    print('%2d %9.6f %9.6f' % (FH(a, X, y, z, eps, beta, gamma),
                               FL(a, X, y, z, eps, beta, gamma),
                               FR(a, X, y, z, eps, beta, gamma)))

#-----------------------------------------------------------------------

def getData ():
    X = [(0.0,),     (0.1,),     (0.2,),     (0.3,),     (0.4,),
         (0.5,),     (0.6,),     (0.7,),     (0.8,),     (0.9,),
         (5.0,)]
    y = [ 1.54901800, 0.07267671,-0.03379786,-1.77214300,-1.56598000,
         -2.89350300,-4.90599200,-4.82752800,-6.44050900,-7.06924500,
          5.00000000]
    return (X,y)

#-----------------------------------------------------------------------

def grid (r, eps, beta, gamma, fn, steps):
    X,y = getData()
    a = coeffs(X,y)
    z = [regval(a,x) for x in X]
    F = FH if fn <= 0 else FL if fn <= 1 else FR
    s = float(steps)
    with open('data.txt', 'w') as out:
        for i in range(steps+1):
            c0 = (i/s)*(r[1]-r[0]) +r[0]
            for k in range(steps+1):
                c1 = (k/s)*(r[1]-r[0]) +r[0]
                q = F((c0,c1), X, y, z, eps, beta, gamma)
                out.write('% 7.5f % 7.5f %8.5f\n' % (c0,c1,q))
            out.write('\n')

#-----------------------------------------------------------------------

def basic (eps=0.1, beta=1.0, gamma=8.0,
           trials=10, eta=0.01, thresh=1e-6, steps=1000,
           facts=(1.2,0.5), change=(1e-6,1.0), rseed=0):
                                # --- basic test of functionality
    seed(rseed if rseed != 0 else time())
    X,y = getData()

    print('\nordinary regression:')
    t = time()
    a = coeffs(X,y)
    t = time() -t
    z = [regval(a,x) for x in X]
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nordinary regression without last point:')
    t = time()
    a = coeffs(X[:-1],y[:-1])
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nrobust regression:')
    t = time()
    a = robust(X,y,thresh,steps)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nbootstrap sampling (direct):')
    t = time()
    a = optBSdirect(X, y, z, eps, 10*trials)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nbootstrap sampling F_H:')
    t = time()
    a = optBootstrap(fnFH, X, y, z, eps, beta, gamma, 10*trials)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nbootstrap sampling F_L:')
    t = time()
    a = optBootstrap(fnFL, X, y, z, eps, beta, gamma, 10*trials)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nbootstrap sampling F_R:')
    t = time()
    a = optBootstrap(fnFR, X, y, z, eps, beta, gamma, 10*trials)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    #print('\nhill climbing on F_L:')
    #t = time()
    #a = optClimb(fnFL, X, y, z, eps, beta, gamma, trials, eta, steps)
    #t = time() -t
    #showres(a, X, y, z, eps, beta, gamma, t)

    #print('\nhill climbing on F_R:')
    #t = time()
    #a = optClimb(fnFR, X, y, z, eps, beta, gamma, trials, eta, steps)
    #t = time() -t
    #showres(a, X, y, z, eps, beta, gamma, t)

    print('\nstandard gradient descent on F_L:')
    t = time()
    a = optGrad     (fnFL, X, y, z, eps, beta, gamma,
                     trials, eta, thresh, steps)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nresilient gradient descent on F_L:')
    t = time()
    a = optResilient(fnFL, X, y, z, eps, beta, gamma,
                     trials, eta, thresh, facts, change, steps)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nNewton-Raphson on gradient of F_L:')
    t = time()
    a = optNewton   (fnFL, X, y, z, eps, beta, gamma,
                     trials, thresh, steps)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nstandard gradient descent on F_R:')
    t = time()
    a = optGrad     (fnFR, X, y, z, eps, beta, gamma,
                     trials, eta, thresh, steps)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nresilient gradient descent on F_R:')
    t = time()
    a = optResilient(fnFR, X, y, z, eps, beta, gamma,
                     trials, eta, thresh, facts, change, steps)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

    print('\nNewton-Raphson on gradient of F_R:')
    t = time()
    a = optNewton   (fnFR, X, y, z, eps, beta, gamma,
                     trials, thresh, steps)
    t = time() -t
    showres(a, X, y, z, eps, beta, gamma, t)

#-----------------------------------------------------------------------

def extra (mode, ab=(0.0,1.0), size=100,
           xdist=('n',2.0), error=('n',1.0),
           objfn='FH', eps=0.1, beta=1.0, gamma=8.0,
           trials=100, eta=0.001, thresh=1e-6, steps=100,
           facts=(0.5,1.2), change=(1e-6,1.0), runs=100, rseed=0):
                                # --- best response regression exps.
    seed(rseed if rseed != 0 else time())
    fns   = objfns[objfn]       # get objective function to optimize
    a,b   = ab                  # get linear function coefficients
    wtrn  = wtst = wsse = 0     # number of training / test runs won
    rsize = range(size)         # get range for data points and
    xsdev = sqrt(3.0) *xdist[1] # bound for uniform distribution
    for k in range(runs):       # execute runs (traverse data sets)
        e  = noise(size, error[0], error[1])
        x  = [normalvariate(0,xdist[1]) for i in rsize] \
             if xdist[0] in ['n', 'N', 'normal', 'Normal'] else \
             [uniform(-xsdev,xsdev)     for i in rsize]
        y  = [a +b *x[i] +e[i]          for i in rsize]
        X  = [(u,) for u in x]  # sample noise and x, compute y
        ca = coeffs(X,y)        # build model and compute residuals
        za = [regval(ca,x) for x in X]
        ra = [abs(u-v) for u,v in zip(y,za)]
        if   mode == 1: cb = robust      (     X, y, thresh, steps)
        elif mode == 2: cb = optBSdirect (     X, y, za, eps, trials)
        elif mode == 3: cb = optBootstrap(fns, X, y, za,
                                          eps, beta, gamma, trials)
        elif mode == 4: cb = optClimb    (fns, X, y, za,
                                          eps, beta, gamma,
                                          trials, eta, steps)
        elif mode == 5: cb = optGrad     (fns, X, y, za,
                                          eps, beta, gamma,
                                          trials, eta, thresh, steps)
        elif mode == 6: cb = optResilient(fns, X, y, za,
                                          eps, beta, gamma,
                                          trials, eta, thresh,
                                          facts, change, steps)
        elif mode == 7: cb = optNewton   (fns, X, y, za,
                                          eps, beta, gamma,
                                          trials, thresh, steps)
        zb = [regval(cb,x) for x in X]
        rb = [abs(u-v) for u,v in zip(y,zb)]
        wn = len([u for u,v in zip(rb,ra) if u < v])
        if wn+wn > size:        # compute and compare residuals
            wtrn += 1           # and count won training runs
        e  = noise(size, error[0], error[1])
        x  = [normalvariate(0,xdist[1]) for i in rsize] \
             if xdist[0] in ['n', 'N', 'normal', 'Normal'] else \
             [uniform(-xsdev,xsdev)     for i in rsize]
        y  = [a +b *x[i] +e[i]   for i in rsize]
        X  = [(u,) for u in x]  # sample noise and x, compute y
        za = [regval(ca,x) for x in X]
        ra = [abs(u-v) for u,v in zip(y,za)]
        zb = [regval(cb,x) for x in X]
        rb = [abs(u-v) for u,v in zip(y,zb)]
        wn = len([u for u,v in zip(rb,ra) if u < v])
        if wn+wn > size:        # compute and compare residuals
            wtst += 1           # and count won training runs
        ea = sum([e*e for e in ra])
        eb = sum([e*e for e in rb])
        if eb < ea: wsse += 1   # count won test runs (sse)
        #r = float(k+1)          # compute win rates
        #print("%4d %5.3f %5.3f %5.3f" % (r, wtrn/r, wtst/r, wsse/r))
    r = float(runs)             # compute win rates
    print("%3d %5.3f %5.3f %5.3f" % (runs, wtrn/r, wtst/r, wsse/r))

#-----------------------------------------------------------------------

modemap = {'coeffs':      -2,             'c': -2,
           'grid':        -1,             'i': -1,
           'basic':        0,             'x':  0,
           #--------------------------------------
           'robust':       1, 'rbst':  1, 's':  1,
           'direct':       2, 'dbs':   2, 'd':  2,
           #--------------------------------------
           'bootstrap':    3, 'boot':  3, 'b':  3,
           'hillclimb':    4, 'hill':  4, 'h':  4,
           'gradient':     5, 'grad':  5, 'g':  5,
           'resilient':    6, 'resi':  6, 'r':  6,
           'Newton':       7, 'newt':  7, 'n':  7 }

#-----------------------------------------------------------------------

if __name__ == '__main__':
    desc    = 'best response regression'
    version = 'version 1.3 (2020.03.19)         ' \
            + '(c) 2020   Christian Borgelt'
    opts    = {'a': ['coeffs', '0.0:1.0'    ],
               'n': ['size',   100          ],
               'x': ['xdist',  'normal:2.0' ],
               'e': ['error',  'normal:1.0' ],
               'o': ['objfn',  'FH'         ],
               'i': ['eps',    0.001        ],
               'b': ['beta',   1.0          ],
               'g': ['gamma',  32.0         ],
               't': ['trials', 20           ],
               'l': ['eta',    0.001        ],
               'T': ['thresh', 1e-6         ],
               's': ['steps',  200          ],
               'f': ['facts',  '0.5:1.2'    ],
               'z': ['change', '1e-6:1.0'   ],
               'r': ['runs',   1000         ],
               'S': ['rseed',  0            ] }
    fixed   = []                # list of program options

    if len(argv) <= 1:          # if no arguments are given
        opts = dict([(o,opts[o][1]) for o in opts])
        print('usage: pyfim.py [options] infile [outfile]')
        print(desc); print(version)
        print('-a#      true linear function coefficients      '
                      +'(default: ' +str(opts['a'])+')')
        print('-n#      number of data points                  '
                      +'(default: ' +str(opts['n'])+')')
        print('-x#      distribution of independent variable   '
                      +'(default: ' +str(opts['x'])+')')
        print('-e#      noise to be added and its scale        '
                      +'(default: ' +str(opts['e'])+')')
        print('-o#      objective function (FH,FL,FR)          '
                      +'(default: ' +str(opts['o'])+')')
        print('-i#      minimum improvement percentage epsilon '
                      +'(default: ' +str(opts['i'])+')')
        print('-b#      steepness parameter beta               '
                      +'(default: ' +str(opts['b'])+')')
        print('-g#      steepness parameter gamma              '
                      +'(default: ' +str(opts['g'])+')')
        print('-t#      number of trials / bootstrap samples   '
                      +'(default: ' +str(opts['t'])+')')
        print('-l#      learning rate eta                      '
                      +'(default: ' +str(opts['l'])+')')
        print('-T#      threshold for termination              '
                      +'(default: ' +str(opts['T'])+')')
        print('-s#      number of update steps                 '
                      +'(default: ' +str(opts['s'])+')')
        print('-f#      growth and shrink factors              '
                      +'(default: ' +str(opts['f'])+')')
        print('-z#      minimum and maximum change             '
                      +'(default: ' +str(opts['z'])+')')
        print('-r#      number of runs / repetitions           '
                      +'(default: ' +str(opts['r'])+')')
        print('-S#      seed value for random number generator '
                      +'(default: ' +str(opts['S'])+')')
        print('mode     mode / test type to be carried out     '
                      +'[required]')
        print('         -1: basic test of coeffs function')
        print('          0: basic test of best response regression')
        print('         1+: extended test of best response regression')
        exit()                  # print a usage message

    stderr.write('brreg.py')    # print a startup message
    stderr.write(' - ' +desc +'\n' +version +'\n')

    for a in argv[1:]:          # traverse the program arguments
        if a[0] != '-':         # check for a fixed argument
            if len(fixed) < 1: fixed.append(a); continue
            else: print('too many fixed arguments'); exit()
        if a[1] not in opts:    # check for an option
            print('unknown option: '+a[:2]); exit()
        o = opts[a[1]]          # get the corresponding option
        v = a[2:]               # and get the option argument
        if   isinstance(o[1],(0).__class__): o[1] = int(v)
        elif isinstance(o[1],0.0.__class__): o[1] = float(v)
        else:                                o[1] = v
    opts   = dict([opts[o] for o in opts])
    ab     = [float(x) for x in opts['coeffs'].split(':')]
    size   = opts['size']
    s      = opts['xdist'].split(':')
    xdist  = (s[0],float(s[1]) if len(s) > 1 else 2.0)
    s      = opts['error'].split(':')
    error  = (s[0],float(s[1]) if len(s) > 1 else 1.0)
    objfn  = opts['objfn']
    eps    = opts['eps']
    beta   = opts['beta']
    gamma  = opts['gamma']
    trials = opts['trials']
    eta    = opts['eta']
    thresh = opts['thresh']
    steps  = opts['steps']
    facts  = [float(x) for x in opts['facts'].split(':')]
    change = [float(x) for x in opts['change'].split(':')]
    runs   = opts['runs']
    rseed  = opts['rseed']

    if len(fixed) <= 0:       mode = 0
    elif fixed[0] in modemap: mode = modemap[fixed[0]]
    else:                     mode = int(fixed[0])

    if   mode < -1: ctest()
    elif mode <  0: grid(ab, eps, beta, gamma, trials, steps)
    elif mode == 0: basic(eps, beta, gamma,
                          trials, eta, thresh, steps,
                          facts, change, rseed)
    else:           extra(mode, ab, size, xdist, error,
                          objfn, eps, beta, gamma,
                          trials, eta, thresh, steps,
                          facts, change, runs, rseed)
