/* Copyright 2002 Ben Blum, Christian Shelton
 *
 * This file is part of GameTracer.
 *
 * GameTracer is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * GameTracer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GameTracer; if not, write to the Free Software Foundation, 
 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "cmatrix.h"
#include "ipa.h"
#include "gnmgame.h"

// IPA(A,g,zh,alpha,fuzz,ans)
// --------------------------
// This runs the IPA algorithm on game A.
// Interpretation of parameters:
// g: perturbation ray.
// zh: initial approximation for z.  Can be set to vector of all 1's.
// alpha: stepsize.  Must be a number between 0 and 1, to be interpreted
//        as the fraction of a complete step to take.
// fuzz: the cutoff accuracy for an equilibrium after which the algorithm
//       stops refining it
// ans: a pre-allocated vector in which the equilibrium will be stored

int IPA(gnmgame &A, cvector &g, cvector &zh, double alpha, double fuzz, cvector &ans) {
  int N = A.getNumPlayers(),
    M = A.getNumActions(), // For easy reference
    i,j,n,bestAction,B, // utility vars
    Im[N], // best actions in perturbed game
    firstIteration = 1; 

  double bestPayoff,l; // utility vars

  cmatrix DG(M,M), 
    O(N,N,0), // matrix of zeroes
    S(N,M,0), // 
    I(M+N,M+N,1,1), // identity
    T(M+N,M+N+2,0), // tableau for Lemke-Howson
    T2(M+N,M+N,0); // submatrix of tableau used if Lemke-Howson is unnecessary

  cvector d(M), // diff
    u(M),
    y(M), // old z
    yh(M), // old zh
    s(M), // current strategy
    so(M), // old strategy
    sh(M), // approximate strategy
    sho(M,0), // old approximate strategy
    z(M), // current point in game-space
    zt(M), // next approximating point
    ym1(M), // utility vars
    ym2(M), 
    ymn1(M+N),
    ymn2(M+N);

  // Find the best action for each player when the game is highly perturbed
  for(n = 0; n < N; n++) {
    bestPayoff = g[A.firstAction(n)];
    bestAction = A.firstAction(n);
    for(j = bestAction+1; j < A.lastAction(n); j++) {
      if(g[j] > bestPayoff) {
	bestPayoff = g[j];
	bestAction = j;
      }
    }
    Im[n] = bestAction;
  }


  A.retract(sh, zh);
  so = sh;

  while(1) {
    A.payoffMatrix(DG,sh,0.0);
    DG /= (double)(N-1); // find the Jacobian of the approximating bimatrix game

    // Initialize the Lemke-Howson tableau
    for(n = 0; n < N; n++) {
      for(i = 0; i < M+N; i++) {
	if(i >= A.firstAction(n) && i < A.lastAction(n)) {
	  T[M+n][i] = 1;
	  T[i][M+n] = -1;
	} else {
	  T[M+n][i] = T[i][M+n] = 0;
	}
      }
    }
    for(n = 0; n < M; n++) {
      T[n][M+N] = g[n];
      T[n][M+N+1] = 0.0;
    }
    for(n = 0; n < N; n++) {
      T[M+n][M+N+1] = 1.0;
      T[M+n][M+N] = 0.0;
    }
    for(i = 0; i < M; i++)
      for(j = 0; j < M; j++)
	T[i][j] = DG[i][j];

    // copy the tableau to T2
    for(i = 0; i < M+N; i++)
      for(j = 0; j < M+N; j++)
	T2[i][j] = T[i][j];

    // zero out columns not in the support
    for(i = 0; i < M; i++) {
      if(so[i] <= 0.0) {
	for(j = 0; j < M+N; j++) {
	  if(i == j)
	    T2[j][i] = 1.0;
	  else
	    T2[j][i] = 0.0;
	}
      }
    }
    
    for(i = 0; i < M; i++) {
      ymn1[i] = 0;
    }
    
    for(i = M; i < M+N; i++) {
      ymn1[i] = 1;
    }
    
    // find equilibrium assuming current support
    T2.solve(ymn1, ymn2);
    
    for(i = 0; i < M; i++)
      s[i] = ymn2[i];

    int flag = 0;
    for(i = 0; i < M; i++) {
      if(s[i] < 0.0) {
	flag = 1; // need to update the support
	break;
      }
    }
    if(flag) { // update support and solve
      A.LemkeHowson(s,T,Im);
    } else {
      // limit to current support
      for(i = 0; i < M; i++) {
	if(so[i] <= 0.0)
	  s[i] = 0.0;
      }
    }

    DG.multiply(s,z);
    z += s;
    
    // see if the support has changed
    B = 1;
    for(i = 0; i < M; i++) {
      if((s[i] > 0.0) != (so[i] > 0.0)) {
	B = 0;
	break;
      }
    } 
    
    // see if angle between z-sh and zh-sh is acute; if so, scale zh.

    u = zh;
    u -= sh;

    ym1 = z;
    ym1 -= sh;
    l = (ym1 * u) / u.norm2(); // dot product
    if(l <= 0.0 || B) {
      zh = u;
      zh *= l;
      zh += sh;
      yh -= sho;
      yh *= l;
      yh += sho;
    }
    
    ym1 = z;
    ym1 -= zh;
    ym2 = s;
    ym2 -= sh;
    // if z and zh or s and sh are close enough, 
    // we've got an approximate equilibrium, so we can quit
    if(N <= 2 || (ym1.norm() < fuzz || ym2.norm() < fuzz)) {
      ans = s;
      A.payoffMatrix(DG,s,0.0);
      return 1;
    }
    ym1 = z;
    // do a first-order approximation on the first iteration,
    // and subsequently do a second-order approximation
    if(!firstIteration) {
      d = z;
      d -= zh;
      d -= y;
      d += yh;
      // Rule of false position
      if(B && d.absmax() > fuzz) {
	for(i = 0; i < M; i++) {
	  ym1[i] = (yh[i] * z[i] - zh[i] * y[i])/d[i];
	}
      } else {
      // Only do a first-order approximation
	ym1 = z;
      }
    } else {
      firstIteration = 0;
      ym1 = z;
    }
    zt = ym1;
    zt *= alpha / (1-alpha);
    zt += zh;
    zt *= (1-alpha);
    // zt = ym1*alpha + zh * (1-alpha)

    // Update values
    so = s;
    y = z;
    sho = sh;
    yh = zh;
    zh = zt;
    A.retract(sh,zh);
  }
}