static const char* szModule        = "predcition.cpp";

//------------------------------------------------------------------------------
//    module prediction.cpp                                                   //
//                                                                            //
//    The class TPrediction predicts the unknown output value given the       //
//    input based on the model learned with the PNC cluster/learn algorithm.  //
//    The class TCluster encapsulates such a model.                           //
//    See below or http://www.newty.de/pnc2/sdocu.html for more information.  //
//                                                                            //
//    copyright (c) 2000-2003 by Lars Haendel                                 //
//    home: www.newty.de                                                      //
//                                                                            //
//    This program is free software and can be used under the terms of the    //
//    GNU licence. See header file for further information and disclaimer.    //
//                                                                            //
//------------------------------------------------------------------------------
//                                                                            //
//    CREATE: Pass learned model (TCluster) and (if you like) alternate       //
//    parameters (TParameter).                                                //
//                                                                            //
//    USE: Predict one single input(!) data tuple (float*) or predict         //
//    several tuples (TData*) by calling Predict(). In the latter case        //
//    you can pass an output file, a loss object and you can specify if       //
//    the data is passed with or without the output values. Of course         //
//    you cannot get meaningful loss results if the real output values        //
//    are not present but you may access the predictions made from the        //
//    loss object via the function Y_p() .                                    //
//                                                                            //
//    File I/O: some output in function Predict()                             //
//------------------------------------------------------------------------------


//----------------------------------------------------------------------------------------------------------------------
#include <stdlib>                // due to   qsort()
#include <stdio>                 //          sprintf()
#include <iomanip>               //          setw()
#include <math>                  //          sqrt()
#include <values>                //          MAXFLOAT

#include "prediction.h"
#include "fileutil.h"            //          file I/O utilities
#include "defines.h"
#include "exception.h"           //          IfTrueThrowTypeA()



//----------------------------------------------------------------------------------------------------------------------
//#define PREDICTION_DIAGNOSTIC // for each prediction: write distance of tuple to nearest cuboid etc. to logfile
#ifdef PREDICTION_DIAGNOSTIC
ofstream prdDiagFile("C:\\prddiag.dat");
#endif



      //---------------------------------------------------------------------------------------
      // utility function: 1/exp(x)  -  note: approximation is used for better performance
      #ifdef DEBUG
      float expn(float x)
      {
         IfTrueThrowTypeB(x<0, "Function called with negative argument!", "expn", szModule);
         return 1.0/(1.0 + x + x*x*0.5 + x*x*x*0.1666667);
      }
      #else
      inline float expn(float x){ return 1.0/(1.0 + x + x*x*0.5 + x*x*x*0.1666667); }
      #endif



//----------------------------------------------------------------------------------------------------------------------
// constructor
TPrediction::TPrediction(TCluster*const& _cls, const TParameter*const& _para/*=NULL*/)
{
   // copy
   cls  = _cls;
   if(_para)
      para = _para;                                   // use parameter object if given
   else
      para = cls->GetParameters();                    // else: take parameters from TCluster object

   beta = 1.0/(para->Sigma*(cls->nVar()-1));          // kernel width for regression


   d_ins = new float[cls->nCuboids()];                // cuboid 'inside' distances/probability
   d_out = new float[cls->nCuboids()];                // cuboid 'outside' distances/probability

   cls->Prepare(para);                    // prepare mapper to use only those cuboids with a mass bigger than 'para.K'
}


//----------------------------------------------------------------------------------------------------------------------
// predict unknown output value for given input
float TPrediction::Predict(const float*const x, float& nIns)
{
   nIns=0;  // initialize

   float  r_value=0;


   if(cls->nCuboidsRed()>0)                                 // if there are any cuboids -> predict using ...
      if(para->f_Regression)
         r_value = StandardPrediction(x, nIns);             // ... standard prediction for regression tasks
      else
         r_value = WeightedCountPrediction(x, nIns);        // ... and "weighted count" for classification tasks ...

   return r_value;
}


//----------------------------------------------------------------------------------------------------------------------
// standard inside-outside nearest cuboid prediction
float TPrediction::StandardPrediction(const float*const& x, float& nIns)
{
   // 1. reset all probabilities
   for(int t=0;t<cls->nCuboidsRed();t++)
      d_ins[t] = d_out[t] = 0;


   // 2. calculate probabilities
  for(int t=cls->nCuboidsRed();t--;)                     // for each cuboid
   {
      // a) calculate distance to t'th cuboid
      float d;
      if(para->Prune)                                    // use either pruned or none-pruned model
         d = cls->PrunedDistance(x, t);
      else
         d = cls->Distance(x, t);


      // b) if tuple is inside cuboid -> ...
      if(d==0)
      {
         // set t'th cuboid 'inside'-probability
         #ifdef HITRATE
         d_ins[t] = cls->Hitrate(t);
         #else
         d_ins[t] = 1.0;
         #endif
         nIns++;                                         // increment 'inside' counter
      }
      else        // ... else if tuple is 'outside'
      {
         #ifdef GAUSS_MSF
         d *= d;                                            // square distance
         #endif


         // set t'th cuboid 'outside' probability
         #ifdef HITRATE
         d_out[t] = expn(beta*d)*cls->Hitrate(t);           // modify probability by hitrate
         #else
         d_out[t] = expn(beta*d);
         #endif
      }
   }

   // 3. evaluate probabilities
   double y =0.0;                   // ini
   if(!EvMean(d_ins, y))            // minimum risk evaluation (COG = center of gravity)
      EvMean(d_out, y);

   return y;
}


//----------------------------------------------------------------------------------------------------------------------
// calculate mean of all probabilities (minimum risk evaluation, COG)
bool TPrediction::EvMean(const float*const& mu, double& y)
{
   double sum=0.0;      // ini

   // calculate mean of all probabilities
   for(int t=0;t<cls->nCuboidsRed();t++)
   {
      y     += mu[t]*cls->Output(t);
      sum   += mu[t];
   }

   if(sum==0)        // no probabilities have been greater than zero -> do not predict and return false
      return false;

   y /= sum;                           // ... else

   return true;
}


//----------------------------------------------------------------------------------------------------------------------
// predict value with maximal probability (maximum likelihood evaluation)
bool TPrediction::EvMax(const float*const& mu, double& y)
{
   int   maxId=0;             // note: ini with invalid value should also be possible as there is at least one cuboid
   float max=-MAXFLOAT;

   // estimate maximum of all suggestions
   for(int t=0;t<cls->nCuboidsRed();t++)        // for all cuboids
      if(mu[t] > max)                           // if probability is greater -> new maximum is found
      {
         max = mu[t];
         maxId=t;
      }

   if(max==0)                       // no probabilities have been greater than zero -> do not predict and return false
      return false;

   y=cls->Output(maxId);            // predict value with maximal probability

   return true;
}


//----------------------------------------------------------------------------------------------------------------------
// weighted count prediction for given tuple
float TPrediction::WeightedCountPrediction(const float*const& x, float& nIns)
{
   // 1. calculate distance of tuple to each cuboid
   for(int t=0;t<cls->nCuboidsRed();t++)
      if(para->Prune)                                                // distance to t-th cuboid
         d_out[t] = cls->PrunedDistance(x, t);
      else
         d_out[t] = cls->Distance(x, t);


   // 2. evaluate distances and predict
   return EvDistances(d_out, nIns);
}


//----------------------------------------------------------------------------------------------------------------------
// evaluate distances from each cuboid to tuple to make a "weighted count" prediction
float TPrediction::EvDistances(const float*const& d, float& nIns)
{
   // a) sort distances
   TSorter dis(cls->nCuboidsRed());
   for(int t=0;t<cls->nCuboidsRed();t++)
   {
      dis.GetA(t)  = d[t];                // 1th criterion distance
      dis.GetB(t)  = 0;                   // 2nd criterion not used
   }
   dis.SortAsc();


   // b) determine neighbourhood
   float d_ref=dis.GetA(0);                                 // set distance of nearest cuboid as 'reference' distance
   #ifndef RELEASE
   float max=d_ref*para->W_Kernel+cls->d_ref()*para->W_Kernel_Min;               // calculate width of 'neighborhood'
   #else
   float max=d_ref*para->W_Kernel;
   #endif
   float w_inv=0;                                                                // inverse of width
   if((max-d_ref)!=0)
      w_inv=1.0/(max-d_ref);

   int k=1;                                                                      // get # cuboids in neighborhood
   for(;k<cls->nCuboidsRed();k++)
      if(dis.GetA(k)>max)
         break;

   if(d_ref==0.0)             // if smallest distance is zero (inside) then set # cuboids inside
      nIns=k;


   // c) for each cuboid in the neighbourhood: sum up class proposals
   int nClasses=cls->GetDataData()->nIntegerMaxMin(0);                           // # classes output variable
   TSorter count(nClasses);
   for(int i=0;i<nClasses;i++)                                                   // reset/ini
      count.GetA(i) = count.GetB(i)= 0;                                          // 1th criterion and 2nd criterion


   // save: reference distance, maximal distance, k and if it's an inside prediction
   #ifdef PREDICTION_DIAGNOSTIC
      prdDiagFile << d_ref << " " << dis.GetA(nClasses-1) << " " <<  k << " " << (d_ref==0.0) << endl;
   #endif

   int minY = cls->GetDataData()->Min()[0];

   for(int t=0;t<k;t++)    // sum up
   {
      const int id = cls->Output(dis.GetId(t)) - minY;                              // output class mapped to id (PoD!)
      #ifdef HITRATE
         // first criterion: distance weighted count multiplied with hitrate
         count.GetA(id) += (1.0-(dis.GetA(t)-d_ref)*w_inv) * cls->Hitrate(dis.GetId(t));
      #else
         count.GetA(id) += (1.0-(dis.GetA(t)-d_ref)*w_inv) * 1;       // first criterion: distance weighted count
      #endif

      #ifdef MASS_TIE_BREAKING
         // 2nd criterion (tie breaking policy): sum up mass
         count.GetB(id) += cls->Mass(dis.GetId(t));
      #endif
   }
   #ifndef MASS_TIE_BREAKING
   // tie breaking policy: predict most frequent class
//   count.GetB(<most frequent class>) = 2;     // ToDo
   #endif


   // d) predict class with highest proposal count
   count.SortDes();                                      // sort descending
   return count.GetId(0)+minY;                           // ... take first (this is the best) and re-map to class value
}


//----------------------------------------------------------------------------------------------------------------------
// predict output value for all tuples in test data object 'data_T', flag f_no_output indicates if the first column in
// data object contains the real output values
void TPrediction::Predict(const TData*const& data_T, TLossFunction*const& loss, ofstream* file/*=NULL*/
                           , const bool& f_IsWithOutput/*=true*/, const bool* f_Stop/*=NULL*/)
{
   // pre-checks
   IfTrueThrowTypeA(data_T->nTup()==0, "Function called with empty data object!", "TPrediction::Test", szModule);
   IfTrueThrowTypeA(!file&&!loss, "Function called without loss or file stream object!", "TPrediction::Test", szModule);


   // a) write header if file exists
   if(file)
   {
      *file << ComChar << " Learn data       = " << cls->GetDataData()->FileName()  << endl;
      *file << ComChar << " Test data        = " << data_T->LoadFileName()    << endl;
      *file << ComChar << " Cuboids          = " << cls->nCuboids() << " / " << cls->nCuboidsRed() << " (reduced)" << endl;
      *file << endl;
      para->Save(*file, true);
   }


   // b) predict all tuples
   progress = 0;
   if(f_IsWithOutput)
      // a) real output values are given (column 0 in data object) and used to calculate loss
      for(int i=0;i<data_T->nTup();i++)
      {
         float nIns;                                           // # cuboids with distance of zero
         float res = Predict(&(data_T->Row(i)[1]), nIns);      // predict output value
         loss->EvPrediction(res, data_T->Row(i)[0], nIns);     // pass predicted and real value to loss function

         if(f_Stop)                                            // check stop flag
            if(*f_Stop)
               break;

         progress++;
      }
      else
         // b) no real ouput values are known
         for(int i=0;i<data_T->nTup();i++)
         {
            float nIns;                                        // # cuboids with distance of zero
            float res = Predict(data_T->Row(i), nIns);         // predict output value
            loss->EvPrediction(res, 0, nIns);                  // pass predicted and dummy real value to loss function

            if(f_Stop)                                         // check stop flag
               if(*f_Stop)
                  break;

            progress++;
         }



   // c) write results
   if(file)
   {
      loss->WriteResults(*file, !para->f_Regression, f_IsWithOutput);   // write results (loss etc.)
      *file << endl << endl;                                            // linefeed

      if(f_Stop)
         if(*f_Stop)                                                    // write error message if stop flag is set
            *file << "User break! Simulation terminated!" << endl;
   }
}