//------------------------------------------------------------------------------
//    module ParaSet.cpp                                                      //
//                                                                            //
//    Base class for TParameter. Encapsulates only those parameters that are  //
//    chosen within the parameter tuning.                                     //
//    See below or http://www.newty.de/pnc2/sdocu.html for more information.  //
//                                                                            //
//    copyright (c) 2001-2003 by Lars Haendel                                 //
//    home: www.newty.de                                                      //
//                                                                            //
//    This program is free software and can be used under the terms of the    //
//    GNU licence. See header file for further information and disclaimer.    //
//                                                                            //
//------------------------------------------------------------------------------
//                                                                            //
//    NOTE: There are helper functions to convert to TParameter               //
//                                                                            //
//    File I/O: Load, Save and Write routines                                 //
//------------------------------------------------------------------------------


#include <math>            // due to:  atof() etc.
#include <stdlib>          //          strtod()
#include <iomanip>         //          setw()


#include "fileutil.h"      //          SizeOfString() and other utilities
#include "defines.h"
#include "ParaSet.h"
#include "exception.h"     //          IfTrueThrowTypeU()


//----------------------------------------------------------------------------------------------------------------------
// max. value for parameter 'min. cuboid mass'  -  note: use value of 10 to ensure, that the values used in the gui's
// task wizard, are legal
const int Max_P_Min(const TData*const& data)
{
   if(!data)                              // if # tuples in learn data is unknown
      return 10;
   else
      return max(10, data->nTup()/2);
}


//----------------------------------------------------------------------------------------------------------------------
// max. value for parameter 'w_COD'
const int Max_w_COD(const TData*const& data)
{
   if(!data)
      return 0;                        // secure value if # inputs is unknown
   else
      return max(data->nVar()-2, 0);
}


//----------------------------------------------------------------------------------------------------------------------
// initialize parameters with hard coded default values from para.h
// note: if data object is passed, then the parameter 'w_COD' is initialized using a heuristic
void TParaSet::SetStandardValues(const TData*const& data/*=NULL*/)
{
   // a) if data given: calculate standard value for parameter 'w_COD'
   if(data)
   {
      float nVar_eff = 0;                                         // effective number of input variables

      // (hack!)
      for(int j=1;j<data->nVar();j++)                             // over all input variables ...
      {
         float tmp = min(1.0, 1-1.25*(0.8-data->Weights()[j]));   // count variables not considering those with low weight
         if(data->IsSymbolic(j))                                  // additionally down weight symbolic variables
            tmp *= 0.3;    // (hack!)
         nVar_eff += tmp;                                         // and sum up
      }

      // approx. 30% of effective number of variables   note: will have a precision of 1
      w_COD = min(floor(0.5+nVar_eff*3)/10.0, (double) Max_w_COD(data));
   }
   else
      w_COD = min(DEF_W_COD, (float) Max_w_COD(data));      // else set hard coded default value


   // b) use default values of class TParameter for the other parameters
   N_Int        = DEF_N_INT;
   Eta          = DEF_ETA;
   W_Kernel     = DEF_W_KERNEL;
   Sigma        = DEF_SIGMA;
   p_min        = min(DEF_P_MIN, Max_P_Min(data));
   Prune        = DEF_PRUNE;
   Weights      = DEF_WEIGHTS;
   Metric       = DEF_METRIC;

   #ifndef RELEASE  // obsolete in release versions
   DifMax       = DEF_DIF_MAX;
   Noise        = DEF_NOISE;
   #endif
}


//----------------------------------------------------------------------------------------------------------------------
// write (short) parameter names used as table header in batch interface's output files
void TParaSet::WriteDescription(ofstream& file, const bool& f_Regression/*=true*/)
{
   file << "Int   COD  Eta";
   if(f_Regression)
      file << "   Sigma";
   else
   {
      file << "  W_K";
      #ifndef RELEASE          // obsolete in release versions
      file << "  W_K_M";
      #endif
   }
   file << " p_min Prune Weight Metric";

   #ifndef RELEASE         // obsolete in release versions
   if(f_Regression)
      file << " Dif";
   file << " Noise";
   #endif
}


//----------------------------------------------------------------------------------------------------------------------
// write parameter values with or without their name/description  -  note: used in batchj interface output files and
// when saving the tunings results in the GUI
void TParaSet::WriteParameters(ostream& file, const bool& f_Regression/*=true*/, const bool& f_Names/*=false*/) const
{
   char text[STS];

   // write parameters
   if(f_Names)    // with description
   {
      sprintf(text, " %s=%d"   , SZ_N_INT   , N_Int);          file << text;           // # intervals
      sprintf(text, " %s=%.1f" , SZ_W_COD   , w_COD);          file << text;           // w_COD
      sprintf(text, " %s=%.1f" , SZ_ETA     , Eta);            file << text;           // eta

      if(f_Regression) {
         sprintf(text, " %s=%.4f", SZ_SIGMA  , Sigma);         file << text; }         // sigma
      else
      {                                                                                // class. specific parameters
         sprintf(text, " %s=%.1f", SZ_W_KERNEL    , W_Kernel);     file << text;       // kernel width
         #ifndef RELEASE                                                               // obsolete in release versions
         sprintf(text, " %s=%.2f", SZ_W_KERNEL_MIN, W_Kernel_Min); file << text;       // min. kernel width
         #endif
      }

      sprintf(text, " %s=%d"   , SZ_P_MIN   , p_min);          file << text;           // min. cuboid mass
      sprintf(text, " %s=%d"   , SZ_PRUNE   , (int) Prune);    file << text;           // post prune
      sprintf(text, " %s=%d"   , SZ_WEIGHTS , (int) Weights);  file << text;           // use weights
      sprintf(text, " %s=%.1f" , SZ_METRIC  , Metric);         file << text;           // metric

      #ifndef RELEASE
      if(f_Regression) {
         sprintf(text, " %s=%d", SZ_DIF_MAX, DifMax);          file << text; }         // difMax
      sprintf(text, " %s=%.1f" , SZ_NOISE  , Noise);           file << text;
      #endif
   }
   else                 // without description
   {
      sprintf(text, " %2d"    , N_Int);            file << text;                       // # intervals
      sprintf(text, " %5.1f"  , w_COD);            file << text;                       // w_COD
      sprintf(text, " %4.1f"  , Eta);              file << text;                       // eta

      if(f_Regression) {
         sprintf(text, " %7.4f", Sigma);           file << text; }                     // sigma
      else
      {                                                                                // class. specific parameters
         sprintf(text, " %4.1f", W_Kernel);        file << text;                       // kernel width
         #ifndef RELEASE                                                               // obsolete in release versions
         sprintf(text, " %6.2f", W_Kernel_Min);    file << text;                       // min. kernel width
         #endif
      }

      sprintf(text, " %5d"    , p_min);            file << text;                       // min. cuboid mass
      sprintf(text, " %5d"    , (int) Prune);      file << text;                       // post prune
      sprintf(text, " %6d"    , (int) Weights);    file << text;                       // use weights
      sprintf(text, " %6.1f"  , Metric);           file << text;                       // metric

      #ifndef RELEASE
      if(f_Regression) {
         sprintf(text, " %3d", DifMax);            file << text; }                     // difMax
      sprintf(text, " %5.1f" , Noise);             file << text;                       // noise
      #endif
   }
}


//----------------------------------------------------------------------------------------------------------------------
// write parameters to file (ofstream)
void TParaSet::Save(ofstream& file, const bool& f_Commented) const
{
   // a) initialize comment string and write section name if necessary
   char szComment[16];
   sprintf(szComment, "%c ", ComChar);
   if(!f_Commented)                                                                       // if none-commented style ...
   {
      file << "[Parameter]" << endl;                                                      // section name
      szComment[0]='\0';                                                                  // 'delete' comment string
   }

   // b) write parameters
   file << setiosflags(ios::left) << resetiosflags(ios::right);                           // left justified output

   file << szComment << setw(WNAME) << SZ_N_INT          << " = " << N_Int << endl;
   file << szComment << setw(WNAME) << SZ_W_COD          << " = " << w_COD << endl;
   file << szComment << setw(WNAME) << SZ_ETA            << " = " << Eta << endl;

   file << szComment << setw(WNAME) << SZ_W_KERNEL       << " = " << W_Kernel << endl;
   file << szComment << setw(WNAME) << SZ_SIGMA          << " = " << Sigma << endl ;

   file << szComment << setw(WNAME) << SZ_P_MIN          << " = " << p_min << endl;
   file << szComment << setw(WNAME) << SZ_PRUNE          << " = " << FlagToString(Prune) << endl;
   file << szComment << setw(WNAME) << SZ_WEIGHTS        << " = " << FlagToString(Weights) << endl;
   file << szComment << setw(WNAME) << SZ_METRIC         << " = " << Metric << endl;

   // obsolete in release versions
   #ifndef RELEASE
   file << szComment << setw(WNAME) << SZ_W_KERNEL_MIN   << " = " << W_Kernel_Min << endl;
   file << szComment << setw(WNAME) << SZ_NOISE          << " = " << Noise  << endl;
   file << szComment << setw(WNAME) << SZ_DIF_MAX        << " = " << DifMax << endl;
   #endif

   file << resetiosflags(ios::left) << setiosflags(ios::right);                        // restore right justified output
}


//----------------------------------------------------------------------------------------------------------------------
// load parameters from file (ifstream)
bool TParaSet::Load(ifstream& file, int& line)
{
   try
   {
      // a) position to section "[parameters]"
      try
      {
         line += SearchKey(file, "[Parameter]");
      } catch(int errNo)
      {                        // section not found
         return false;         // nothing loaded
      }


      // b) read parameters
      N_Int    = ReadKeyValue(file, SZ_N_INT   , DEF_N_INT   , SEARCH_LINES);
      w_COD    = ReadKeyValue(file, SZ_W_COD   , DEF_W_COD   , SEARCH_LINES);
      Eta      = ReadKeyValue(file, SZ_ETA     , DEF_ETA     , SEARCH_LINES);

      W_Kernel     = ReadKeyValue(file, SZ_W_KERNEL    , DEF_W_KERNEL    , SEARCH_LINES);  // classification only
      #ifndef RELEASE
      W_Kernel_Min = ReadKeyValue(file, SZ_W_KERNEL_MIN, DEF_W_KERNEL_MIN, SEARCH_LINES);  // classification only
      #endif
      Sigma        = ReadKeyValue(file, SZ_SIGMA       , DEF_SIGMA       , SEARCH_LINES);  // regression only

      p_min    = ReadKeyValue(file, SZ_P_MIN   , DEF_P_MIN   , SEARCH_LINES);
      Prune    = ReadKeyBool (file, SZ_PRUNE   , DEF_PRUNE   , SEARCH_LINES);
      Weights  = ReadKeyBool (file, SZ_WEIGHTS , DEF_WEIGHTS , SEARCH_LINES);
      Metric   = ReadKeyValue(file, SZ_METRIC  , DEF_METRIC  , SEARCH_LINES);

      #ifndef RELEASE    // note: parameters not used in release versions
      DifMax   = ReadKeyValue(file, SZ_DIF_MAX , DEF_DIF_MAX , SEARCH_LINES);
      Noise    = ReadKeyValue(file, SZ_NOISE   , DEF_NOISE   , SEARCH_LINES);
      #endif


      // c) check parameter values
      const char* szText = CheckParameters();
      if(szText[0]!='\0')
      {
         char szError[STS];
         sprintf(szError, "Error in section [Parameter]: %s", szText);     // prepend section information
         ThrowTypeU(szError);                                              // throw exception
      }
   }
   catch(int errNo)     // exception handling: compose error text and throw again
   {
      char szText[STS];
      sprintf(szText, "Error reading section [Parameter]: %s", GetLastError(errNo));
      ThrowTypeU(szText);                          // throw exception
   }

   return true;
}


//----------------------------------------------------------------------------------------------------------------------
// check if actual parameter values are within valid ranges; returns non-empty string if somethings wrong
// note: put in a seperate function as it is also used to check when loading the tuning results (GUI)
const char* TParaSet::CheckParameters()
{
   static char szText[STS];   // error text
   szText[0]='\0';            // initialize (reset string)

   try
   {
      if(N_Int        < MIN_N_INT        || N_Int        > MAX_N_INT)            throw 1;
      if(w_COD        < MIN_W_COD)                                               throw 2;
      if(Eta          < MIN_ETA          || Eta          > MAX_ETA)              throw 3;


      if(p_min        < MIN_P_MIN)                                               throw 4;
      if(Metric       < MIN_METRIC       || Metric       > MAX_METRIC)           throw 5;

      if(W_Kernel     < MIN_W_KERNEL     || W_Kernel > MAX_W_KERNEL)             throw 6;
      if(Sigma        < MIN_SIGMA        || Sigma        > MAX_SIGMA)            throw 7;


      // obsolete in release versions
      #ifndef RELEASE
      if(W_Kernel_Min < MIN_W_KERNEL_MIN || W_Kernel_Min > MAX_W_KERNEL_MIN)     throw 8;
      if(Noise        < MIN_NOISE        || Noise        > MAX_NOISE)            throw 9;
      if(DifMax       < 0                || DifMax       > N_Int-2)              throw 10;
      #endif
   }
   catch(int errNo)
   {
      switch(errNo)
      {
         case 1  : sprintf(szText, "Value of '%s' is not e[%d..%d]!", SZ_N_INT, MIN_N_INT, MAX_N_INT); break;
         case 2  : sprintf(szText, "Value of '%s' must be > %s", SZ_W_COD, ValueToText1(MIN_W_COD, 0, PREC_PARA)); break;
         case 3  : sprintf(szText, "Value of '%s' is not e[%s..%s]!", SZ_ETA, ValueToText1(MIN_ETA), ValueToText2(MAX_ETA)); break;
         case 4  : sprintf(szText, "Value of '%s' must be > %d!", SZ_P_MIN, MIN_P_MIN);   break;
         case 5  : sprintf(szText, "Value of '%s' must be e[%s..%s]!", SZ_METRIC, ValueToText1(MIN_METRIC), ValueToText2(MAX_METRIC)); break;
         case 6  : sprintf(szText, "Value of '%s' not e[%s..%s]!", SZ_W_KERNEL, ValueToText1(MIN_W_KERNEL), ValueToText2(MAX_W_KERNEL)); break;
         case 7  : sprintf(szText, "Value of '%s' not e[%s..%s]!", SZ_SIGMA, ValueToText1(MIN_SIGMA, 0, PREC_PARA), ValueToText2(MAX_SIGMA, 0, PREC_PARA)); break;

         #ifndef RELEASE    // obsolete in release versions
         case 8  : sprintf(szText, "Value of '%s' not e[%s..%s]!", SZ_W_KERNEL_MIN, ValueToText1(MIN_W_KERNEL_MIN), ValueToText2(MAX_W_KERNEL_MIN)); break;
         case 9  : sprintf(szText, "Error in section [Parameter]: Value of '%s' is not e[%s..%s]!", SZ_NOISE, ValueToText1(MIN_NOISE), ValueToText2(MAX_NOISE)); break;
         case 10 : sprintf(szText, "Error in section [Parameter]: Value of '%s' not e[0..%d]!", SZ_DIF_MAX, N_Int-2); break;
         #endif
      }
   }

   return szText;
}