//------------------------------------------------------------------------------
//    module task.cpp                                                         //
//                                                                            //
//    A kind of struct with load and save functionality. Encapsulates a task, //
//    i.e. a set of parameter values for the PNC2 algorithm, that are used    //
//    to generate a TParaSetList by building all possible combinations.       //
//                                                                            //
//    copyright (c) 2001-2003 by Lars Haendel                                 //
//    home: www.newty.de                                                      //
//                                                                            //
//    This program is free software and can be used under the terms of the    //
//    GNU licence. See header file for further information and disclaimer.    //
//                                                                            //
//------------------------------------------------------------------------------

#include <iomanip>                  // due to:  setw()

#include "fileutil.h"               //          SizeOfString()
#include "task.h"


#define MAX_PARAMETER_LENGTH (int) 6   // maximal length of one(!) parameter in parameter string
                                       // note: is checked in ParseParameters()


//----------------------------------------------------------------------------------------------------------------------
// parse parameter string: search for sub-strings terminated by ';', convert them to floats and store them in parameter
// array.  return # parameters found
int ParseParameters(const char*const& szPar, float*& para, const float& defVal, const char*const& szName
                     , const float& min, const bool f_Integer/*=false*/, const float& max/*=MAXFLOAT*/)
{
   const bool boolean = (f_Integer && max==1 && min==0);       // flag: parameter can only take boolean values

   // a) create parameter field
   int nSubStrings=1;                     // allocate one more to store default parameter when string is empty
   int j=0;
   while(szPar[j]!='\0')                  // count # ';'/spaces in string
   {
      if(szPar[j]==';' || szPar[j]==' ')              // ';' or space found
      {
         nSubStrings++;                               // increment counter
         while(szPar[j+1]==';' || szPar[j+1]==' ')    // and ignore all directly following ';'/spaces
            j++;
      }
      j++;                                            // proceed with next character in string
   }
   para = new float[nSubStrings];         // allocate memory for parameter field


   // b) parse string
   int i=0, c, nPara=0;
   char buffer[STS];
   char* endptr ;
   while(szPar[i]!='\0')
   {
      c=0;                                               // reset buffer index

      // copy sub-strings (terminated by ';' or space) to buffer
      while(szPar[i]!=';' && szPar[i]!=' ' && szPar[i]!='\0' && c<255)
         buffer[c++]=szPar[i++];
      buffer[c]='\0';                                    // terminate substring


      // check maximal string length, no need to allow more characters
      IfTrueThrowTypeU(c>MAX_PARAMETER_LENGTH, "String to long for parameter '%s'", szName);


      if(strcmp("true", buffer)==0)                      // compare sub-string with 'true' and 'false' first ...
         para[nPara] = 1;
      else
         if(strcmp("false", buffer)==0)
            para[nPara] = 0;
         else
         {
            endptr = NULL;
            para[nPara] = (float) strtod(buffer, &endptr);           // ... else convert to floating point

            if(f_Integer)                                 // eventually check if specified parameter must be an integer
               IfTrueThrowTypeU(para[nPara]!=floor(para[nPara]),"Parameter '%s' can take only integer values!", szName);


            // check if there has been an error
            if(*endptr!='\0')
            {
               char szText[STS];
               sprintf(szText, "Error parsing '%s': Character '%c' is illegal!", szPar, *endptr);
               ThrowTypeU(szText);
            }
         }
      nPara++;                               // increment parameter counter

      while(szPar[i]==';' || szPar[i]==' ')  // position forward ('remove' all the ';' or space)
         i++;
   }

   // c) check for dupes
   for(int i=0;i<nPara;i++)
      for(int j=i+1;j<nPara;j++)
         if(para[i]==para[j])
         {
            char szText[STS];
            sprintf(szText, "Duplicate value %s found for parameter '%s'!", ValueToText1(para[i], 0, PREC_PARA), szName);
            ThrowTypeU(szText);
         }


   // d) check minima and maxima if specified
//   if(min!=max)             (hack!)
      for(int i=0;i<nPara;i++)
         if( para[i]<min || para[i]>max)        // if parameter value is 'out of range' ...
            if(boolean)
               ThrowTypeU("Parameter '%s' can take only boolean values!", szName);
            else
            {
               char szText[STS];       // ... compose error text ...

               if(max!=MAXFLOAT)

                  // maximum specified
                  sprintf(szText, "Value(s) of parameter '%s' must be e[%s..%s]!", szName, ValueToText1(min, 0, PREC_PARA)
                           , ValueToText2(max, 0, PREC_PARA));
               else

                  // no maximum specified
                  sprintf(szText, "Value(s) of parameter '%s' must be e[%s..oo]!", szName, ValueToText1(min));
               ThrowTypeU(szText);
            }



   // e) parameter specific modifications
   // ensure special ordering for parameter 'Prune': 'true' must come before 'false'
   if(strcmp(szName, SZ_PRUNE)==0)
      if(nPara>1)
      {
         para[0] = true;
         para[1] = false;
      }


   // f) set default if string was empty
   if(nPara==0)
      para[nPara++]=defVal;

   return nPara;
}


//----------------------------------------------------------------------------------------------------------------------
// convert tasks to parameter set list;  also used to test validity of tasks
TParaSetList* /*cr*/ ToParaSetList(TTaskList*const& tasks, const TData*const& data, const bool& f_Regression)
{
   // a) ini and pre-checks
   TParaSetList* sets = new TParaSetList();           // create parameter set list (return value)
   sets->SetName("TParaSetList (TProject)");          // set name

   int def_N_Int = DEF_N_INT;                         // default for parameter 'N_Int'
   if(!f_Regression)
      def_N_Int = data->nIntegerMaxMin(0);


   // b) for all tasks in task list ... parse them and add parameter sets to list
   int i;
   try {
   for(i=0;i<tasks->Size();i++)
   {
      TTask& task = tasks->Get(i);                    // get i-th task from list


      // parameters and # parameters
      float *nInt, *w_COD, *eta, *wKernel, *sigma, *p_min, *prune, *weights, *metric;
      int nnInt, nW_COD, nEta, nWKernel, nSigma, nP_min, nPrune, nWeights, nMetric;
      #ifndef RELEASE    // obsolete in release versions
      float *wKernelMin, *noise, *difMax;
      int nWKernelMin, nNoise, nDifMax;
      #endif

      // parse parameter strings, i.e. generate parameter fields and determine # parameters
      nnInt       = ParseParameters(task.szN_Int, nInt, def_N_Int, SZ_SIGMA, MIN_N_INT, INT_N_INT, MAX_N_INT);
      nW_COD      = ParseParameters(task.szW_COD, w_COD, DEF_W_COD, SZ_W_COD, MIN_W_COD, INT_W_COD, Max_w_COD(data));
      nEta        = ParseParameters(task.szEta, eta, DEF_ETA, SZ_ETA, MIN_ETA, INT_ETA, MAX_ETA);
      nWKernel    = ParseParameters(task.szW_Kernel, wKernel, DEF_W_KERNEL, SZ_W_KERNEL, MIN_W_KERNEL, INT_W_KERNEL);
      nSigma      = ParseParameters(task.szSigma, sigma, DEF_SIGMA, SZ_SIGMA, MIN_SIGMA, INT_SIGMA, MAX_SIGMA);
      nP_min      = ParseParameters(task.szP_Min, p_min, DEF_P_MIN, SZ_P_MIN, MIN_P_MIN, INT_P_MIN, Max_P_Min(data));
      nPrune      = ParseParameters(task.szPrune, prune, DEF_PRUNE, SZ_PRUNE, MIN_PRUNE, INT_PRUNE, MAX_PRUNE);
      nWeights    = ParseParameters(task.szWeights, weights, DEF_WEIGHTS, SZ_WEIGHTS, MIN_WEIGHTS, INT_WEIGHTS, MAX_WEIGHTS);
      nMetric     = ParseParameters(task.szMetric, metric, DEF_METRIC, SZ_METRIC, MIN_METRIC, INT_METRIC, MAX_METRIC);

      #ifndef RELEASE   // obsolete in release versions
      nWKernelMin = ParseParameters(task.szW_Kernel_Min, wKernelMin, DEF_W_KERNEL_MIN, SZ_W_KERNEL_MIN,
                                    MIN_W_KERNEL_MIN, INT_W_KERNEL_MIN, MAX_W_KERNEL_MIN);
      nDifMax     = ParseParameters(task.szDifMax, difMax, false, SZ_DIF_MAX, MIN_DIF_MAX, INT_DIF_MAX, MAX_DIF_MAX);
      nNoise      = ParseParameters(task.szNoise, noise, DEF_NOISE, SZ_NOISE, MIN_NOISE, INT_NOISE, MAX_NOISE);
      #endif

      // additional checks
      IfTrueThrowTypeU(nWKernel>1 && f_Regression, "Parameter '%s' is irrelevant for regression tasks. Thus do not specify more than one value!", SZ_W_KERNEL);
      #ifndef RELEASE    // obsolete in release versions
      IfTrueThrowTypeU(nWKernelMin>1 && f_Regression, "Parameter '%s' is irrelevant for regression tasks. Thus do not specify more than one value!", SZ_W_KERNEL_MIN);
      #endif
      IfTrueThrowTypeU(nSigma>1 && !f_Regression, "Parameter '%s' is irrelevant for classification tasks. Thus do not specify more than one value!", SZ_SIGMA);
      IfTrueThrowTypeU(!f_Regression && (nnInt>1 || nInt[0]!=data->nIntegerMaxMin(0)), "Given problem is a classifcication task. Thus '%s' must be equal to the number of classes!", SZ_N_INT);


      // # parameter sets that need learning and # parameters overall
      task.nSetsLearn = nnInt * nEta * nW_COD * nWeights * nMetric;
      #ifndef RELEASE   // obsolete in release versions
      task.nSetsLearn *= nDifMax * nNoise;
      #endif
      task.nSets =  task.nSetsLearn * nPrune * nWKernel * nP_min * nSigma;
      #ifndef RELEASE   // obsolete in release versions
      task.nSets  *= nWKernelMin;
      #endif

      // sort parameters that have a direct effect on model size in an order that the smallest models are learned first
      qsort(w_COD, nW_COD, sizeof(w_COD[0]), FloatCmpAsc);     // w_COD ascending
      qsort(eta, nEta, sizeof(eta[0]), FloatCmpDes);           // eta descending
      qsort(p_min, nP_min, sizeof(p_min[0]), FloatCmpDes);     // min. cuboid mass descending
      qsort(prune, nPrune, sizeof(prune[0]), FloatCmpDes);     // pruning in descending order


      // add parameter sets to list
      for(int a=0;a<nnInt;a++)
         for(int b=0;b<nEta;b++)
            for(int c=0;c<nW_COD;c++)
               for(int d=0;d<nWeights;d++)
                  for(int y=0;y<nMetric;y++)
                     #ifndef RELEASE
                     for(int v=0;v<nDifMax;v++)
                        for(int g=0;g<nNoise;g++)                // note: noise needs relearning since v1.26b !
                     #endif
                        {
                           bool relearn = true;                                  // set relearn flag
                           for(int z=0;z<nPrune;z++)
                              for(int x=0;x<nWKernel;x++)
                                 for(int q=0;q<nP_min;q++)
                                 #ifndef RELEASE   // obsolete in release versions
                                    for(int f=0;f<nWKernelMin;f++)
                                 #endif
                                       for(int e=0;e<nSigma;e++)
                                       {
                                          TParaSet& para = sets->Ins();          // insert new parameter set in list

                                          para.N_Int     = (int) nInt[a];        // copy/set parameters
                                          para.w_COD     = w_COD[c];
                                          para.Eta       = eta[b];

                                          para.W_Kernel     = wKernel[x];
                                          para.Sigma        = sigma[e];

                                          para.p_min     = p_min[q];
                                          para.Prune     = (bool) prune[z];
                                          para.Weights   = (bool) weights[d];
                                          para.Metric    = metric[y];

                                          #ifndef RELEASE                        // obsolete in release versions
                                          para.W_Kernel_Min = wKernelMin[f];
                                          para.Noise     = noise[g];
                                          para.DifMax    = difMax[v];
                                          #endif

                                          para.Relearn = relearn;
                                          para.Skip    = false;                  // no set is skipped by default
                                          relearn=false;                         // reset relearn-flag
                                       }
                        }


      // release parameter fields
      delete[] nInt;
      delete[] w_COD;
      delete[] eta;
      delete[] wKernel;
      delete[] sigma;
      delete[] p_min;
      delete[] prune;
      delete[] weights;
      delete[] metric;

      #ifndef RELEASE         // obsolete in release versions
      delete[] wKernelMin;
      delete[] difMax;
      delete[] noise;
      #endif
   }}
   catch(TExceptionU excp)
   {
      delete sets;      // release

      char szText[STS];
      sprintf(szText, "Section '[Task%d]': %s", i+1, excp.GetErrorText());    // re-compose error text
      ThrowTypeU(szText);                                                     // throw again
   }

   return sets;      // return parameter set list
}


//----------------------------------------------------------------------------------------------------------------------
// write task to file
void TTask::Save(ofstream& file)
{
   file << setiosflags(ios::left) << resetiosflags(ios::right);        // set left justified output
   Replace(' ', ';');                                                  // replace spaces in task strings by ';'

   if(szN_Int[0]!='\0') file << setw(WNAME) << SZ_N_INT << " = " << szN_Int << endl;
   if(szW_COD[0]!='\0') file << setw(WNAME) << SZ_W_COD << " = " << szW_COD << endl;
   if(szEta  [0]!='\0') file << setw(WNAME) << SZ_ETA   << " = " << szEta   << endl;

   if(szW_Kernel    [0]!='\0') file << setw(WNAME) << SZ_W_KERNEL     << " = " << szW_Kernel     << endl;
   if(szSigma       [0]!='\0') file << setw(WNAME) << SZ_SIGMA        << " = " << szSigma        << endl;

   if(szP_Min  [0]!='\0') file << setw(WNAME) << SZ_P_MIN   << " = " << szP_Min    << endl;
   if(szPrune  [0]!='\0') file << setw(WNAME) << SZ_PRUNE   << " = " << szPrune    << endl;
   if(szWeights[0]!='\0') file << setw(WNAME) << SZ_WEIGHTS << " = " << szWeights  << endl;
   if(szMetric [0]!='\0') file << setw(WNAME) << SZ_METRIC  << " = " << szMetric   << endl;


   #ifndef RELEASE   // obsolete in release versions
   if(szW_Kernel_Min[0]!='\0') file << setw(WNAME) << SZ_W_KERNEL_MIN << " = " << szW_Kernel_Min << endl;
   if(szNoise [0]!='\0') file << setw(WNAME) << SZ_NOISE   << " = " << szNoise  << endl;
   if(szDifMax[0]!='\0') file << setw(WNAME) << SZ_DIF_MAX << " = " << szDifMax << endl;
   #endif

   Replace(';', ' ');                                                // restore spaces in task strings
   file << resetiosflags(ios::left) << setiosflags(ios::right);      // restore right justified output
}


//----------------------------------------------------------------------------------------------------------------------
// used to replace ';' or space in task strings or vice versa

   void Replace(const char& a, const char& b, char* szText)
   {
      int i=0;                   // ini
      while(szText[i]!='\0')     // parse whole string
      {
         if(szText[i]==a)
            szText[i]=b;
         i++;
      }
   }

void TTask::Replace(const char& a, const char& b)
{
   // replace ';' by spaces
   ::Replace(a, b, szN_Int);
   ::Replace(a, b, szW_COD);
   ::Replace(a, b, szEta);
   ::Replace(a, b, szW_Kernel);

   ::Replace(a, b, szSigma);
   ::Replace(a, b, szP_Min);
   ::Replace(a, b, szPrune);
   ::Replace(a, b, szWeights);
   ::Replace(a, b, szMetric);

   #ifndef RELEASE   // obsolete in release versions
   ::Replace(a, b, szW_Kernel_Min);
   ::Replace(a, b, szDifMax);
   ::Replace(a, b, szNoise);
   #endif
}

//----------------------------------------------------------------------------------------------------------------------
// read task from file
void TTask::Load(ifstream& file)
{
   // load
   ReadKeyString(file, SZ_N_INT       , szN_Int       , STS);
   ReadKeyString(file, SZ_W_COD       , szW_COD       , STS);
   ReadKeyString(file, SZ_ETA         , szEta         , STS);

   ReadKeyString(file, SZ_W_KERNEL    , szW_Kernel    , STS);
   ReadKeyString(file, SZ_SIGMA       , szSigma       , STS);

   ReadKeyString(file, SZ_P_MIN       , szP_Min       , STS);
   ReadKeyString(file, SZ_PRUNE       , szPrune       , STS);
   ReadKeyString(file, SZ_WEIGHTS     , szWeights     , STS);
   ReadKeyString(file, SZ_METRIC      , szMetric      , STS);

   #ifndef RELEASE   // obsolete in release versions
   ReadKeyString(file, SZ_W_KERNEL_MIN, szW_Kernel_Min, STS);
   ReadKeyString(file, SZ_DIF_MAX     , szDifMax      , STS);
   ReadKeyString(file, SZ_NOISE       , szNoise       , STS);
   #endif

   // replace ';' in task strings by space
   Replace(';', ' ');
}