//------------------------------------------------------------------------------
//    module project.cpp                                                      //
//                                                                            //
//    Class TProject encapsulates settings for usage of PNC algorithm         //
//                                                                            //
//    copyright (c) 2001-2003 by Lars Haendel                                 //
//    mail: lore17@newty.de                                                   //
//    home: www.newty.de                                                      //
//                                                                            //
//    This program is free software and can be used under the terms of the    //
//    GNU licence. See header file for further information and disclaimer.    //
//                                                                            //
//------------------------------------------------------------------------------


#include "projectB.h"

#include <iomanip>         // due to:  setiosflags()



//----------------------------------------------------------------------------------------------------------------------
// constructor
TProjectB::TProjectB() : TProject() { }


//----------------------------------------------------------------------------------------------------------------------
// load project settings from file
void TProjectB::Load(ifstream& file, const char*const& _szProjectFilePath)
{
   // a) call base class version
   TProject::Load(file, _szProjectFilePath);


   // b) read additional batch parameters and/or sections
   char szSection[STS];                   // name of section currently read
   try
   {
      streampos curPos = file.tellg();    // preserve actual stream position


      //----------------------------------------------------------------------------------------------------------------
      // b1) read additional parameters of section '[Tuning]'
      strcpy(szSection, "[Tuning]");                                       // position to section
      SearchKey(file, szSection);

      ReadKeyString(file, "Results",szTuningFile, STS);                    // tuning result filename
      CorrectPathDelimiter(szTuningFile);

      ReadKeyString(file, "Data",   szTuneDataBaseFile, STS);              // basename for generated learn/test data
      ReadKeyString(file, "Model", szTuneModelBaseFile, STS);              // basename for learned models
      ReadKeyString(file, SZ_SIMULATION,  szTuneSimulationBaseFile, STS);  // basename for simulation output files

      ReadKeyString(file, "Ranking", szRanking, STS);                      // criterion to rank parameter sets
      ReadKeyString(file, "Objective", szObjective, STS);                  // tuning objective
      ranking    = StringToCriterionId(szRanking);                         // convert string to criterion id
      objective  = StringToCriterionId(szObjective);



      //----------------------------------------------------------------------------------------------------------------
      // b2) read section '[Batch]'
      file.seekg(curPos);                                                  // restore stream position
      strcpy(szSection, "[Batch]");                                        // position to section
      SearchKey(file, szSection);

      f_Study = ReadKeyBool(file, "Study");                                // flag: make study
      f_SaveMemory = ReadKeyBool (file, "SaveMemory", DEF_SAVE_MEMORY);    // flag: save memory (don't store predictions)

      ReadKeyString(file, "Results", szResultFile, STS);                   // result filename
      CorrectPathDelimiter(szResultFile);
      ReadKeyString(file, "Data", szDataBaseFileName, STS);                // (base) filename for generated learn/test data
      ReadKeyString(file, SZ_SIMULATION, szSimulationBaseFileName, STS);   // (base) filename for simulation output file(s)
      ReadKeyString(file, "Model", szModelBaseFileName, STS);              // (base) filename for learned model(s)

      // output directory    note: empty/missing output directory causes project file directory to be used
      ReadKeyString(file,SZ_DIRECTORY , szOutputDir, STS);
      CorrectPathDelimiter(szOutputDir);

      ReadKeyString(file, "Type", szStudyType, STS);                       // study type
      studyType = StringToTestType(szStudyType);
      N_R = ReadKeyValue(file, "N_R", 0);                                  // # repetitions
      N_L = ReadKeyValue(file, "N_L", 0);                                  // # learn tuples
      N_T = ReadKeyValue(file, "N_T", 0);                                  // # test tuples



      //----------------------------------------------------------------------------------------------------------------
      // b3) read section '[Show]'
      file.seekg(curPos);                                                  // restore stream position
      strcpy(szSection, "[Show]");
      SearchKey(file, szSection);                                          // position to section

      // read criterions to log
      for(int i=0;i<nCriterion;i++)
         // (hack!): ensure that default for criterion 'pVal' is 'false'  -  must be consistent with Save()
         f_Criterion[i] = ReadKeyBool(file, CriterionIdToString(i), DEF_SHOW_CRITERION && (i!=StringToCriterionId("pVal")));

      f_ShowDev = ReadKeyBool(file, "Deviation", DEF_SHOW_DEVIATION);      // flag: write results' deviations


      // restore stream position
      file.seekg(curPos);
   }
   catch(int errNo)     // exception handling
   {
      // compose error text
      char szText[STS];
      if(strcmp(szSection, GetLastKey())==0)
         sprintf(szText, "Section '%s' not found!", szSection);
      else
         sprintf(szText, "Section '%s' reading key '%s': %s", szSection, GetLastKey(), GetLastError(errNo));

      ThrowTypeU(szText);        // 'propagate' exception
   }
}


//----------------------------------------------------------------------------------------------------------------------
// save project settings to file
void TProjectB::Save(ofstream& file, const bool& f_WriteTuningAnyway/*=false*/)
{
   // a) call base class version
   TProject::Save(file, f_WriteTuningAnyway);


   // b) save additional parameters/sections
   file << setiosflags(ios::left) << resetiosflags(ios::right);            // set left justified output


   //-------------------------------------------------------------------------------------------------------------------
   // b1) write additional parameters at the end of current section (will be section '[Tuning]')
   if(f_Tune || f_WriteTuningAnyway)
   {
      file << setw(WNAME) << "Results" << " = " << szTuningFile << endl;   // tuning result filename
      if(szTuneDataBaseFile[0]!='\0') file << setw(WNAME) << "Data" << " = " << szTuneDataBaseFile << endl;
      if(szTuneModelBaseFile[0]!='\0') file << setw(WNAME) << "Model" << " = " << szTuneModelBaseFile << endl;
      if(szTuneSimulationBaseFile[0]!='\0')file << setw(WNAME) << SZ_SIMULATION <<" = "<< szTuneSimulationBaseFile << endl;

      file << setw(WNAME) << "Ranking"    << " = " << CriterionIdToString(ranking) << endl;
      file << setw(WNAME) << "Objective"  << " = " << CriterionIdToString(objective) << endl;
   }


   //-------------------------------------------------------------------------------------------------------------------
   // b2) write section [Batch]
   file << endl << endl << "[Batch]" << endl;
   file << setw(WNAME) << "Study" << " = " << FlagToString(f_Study) << endl;
   file << setw(WNAME) << "SaveMemory" << " = " << FlagToString(f_SaveMemory) << endl;
   file << setw(WNAME) << "Results" << " = " << szResultFile << endl;
   if(szOutputDir[0]!='\0')                                                      // if not empty
      file << setw(WNAME) << SZ_DIRECTORY << " = " << szOutputDir << endl;       // write output directory


   if(f_Study)  // suppress study settings if disabled
   {
      if(szDataBaseFileName[0]!='\0') file << setw(WNAME) << "Data"   << " = " << szDataBaseFileName << endl;
      if(szModelBaseFileName[0]!='\0') file << setw(WNAME) << "Model" << " = " << szModelBaseFileName << endl;
      if(szSimulationBaseFileName[0]!='\0')file<<setw(WNAME)<<SZ_SIMULATION<< " = " << szSimulationBaseFileName << endl;

      file << setw(WNAME) << "Type" << " = " << TestTypeToString(studyType) << endl;
      if(studyType==Rep || studyType==Cv)
         file << setw(WNAME) << "N_R" << " = " << N_R << endl;    // note: only needed for type repetition or CV


      if(studyType==Rep)                                          // note: only needed for type repetition
      {
         file << setw(WNAME) << "N_L" << " = " << N_L << endl;
         file << setw(WNAME) << "N_T" << " = " << N_T << endl;
      }
   }



   //-------------------------------------------------------------------------------------------------------------------
   // b3) write section [Show]
   file << endl << endl << "[Show]" << endl;
   for(int i=0;i<nCriterion;i++)
      // write if criterion is not(!) the default
      // (hack!): ensure that default for criterion 'pVal' is 'false'  -  must be consistent with Load()
      if(f_Criterion[i]!= (DEF_SHOW_CRITERION  && (i!=StringToCriterionId("pVal"))))
         file << setw(WNAME) << CriterionIdToString(i) << " = true" << endl;

   file << setw(WNAME) << "Deviation" << " = " << FlagToString(f_ShowDev) << endl;


   file << setiosflags(ios::right) << resetiosflags(ios::left);            // restore right justified output
}


//----------------------------------------------------------------------------------------------------------------------
// check project parameters against constraints and given data file, set output column and variable types and set
// dependant variables -  WARNING: caller has to release returned TParaSetList !!
// note: does some initialization using IniStudyCounts() and IniCriterionFlags()
TParaSetList* /*cr*/ TProjectB::Synchronize(TData*const& _data1, const bool& f_CheckTuningAnyway/*=false*/)
{
   // a) call base class version
   TParaSetList* sets = TProject::Synchronize(_data1, f_CheckTuningAnyway);


   // b) check
   try
   {
      //----------------------------------------------------------------------------------------------------------------
      // a) section '[Basic]'
      if(!f_Study && szData2[0]=='\0')
         throw 106;                             // 2nd filename needed (test data file) if study is disabled



      //----------------------------------------------------------------------------------------------------------------
      // b) section '[Batch]'
      if(f_Study < 0)                              throw 200;  // study flag not found in section 'Batch'
      if(szResultFile[0]=='\0')                    throw 201;  // result file needed in batch mode
      if(f_Study)
      {
         if(szData2[0]!='\0')                      throw 202;  // study mode conflicts with 2nd data file (test data)
         if(szStudyType[0]=='\0')                  throw 203;  // no study type given

         if((studyType==Cv
               || studyType==Rep) && N_R==0)       throw 205;  // CV and repetition need N_R ...
         if((studyType==Loocv
            || studyType==Special) && N_R!=0)      throw 206;  // ... but Loocv and Special don't
         if(studyType==Rep && (N_L==0 || N_T==0))  throw 207;  // repetition needs # learn and test tuples ...
         if(studyType!=Rep && (N_L!=0 || N_T!=0))  throw 208;  // ... all other types don't
         #ifndef VALIDATION_1
         if(studyType==Rep&&N_T+N_L>data1->nTup()) throw 209;  // check for overlapping of learn and test data
         #endif
         if(studyType==Cv && N_R>data1->nTup())    throw 210;  // # cross-val. cannot be greater than # data tuples
      }

      // now we've checked and can initialize (if necessary) N_L, N_T and N_R with respect to study flag and type
      // necessary here as later on N_L is needed to check tuning settings.
      IniStudyCounts();


      // check base filenames: they must not cotain backslash, slash, colon or dot
      if(!CheckName(szDataBaseFileName))        throw 211;     // invalid data base filename
      if(!CheckName(szModelBaseFileName))       throw 213;     // invalid model base filename
      if(!CheckName(szSimulationBaseFileName))  throw 212;     // invalid simulation base filename


      //----------------------------------------------------------------------------------------------------------------
      // c) section '[Show]'
      if(f_SaveMemory&&f_Criterion[StringToCriterionId("pVal")])
         throw 400;                                               // p-values cannot be calculated in memory save mode
      #ifdef RELEASE
      if(f_Criterion[StringToCriterionId("pVal")])                // p-values cannot be used in release versions
         throw 401;                                               // due to licence problems
      #endif



      //----------------------------------------------------------------------------------------------------------------
      // d) section '[Tuning]'
      // check settings if tuning is enabled
      if(f_Tune)
      {
         if(szRanking[0]=='\0')                          throw 501;     // ranking criterion not found
         if(szObjective[0]=='\0')                        throw 502;     // tuning objective missing
         if(szTuningFile[0]=='\0')                       throw 512;     // tuning result file needed

         if(ranking<0)                                   throw 515;     // ranking criterion known ?
         if(ranking==StringToCriterionId("pVal")
            ||ranking==StringToCriterionId("Rank"))      throw 516;     // must not be 'Rank' or 'pVal'
         if(ranking==StringToCriterionId("SizeFac"))     throw 517;     // must not be 'SizeFac'
         if(objective<0)                                 throw 518;     // objective criterion  known ?
         if(objective==StringToCriterionId("SizeFac"))   throw 519;     // objective must not be 'SizeFac'


         // check base filenames: they must not cotain backslash, slash, colon or dot
         if(!CheckName(szTuneDataBaseFile))              throw 525;     // invalid data base filename
         if(!CheckName(szTuneModelBaseFile))             throw 523;     // invalid model base filename
         if(!CheckName(szTuneSimulationBaseFile))        throw 524;     // invalid simulation base filename


         // p-values are not calculated in memory save mode and thus cannot be objective or ranking criterion
         if(f_SaveMemory && objective==StringToCriterionId("pVal"))
            throw 520;


         // initialize flag arrays with the criterions that need to be calculated according to section '[Show]' , objective etc.
         IniCriterionFlags();
      }
      else                          // tuning disabled/not tested
         if(studyType==Special)
            throw 526;              // tuning cannot be disabled if study type is 'Special'



      //----------------------------------------------------------------------------------------------------------------
      // e) section '[Task...]'
      if(!f_Tune && sets->Size()>1) throw 600;  // there must not be more than one parameter set if tuning is disabled
      if(tasks.Size()==0)           throw 601;  // at least one task must be given
   }
   catch(int errNo)     // exception handling
   {
      delete sets;      // release parameter set list

      char szText[STS]; // compose error text
      switch(errNo)
      {
         // section '[Basic]'
         case 106 : sprintf(szText, "Section '[Basic]': Test data file needed if study is disbled! Please specify key 'Data2' or set key 'Study' in section 'Batch'!"); break;


         // section '[Batch]'
         case 200 : sprintf(szText, "Section '[Batch]': Key 'Study' not found! Please specify key 'Study'!"); break;
         case 201 : sprintf(szText, "Section '[Batch]': Result filename missing! Please specify key 'Results'!"); break;
         case 202 : sprintf(szText, "Section '[Batch]': Study mode must not be set if test data file is given! Please unset key 'Study' or remove key 'Data2'!"); break;
         case 203 : sprintf(szText, "Section '[Batch]': No study type given! Please specify key 'Type'!"); break;
         case 205 : sprintf(szText, "Section '[Batch]': Number of repetitions/cross-validations not given or out of range! Please specify/correct key 'N_R'!"); break;
         case 206 : sprintf(szText, "Section '[Batch]': Number of repetitions/cross-validations must not be given for types 'Loocv' or 'Special'! Please remove key 'N_R'!"); break;
         case 207 : sprintf(szText, "Section '[Batch]': Number of learn and test tuples not given or out of range! Please specify/correct keys 'N_L' and 'N_T'!"); break;
         case 208 : sprintf(szText, "Section '[Batch]': Number of learn and test tuples must not be given for types 'Cross-Validation', 'Loocv' or 'Special'! Please remove keys 'N_L' and 'N_T'!"); break;
         case 209 : sprintf(szText, "Section '[Batch]': Overlapping of learn and test data! 'N_L+N_T<=%d' must yield!", data1->nTup()); break;
         case 210 : sprintf(szText, "Section '[Batch]': Too many cross-validations! 'N_R<=%d' must yield!", data1->nTup()); break;
         case 211 : sprintf(szText, "Section '[Batch]': Data base filename must  ot contain any slashes, colons or dots! Please correct or remove key 'Data'!"); break;
         case 212 : sprintf(szText, "Section '[Batch]': Model base filename must  ot contain any slashes, colons or dots! Please correct or remove key 'Model'!"); break;
         case 213 : sprintf(szText, "Section '[Batch]': Simulation base filename must  ot contain any slashes, colons or dots! Please correct or remove key '%s'!", SZ_SIMULATION); break;


         // section '[Show]'
         case 400 : sprintf (szText, "Section '[Show]': Criterion 'pVal' conflicts with save memory mode!"); break;
         case 401 : sprintf (szText, "Section '[Show]': Criterion 'pVal' cannot be used in GNU GPL version ;-("); break;


         // section '[Tuning]'
         case 501 : sprintf(szText, "Section '[Tuning]': No criterion for ranking of parameter sets given! Please specify key 'Ranking'!"); break;
         case 502 : sprintf(szText, "Section '[Tuning]': No tuning objective is given! Please specify key 'Objective'!"); break;
         case 512 : sprintf(szText, "Section '[Tuning]': Result filename missing! Please specify key 'Results'!"); break;
         case 515 : sprintf(szText, "Section '[Tuning]': Ranking criterion unknown! Please correct key 'Ranking'!"); break;
         case 516 : sprintf(szText, "Section '[Tuning]': Ranking criterion must not be 'pVal' or 'Rank'! Please correct key 'Ranking'"); break;
         case 517 : sprintf(szText, "Section '[Tuning]': Ranking criterion must not be 'SizeFac'! Please correct key 'Ranking'!"); break;
         case 518 : sprintf(szText, "Section '[Tuning]': Tuning objective unknown! Please correct key 'Objective'!"); break;
         case 519 : sprintf(szText, "Section '[Tuning]': Tuning objective must not be 'SizeFac'! Please correct key 'Objective'!"); break;
         case 520 : sprintf(szText, "Section '[Tuning]': Tuning objective must not be 'pVal' if memory save mode is enabled! Please correct key 'Objective' or unset key 'SaveMemory'!"); break;

         case 523 : sprintf(szText, "Section '[Tuning]': Data base filename must  ot contain any slashes, colons or dots! Please correct or remove key 'Data'!"); break;
         case 524 : sprintf(szText, "Section '[Tuning]': Model base filename must  ot contain any slashes, colons or dots! Please correct or remove key 'Model'!"); break;
         case 525 : sprintf(szText, "Section '[Tuning]': Simulation base filename must  ot contain any slashes, colons or dots! Please correct or remove key '%s'!", SZ_SIMULATION); break;
         case 526 : sprintf(szText, "Section '[Tuning]': Tuning cannot be disabled if study type is 'Special'! Please correct key 'Tuning'!"); break;


         // section '[Task...]'
         case 600 : sprintf(szText, "Section '[Task%d]': There must not be more than one parameter set if tuning is disabled! Please correct specified tasks or set key 'Tuning' in section 'Tuning'!", tasks.Size()); break;
         case 601 : sprintf(szText, "Section '[Task%d]': At least one task must be given! Please add section [Task1]!"); break;
      }
      ThrowTypeU(szText);     // 'propagate' exception
   }


   // return parameter set list generated from settings in section [Parameter]  note: not really good style but this
   // list was generated while checking, so why do not use it ?
   return sets;
}


//----------------------------------------------------------------------------------------------------------------------
// initialize flag arrays with the criterions that need to be calculated according to section '[Show]' , objective etc.
void TProjectB::IniCriterionFlags()
{
   memcpy(f_CritToCalc, f_Criterion, nCriterion*sizeof(bool));       // evaluate which criterions need to be calculated
   if(f_Tune)
      f_CritToCalc[objective] =  f_CritToCalc[ranking] = true;       // 'add' ranking and tuning objective
   f_CritToCalc[StringToCriterionId("SizeFac")] = true;
   f_CritToCalc[StringToCriterionId("VarPerCub")] = true;
   f_CritToCalc[StringToCriterionId("K")] = true;                    // needed while tuning parameters
   f_CritToCalc[StringToCriterionId("K'")] = true;

   f_LossOnLearnData = ::LossOnLearnData(f_CritToCalc);              // check, if loss on learn data is required
}


//----------------------------------------------------------------------------------------------------------------------
// initialize learn and test data tuple counts (N_L and N_R) and repetition/cross-validation count (N_R) with respect
// to study flag (f_Study) and study type.   note: No ini done for type 'Repetition'!
void TProjectB::IniStudyCounts()
{
   if(f_Study)
   {
      // set dependant variables: # tests and # learn tuples for CV/Loocv/special mode
      if(studyType==Cv)                                     // N-fold cross-validation
      {
         N_T = data1->nTup()/N_R;
         N_L = data1->nTup()-N_T;
      }
      if(studyType==Loocv)                                  // leave-one-out cross-validation
      {
         N_L = data1->nTup()-1;
         N_T = 1;
         N_R = data1->nTup();
      }
      if(studyType==Special)                                // special mode
      {
         N_L = data1->nTup();
         N_T = 0;
         N_R = 1;
      }
   }
   else     // study disabled, set dependant variables
   {
      N_L = data1->nTup();
      N_T = 0;
      N_R = 1;
   }
}


//----------------------------------------------------------------------------------------------------------------------
// peek learn data tuple count, e.g. peek value of N_L as it would be after calling IniStudyCounts()
int TProjectB::Peek_N_L()
{
   int N_L = TProjectB::N_L;     // ini with value that was loaded from project file

   if(!f_Study || studyType==Special) N_L = data1->nTup();                       // special mode or study disabled
   if(f_Study && studyType==Cv)       N_L = data1->nTup()-data1->nTup()/N_R;     // N-fold cross-validation
   if(f_Study && studyType==Loocv)    N_L = data1->nTup()-1;                     // leave-one-out cross-validation

   return N_L;
}


//----------------------------------------------------------------------------------------------------------------------
// get output directory, i.e. directory used for result files if these are specified with relative path
const char* TProjectB::GetOutputDir() const
{
   static char szDir[STS];

   if(szOutputDir[0]!='\0')                     // if output directory is specified
   {
      strcpy(szDir, szOutputDir);               // copy
      PrefixPath(szDir, szProjectFilePath);     // prepend project file directory if necessary
   }
   else
      strcpy(szDir, szProjectFilePath);         // just copy project file directory
   EnsurePathDelimiter(szDir);                  // ensure finishing path delimiter

   return szDir;
}