#ifdef RELEASE
static const char* szPrgName   = "bap 1.26b Release Version";
#else
static const char* szPrgName   = "bap 1.26b Development Version";
#endif
static const char* szAuthor    = "Lars Haendel";
static const char* szCopyRight = "(c) 2001-2003 by";
static const char* szLogFile   = "project.log";
static const char* szModule    = "bap.cpp";

//------------------------------------------------------------------------------
//    module bap.cpp                                                          //
//                                                                            //
//    batch interface for the PNC cluster/learn algorithm                     //
//                                                                            //
//    copyright (c) 2001-2003 by Lars Haendel                                 //
//    mail: lore@newty.de                                                     //
//    home: www.newty.de                                                      //
//                                                                            //
//    This program is free software and can be used under the terms of the    //
//    GNU licence. See header file for further information and disclaimer.    //
//                                                                            //
//------------------------------------------------------------------------------                     


//----------------------------------------------------------------------------------------------------------------------
//#define ANSI                // ANSI C++, i.e. do not reduce thread priority and write protect files while they're
                              // opened (default: OFF)
#define MAX_TEST 10           // max. # characters used for progress indication


#define SZ_ERROR_LOG_FILE    "error.log"  // filename for error messages if program started with option "/ns"


#include <iostream>           // due to   iostreams
#include <iomanip>            //          setw()
#include <dir>                //          mkdir()
#include <errno>              //          errno

#ifndef ANSI
#include <io>                 //          chmod()
#include <sys\stat>           //          S_IREAD
#include <windows>            //          SetThreadPriority()
#endif


#include "data.h"             //          TData
#include "fileutil.h"         //          FlagToString()
#include "pnc.h"              //          TPnc
#include "defines.h"          //          definitions 
#include "ParaSet.h"          //          TParaSet
#include "demo.h"             //          demo of how to use
#include "exception.h"        //          IfTrueThrowTypeA()

#include "bap.h"




//----------------------------------------------------------------------------------------------------------------------
// constructor
TBatchInterface::TBatchInterface(const bool& _f_WriteProtectOutput)
{
   f_WriteProtectOutput = _f_WriteProtectOutput;   // copy
   f_Loaded = false;                               // initialize flag

   data1 = new TData();                            // base data file in study mode, else learn data
   data2 = new TData();                            // test data file if study mode is disabled
   sets  = NULL;
}


//----------------------------------------------------------------------------------------------------------------------
// destructor
TBatchInterface::~TBatchInterface()
{
   if(data1)                                 // base data
      data1->Release();
   if(data2)                                 // test data
      data2->Release();

   delete sets;                              // parameter set list
}


//----------------------------------------------------------------------------------------------------------------------
// write settings, i.e. data filename etc. and averaged weights if field with data pointers is passed
// note: if 'data_L' is passed the function gets called for overall result file, else it is called for tuning
//       result file
void TBatchInterface::WriteSettings(ofstream& file, const TData**const& data_L/*=NULL*/)
{
   const bool f_TuningFile = !data_L;

   file << ComChar << " General Settings:" << endl;

   // a) write data filename etc.
   if(prj.GetData2FileName()[0]!='\0') // if 2nd data file is given, i.e. learn and test data are explicitly specified
   {
      file << ComChar << " Data1       = " << prj.GetData1FileName() << endl;             // learn data file name
      if(f_TuningFile)
         file << ComChar << "  #tuples    = " << data1->nTup() << endl;                   // # tuples
      else
         file << ComChar << " Data2       = " << prj.GetData2FileName() << endl;          // test data file name

   }
   else
   {
      file << ComChar << " Data1       = " << prj.GetData1FileName() << endl;             // base data file name
      file << ComChar << "  #tuples    = ";

      if(f_TuningFile)                                               // # tuples in data file for parameter tuning
         file << prj.Get_N_L() << "/";
      file << data1->nTup() << endl;                                 // # tuples
   }

   file << ComChar << "  #colums    = " << data1->nVar() << endl;                         // # columns
   file << ComChar << " Regression  = " << FlagToString(prj.Regression()) << endl;        // regression flag


   // b1) parameter tuning
   if(f_TuningFile)
   {
      file << ComChar << " N_R         = " << prj.Get_N_R_Tune() << endl;                 // # repetitions
      file << ComChar << " N_L         = " << prj.Get_N_L_Tune() << endl;                 // # learn tuples
      file << ComChar << " N_T         = " << prj.Get_N_T_Tune() << endl;                 // # test tuples
      file << ComChar << " Ranking     = " << CriterionIdToString(prj.Ranking()) << endl; // ranking criterion
      file << ComChar << " Objective   = " << CriterionIdToString(prj.Objective())<<endl; // tuning objective
   }
   else

      // or b2) # learn and test tuples for study and 'learn-test-data-explicitly-specified' mode
      if(prj.GetData2FileName()[0]!='\0')                                                 // explicitly specified
      {
         file << ComChar << " N_L         = " << data1->nTup() << endl;                   // # learn tuples
         file << ComChar << " N_T         = " << data2->nTup() << endl;                   // # test tuples
      }
      else
      {                                                                                   // study mode
         file << ComChar << " N_R         = " << prj.Get_N_R() << endl;                   // # repetitions
         file << ComChar << " N_L         = " << prj.Get_N_L() << endl;                   // # learn tuples
         file << ComChar << " N_T         = " << prj.Get_N_T() << endl;                   // # test tuples
      }

   file << ComChar << " Randomize   = " << FlagToString(prj.Randomize()) << endl;         // randomization flag


   // c) # parameter sets if there is more than one
   if(prj.DoTuning())
   {
      file << ComChar << " " << prj.nParaSets() << " different parameter sets from which ";
      file << prj.nParaSetsLearn() << " require learning " << endl;
   }
   file << endl;



   // d) write (averaged) weights if (learn) data is given
   if(!f_TuningFile)
   {
      // d1) calculate averaged weights
      float* weights=new float[data1->nVar()];                                // allocate memory for averaged weights
      for(int j=0;j<data1->nVar();j++)                                        // ini
         weights[j]=0;
      for(int t=0;t<prj.Get_N_R();t++)
         for(int j=0;j<data1->nVar();j++)                                     // average weights
            weights[j]+=data_L[t]->Weights()[j]/prj.Get_N_R();


      // d2) write averaged weights
      file << endl << "Averaged Weights: " << endl;
      char szText[STS];
      for(int j=0;j<data1->nVar();j++)                                        // for all variables
      {
         sprintf(szText, " %.2f", weights[j]);
         file << szText;                                                      // write weight
      }
      file << endl << endl;                                                   // linefeed
      delete[] weights;                                                       // release
   }
}


//----------------------------------------------------------------------------------------------------------------------
// write (study) result file, 'para' contains the parameter sets and 'res' the corresponding loss results
void TBatchInterface::WriteStudyResults(const TParaSet*const& para, TParaSetResult& res, const clock_t& T_Tune)
{
   // a) get crit. which should be shown and unset 'pVal' and 'Rank' because there is no reason to display this
   bool f_criterion[nCriterion];
   memcpy(f_criterion, prj.GetCriterionsToShow(), sizeof(bool)*nCriterion);
   f_criterion[StringToCriterionId("Rank")] = f_criterion[StringToCriterionId("pVal")] = false;    // unset


   // b) display filename
   cout << endl << endl << "Writing results to " << prj.GetResultFileName() << endl;


   // c) write parameter and result description
   int nChar = max(int (log(prj.Get_N_R())/log(10)+1), 4);                    // # characters used to display test id
   for(int i=0;i<nChar-4;i++)                                                 // add spaces
      resfile << " ";
   resfile << "Test";
   TParaSet::WriteDescription(resfile, prj.Regression());                     // write parameter description
   TParaSetResult::WriteResultDescriptions(resfile, f_criterion);             // write result description



   // d) write parameters and loss function results for each repetitions
   char szText[STS];
   for(int t=0;t<prj.Get_N_R();t++)                                           // over all repetitions (test)
   {
      sprintf(szText, "%*d", nChar, t+1);                                     // write test id
      resfile << szText;
      para[t].WriteParameters(resfile, prj.Regression());                     // write parameter set
      res.Write(resfile, f_criterion, prj.ShowDev(), t);                      // write results
   }


   // e) write overall performance averaged over all tests/repetitions
   res.Calculate();
   resfile << endl << "Overall Performance:" << endl << "  ";
   TParaSetResult::WriteResultDescriptions(resfile, f_criterion);             // write result description

   resfile << "  ";
   res.Write(resfile, f_criterion, false);                                    // write average results

   resfile << "+-";
   for(int i=0;i<nCriterion;i++)                                              // write deviation below in the next line
      if(f_criterion[i])
         res.WriteDeviation(i, resfile);

   if(prj.DoTuning())
      resfile << endl << endl << "T_Tune = " << WriteTime(T_Tune/prj.Get_N_R(), false);  // write time needed for tuning
}


   //-------------------------------------------------------------------------------------------------------------------
   // progress indication helper function for TBatchInterface::TuneParameters()
   void ProgressIndication(const int& i, const int& steps_until_update, const bool& blank=false)
   {
      char c=' ';
      if(!blank)
         if(steps_until_update==1)     c='.';
         else                          c='|';

      if(i%steps_until_update==0)
         cout << c;
   }


//----------------------------------------------------------------------------------------------------------------------
// find optimal parameter set for given (learn) data
TParaSet TBatchInterface::TuneParameters(const unsigned int& seed, TData*const& data, const int& optId
                  , ofstream*const& file)
{
   //-------------------------------------------------------------------------------------------------------------------
   // 1. initialize
   char szText[STS];
   const int nParaSets = sets->Size();                            // abbrevation
   clock_t time = clock();                                        // get start time
   TCluster** cls = NULL;                                         // learned models

   for(int r=0;r<nParaSets;r++)                                   // reset skip flags for each parameter set
      sets->Get(r).Skip = false;

   int nChar = log(prj.Get_N_R())/log(10)+1;                      // # characters needed to enumerate repetitions
                                                                  // (used for filename generation)
   const int nCharOpt  = log(prj.Get_N_R_Tune())/log(10)+1;       // # characters needed to enumerate tuning runs ...
   const int nCharPara = log(nParaSets)/log(10)+1;                // ... repetitions and parameter sets
   int tests_until_update = prj.Get_N_R_Tune()/MAX_TEST+1;        // calculate # test until display is updated

   TParaSetResults results(sets->Size(), prj.Get_N_R_Tune()       // create result object
      , prj.Regression(), prj.GetCriterionsToCalculate());


   //-------------------------------------------------------------------------------------------------------------------
   // 2. prepare learn and test data
   TData **data_L, **data_T;

   data->GenerateLearnAndTestData(seed, data_L, data_T, prj.Get_N_R_Tune(), prj.GetTuneType()!=Rep,
   prj.Get_N_L_Tune(), prj.Get_N_T_Tune(), prj.GetTuneDataBaseFile(), prj.GetOutputDir(), prj.Get_N_Bins()
         , prj.Regression(), prj.EqualWidthBinning(), optId, nChar);


   //-------------------------------------------------------------------------------------------------------------------
   // 3. for each parameter set: learn and test on the different learn and test data
   int lastLearnId, resultLength;
   for(int r=0;r<nParaSets;r++)
   {
      // a) get parameter set
      TParaSet& para   = sets->Get(r);                                  // get from list
      TParameter _para = ToTParameter(para, &prj);                            // convert to use it with kernel routines


      // b) display progress
      sprintf(szText, "%*d L", (int) (log(sets->Size())/log(10)+1), (r+1));   // display parameter set number
      cout << endl << szText;
      if(para.Prune)
         cout << "P";                                                         // display if pruning will be done
      else
         cout << " ";


      //----------------------------------------------------------------------------------------------------------------
      // c) learn models for each repetition  -  note: learn only if re-learn flag is set and skip flag is unset
      //    note: either I), II) or III) will be executed
      if(!para.Skip)

         //-------------------------------------------------------------------------------------------------------------
         // I) learn models for each repetition
         if(para.Relearn)
         {
            lastLearnId = r;                             // store id

            if(cls)                                      // release previous models
               for(int t=0;t<prj.Get_N_R_Tune();t++)
                  if(cls[t])
                     delete cls[t];
            delete[] cls;
            cls = new TCluster*[prj.Get_N_R_Tune()];     // new field with models
            for(int t=0;t<prj.Get_N_R_Tune();t++)
               cls[t] = NULL;                            // initialize to NULL


            // over all repetitons
            for(int t=0;t<prj.Get_N_R_Tune();t++)
            {
               TPnc pnc(data_L[t], _para);               // instantiate PNC object

               results.Get(r)->StartClock(false, t);     // get time
               while(pnc.Iterate());                     // learn model ...
               cls[t] = pnc.ToTCluster(para.Prune);      // ... and convert TCluster, prune if necessary
               results.Get(r)->StopClock(false, t);      // set learn time


               // abort if currently learned model is to big
               // note: do not skip first model as it'll be the smallest of all
               if(prj.Skipping() && r!=0)
                  if(ModelToBig(cls[t], data_L[t], _para, &prj))
                  {
                     SkipBigger(sets);                              // set skip flag for this and covered parameter sets
                     for(int k=t;k<prj.Get_N_R_Tune();k++)
                        ProgressIndication(k, tests_until_update);  // complete progress indication with dummies
                     break;                                         // abort learning
                  }


               ProgressIndication(t, tests_until_update);           // display progress
            }
         }

         //-------------------------------------------------------------------------------------------------------------
         // II) skip learning as previously learned models can be used but copy learn times and check model size
         else
         {
            // for each previously learned model ...
            for(int t=0;t<prj.Get_N_R_Tune();t++)
            {
               ProgressIndication(t, tests_until_update, true);     // dummy progress indication

               // copy learn time from last learned model
               results.Get(r)->GetClock(false, t) = results.Get(lastLearnId)->GetClock(false, t);
            }


            // check all(!) previously learned models with current parameters if one is to big
            if(prj.Skipping())
               for(int t=0;t<prj.Get_N_R_Tune();t++)
                  if(ModelToBig(cls[t], data_L[t], _para, &prj))
                  {
                     SkipBigger(sets);                              // set skip flag for this and covered parameter sets
                     break;                                         // abort learning
                  }

         }

      //----------------------------------------------------------------------------------------------------------------
      // III) parameter set is skipped ...
      else
         for(int t=0;t<prj.Get_N_R_Tune();t++)                      // ... dummy progress indication
            ProgressIndication(t, tests_until_update, true);



      //----------------------------------------------------------------------------------------------------------------
      // d) test all learned models if skip flag is not set
      if(!para.Skip)
      {
         cout << " T";

         // for each model
         for(int t=0;t<prj.Get_N_R_Tune();t++)
         {
            //----------------------------------------------------------------------------------------------------------
            // d1) open simulation output if basename specified
            ofstream* file=NULL;
            if(prj.GetTuneSimulationBaseFile()[0]!='\0')
            {
               // compose filename
               sprintf(szText, "%s%s_CV%0*d_Run%0*d_PSet%0*d.sim", prj.GetOutputDir(),
                  prj.GetTuneSimulationBaseFile(), nChar, optId+1, nCharOpt, (t+1), nCharPara, (r+1));

               file = new ofstream(szText);                                      // open file and ...
               if(!*file)                                                        // ... check success
                  cout << endl << "Write Error: Unable to open simulation file '" << szText << "'!" << endl;
            }


            //----------------------------------------------------------------------------------------------------------
            // d2) loss on test(!) data
            int nTup = data_T[t]->nTup();    // # tuples to enable loss object to store predictions
            if(prj.SaveMemory())
               nTup=-1;
            results.Get(r)->SetLossObjectTest(t, new TLossFunction(data_L[t]->Mean()[0], nTup));   // new loss object

            results.Get(r)->StartClock(true, t);
            TPrediction prd(cls[t], &_para);
            prd.Predict(data_T[t], results.Get(r)->GetLossObjectTest(t), file);                    // predict all
            results.Get(r)->StopClock(true, t);                                                    // store test time


            //----------------------------------------------------------------------------------------------------------
            // d3) loss on learn(!) data
            if(prj.NeedLossOnLearnData())       // note: estimate only if necessary
            {
               nTup = data_L[t]->nTup();        // # tuples to enable loss object to store predictions
               if(prj.SaveMemory())
                  nTup=-1;
               results.Get(r)->SetLossObjectLearn(t, new TLossFunction(data_L[t]->Mean()[0], nTup));  // new loss object
               prd.Predict(data_L[t], results.Get(r)->GetLossObjectLearn(t), file);                   // predict all
            }


            //----------------------------------------------------------------------------------------------------------
            // d4) set other 'results' like mass and hitrate
            results.Get(r)->nCuboids(t)  = cls[t]->nCuboids();              // # cuboids (without minimal mass)
            results.Get(r)->nCuboidsK(t) = cls[t]->nCuboidsRed();           // # cuboids that exceed min. mass
            results.Get(r)->nBounds(t)   = cls[t]->AvrVarPerCub(para.Prune);// average active variable bounds per cuboid
            results.Get(r)->HitRate(t)   = cls[t]->AvrHitRate();            // average hitrate
            results.Get(r)->SizeFac(t) = 1;                                 // weighting factor regarding model's size
            if(ModelSize(cls[t], data, para.Prune) > prj.MaxModelSize())
               results.Get(r)->SizeFac(t)*=2;


            //----------------------------------------------------------------------------------------------------------
            // d5) save model if basename specified
            if(prj.GetTuneModelBaseFile()[0]!='\0')
            {
               // compose filename
               sprintf(szText, "%s%s_CV%0*d_Run%0*d_PSet%0*d.%s", prj.GetOutputDir(),
               prj.GetTuneModelBaseFile(), nChar, optId+1, nCharOpt, (t+1), nCharPara, (r+1), TPnc::Extension());

               ofstream file(szText);                                         // open file and ...
               if(!file)                                                      // ... check success
                  cout << endl << "Write Error: Unable to open model file " << szText << " !" << endl;
               cls[t]->Save(file, true, &_para);                              // save model
            }

            if(file)                                                          // eventually close file
               file->close();
            delete file;

            ProgressIndication(t, tests_until_update);                        // display progress
         }


         //-------------------------------------------------------------------------------------------------------------
         // d6) calculate mean and deviation of loss and other results
         results.Get(r)->Calculate();


         //-------------------------------------------------------------------------------------------------------------
         // d7) display some results (ranking criterion, # cuboids and remaining time)
         sprintf(szText, " %s=%s", CriterionIdToString(prj.Ranking()), results.Get(r)->ResultToText(prj.Ranking()
                                 ,/*width*/8));
         cout << szText;
         resultLength = SizeOfString(szText);                                 // store string length
         int w = (log(data->nTup())/log(10)+3);
         sprintf(szText, "  K=%*.1f/%*.1f", w, results.Get(r)->Result(StringToCriterionId("K")), w
                  , results.Get(r)->Result(StringToCriterionId("K'")));       // # cuboids
         cout << szText;
         resultLength += SizeOfString(szText);                                // add to stored string length
      }

      //----------------------------------------------------------------------------------------------------------------
      // skip flag was set: do not test models, just display dummy progress and a '-'
      else
      {
         for(int t=0;t<prj.Get_N_R_Tune();t++)                    // dummy progress indication
            ProgressIndication(t, tests_until_update, true);
         cout << "   -" << setw(resultLength-2) << " ";
      }


      //----------------------------------------------------------------------------------------------------------------
      // e) display time elapsed if next parameter set specifies relearning
      cout << "  " << WriteTime2(clock()-time) << " " << WritePercentage((r+1)/((float)nParaSets), 0, 0) << "% done";
   }


   //-------------------------------------------------------------------------------------------------------------------
   // 4. calculate 'pVal' and 'Rank', select best parameter set (regarding specified objective considering size
   //    weighting factor) and write tuning results to file
   results.CalculateRankAndPVal(prj.Ranking());
   int id_best = results.FindBest(prj.Objective());      // select best parameter set


   // write tuning results sorted by tuning objective (weighted by model size factor)
   cout << endl;
   *file << " Test " << (optId+1) << endl;
   results.Write(*file, prj.GetCriterionsToShow(), prj.Objective(), sets, prj.ShowDev());
   *file << endl;



   //-------------------------------------------------------------------------------------------------------------------
   // 5. release
   for(int t=0;t<prj.Get_N_R_Tune();t++)
   {
      if(cls[t])                 // models
         delete cls[t];
      data_L[t]->Release();      // data
      data_T[t]->Release();
   }
   delete[] data_T;
   delete[] data_L;
   delete[] cls;


   //-------------------------------------------------------------------------------------------------------------------
   // 6. return optimal run parameters
   return sets->Get(id_best);
}



//----------------------------------------------------------------------------------------------------------------------
// do it: generate learn and test data, either using specified experiment design (Repetition, CV, Loocv) or specified
// learn and test data files. Learn on each learn data set (including tuning of parameters if enabled) and test
// on each test data set.
void TBatchInterface::DoIt()
{
   //-------------------------------------------------------------------------------------------------------------------
   // 1. display study parameters if study mode is enabled
   if(prj.MakeStudy())
   {
      cout << endl << "Study ------------------------------------------------" << endl;

      // a) experiment design
      if(prj.StudyType()==Special)  cout << "Special mode"                      << endl;
      if(prj.StudyType()==Loocv)    cout << "Leave-one-out cross-validation"    << endl;
      if(prj.StudyType()==Cv)       cout << prj.Get_N_R() << " fold cross-validation" << endl;
      if(prj.StudyType()==Rep)      cout << prj.Get_N_R() << " fold repetition"       << endl;

      // b) # learn and test data tuples
      cout << "Generating " << prj.Get_N_R() << " data files with " << prj.Get_N_L() << " learn and " << prj.Get_N_T();
      cout << " test tuples" << endl;

      // c) output range for regression tasks and # classes for classification tasks
      if(prj.Regression())
         cout << "Regression task with y e[" << data1->Min()[0] << ".." << data1->Max()[0] << "]" << endl;
      else
         cout << "Classification task with " << data1->nIntegerMaxMin(0) << " classes" << endl;
   }


   //-------------------------------------------------------------------------------------------------------------------
   // 2. display and write tuning settings if tuning is enabled
   if(prj.DoTuning())
   {
      cout << endl << "Tuning -----------------------------------------------" << endl;
      cout << prj.nParaSets() << " parameter sets from which " << prj.nParaSetsLearn() << " require learning " << endl;
      if(prj.GetTuneType()==Loocv) cout << "Leave-one-out-cross-validation"             << endl;   // experiment design
      if(prj.GetTuneType()==Cv)   cout << prj.Get_N_R_Tune() << " fold cross-validation" << endl;
      if(prj.GetTuneType()==Rep)  cout << prj.Get_N_R_Tune() << " fold repetition"       << endl;

      // # learn and test data tuples
      cout << prj.Get_N_L_Tune() << " learn and " << prj.Get_N_T_Tune() << " test tuples" << endl;

      WriteSettings(optfile);        // write settings to tuning result file
   }


   //-------------------------------------------------------------------------------------------------------------------
   // 3. get random seeds for learn and test data generation, note: one seed is needed for learn and test data
   //    generation in study mode and then one seed is needed for the parameter optimzation for each repetition
   unsigned int* seed = new unsigned int[prj.Get_N_R()+1];
   if(prj.Randomize())                                            // initialize random number generator if specified
      ::randomize();
   for(int i=0;i<prj.Get_N_R()+1;i++)                             // create seed for data generation
      seed[i]=abs(random(INT_MAX));



   //-------------------------------------------------------------------------------------------------------------------
   // 4. do it
   if(prj.StudyType()==Special)


   //-------------------------------------------------------------------------------------------------------------------
   // I) special study mode: run just one parameter tuning on complete data set
   {
      for(int i=0;i<prj.Get_N_R_Tune();i++)              // emulate calls to get same 'random' data file splitting
         seed[0] = abs(random(INT_MAX));

      clock_t time = clock();                            // get time
      TuneParameters(seed[0], data1, 0, &optfile);
      time = clock()-time;                               // determine time needed for tuning ...
      optfile << "T_Tune = " << WriteTime(time, false);  // ... and write it
   }

   //-------------------------------------------------------------------------------------------------------------------
   // II) normal study mode or disabled study mode, i.e. explicityl specified learn and test data sets
   else
   {
      //----------------------------------------------------------------------------------------------------------------
      // a) generate learn and test data
      #ifdef DEBUG_LOG_ON
      DebugLogFile << "Generating Data" << endl;
      #endif

      TData **data_L, **data_T;
      if(prj.MakeStudy())

         // create N_R learn and test data sets
         data1->GenerateLearnAndTestData(seed[prj.Get_N_R()], data_L, data_T, prj.Get_N_R(), prj.StudyType()!=Rep
                     , prj.Get_N_L(), prj.Get_N_T(), prj.GetDataBaseFileName(), prj.GetOutputDir()
                     , prj.Get_N_Bins(), prj.Regression(), prj.EqualWidthBinning());
      else
      {
         // emulate learn and test data generation: make arrays with one single data object each
         data_L = new TData*[1];                                           // allocate arrays (for one object)
         data_T = new TData*[1];
         data_L[0] = data1->GetObject();                                   // set specified learn and test data
         data_T[0] = data2->GetObject();

         data_L[0]->Sort(0);                                               // sort learn data (regarding the output)
         data_L[0]->CalculateWeights(prj.Get_N_Bins(), prj.Regression(), prj.EqualWidthBinning()); // calculate weights
         data_L[0]->Lock();
      }


      //----------------------------------------------------------------------------------------------------------------
      // b) write settings to result file
      WriteSettings(resfile, (const TData**) data_L);


      //----------------------------------------------------------------------------------------------------------------
      // c) alloocate/initialize
      float maxSize = 0.5*prj.Get_N_L()*prj.GetMaxSize()/100.0;      // maximal acceptable model size
      TParaSet* para = new TParaSet[prj.Get_N_R()];                  // optimal parameters for each repetition
      TParaSetResult  res(prj.Get_N_R(), prj.GetCriterionsToShow()); // loss results for each repetition
      const int nChar = log(prj.Get_N_R())/log(10)+1;                // used for output filename generation
      clock_t T_Tune = 0;                                            // time used to tune parameters


      //----------------------------------------------------------------------------------------------------------------
      // d) learn and test on each learn and test data set
      #ifdef DEBUG_LOG_ON
      DebugLogFile << "Processing ... " << endl;
      #endif

      for(int t=0;t<prj.Get_N_R();t++)
      {
         cout << endl;
         #ifdef DEBUG_LOG_ON
         DebugLogFile << endl << "Run " << (t+1) << endl;
         #endif


         //-------------------------------------------------------------------------------------------------------------
         // d1) tune parameters if enabled
         if(prj.DoTuning())
         {
            cout << endl << "Test " << (t+1) << "/" << prj.Get_N_R();                  // display progress
            clock_t time = clock();                                                    // get time
            para[t] = TuneParameters(seed[t], data_L[t], t, &optfile);
            time = clock()-time;                                                       // stop time
            T_Tune += time;                                                            // sum up
            cout << "Optimal parameters: ";                                            // ... and display them
            para[t].WriteParameters(cout, prj.Regression(), true);
            cout << endl;
         }
         else
            para[t] = sets->Get(0);    // note: if tuning is disabled there is just one single parameter
                                       //       set in list. Just take it!


         //-------------------------------------------------------------------------------------------------------------
         // d2) learn model with specified/tuned parameters
         TPnc pnc(data_L[t], ToTParameter(para[t], &prj));                             // new model

         cout << (t+1) << "/" << prj.Get_N_R() << " Learning " << " ... ";
         res.StartClock(false, t);                                                     // start timer

         while(pnc.Iterate());                                                         // learn model

         if(para[t].Prune)
            cout << "Pruning ... ";
         TCluster* cls = pnc.ToTCluster(para[t].Prune);                                // convert to TCluster

         res.StopClock(false, t);                                                      // stop timer


         //-------------------------------------------------------------------------------------------------------------
         // d3) open simulation output file if (base) name is specified
         ofstream* file=NULL;
         if(prj.GetSimulationBaseFileName()[0]!='\0')
         {
            char szFilename[STS];                                                                  // compose filename
            sprintf(szFilename, "%s%s_CV%0*d.sim", prj.GetOutputDir()
               , prj.GetSimulationBaseFileName(), nChar, (t+1));
            file = new ofstream(szFilename);                                                       // open file
            if(!*file)                                                                             // check
               cout << "Write Error: Unable to open simulation file " << szFilename << " !" << endl;
         }


         //-------------------------------------------------------------------------------------------------------------
         // d5) test model on test data
         cout << "Testing ... ";
         int nTup = data_T[t]->nTup();                   // #tuples to predict
         if(prj.SaveMemory())
            nTup=-1;                                     // reset it if predictions should not be stored to save memory
         res.SetLossObjectTest(t, new TLossFunction(data_L[t]->Mean()[0], nTup));      // new loss object
         res.StartClock(true, t);

         TPrediction prd(cls);                                                         // instantiate prediction object
         prd.Predict(data_T[t], res.GetLossObjectTest(t), file);                       // test on test data
         res.StopClock(true, t);


         //-------------------------------------------------------------------------------------------------------------
         // d6) test on learn(!) data if necessary
         if(prj.NeedLossOnLearnData())
         {
            cout << " ... ";
            nTup = data_L[t]->nTup();     // #tuples to predict
            if(prj.SaveMemory())          // reset it if predictions should not be stored to save memory
               nTup=-1;
            res.SetLossObjectLearn(t, new TLossFunction(data_L[t]->Mean()[0], nTup));  // new loss object
            prd.Predict(data_L[t], res.GetLossObjectLearn(t), file);                   // test
         }


         //-------------------------------------------------------------------------------------------------------------
         // d7) store other 'results' like model size ...
         res.nCuboids(t)  = cls->nCuboids();
         res.nCuboidsK(t) = cls->nCuboidsRed();
         res.nBounds(t)   = cls->AvrVarPerCub(para[t].Prune);     // average # variables per cuboid
         res.HitRate(t)   = cls->AvrHitRate();

         res.SizeFac(t) = 1;
         if(ModelSize(cls, data_L[t], para[t].Prune) > maxSize)
            res.SizeFac(t) *=2;                                   // weighting factor regarding model's size


         //-------------------------------------------------------------------------------------------------------------
         // d8) save model if (base) name is specified
         if(prj.GetModelBaseFileName()[0]!='\0')
         {
            // compose filename
            char szFilename[STS];
            sprintf(szFilename, "%s%s_CV%0*d.%s", prj.GetOutputDir(), prj.GetModelBaseFileName(), nChar, (t+1)
                     , pnc.Extension());

            ofstream file(szFilename);                                                                // open file
            if(!file)                                                                                 // check
               cout << "Write Error: Unable to open model file " << szFilename << " !" << endl;
            cls->Save(file, true);                                                                    // save model
            file.close();                                                                             // close file

            // (hack!) save tuple ID's
            sprintf(szFilename, "%s%s_CV%0*d.%s", prj.GetOutputDir(), prj.GetModelBaseFileName(), nChar, (t+1), "tid");
            file.open(szFilename);                                                                    // open file
            if(!file)                                                                                 // check
               cout << "Write Error: Unable to open tuple id file " << szFilename << " !" << endl;
            pnc.SaveClusterTupleIds(file);                                                            // save
         }


         //-------------------------------------------------------------------------------------------------------------
         // d9) display some results (ranking criterion on test data and # cuboids)
         char szText[STS];

         // display ranking criterion
         sprintf(szText, " %s=%s  ", CriterionIdToString(prj.Ranking()), res.ResultToText(prj.Ranking(), 0, false, t));
         cout << szText;

         // display # cuboids
         int w = log(data1->nTup())/log(10)+1;
         sprintf(szText, " K=%*d/%*d", w, (int) res.Result(StringToCriterionId("K"), t), w
                  , (int) res.Result(StringToCriterionId("K'"), t));
         cout << szText;


         // release
         delete cls;
         if(file)                                                                      // eventually close/delete file
            file->close();
         delete file;
      }

      //----------------------------------------------------------------------------------------------------------------
      // e) write overall performance (results)
      WriteStudyResults(para, res, T_Tune);


      //----------------------------------------------------------------------------------------------------------------
      // f) clean up: release learn and test data and parameters
      for(int t=0;t<prj.Get_N_R();t++) data_L[t]->Release();      delete[] data_L;
      for(int t=0;t<prj.Get_N_R();t++) data_T[t]->Release();      delete[] data_T;
      delete[] para;
    }


   //-------------------------------------------------------------------------------------------------------------------
   // 5. clean up
   delete[] seed;

   // close files
   #ifndef ANSI
   if(!f_WriteProtectOutput)                                      // if not specified otherwise by cmd line option ...
   {
      chmod(prj.GetResultFileName(), S_IREAD | S_IWRITE);         // ... remove write protection
      chmod(prj.GetTuningFileName(), S_IREAD | S_IWRITE);
   }
   #endif

   optfile.close();                                               // close files which were opened in Load()
   resfile.close();
}



//----------------------------------------------------------------------------------------------------------------------
// load project file
void TBatchInterface::Load(char* _szFilename)
{
   #ifdef DEBUG_LOG_ON
   DebugLogFile << "Loading project file: " << _szFilename << endl;
   #endif


   //-------------------------------------------------------------------------------------------------------------------
   // 0. pre-check
   IfTrueThrowTypeA(f_Loaded, "Function can only be called once in the lifetime of an instance!"
                  , "TBatchInterface::Load", szModule);


   //-------------------------------------------------------------------------------------------------------------------
   // 1. initialize/manipulate directories and file names, check path delimiter
   char szPath[STS], szText[STS];
   getcwd(szCwd, STS);                 // get working directory
   EnsurePathDelimiter(szCwd);         // ensure path delimiter

   strcpy(szFilename, _szFilename);    // copy filename to own memory   note: to be able to manipulate (prepend) path
   CorrectPathDelimiter(szFilename);   // correct path delimiter (now user can use either slash or backslash
   ExtractPath(szPath, szFilename);    // extract project file "working" directory from filename which may be ...
   PrefixPath(szPath, szCwd);          // ... relative to current working directory thus prepend current working
                                       // directory if necessary
   PrefixPath(szFilename, szCwd);      // prepend current path to project filename if necessary

   f_Loaded = true;
   cout << endl << "Loading '" << szFilename << "'" << endl;            // display project filename


   //-------------------------------------------------------------------------------------------------------------------
   // 2. load project file
   ifstream file(szFilename, ios::in);                                  // open file
   IfTrueThrowTypeU(!file, "Load Error: Unable to open project file!"); // check success   note: name is displayed above
   prj.Load(file, szPath);                                              // load via TProjectB::Load()
   file.close();                                                        // close file


   //-------------------------------------------------------------------------------------------------------------------
   // 3. re-write loaded project file to standard logfile
   ofstream logfile(szLogFile, ios::out);                                                 // open
   if(!logfile)                                                                           // check success
      cout << endl << "Warning: Unable to open log file '" << szLogFile << "'!" << endl;
   prj.Save(logfile);                                                                     // save
   logfile.close();                                                                       // close file


   //-------------------------------------------------------------------------------------------------------------------
   // 4. load data file(s) and synchronize it with project
   data1->Load(prj.GetData1FileName());                                                   // load data file
   sets = prj.Synchronize(data1);                                                         // synchronize

   if(prj.GetData2FileName()[0]!='\0')
   {
      data2->Load(prj.GetData2FileName());                                                // load data file
      data2->MakeCompatible(data1);                // make compatible
   }


   //-------------------------------------------------------------------------------------------------------------------
   // 5. create output directory if necessary
   strcpy(szText, prj.GetOutputDir());      // get output directory
   if(szText[0]!='\0')
      if(mkdir(szText)!=0)
         IfTrueThrowTypeU(errno==ENOENT
            , "Section '[Basic]': Please correct key 'Dir'! Unable to create output directory '%s'!", szText);



   //-------------------------------------------------------------------------------------------------------------------
   // 6. re-write project file using path and name of (study) result file with the extension 'log'
   strcpy(szText, prj.GetResultFileName());                                         // copy name from study result file
   ExchangeExt(szText, "log");                                                      // exchange extension
   logfile.open(szText, ios::out);                                                  // try to open
   IfTrueThrowTypeU(!logfile, "Write Error: Unable to open logfile '%s'!", szText); // check success

   #ifndef ANSI
   if(f_WriteProtectOutput)                                                         // if specified ...
      chmod(szText, S_IREAD);                                                       // ... write protect result filename
   #endif

   logfile << "This file is a re-write copy of " << szFilename << endl;             // write file header
   logfile << "To use this file again move it back to its original directory!" << endl << endl;
   prj.Save(logfile);                                                               // save project file



   //-------------------------------------------------------------------------------------------------------------------
   // 7. open result file and write header
   resfile.open(prj.GetResultFileName(), ios::out);                                 // try to open
   IfTrueThrowTypeU(!resfile, "Write Error: Unable to open result file '%s'!"
         , prj.GetResultFileName());                                                // check success
   resfile << ComChar << " Results written by " << szPrgName << endl << endl;       // program's name

   #ifndef ANSI
   chmod(prj.GetResultFileName(), S_IREAD);                                         // write protect result filename
   #endif

   // write pre-processor flags and settings
   resfile << endl << ComChar << " Fixed Settings:" << endl;
   WriteDefines(resfile, prj.Regression());                                         // write
   resfile << endl << endl;                                                         // linefeed

   // write note if special mode is on
   strcpy(szText, prj.GetTuningFileName());
   PrefixPath(szText, szCwd);                                                       // prepend path if necessary
   if(prj.StudyType()==Special)
      resfile << "Special mode! See tuning result file '" << szText << "' for results!" << endl;



   //-------------------------------------------------------------------------------------------------------------------
   // 8. open tuning result file
   if(prj.DoTuning())
   {
      optfile.open(prj.GetTuningFileName(), ios::out);                        // try to open

      IfTrueThrowTypeU(!optfile                                                     // check success
         , "Write Error: Unable to open tuning result file '%s'!", prj.GetTuningFileName());

      #ifndef ANSI
      chmod(prj.GetTuningFileName(), S_IREAD);                                // write protect result filename
      #endif

      optfile << ComChar << " Results written by " << szPrgName << endl << endl;    // program's name
   }
}


//----------------------------------------------------------------------------------------------------------------------
// display and log error message
void HandleException(const char*const& szError, const char*const szFilename, const bool& f_HaltOnError
                     , const bool& f_NoSound)
{
   // display error on screen
   cout << endl << "Fatal Error!" << endl << szError;


   // write error to logfile
   ofstream logfile(SZ_ERROR_LOG_FILE, ios::app);
   logfile << endl << TimeStamp() << endl;                        // time stamp
   logfile << "Processing file '" << szFilename << "'" << endl;   // project filename
   logfile << szError << endl;                                    // error message

   #ifndef ANSI                        // make some sound when ready
   if(!f_NoSound)
      for(int i=0;i<3;i++)
      {
         Beep(0x125, 350);
         Beep(0x225, 350);
      }
   #endif

   if(f_HaltOnError)
   {
      cout << endl << endl << "Press enter to terminate!";
      cin.get();                                                  // wait for user input and return
   }
   else     // wait some seconds if program won't stop
   {
      cout << endl << endl << "Terminating within a few seconds ...";
      Sleep(5520);
   }
}


//----------------------------------------------------------------------------------------------------------------------
// check passed character string for command-line options
void CheckOptions(const char*const& arg, bool& f_HaltOnError, bool& f_WriteProtectOutput, bool& f_NoSound)
{
   if(strcmp(arg,"/nh")==0)
      f_HaltOnError = false;                // do not halt on errors
   if(strcmp(arg,"/wp")==0)
      f_WriteProtectOutput = true;          // write protect result files
   if(strcmp(arg,"/ns")==0)
      f_NoSound = true;                     // turn sound off
}


//----------------------------------------------------------------------------------------------------------------------
// startup function
int main(int argc, char *argv[], char *[])
{
   #ifndef ANSI
   SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);    // reduce thread priority
   #endif

   cout << szPrgName << " " << szCopyRight << " " << szAuthor << endl;     // display program's name


   // display warning if debug mode is on
   #ifdef DEBUG
   for(int i=0;i<20;i++)
      cout << "WARNING: DEBUG MODE IS ON!!!!    ";  // now i won't forget to disable it again ;-)
   cout << endl << endl;
   #endif


   TBatchInterface* batch=NULL;
   bool f_HaltOnError=true, f_WriteProtectOutput=false, f_NoSound=false,  f_Error=false;
   try
   {
      // check # input arguments
      if(argc<2)                                                           // no project file specified ...
      {
         cout << "No batch file specified! Nothing done!" << endl;         // display message
         cout << endl << "Press enter to terminate!";
         cin.get();                                                        // wait for user input and return
      }
      else
      {
         // check for command line options
         if(argc>2) CheckOptions(argv[2], f_HaltOnError, f_WriteProtectOutput, f_NoSound);
         if(argc>3) CheckOptions(argv[3], f_HaltOnError, f_WriteProtectOutput, f_NoSound);
         if(argc>4) CheckOptions(argv[4], f_HaltOnError, f_WriteProtectOutput, f_NoSound);


         // load project file an go ...
         batch = new TBatchInterface(f_WriteProtectOutput);
         batch->Load(argv[1]);                                             // load project file
         batch->DoIt();                                                    // process batch file
      }
   }
   // note: be sure not to throw any exceptions before <batch> is instantiated :-)
   catch(TExceptionU excp)                                                 // excpetion intended for the user
   {
      // display and log error message
      HandleException(excp.GetErrorText(), batch->ProjectFile(), f_HaltOnError, f_NoSound);
      f_Error = true;
   }
   catch(TExceptionAB excp)                                                // exceptions intended for the programmer
   {
      // display and log error message
      HandleException(excp.GetErrorText(), batch->ProjectFile(), f_HaltOnError, f_NoSound);
      f_Error = true;
   }
   delete batch;                       // release


   #ifndef ANSI                        // make some sound when ready
   if(!f_Error & !f_NoSound)
      for(int i=0;i<2;i++)
      {
         Beep(0x525, 100+i*140);
         Sleep(120);
      }
   #endif

   return 0;                           // exit
}