#ifdef RELEASE
static const char* szPrgName = "bap 1.26b Release Version";
#else
static const char* szPrgName = "bap 1.26b Development Version";
#endif
static const char* szAuthor = "Lars Haendel";
static const char* szCopyRight = "(c) 2001-2003 by";
static const char* szLogFile = "project.log";
static const char* szModule = "bap.cpp";
//------------------------------------------------------------------------------
// module bap.cpp //
// //
// batch interface for the PNC cluster/learn algorithm //
// //
// copyright (c) 2001-2003 by Lars Haendel //
// mail: lore@newty.de //
// home: www.newty.de //
// //
// This program is free software and can be used under the terms of the //
// GNU licence. See header file for further information and disclaimer. //
// //
//------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------
//#define ANSI // ANSI C++, i.e. do not reduce thread priority and write protect files while they're
// opened (default: OFF)
#define MAX_TEST 10 // max. # characters used for progress indication
#define SZ_ERROR_LOG_FILE "error.log" // filename for error messages if program started with option "/ns"
#include <iostream> // due to iostreams
#include <iomanip> // setw()
#include <dir> // mkdir()
#include <errno> // errno
#ifndef ANSI
#include <io> // chmod()
#include <sys\stat> // S_IREAD
#include <windows> // SetThreadPriority()
#endif
#include "data.h" // TData
#include "fileutil.h" // FlagToString()
#include "pnc.h" // TPnc
#include "defines.h" // definitions
#include "ParaSet.h" // TParaSet
#include "demo.h" // demo of how to use
#include "exception.h" // IfTrueThrowTypeA()
#include "bap.h"
//----------------------------------------------------------------------------------------------------------------------
// constructor
TBatchInterface::TBatchInterface(const bool& _f_WriteProtectOutput)
{
f_WriteProtectOutput = _f_WriteProtectOutput; // copy
f_Loaded = false; // initialize flag
data1 = new TData(); // base data file in study mode, else learn data
data2 = new TData(); // test data file if study mode is disabled
sets = NULL;
}
//----------------------------------------------------------------------------------------------------------------------
// destructor
TBatchInterface::~TBatchInterface()
{
if(data1) // base data
data1->Release();
if(data2) // test data
data2->Release();
delete sets; // parameter set list
}
//----------------------------------------------------------------------------------------------------------------------
// write settings, i.e. data filename etc. and averaged weights if field with data pointers is passed
// note: if 'data_L' is passed the function gets called for overall result file, else it is called for tuning
// result file
void TBatchInterface::WriteSettings(ofstream& file, const TData**const& data_L/*=NULL*/)
{
const bool f_TuningFile = !data_L;
file << ComChar << " General Settings:" << endl;
// a) write data filename etc.
if(prj.GetData2FileName()[0]!='\0') // if 2nd data file is given, i.e. learn and test data are explicitly specified
{
file << ComChar << " Data1 = " << prj.GetData1FileName() << endl; // learn data file name
if(f_TuningFile)
file << ComChar << " #tuples = " << data1->nTup() << endl; // # tuples
else
file << ComChar << " Data2 = " << prj.GetData2FileName() << endl; // test data file name
}
else
{
file << ComChar << " Data1 = " << prj.GetData1FileName() << endl; // base data file name
file << ComChar << " #tuples = ";
if(f_TuningFile) // # tuples in data file for parameter tuning
file << prj.Get_N_L() << "/";
file << data1->nTup() << endl; // # tuples
}
file << ComChar << " #colums = " << data1->nVar() << endl; // # columns
file << ComChar << " Regression = " << FlagToString(prj.Regression()) << endl; // regression flag
// b1) parameter tuning
if(f_TuningFile)
{
file << ComChar << " N_R = " << prj.Get_N_R_Tune() << endl; // # repetitions
file << ComChar << " N_L = " << prj.Get_N_L_Tune() << endl; // # learn tuples
file << ComChar << " N_T = " << prj.Get_N_T_Tune() << endl; // # test tuples
file << ComChar << " Ranking = " << CriterionIdToString(prj.Ranking()) << endl; // ranking criterion
file << ComChar << " Objective = " << CriterionIdToString(prj.Objective())<<endl; // tuning objective
}
else
// or b2) # learn and test tuples for study and 'learn-test-data-explicitly-specified' mode
if(prj.GetData2FileName()[0]!='\0') // explicitly specified
{
file << ComChar << " N_L = " << data1->nTup() << endl; // # learn tuples
file << ComChar << " N_T = " << data2->nTup() << endl; // # test tuples
}
else
{ // study mode
file << ComChar << " N_R = " << prj.Get_N_R() << endl; // # repetitions
file << ComChar << " N_L = " << prj.Get_N_L() << endl; // # learn tuples
file << ComChar << " N_T = " << prj.Get_N_T() << endl; // # test tuples
}
file << ComChar << " Randomize = " << FlagToString(prj.Randomize()) << endl; // randomization flag
// c) # parameter sets if there is more than one
if(prj.DoTuning())
{
file << ComChar << " " << prj.nParaSets() << " different parameter sets from which ";
file << prj.nParaSetsLearn() << " require learning " << endl;
}
file << endl;
// d) write (averaged) weights if (learn) data is given
if(!f_TuningFile)
{
// d1) calculate averaged weights
float* weights=new float[data1->nVar()]; // allocate memory for averaged weights
for(int j=0;j<data1->nVar();j++) // ini
weights[j]=0;
for(int t=0;t<prj.Get_N_R();t++)
for(int j=0;j<data1->nVar();j++) // average weights
weights[j]+=data_L[t]->Weights()[j]/prj.Get_N_R();
// d2) write averaged weights
file << endl << "Averaged Weights: " << endl;
char szText[STS];
for(int j=0;j<data1->nVar();j++) // for all variables
{
sprintf(szText, " %.2f", weights[j]);
file << szText; // write weight
}
file << endl << endl; // linefeed
delete[] weights; // release
}
}
//----------------------------------------------------------------------------------------------------------------------
// write (study) result file, 'para' contains the parameter sets and 'res' the corresponding loss results
void TBatchInterface::WriteStudyResults(const TParaSet*const& para, TParaSetResult& res, const clock_t& T_Tune)
{
// a) get crit. which should be shown and unset 'pVal' and 'Rank' because there is no reason to display this
bool f_criterion[nCriterion];
memcpy(f_criterion, prj.GetCriterionsToShow(), sizeof(bool)*nCriterion);
f_criterion[StringToCriterionId("Rank")] = f_criterion[StringToCriterionId("pVal")] = false; // unset
// b) display filename
cout << endl << endl << "Writing results to " << prj.GetResultFileName() << endl;
// c) write parameter and result description
int nChar = max(int (log(prj.Get_N_R())/log(10)+1), 4); // # characters used to display test id
for(int i=0;i<nChar-4;i++) // add spaces
resfile << " ";
resfile << "Test";
TParaSet::WriteDescription(resfile, prj.Regression()); // write parameter description
TParaSetResult::WriteResultDescriptions(resfile, f_criterion); // write result description
// d) write parameters and loss function results for each repetitions
char szText[STS];
for(int t=0;t<prj.Get_N_R();t++) // over all repetitions (test)
{
sprintf(szText, "%*d", nChar, t+1); // write test id
resfile << szText;
para[t].WriteParameters(resfile, prj.Regression()); // write parameter set
res.Write(resfile, f_criterion, prj.ShowDev(), t); // write results
}
// e) write overall performance averaged over all tests/repetitions
res.Calculate();
resfile << endl << "Overall Performance:" << endl << " ";
TParaSetResult::WriteResultDescriptions(resfile, f_criterion); // write result description
resfile << " ";
res.Write(resfile, f_criterion, false); // write average results
resfile << "+-";
for(int i=0;i<nCriterion;i++) // write deviation below in the next line
if(f_criterion[i])
res.WriteDeviation(i, resfile);
if(prj.DoTuning())
resfile << endl << endl << "T_Tune = " << WriteTime(T_Tune/prj.Get_N_R(), false); // write time needed for tuning
}
//-------------------------------------------------------------------------------------------------------------------
// progress indication helper function for TBatchInterface::TuneParameters()
void ProgressIndication(const int& i, const int& steps_until_update, const bool& blank=false)
{
char c=' ';
if(!blank)
if(steps_until_update==1) c='.';
else c='|';
if(i%steps_until_update==0)
cout << c;
}
//----------------------------------------------------------------------------------------------------------------------
// find optimal parameter set for given (learn) data
TParaSet TBatchInterface::TuneParameters(const unsigned int& seed, TData*const& data, const int& optId
, ofstream*const& file)
{
//-------------------------------------------------------------------------------------------------------------------
// 1. initialize
char szText[STS];
const int nParaSets = sets->Size(); // abbrevation
clock_t time = clock(); // get start time
TCluster** cls = NULL; // learned models
for(int r=0;r<nParaSets;r++) // reset skip flags for each parameter set
sets->Get(r).Skip = false;
int nChar = log(prj.Get_N_R())/log(10)+1; // # characters needed to enumerate repetitions
// (used for filename generation)
const int nCharOpt = log(prj.Get_N_R_Tune())/log(10)+1; // # characters needed to enumerate tuning runs ...
const int nCharPara = log(nParaSets)/log(10)+1; // ... repetitions and parameter sets
int tests_until_update = prj.Get_N_R_Tune()/MAX_TEST+1; // calculate # test until display is updated
TParaSetResults results(sets->Size(), prj.Get_N_R_Tune() // create result object
, prj.Regression(), prj.GetCriterionsToCalculate());
//-------------------------------------------------------------------------------------------------------------------
// 2. prepare learn and test data
TData **data_L, **data_T;
data->GenerateLearnAndTestData(seed, data_L, data_T, prj.Get_N_R_Tune(), prj.GetTuneType()!=Rep,
prj.Get_N_L_Tune(), prj.Get_N_T_Tune(), prj.GetTuneDataBaseFile(), prj.GetOutputDir(), prj.Get_N_Bins()
, prj.Regression(), prj.EqualWidthBinning(), optId, nChar);
//-------------------------------------------------------------------------------------------------------------------
// 3. for each parameter set: learn and test on the different learn and test data
int lastLearnId, resultLength;
for(int r=0;r<nParaSets;r++)
{
// a) get parameter set
TParaSet& para = sets->Get(r); // get from list
TParameter _para = ToTParameter(para, &prj); // convert to use it with kernel routines
// b) display progress
sprintf(szText, "%*d L", (int) (log(sets->Size())/log(10)+1), (r+1)); // display parameter set number
cout << endl << szText;
if(para.Prune)
cout << "P"; // display if pruning will be done
else
cout << " ";
//----------------------------------------------------------------------------------------------------------------
// c) learn models for each repetition - note: learn only if re-learn flag is set and skip flag is unset
// note: either I), II) or III) will be executed
if(!para.Skip)
//-------------------------------------------------------------------------------------------------------------
// I) learn models for each repetition
if(para.Relearn)
{
lastLearnId = r; // store id
if(cls) // release previous models
for(int t=0;t<prj.Get_N_R_Tune();t++)
if(cls[t])
delete cls[t];
delete[] cls;
cls = new TCluster*[prj.Get_N_R_Tune()]; // new field with models
for(int t=0;t<prj.Get_N_R_Tune();t++)
cls[t] = NULL; // initialize to NULL
// over all repetitons
for(int t=0;t<prj.Get_N_R_Tune();t++)
{
TPnc pnc(data_L[t], _para); // instantiate PNC object
results.Get(r)->StartClock(false, t); // get time
while(pnc.Iterate()); // learn model ...
cls[t] = pnc.ToTCluster(para.Prune); // ... and convert TCluster, prune if necessary
results.Get(r)->StopClock(false, t); // set learn time
// abort if currently learned model is to big
// note: do not skip first model as it'll be the smallest of all
if(prj.Skipping() && r!=0)
if(ModelToBig(cls[t], data_L[t], _para, &prj))
{
SkipBigger(sets); // set skip flag for this and covered parameter sets
for(int k=t;k<prj.Get_N_R_Tune();k++)
ProgressIndication(k, tests_until_update); // complete progress indication with dummies
break; // abort learning
}
ProgressIndication(t, tests_until_update); // display progress
}
}
//-------------------------------------------------------------------------------------------------------------
// II) skip learning as previously learned models can be used but copy learn times and check model size
else
{
// for each previously learned model ...
for(int t=0;t<prj.Get_N_R_Tune();t++)
{
ProgressIndication(t, tests_until_update, true); // dummy progress indication
// copy learn time from last learned model
results.Get(r)->GetClock(false, t) = results.Get(lastLearnId)->GetClock(false, t);
}
// check all(!) previously learned models with current parameters if one is to big
if(prj.Skipping())
for(int t=0;t<prj.Get_N_R_Tune();t++)
if(ModelToBig(cls[t], data_L[t], _para, &prj))
{
SkipBigger(sets); // set skip flag for this and covered parameter sets
break; // abort learning
}
}
//----------------------------------------------------------------------------------------------------------------
// III) parameter set is skipped ...
else
for(int t=0;t<prj.Get_N_R_Tune();t++) // ... dummy progress indication
ProgressIndication(t, tests_until_update, true);
//----------------------------------------------------------------------------------------------------------------
// d) test all learned models if skip flag is not set
if(!para.Skip)
{
cout << " T";
// for each model
for(int t=0;t<prj.Get_N_R_Tune();t++)
{
//----------------------------------------------------------------------------------------------------------
// d1) open simulation output if basename specified
ofstream* file=NULL;
if(prj.GetTuneSimulationBaseFile()[0]!='\0')
{
// compose filename
sprintf(szText, "%s%s_CV%0*d_Run%0*d_PSet%0*d.sim", prj.GetOutputDir(),
prj.GetTuneSimulationBaseFile(), nChar, optId+1, nCharOpt, (t+1), nCharPara, (r+1));
file = new ofstream(szText); // open file and ...
if(!*file) // ... check success
cout << endl << "Write Error: Unable to open simulation file '" << szText << "'!" << endl;
}
//----------------------------------------------------------------------------------------------------------
// d2) loss on test(!) data
int nTup = data_T[t]->nTup(); // # tuples to enable loss object to store predictions
if(prj.SaveMemory())
nTup=-1;
results.Get(r)->SetLossObjectTest(t, new TLossFunction(data_L[t]->Mean()[0], nTup)); // new loss object
results.Get(r)->StartClock(true, t);
TPrediction prd(cls[t], &_para);
prd.Predict(data_T[t], results.Get(r)->GetLossObjectTest(t), file); // predict all
results.Get(r)->StopClock(true, t); // store test time
//----------------------------------------------------------------------------------------------------------
// d3) loss on learn(!) data
if(prj.NeedLossOnLearnData()) // note: estimate only if necessary
{
nTup = data_L[t]->nTup(); // # tuples to enable loss object to store predictions
if(prj.SaveMemory())
nTup=-1;
results.Get(r)->SetLossObjectLearn(t, new TLossFunction(data_L[t]->Mean()[0], nTup)); // new loss object
prd.Predict(data_L[t], results.Get(r)->GetLossObjectLearn(t), file); // predict all
}
//----------------------------------------------------------------------------------------------------------
// d4) set other 'results' like mass and hitrate
results.Get(r)->nCuboids(t) = cls[t]->nCuboids(); // # cuboids (without minimal mass)
results.Get(r)->nCuboidsK(t) = cls[t]->nCuboidsRed(); // # cuboids that exceed min. mass
results.Get(r)->nBounds(t) = cls[t]->AvrVarPerCub(para.Prune);// average active variable bounds per cuboid
results.Get(r)->HitRate(t) = cls[t]->AvrHitRate(); // average hitrate
results.Get(r)->SizeFac(t) = 1; // weighting factor regarding model's size
if(ModelSize(cls[t], data, para.Prune) > prj.MaxModelSize())
results.Get(r)->SizeFac(t)*=2;
//----------------------------------------------------------------------------------------------------------
// d5) save model if basename specified
if(prj.GetTuneModelBaseFile()[0]!='\0')
{
// compose filename
sprintf(szText, "%s%s_CV%0*d_Run%0*d_PSet%0*d.%s", prj.GetOutputDir(),
prj.GetTuneModelBaseFile(), nChar, optId+1, nCharOpt, (t+1), nCharPara, (r+1), TPnc::Extension());
ofstream file(szText); // open file and ...
if(!file) // ... check success
cout << endl << "Write Error: Unable to open model file " << szText << " !" << endl;
cls[t]->Save(file, true, &_para); // save model
}
if(file) // eventually close file
file->close();
delete file;
ProgressIndication(t, tests_until_update); // display progress
}
//-------------------------------------------------------------------------------------------------------------
// d6) calculate mean and deviation of loss and other results
results.Get(r)->Calculate();
//-------------------------------------------------------------------------------------------------------------
// d7) display some results (ranking criterion, # cuboids and remaining time)
sprintf(szText, " %s=%s", CriterionIdToString(prj.Ranking()), results.Get(r)->ResultToText(prj.Ranking()
,/*width*/8));
cout << szText;
resultLength = SizeOfString(szText); // store string length
int w = (log(data->nTup())/log(10)+3);
sprintf(szText, " K=%*.1f/%*.1f", w, results.Get(r)->Result(StringToCriterionId("K")), w
, results.Get(r)->Result(StringToCriterionId("K'"))); // # cuboids
cout << szText;
resultLength += SizeOfString(szText); // add to stored string length
}
//----------------------------------------------------------------------------------------------------------------
// skip flag was set: do not test models, just display dummy progress and a '-'
else
{
for(int t=0;t<prj.Get_N_R_Tune();t++) // dummy progress indication
ProgressIndication(t, tests_until_update, true);
cout << " -" << setw(resultLength-2) << " ";
}
//----------------------------------------------------------------------------------------------------------------
// e) display time elapsed if next parameter set specifies relearning
cout << " " << WriteTime2(clock()-time) << " " << WritePercentage((r+1)/((float)nParaSets), 0, 0) << "% done";
}
//-------------------------------------------------------------------------------------------------------------------
// 4. calculate 'pVal' and 'Rank', select best parameter set (regarding specified objective considering size
// weighting factor) and write tuning results to file
results.CalculateRankAndPVal(prj.Ranking());
int id_best = results.FindBest(prj.Objective()); // select best parameter set
// write tuning results sorted by tuning objective (weighted by model size factor)
cout << endl;
*file << " Test " << (optId+1) << endl;
results.Write(*file, prj.GetCriterionsToShow(), prj.Objective(), sets, prj.ShowDev());
*file << endl;
//-------------------------------------------------------------------------------------------------------------------
// 5. release
for(int t=0;t<prj.Get_N_R_Tune();t++)
{
if(cls[t]) // models
delete cls[t];
data_L[t]->Release(); // data
data_T[t]->Release();
}
delete[] data_T;
delete[] data_L;
delete[] cls;
//-------------------------------------------------------------------------------------------------------------------
// 6. return optimal run parameters
return sets->Get(id_best);
}
//----------------------------------------------------------------------------------------------------------------------
// do it: generate learn and test data, either using specified experiment design (Repetition, CV, Loocv) or specified
// learn and test data files. Learn on each learn data set (including tuning of parameters if enabled) and test
// on each test data set.
void TBatchInterface::DoIt()
{
//-------------------------------------------------------------------------------------------------------------------
// 1. display study parameters if study mode is enabled
if(prj.MakeStudy())
{
cout << endl << "Study ------------------------------------------------" << endl;
// a) experiment design
if(prj.StudyType()==Special) cout << "Special mode" << endl;
if(prj.StudyType()==Loocv) cout << "Leave-one-out cross-validation" << endl;
if(prj.StudyType()==Cv) cout << prj.Get_N_R() << " fold cross-validation" << endl;
if(prj.StudyType()==Rep) cout << prj.Get_N_R() << " fold repetition" << endl;
// b) # learn and test data tuples
cout << "Generating " << prj.Get_N_R() << " data files with " << prj.Get_N_L() << " learn and " << prj.Get_N_T();
cout << " test tuples" << endl;
// c) output range for regression tasks and # classes for classification tasks
if(prj.Regression())
cout << "Regression task with y e[" << data1->Min()[0] << ".." << data1->Max()[0] << "]" << endl;
else
cout << "Classification task with " << data1->nIntegerMaxMin(0) << " classes" << endl;
}
//-------------------------------------------------------------------------------------------------------------------
// 2. display and write tuning settings if tuning is enabled
if(prj.DoTuning())
{
cout << endl << "Tuning -----------------------------------------------" << endl;
cout << prj.nParaSets() << " parameter sets from which " << prj.nParaSetsLearn() << " require learning " << endl;
if(prj.GetTuneType()==Loocv) cout << "Leave-one-out-cross-validation" << endl; // experiment design
if(prj.GetTuneType()==Cv) cout << prj.Get_N_R_Tune() << " fold cross-validation" << endl;
if(prj.GetTuneType()==Rep) cout << prj.Get_N_R_Tune() << " fold repetition" << endl;
// # learn and test data tuples
cout << prj.Get_N_L_Tune() << " learn and " << prj.Get_N_T_Tune() << " test tuples" << endl;
WriteSettings(optfile); // write settings to tuning result file
}
//-------------------------------------------------------------------------------------------------------------------
// 3. get random seeds for learn and test data generation, note: one seed is needed for learn and test data
// generation in study mode and then one seed is needed for the parameter optimzation for each repetition
unsigned int* seed = new unsigned int[prj.Get_N_R()+1];
if(prj.Randomize()) // initialize random number generator if specified
::randomize();
for(int i=0;i<prj.Get_N_R()+1;i++) // create seed for data generation
seed[i]=abs(random(INT_MAX));
//-------------------------------------------------------------------------------------------------------------------
// 4. do it
if(prj.StudyType()==Special)
//-------------------------------------------------------------------------------------------------------------------
// I) special study mode: run just one parameter tuning on complete data set
{
for(int i=0;i<prj.Get_N_R_Tune();i++) // emulate calls to get same 'random' data file splitting
seed[0] = abs(random(INT_MAX));
clock_t time = clock(); // get time
TuneParameters(seed[0], data1, 0, &optfile);
time = clock()-time; // determine time needed for tuning ...
optfile << "T_Tune = " << WriteTime(time, false); // ... and write it
}
//-------------------------------------------------------------------------------------------------------------------
// II) normal study mode or disabled study mode, i.e. explicityl specified learn and test data sets
else
{
//----------------------------------------------------------------------------------------------------------------
// a) generate learn and test data
#ifdef DEBUG_LOG_ON
DebugLogFile << "Generating Data" << endl;
#endif
TData **data_L, **data_T;
if(prj.MakeStudy())
// create N_R learn and test data sets
data1->GenerateLearnAndTestData(seed[prj.Get_N_R()], data_L, data_T, prj.Get_N_R(), prj.StudyType()!=Rep
, prj.Get_N_L(), prj.Get_N_T(), prj.GetDataBaseFileName(), prj.GetOutputDir()
, prj.Get_N_Bins(), prj.Regression(), prj.EqualWidthBinning());
else
{
// emulate learn and test data generation: make arrays with one single data object each
data_L = new TData*[1]; // allocate arrays (for one object)
data_T = new TData*[1];
data_L[0] = data1->GetObject(); // set specified learn and test data
data_T[0] = data2->GetObject();
data_L[0]->Sort(0); // sort learn data (regarding the output)
data_L[0]->CalculateWeights(prj.Get_N_Bins(), prj.Regression(), prj.EqualWidthBinning()); // calculate weights
data_L[0]->Lock();
}
//----------------------------------------------------------------------------------------------------------------
// b) write settings to result file
WriteSettings(resfile, (const TData**) data_L);
//----------------------------------------------------------------------------------------------------------------
// c) alloocate/initialize
float maxSize = 0.5*prj.Get_N_L()*prj.GetMaxSize()/100.0; // maximal acceptable model size
TParaSet* para = new TParaSet[prj.Get_N_R()]; // optimal parameters for each repetition
TParaSetResult res(prj.Get_N_R(), prj.GetCriterionsToShow()); // loss results for each repetition
const int nChar = log(prj.Get_N_R())/log(10)+1; // used for output filename generation
clock_t T_Tune = 0; // time used to tune parameters
//----------------------------------------------------------------------------------------------------------------
// d) learn and test on each learn and test data set
#ifdef DEBUG_LOG_ON
DebugLogFile << "Processing ... " << endl;
#endif
for(int t=0;t<prj.Get_N_R();t++)
{
cout << endl;
#ifdef DEBUG_LOG_ON
DebugLogFile << endl << "Run " << (t+1) << endl;
#endif
//-------------------------------------------------------------------------------------------------------------
// d1) tune parameters if enabled
if(prj.DoTuning())
{
cout << endl << "Test " << (t+1) << "/" << prj.Get_N_R(); // display progress
clock_t time = clock(); // get time
para[t] = TuneParameters(seed[t], data_L[t], t, &optfile);
time = clock()-time; // stop time
T_Tune += time; // sum up
cout << "Optimal parameters: "; // ... and display them
para[t].WriteParameters(cout, prj.Regression(), true);
cout << endl;
}
else
para[t] = sets->Get(0); // note: if tuning is disabled there is just one single parameter
// set in list. Just take it!
//-------------------------------------------------------------------------------------------------------------
// d2) learn model with specified/tuned parameters
TPnc pnc(data_L[t], ToTParameter(para[t], &prj)); // new model
cout << (t+1) << "/" << prj.Get_N_R() << " Learning " << " ... ";
res.StartClock(false, t); // start timer
while(pnc.Iterate()); // learn model
if(para[t].Prune)
cout << "Pruning ... ";
TCluster* cls = pnc.ToTCluster(para[t].Prune); // convert to TCluster
res.StopClock(false, t); // stop timer
//-------------------------------------------------------------------------------------------------------------
// d3) open simulation output file if (base) name is specified
ofstream* file=NULL;
if(prj.GetSimulationBaseFileName()[0]!='\0')
{
char szFilename[STS]; // compose filename
sprintf(szFilename, "%s%s_CV%0*d.sim", prj.GetOutputDir()
, prj.GetSimulationBaseFileName(), nChar, (t+1));
file = new ofstream(szFilename); // open file
if(!*file) // check
cout << "Write Error: Unable to open simulation file " << szFilename << " !" << endl;
}
//-------------------------------------------------------------------------------------------------------------
// d5) test model on test data
cout << "Testing ... ";
int nTup = data_T[t]->nTup(); // #tuples to predict
if(prj.SaveMemory())
nTup=-1; // reset it if predictions should not be stored to save memory
res.SetLossObjectTest(t, new TLossFunction(data_L[t]->Mean()[0], nTup)); // new loss object
res.StartClock(true, t);
TPrediction prd(cls); // instantiate prediction object
prd.Predict(data_T[t], res.GetLossObjectTest(t), file); // test on test data
res.StopClock(true, t);
//-------------------------------------------------------------------------------------------------------------
// d6) test on learn(!) data if necessary
if(prj.NeedLossOnLearnData())
{
cout << " ... ";
nTup = data_L[t]->nTup(); // #tuples to predict
if(prj.SaveMemory()) // reset it if predictions should not be stored to save memory
nTup=-1;
res.SetLossObjectLearn(t, new TLossFunction(data_L[t]->Mean()[0], nTup)); // new loss object
prd.Predict(data_L[t], res.GetLossObjectLearn(t), file); // test
}
//-------------------------------------------------------------------------------------------------------------
// d7) store other 'results' like model size ...
res.nCuboids(t) = cls->nCuboids();
res.nCuboidsK(t) = cls->nCuboidsRed();
res.nBounds(t) = cls->AvrVarPerCub(para[t].Prune); // average # variables per cuboid
res.HitRate(t) = cls->AvrHitRate();
res.SizeFac(t) = 1;
if(ModelSize(cls, data_L[t], para[t].Prune) > maxSize)
res.SizeFac(t) *=2; // weighting factor regarding model's size
//-------------------------------------------------------------------------------------------------------------
// d8) save model if (base) name is specified
if(prj.GetModelBaseFileName()[0]!='\0')
{
// compose filename
char szFilename[STS];
sprintf(szFilename, "%s%s_CV%0*d.%s", prj.GetOutputDir(), prj.GetModelBaseFileName(), nChar, (t+1)
, pnc.Extension());
ofstream file(szFilename); // open file
if(!file) // check
cout << "Write Error: Unable to open model file " << szFilename << " !" << endl;
cls->Save(file, true); // save model
file.close(); // close file
// (hack!) save tuple ID's
sprintf(szFilename, "%s%s_CV%0*d.%s", prj.GetOutputDir(), prj.GetModelBaseFileName(), nChar, (t+1), "tid");
file.open(szFilename); // open file
if(!file) // check
cout << "Write Error: Unable to open tuple id file " << szFilename << " !" << endl;
pnc.SaveClusterTupleIds(file); // save
}
//-------------------------------------------------------------------------------------------------------------
// d9) display some results (ranking criterion on test data and # cuboids)
char szText[STS];
// display ranking criterion
sprintf(szText, " %s=%s ", CriterionIdToString(prj.Ranking()), res.ResultToText(prj.Ranking(), 0, false, t));
cout << szText;
// display # cuboids
int w = log(data1->nTup())/log(10)+1;
sprintf(szText, " K=%*d/%*d", w, (int) res.Result(StringToCriterionId("K"), t), w
, (int) res.Result(StringToCriterionId("K'"), t));
cout << szText;
// release
delete cls;
if(file) // eventually close/delete file
file->close();
delete file;
}
//----------------------------------------------------------------------------------------------------------------
// e) write overall performance (results)
WriteStudyResults(para, res, T_Tune);
//----------------------------------------------------------------------------------------------------------------
// f) clean up: release learn and test data and parameters
for(int t=0;t<prj.Get_N_R();t++) data_L[t]->Release(); delete[] data_L;
for(int t=0;t<prj.Get_N_R();t++) data_T[t]->Release(); delete[] data_T;
delete[] para;
}
//-------------------------------------------------------------------------------------------------------------------
// 5. clean up
delete[] seed;
// close files
#ifndef ANSI
if(!f_WriteProtectOutput) // if not specified otherwise by cmd line option ...
{
chmod(prj.GetResultFileName(), S_IREAD | S_IWRITE); // ... remove write protection
chmod(prj.GetTuningFileName(), S_IREAD | S_IWRITE);
}
#endif
optfile.close(); // close files which were opened in Load()
resfile.close();
}
//----------------------------------------------------------------------------------------------------------------------
// load project file
void TBatchInterface::Load(char* _szFilename)
{
#ifdef DEBUG_LOG_ON
DebugLogFile << "Loading project file: " << _szFilename << endl;
#endif
//-------------------------------------------------------------------------------------------------------------------
// 0. pre-check
IfTrueThrowTypeA(f_Loaded, "Function can only be called once in the lifetime of an instance!"
, "TBatchInterface::Load", szModule);
//-------------------------------------------------------------------------------------------------------------------
// 1. initialize/manipulate directories and file names, check path delimiter
char szPath[STS], szText[STS];
getcwd(szCwd, STS); // get working directory
EnsurePathDelimiter(szCwd); // ensure path delimiter
strcpy(szFilename, _szFilename); // copy filename to own memory note: to be able to manipulate (prepend) path
CorrectPathDelimiter(szFilename); // correct path delimiter (now user can use either slash or backslash
ExtractPath(szPath, szFilename); // extract project file "working" directory from filename which may be ...
PrefixPath(szPath, szCwd); // ... relative to current working directory thus prepend current working
// directory if necessary
PrefixPath(szFilename, szCwd); // prepend current path to project filename if necessary
f_Loaded = true;
cout << endl << "Loading '" << szFilename << "'" << endl; // display project filename
//-------------------------------------------------------------------------------------------------------------------
// 2. load project file
ifstream file(szFilename, ios::in); // open file
IfTrueThrowTypeU(!file, "Load Error: Unable to open project file!"); // check success note: name is displayed above
prj.Load(file, szPath); // load via TProjectB::Load()
file.close(); // close file
//-------------------------------------------------------------------------------------------------------------------
// 3. re-write loaded project file to standard logfile
ofstream logfile(szLogFile, ios::out); // open
if(!logfile) // check success
cout << endl << "Warning: Unable to open log file '" << szLogFile << "'!" << endl;
prj.Save(logfile); // save
logfile.close(); // close file
//-------------------------------------------------------------------------------------------------------------------
// 4. load data file(s) and synchronize it with project
data1->Load(prj.GetData1FileName()); // load data file
sets = prj.Synchronize(data1); // synchronize
if(prj.GetData2FileName()[0]!='\0')
{
data2->Load(prj.GetData2FileName()); // load data file
data2->MakeCompatible(data1); // make compatible
}
//-------------------------------------------------------------------------------------------------------------------
// 5. create output directory if necessary
strcpy(szText, prj.GetOutputDir()); // get output directory
if(szText[0]!='\0')
if(mkdir(szText)!=0)
IfTrueThrowTypeU(errno==ENOENT
, "Section '[Basic]': Please correct key 'Dir'! Unable to create output directory '%s'!", szText);
//-------------------------------------------------------------------------------------------------------------------
// 6. re-write project file using path and name of (study) result file with the extension 'log'
strcpy(szText, prj.GetResultFileName()); // copy name from study result file
ExchangeExt(szText, "log"); // exchange extension
logfile.open(szText, ios::out); // try to open
IfTrueThrowTypeU(!logfile, "Write Error: Unable to open logfile '%s'!", szText); // check success
#ifndef ANSI
if(f_WriteProtectOutput) // if specified ...
chmod(szText, S_IREAD); // ... write protect result filename
#endif
logfile << "This file is a re-write copy of " << szFilename << endl; // write file header
logfile << "To use this file again move it back to its original directory!" << endl << endl;
prj.Save(logfile); // save project file
//-------------------------------------------------------------------------------------------------------------------
// 7. open result file and write header
resfile.open(prj.GetResultFileName(), ios::out); // try to open
IfTrueThrowTypeU(!resfile, "Write Error: Unable to open result file '%s'!"
, prj.GetResultFileName()); // check success
resfile << ComChar << " Results written by " << szPrgName << endl << endl; // program's name
#ifndef ANSI
chmod(prj.GetResultFileName(), S_IREAD); // write protect result filename
#endif
// write pre-processor flags and settings
resfile << endl << ComChar << " Fixed Settings:" << endl;
WriteDefines(resfile, prj.Regression()); // write
resfile << endl << endl; // linefeed
// write note if special mode is on
strcpy(szText, prj.GetTuningFileName());
PrefixPath(szText, szCwd); // prepend path if necessary
if(prj.StudyType()==Special)
resfile << "Special mode! See tuning result file '" << szText << "' for results!" << endl;
//-------------------------------------------------------------------------------------------------------------------
// 8. open tuning result file
if(prj.DoTuning())
{
optfile.open(prj.GetTuningFileName(), ios::out); // try to open
IfTrueThrowTypeU(!optfile // check success
, "Write Error: Unable to open tuning result file '%s'!", prj.GetTuningFileName());
#ifndef ANSI
chmod(prj.GetTuningFileName(), S_IREAD); // write protect result filename
#endif
optfile << ComChar << " Results written by " << szPrgName << endl << endl; // program's name
}
}
//----------------------------------------------------------------------------------------------------------------------
// display and log error message
void HandleException(const char*const& szError, const char*const szFilename, const bool& f_HaltOnError
, const bool& f_NoSound)
{
// display error on screen
cout << endl << "Fatal Error!" << endl << szError;
// write error to logfile
ofstream logfile(SZ_ERROR_LOG_FILE, ios::app);
logfile << endl << TimeStamp() << endl; // time stamp
logfile << "Processing file '" << szFilename << "'" << endl; // project filename
logfile << szError << endl; // error message
#ifndef ANSI // make some sound when ready
if(!f_NoSound)
for(int i=0;i<3;i++)
{
Beep(0x125, 350);
Beep(0x225, 350);
}
#endif
if(f_HaltOnError)
{
cout << endl << endl << "Press enter to terminate!";
cin.get(); // wait for user input and return
}
else // wait some seconds if program won't stop
{
cout << endl << endl << "Terminating within a few seconds ...";
Sleep(5520);
}
}
//----------------------------------------------------------------------------------------------------------------------
// check passed character string for command-line options
void CheckOptions(const char*const& arg, bool& f_HaltOnError, bool& f_WriteProtectOutput, bool& f_NoSound)
{
if(strcmp(arg,"/nh")==0)
f_HaltOnError = false; // do not halt on errors
if(strcmp(arg,"/wp")==0)
f_WriteProtectOutput = true; // write protect result files
if(strcmp(arg,"/ns")==0)
f_NoSound = true; // turn sound off
}
//----------------------------------------------------------------------------------------------------------------------
// startup function
int main(int argc, char *argv[], char *[])
{
#ifndef ANSI
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL); // reduce thread priority
#endif
cout << szPrgName << " " << szCopyRight << " " << szAuthor << endl; // display program's name
// display warning if debug mode is on
#ifdef DEBUG
for(int i=0;i<20;i++)
cout << "WARNING: DEBUG MODE IS ON!!!! "; // now i won't forget to disable it again ;-)
cout << endl << endl;
#endif
TBatchInterface* batch=NULL;
bool f_HaltOnError=true, f_WriteProtectOutput=false, f_NoSound=false, f_Error=false;
try
{
// check # input arguments
if(argc<2) // no project file specified ...
{
cout << "No batch file specified! Nothing done!" << endl; // display message
cout << endl << "Press enter to terminate!";
cin.get(); // wait for user input and return
}
else
{
// check for command line options
if(argc>2) CheckOptions(argv[2], f_HaltOnError, f_WriteProtectOutput, f_NoSound);
if(argc>3) CheckOptions(argv[3], f_HaltOnError, f_WriteProtectOutput, f_NoSound);
if(argc>4) CheckOptions(argv[4], f_HaltOnError, f_WriteProtectOutput, f_NoSound);
// load project file an go ...
batch = new TBatchInterface(f_WriteProtectOutput);
batch->Load(argv[1]); // load project file
batch->DoIt(); // process batch file
}
}
// note: be sure not to throw any exceptions before <batch> is instantiated :-)
catch(TExceptionU excp) // excpetion intended for the user
{
// display and log error message
HandleException(excp.GetErrorText(), batch->ProjectFile(), f_HaltOnError, f_NoSound);
f_Error = true;
}
catch(TExceptionAB excp) // exceptions intended for the programmer
{
// display and log error message
HandleException(excp.GetErrorText(), batch->ProjectFile(), f_HaltOnError, f_NoSound);
f_Error = true;
}
delete batch; // release
#ifndef ANSI // make some sound when ready
if(!f_Error & !f_NoSound)
for(int i=0;i<2;i++)
{
Beep(0x525, 100+i*140);
Sleep(120);
}
#endif
return 0; // exit
}