static const char* szModule = "Tune.cpp";
//----------------------------------------------------------------------------//
// module: Tune.cpp //
// //
// Tune parameters, i.e. learn and test model on several different data //
// sets that are build randomly from a given TData object. Load and save //
// functionality. //
// //
// copyright (c) 2003 by Lars Haendel //
// home: www.newty.de //
// //
// This program is free software and can be used under the terms of the //
// GNU licence. See header file for further information and disclaimer. //
// //
//----------------------------------------------------------------------------//
#define SEED 0 // fixed seed for random number generator if randomization is disabled
#include <iomanip> // due to: setw()
#include "tune.h"
#include "cluster.h" // TCluster
#include "para.h" // ToTParameter
//----------------------------------------------------------------------------------------------------------------------
// constructor if results will be calculated/generated using DoIt()
TTune::TTune(TParaSetList* /*ca*/ _sets, const TProjectG*const& _prj, const TData*const& _data)
{
sets = _sets; // copy
prj = _prj;
data = _data;
pnc = NULL;
results = new TuneResult[sets->Size()]; // allocate result array
sec = new TCriticalSection(); // instantiate critical section
nResultsOk = t = 0;
f_ForLoading = false; // set flag to prevent any call of Load()
}
//----------------------------------------------------------------------------------------------------------------------
// constructor if results will be loaded from file using Load()
TTune::TTune(const TProjectG*const& _prj, const TData*const& _data)
{
prj = _prj; // copy
data = _data;
results = NULL;
sets = new TParaSetList(); // new parameter set list ...
sets->SetName("TParaSetList (TTune)"); // ... and set its name
sec = new TCriticalSection(); // instantiate critical section
f_Loaded = false; // reset flag to allow one call of Load()
f_ForLoading = true; // block functions which are only used together with first constructor
}
//----------------------------------------------------------------------------------------------------------------------
// destructor
TTune::~TTune()
{
delete sets; // parameter set list
delete[] results; // results
delete sec; // critical section
}
//----------------------------------------------------------------------------------------------------------------------
// get current progress
int TTune::GetProgress()
{
// check
IfTrueThrowTypeA(f_ForLoading, "Function cannot be called for instance intended for use with Load()!"
, "TTune::GetProgress()", szModule);
float incr = 100.0/sets->Size();
return (int) 100.0*nResultsOk/sets->Size() + incr * t/prj->Get_N_R_Tune();
}
//----------------------------------------------------------------------------------------------------------------------
// do it: simulate all parameter sets for all repetitions
void TTune::DoIt(const bool*const& f_Stop)
{
// check
IfTrueThrowTypeA(f_ForLoading, "Function cannot be called for instance intended for use with Load()!"
, "TTune::DoIt()", szModule);
//-------------------------------------------------------------------------------------------------------------------
// 1. initialize
TCluster** cls = NULL; // learned models
//-------------------------------------------------------------------------------------------------------------------
// 2. prepare learn and test data
TData **data_L, **data_T;
unsigned int seed = SEED;
if(prj->Randomize()) // initialize random number generator if specified
{
::randomize();
seed = abs(random(INT_MAX));
}
// generate arrays with tuning learn and test data objects from learn data
data->GenerateLearnAndTestData(seed, data_L, data_T, prj->Get_N_R_Tune(), prj->GetTuneType()!=Rep,
prj->Get_N_L_Tune(), prj->Get_N_T_Tune(), /*dummy*/ "", /*dummy*/"", prj->Get_N_Bins()
, prj->Regression(), prj->EqualWidthBinning(), (int) 0 /* dummy */, 1);
//-------------------------------------------------------------------------------------------------------------------
// 3. for each parameter set: learn and test on the different tuning learn and test data
while(!*f_Stop && nResultsOk<sets->Size())
{
para = sets->Get(nResultsOk); // get from list
TParameter _para = ToTParameter(para, prj); // convert to use it with kernel routines ...
for(int i=0;i<N_TUNE_RES;i++) // reset results
results[nResultsOk][i] = 0;
//----------------------------------------------------------------------------------------------------------------
// learn and test models for each repetition - note: learn only if re-learn flag is set and skip flag is unset
// a) prepare: release previous models if they will/must be re-learned
if(para.Relearn)
{
if(cls) // release previous models
for(int t=0;t<prj->Get_N_R_Tune();t++)
if(cls[t])
delete cls[t];
delete[] cls;
cls = new TCluster*[prj->Get_N_R_Tune()]; // new field with models
for(int t=0;t<prj->Get_N_R_Tune();t++)
cls[t] = NULL; // initialize to NULL
}
// b) over each repetiton: learn (if necessary) and test
for(t=0;t<prj->Get_N_R_Tune();t++)
{
//-------------------------------------------------------------------------------------------------------------
// b1) re-learn if necessary
if(para.Relearn)
{
sec->Enter();
pnc = new TPnc(data_L[t], _para); // instantiate PNC object
sec->Leave();
while(!*f_Stop && pnc->Iterate()); // learn model ...
if(pnc->IsFinished())
cls[t] = pnc->ToTCluster(para.Prune, f_Stop); // ... and convert TCluster, prune if necessary
sec->Enter();
delete pnc; // delete learning object (TPnc)
pnc = NULL;
sec->Leave();
if(*f_Stop)
break; // stop flag is set -> leave loop
// abort if currently learned model is to big
// note: do not skip first parameter set as it'll be the smallest of all
if(prj->Skipping() && nResultsOk!=0)
if(ModelToBig(cls[t], data_L[t], _para, prj))
{
sec->Enter();
sets->Pos(nResultsOk); // set correct list position note: may have been changed in GetParameter()
SkipBigger(sets, true); // set skip flag for this and covered parameter sets
sec->Leave();
break; // leave loop for this parameter set
}
}
//-------------------------------------------------------------------------------------------------------------
// b2) or use the previously learned models and just check once if one out of all is to big
else
if(t==0)
{
// check all(!) previously learned models with current parameters if one is to big
if(prj->Skipping())
{
bool f_Break = false;
for(int t=0;t<prj->Get_N_R_Tune();t++)
if(ModelToBig(cls[t], data_L[t], _para, prj))
{
sec->Enter();
sets->Pos(nResultsOk); // correct list position note: may have been changed in GetParameter()
SkipBigger(sets, true); // set skip flag for this and covered parameter sets
sec->Leave();
f_Break = true;
break; // leave local loop
}
if(f_Break) // leave global loop
break;
}
}
//-------------------------------------------------------------------------------------------------------------
// b3) loss on test(!) data
TLossFunction* loss = new TLossFunction(data_L[t]->Mean()[0], -1 /* do not store */); // new loss object
TPrediction prd(cls[t], &_para);
prd.Predict(data_T[t], loss, NULL, /*f_IsWithOutput*/true, f_Stop); // predict all
if(*f_Stop) // leave loop if stop flag is set
break;
if(_para.f_Regression) // copy estimated loss
results[nResultsOk][Err_T] += loss->Mae(); // MAE for regression tasks
else
results[nResultsOk][Err_T] += 100.0*loss->Mce(); // MCE else
delete loss; // release loss object
//-------------------------------------------------------------------------------------------------------------
// b4) set other 'results' like mass and hitrate
results[nResultsOk][IdCub] += cls[t]->nCuboids(); // # cuboids (without minimal mass)
results[nResultsOk][IdCubRed] += cls[t]->nCuboidsRed(); // # cuboids that exceed min. mass
results[nResultsOk][IdAvrVar] += cls[t]->AvrVarPerCub(para.Prune); // avr. active variable bounds per cuboid
results[nResultsOk][IdHitrate] += 100.0*cls[t]->AvrHitRate(); // average hitrate
}
// c) calculate average
if(t==prj->Get_N_R_Tune())
{
for(int i=0;i<N_TUNE_RES;i++)
results[nResultsOk][i] /= prj->Get_N_R_Tune();
nResultsOk++; // all ok, increment counter for result array
}
}
//-------------------------------------------------------------------------------------------------------------------
// 4. release
for(int t=0;t<prj->Get_N_R_Tune();t++)
{
if(cls[t]) // models
delete cls[t];
data_L[t]->Release(); // data
data_T[t]->Release();
}
delete[] data_T;
delete[] data_L;
delete[] cls;
}
//----------------------------------------------------------------------------------------------------------------------
// get status text
const char* TTune::GetStatusText()
{
// check
IfTrueThrowTypeA(f_ForLoading, "Function cannot be called for instance intended for use with Load()!"
, "TTune::GetStatusText()", szModule);
static char szText[256]; // status text as static variable
sec->Enter(); // compose status text
if(pnc)
sprintf(szText, "Tuning Set %d/%d Run %d/%d Learned %.1f %%", nResultsOk+1, sets->Size()
, t+1, prj->Get_N_R_Tune(), 100*pnc->GetProgress(para.Prune));
else
sprintf(szText, "Tuning Set %d/%d Run %d/%d Testing", nResultsOk+1, sets->Size(), t+1, prj->Get_N_R_Tune());
sec->Leave();
return szText;
}
//----------------------------------------------------------------------------------------------------------------------
// save tuning results to file
void TTune::Save(ofstream& file)
{
// a) section '[Basic]'
file << "[Basic]" << endl;
file << "nResults = " << nResultsOk << endl << endl << endl;
// b) section '[Results]'
int w1 = ceil(log(prj->Get_N_L_Tune()+1)/log(10))+4; // width needed to display # cuboids
int w2 = ceil(log(data->nVar()+1)/log(10))+4; // width needed to display # variables per cuboid
file << "[Results]" << endl << ComChar;
TParaSet::WriteDescription(file, prj->Regression()); // write parameter descriptions
file << setw(w1) << "K" << setw(w1) << "K'"; // result descriptions and linefeed
file << setw(w2) << "Vars" << " Rate Q_T" << endl;
for(int i=0;i<nResultsOk;i++) // write results for each (accepted) parameter set
{
// write parameters
sec->Enter();
file << " "; // due to the comment character in the description line
sets->Get(i).WriteParameters(file, prj->Regression());
sec->Leave();
// write results
char szText[256];
sprintf(szText, "%*.1f%*.1f%*.1f%7.1f", w1, results[i][0], w1, results[i][1], w2, results[i][2], results[i][3]);
file << szText;
if(prj->Regression()) // error
sprintf(szText, "%f", results[i][4]); // MAE for regression task
else
sprintf(szText, "%7.1f", results[i][4]); // MCE for classification task
file << szText << endl;
}
}
//----------------------------------------------------------------------------------------------------------------------
// load tuning results from file
void TTune::Load(ifstream& file, int& line)
{
// check
IfTrueThrowTypeA(!f_ForLoading || f_Loaded, "Function cannot be called twice in the lifetime of an instance!"
, "TTune::Load()", szModule);
f_Loaded = true; // set flag to prevent another call of Load()
try // try to read
{
// a) read section '[Basic]'
line += SearchKey(file, "[Basic]"); // position to section
nResultsOk = ReadKeyValue(file, "nResults", -1, SEARCH_LINES); // # results
if(nResultsOk<1) // and check
throw 1; // value missing or negative
// b) read section '[Results]'
results = new TuneResult[nResultsOk]; // allocate result array
line += SearchKey(file, "[Results]"); // position to section
for(int i=0;i<nResultsOk;i++) // read all parameter sets and results
{
// b1) insert new parameter set in list ...
TParaSet& para = sets->Ins();
// ... and read parameters
para.N_Int = ReadExpNoEndl(file, (float) 0); // read # intervals
para.w_COD = ReadExpNoEndl(file, (float) 0.0); // read 'w_COD'
para.Eta = ReadExpNoEndl(file, (float) 0.0); // read 'Eta'
if(prj->Regression())
para.Sigma = ReadExpNoEndl(file, (float) 0.0); // read sigma
else
para.W_Kernel = ReadExpNoEndl(file, (float) 0.0); // read kernel width
para.p_min = ReadExpNoEndl(file, (float) 0); // read cardinality
para.Prune = ReadExpNoEndl(file, (float) 0); // read 'Prune'
para.Weights = ReadExpNoEndl(file, (float) 0); // read 'Weights'
para.Metric = ReadExpNoEndl(file, (float) 0.0); // read metric parameter
// b2) check parameter values
const char* szText = para.CheckParameters();
if(szText[0]!='\0')
{
char szError[STS];
// prepend line and section information
sprintf(szError, "Error in line %d in section [Results]: %s", line, szText);
ThrowTypeU(szError); // throw exception
}
// b3) read results
for(int j=0;j<N_TUNE_RES;j++)
results[i][j] = ReadExpNoEndl(file, (float) 0.0); // read result
// b4) remove linefeed
line += skipwsExEndl(file);
}
}
catch(int errNo) // exception handling
{
char szText[STS];
switch(errNo) // compose error text
{
case 1 : strcpy (szText, "Error in section [Basic]: Value of 'nResultsOk' missing or negative!"); break;
default : sprintf(szText, "Error in line %d: %s!", line, ::GetLastError(errNo));
}
ThrowTypeU(szText); // throw error
}
}