//------------------------------------------------------------------------------
// module project.cpp //
// //
// Class TProject encapsulates settings for usage of PNC algorithm //
// //
// copyright (c) 2001-2003 by Lars Haendel //
// mail: lore17@newty.de //
// home: www.newty.de //
// //
// This program is free software and can be used under the terms of the //
// GNU licence. See header file for further information and disclaimer. //
// //
//------------------------------------------------------------------------------
#include "projectB.h"
#include <iomanip> // due to: setiosflags()
//----------------------------------------------------------------------------------------------------------------------
// constructor
TProjectB::TProjectB() : TProject() { }
//----------------------------------------------------------------------------------------------------------------------
// load project settings from file
void TProjectB::Load(ifstream& file, const char*const& _szProjectFilePath)
{
// a) call base class version
TProject::Load(file, _szProjectFilePath);
// b) read additional batch parameters and/or sections
char szSection[STS]; // name of section currently read
try
{
streampos curPos = file.tellg(); // preserve actual stream position
//----------------------------------------------------------------------------------------------------------------
// b1) read additional parameters of section '[Tuning]'
strcpy(szSection, "[Tuning]"); // position to section
SearchKey(file, szSection);
ReadKeyString(file, "Results",szTuningFile, STS); // tuning result filename
CorrectPathDelimiter(szTuningFile);
ReadKeyString(file, "Data", szTuneDataBaseFile, STS); // basename for generated learn/test data
ReadKeyString(file, "Model", szTuneModelBaseFile, STS); // basename for learned models
ReadKeyString(file, SZ_SIMULATION, szTuneSimulationBaseFile, STS); // basename for simulation output files
ReadKeyString(file, "Ranking", szRanking, STS); // criterion to rank parameter sets
ReadKeyString(file, "Objective", szObjective, STS); // tuning objective
ranking = StringToCriterionId(szRanking); // convert string to criterion id
objective = StringToCriterionId(szObjective);
//----------------------------------------------------------------------------------------------------------------
// b2) read section '[Batch]'
file.seekg(curPos); // restore stream position
strcpy(szSection, "[Batch]"); // position to section
SearchKey(file, szSection);
f_Study = ReadKeyBool(file, "Study"); // flag: make study
f_SaveMemory = ReadKeyBool (file, "SaveMemory", DEF_SAVE_MEMORY); // flag: save memory (don't store predictions)
ReadKeyString(file, "Results", szResultFile, STS); // result filename
CorrectPathDelimiter(szResultFile);
ReadKeyString(file, "Data", szDataBaseFileName, STS); // (base) filename for generated learn/test data
ReadKeyString(file, SZ_SIMULATION, szSimulationBaseFileName, STS); // (base) filename for simulation output file(s)
ReadKeyString(file, "Model", szModelBaseFileName, STS); // (base) filename for learned model(s)
// output directory note: empty/missing output directory causes project file directory to be used
ReadKeyString(file,SZ_DIRECTORY , szOutputDir, STS);
CorrectPathDelimiter(szOutputDir);
ReadKeyString(file, "Type", szStudyType, STS); // study type
studyType = StringToTestType(szStudyType);
N_R = ReadKeyValue(file, "N_R", 0); // # repetitions
N_L = ReadKeyValue(file, "N_L", 0); // # learn tuples
N_T = ReadKeyValue(file, "N_T", 0); // # test tuples
//----------------------------------------------------------------------------------------------------------------
// b3) read section '[Show]'
file.seekg(curPos); // restore stream position
strcpy(szSection, "[Show]");
SearchKey(file, szSection); // position to section
// read criterions to log
for(int i=0;i<nCriterion;i++)
// (hack!): ensure that default for criterion 'pVal' is 'false' - must be consistent with Save()
f_Criterion[i] = ReadKeyBool(file, CriterionIdToString(i), DEF_SHOW_CRITERION && (i!=StringToCriterionId("pVal")));
f_ShowDev = ReadKeyBool(file, "Deviation", DEF_SHOW_DEVIATION); // flag: write results' deviations
// restore stream position
file.seekg(curPos);
}
catch(int errNo) // exception handling
{
// compose error text
char szText[STS];
if(strcmp(szSection, GetLastKey())==0)
sprintf(szText, "Section '%s' not found!", szSection);
else
sprintf(szText, "Section '%s' reading key '%s': %s", szSection, GetLastKey(), GetLastError(errNo));
ThrowTypeU(szText); // 'propagate' exception
}
}
//----------------------------------------------------------------------------------------------------------------------
// save project settings to file
void TProjectB::Save(ofstream& file, const bool& f_WriteTuningAnyway/*=false*/)
{
// a) call base class version
TProject::Save(file, f_WriteTuningAnyway);
// b) save additional parameters/sections
file << setiosflags(ios::left) << resetiosflags(ios::right); // set left justified output
//-------------------------------------------------------------------------------------------------------------------
// b1) write additional parameters at the end of current section (will be section '[Tuning]')
if(f_Tune || f_WriteTuningAnyway)
{
file << setw(WNAME) << "Results" << " = " << szTuningFile << endl; // tuning result filename
if(szTuneDataBaseFile[0]!='\0') file << setw(WNAME) << "Data" << " = " << szTuneDataBaseFile << endl;
if(szTuneModelBaseFile[0]!='\0') file << setw(WNAME) << "Model" << " = " << szTuneModelBaseFile << endl;
if(szTuneSimulationBaseFile[0]!='\0')file << setw(WNAME) << SZ_SIMULATION <<" = "<< szTuneSimulationBaseFile << endl;
file << setw(WNAME) << "Ranking" << " = " << CriterionIdToString(ranking) << endl;
file << setw(WNAME) << "Objective" << " = " << CriterionIdToString(objective) << endl;
}
//-------------------------------------------------------------------------------------------------------------------
// b2) write section [Batch]
file << endl << endl << "[Batch]" << endl;
file << setw(WNAME) << "Study" << " = " << FlagToString(f_Study) << endl;
file << setw(WNAME) << "SaveMemory" << " = " << FlagToString(f_SaveMemory) << endl;
file << setw(WNAME) << "Results" << " = " << szResultFile << endl;
if(szOutputDir[0]!='\0') // if not empty
file << setw(WNAME) << SZ_DIRECTORY << " = " << szOutputDir << endl; // write output directory
if(f_Study) // suppress study settings if disabled
{
if(szDataBaseFileName[0]!='\0') file << setw(WNAME) << "Data" << " = " << szDataBaseFileName << endl;
if(szModelBaseFileName[0]!='\0') file << setw(WNAME) << "Model" << " = " << szModelBaseFileName << endl;
if(szSimulationBaseFileName[0]!='\0')file<<setw(WNAME)<<SZ_SIMULATION<< " = " << szSimulationBaseFileName << endl;
file << setw(WNAME) << "Type" << " = " << TestTypeToString(studyType) << endl;
if(studyType==Rep || studyType==Cv)
file << setw(WNAME) << "N_R" << " = " << N_R << endl; // note: only needed for type repetition or CV
if(studyType==Rep) // note: only needed for type repetition
{
file << setw(WNAME) << "N_L" << " = " << N_L << endl;
file << setw(WNAME) << "N_T" << " = " << N_T << endl;
}
}
//-------------------------------------------------------------------------------------------------------------------
// b3) write section [Show]
file << endl << endl << "[Show]" << endl;
for(int i=0;i<nCriterion;i++)
// write if criterion is not(!) the default
// (hack!): ensure that default for criterion 'pVal' is 'false' - must be consistent with Load()
if(f_Criterion[i]!= (DEF_SHOW_CRITERION && (i!=StringToCriterionId("pVal"))))
file << setw(WNAME) << CriterionIdToString(i) << " = true" << endl;
file << setw(WNAME) << "Deviation" << " = " << FlagToString(f_ShowDev) << endl;
file << setiosflags(ios::right) << resetiosflags(ios::left); // restore right justified output
}
//----------------------------------------------------------------------------------------------------------------------
// check project parameters against constraints and given data file, set output column and variable types and set
// dependant variables - WARNING: caller has to release returned TParaSetList !!
// note: does some initialization using IniStudyCounts() and IniCriterionFlags()
TParaSetList* /*cr*/ TProjectB::Synchronize(TData*const& _data1, const bool& f_CheckTuningAnyway/*=false*/)
{
// a) call base class version
TParaSetList* sets = TProject::Synchronize(_data1, f_CheckTuningAnyway);
// b) check
try
{
//----------------------------------------------------------------------------------------------------------------
// a) section '[Basic]'
if(!f_Study && szData2[0]=='\0')
throw 106; // 2nd filename needed (test data file) if study is disabled
//----------------------------------------------------------------------------------------------------------------
// b) section '[Batch]'
if(f_Study < 0) throw 200; // study flag not found in section 'Batch'
if(szResultFile[0]=='\0') throw 201; // result file needed in batch mode
if(f_Study)
{
if(szData2[0]!='\0') throw 202; // study mode conflicts with 2nd data file (test data)
if(szStudyType[0]=='\0') throw 203; // no study type given
if((studyType==Cv
|| studyType==Rep) && N_R==0) throw 205; // CV and repetition need N_R ...
if((studyType==Loocv
|| studyType==Special) && N_R!=0) throw 206; // ... but Loocv and Special don't
if(studyType==Rep && (N_L==0 || N_T==0)) throw 207; // repetition needs # learn and test tuples ...
if(studyType!=Rep && (N_L!=0 || N_T!=0)) throw 208; // ... all other types don't
#ifndef VALIDATION_1
if(studyType==Rep&&N_T+N_L>data1->nTup()) throw 209; // check for overlapping of learn and test data
#endif
if(studyType==Cv && N_R>data1->nTup()) throw 210; // # cross-val. cannot be greater than # data tuples
}
// now we've checked and can initialize (if necessary) N_L, N_T and N_R with respect to study flag and type
// necessary here as later on N_L is needed to check tuning settings.
IniStudyCounts();
// check base filenames: they must not cotain backslash, slash, colon or dot
if(!CheckName(szDataBaseFileName)) throw 211; // invalid data base filename
if(!CheckName(szModelBaseFileName)) throw 213; // invalid model base filename
if(!CheckName(szSimulationBaseFileName)) throw 212; // invalid simulation base filename
//----------------------------------------------------------------------------------------------------------------
// c) section '[Show]'
if(f_SaveMemory&&f_Criterion[StringToCriterionId("pVal")])
throw 400; // p-values cannot be calculated in memory save mode
#ifdef RELEASE
if(f_Criterion[StringToCriterionId("pVal")]) // p-values cannot be used in release versions
throw 401; // due to licence problems
#endif
//----------------------------------------------------------------------------------------------------------------
// d) section '[Tuning]'
// check settings if tuning is enabled
if(f_Tune)
{
if(szRanking[0]=='\0') throw 501; // ranking criterion not found
if(szObjective[0]=='\0') throw 502; // tuning objective missing
if(szTuningFile[0]=='\0') throw 512; // tuning result file needed
if(ranking<0) throw 515; // ranking criterion known ?
if(ranking==StringToCriterionId("pVal")
||ranking==StringToCriterionId("Rank")) throw 516; // must not be 'Rank' or 'pVal'
if(ranking==StringToCriterionId("SizeFac")) throw 517; // must not be 'SizeFac'
if(objective<0) throw 518; // objective criterion known ?
if(objective==StringToCriterionId("SizeFac")) throw 519; // objective must not be 'SizeFac'
// check base filenames: they must not cotain backslash, slash, colon or dot
if(!CheckName(szTuneDataBaseFile)) throw 525; // invalid data base filename
if(!CheckName(szTuneModelBaseFile)) throw 523; // invalid model base filename
if(!CheckName(szTuneSimulationBaseFile)) throw 524; // invalid simulation base filename
// p-values are not calculated in memory save mode and thus cannot be objective or ranking criterion
if(f_SaveMemory && objective==StringToCriterionId("pVal"))
throw 520;
// initialize flag arrays with the criterions that need to be calculated according to section '[Show]' , objective etc.
IniCriterionFlags();
}
else // tuning disabled/not tested
if(studyType==Special)
throw 526; // tuning cannot be disabled if study type is 'Special'
//----------------------------------------------------------------------------------------------------------------
// e) section '[Task...]'
if(!f_Tune && sets->Size()>1) throw 600; // there must not be more than one parameter set if tuning is disabled
if(tasks.Size()==0) throw 601; // at least one task must be given
}
catch(int errNo) // exception handling
{
delete sets; // release parameter set list
char szText[STS]; // compose error text
switch(errNo)
{
// section '[Basic]'
case 106 : sprintf(szText, "Section '[Basic]': Test data file needed if study is disbled! Please specify key 'Data2' or set key 'Study' in section 'Batch'!"); break;
// section '[Batch]'
case 200 : sprintf(szText, "Section '[Batch]': Key 'Study' not found! Please specify key 'Study'!"); break;
case 201 : sprintf(szText, "Section '[Batch]': Result filename missing! Please specify key 'Results'!"); break;
case 202 : sprintf(szText, "Section '[Batch]': Study mode must not be set if test data file is given! Please unset key 'Study' or remove key 'Data2'!"); break;
case 203 : sprintf(szText, "Section '[Batch]': No study type given! Please specify key 'Type'!"); break;
case 205 : sprintf(szText, "Section '[Batch]': Number of repetitions/cross-validations not given or out of range! Please specify/correct key 'N_R'!"); break;
case 206 : sprintf(szText, "Section '[Batch]': Number of repetitions/cross-validations must not be given for types 'Loocv' or 'Special'! Please remove key 'N_R'!"); break;
case 207 : sprintf(szText, "Section '[Batch]': Number of learn and test tuples not given or out of range! Please specify/correct keys 'N_L' and 'N_T'!"); break;
case 208 : sprintf(szText, "Section '[Batch]': Number of learn and test tuples must not be given for types 'Cross-Validation', 'Loocv' or 'Special'! Please remove keys 'N_L' and 'N_T'!"); break;
case 209 : sprintf(szText, "Section '[Batch]': Overlapping of learn and test data! 'N_L+N_T<=%d' must yield!", data1->nTup()); break;
case 210 : sprintf(szText, "Section '[Batch]': Too many cross-validations! 'N_R<=%d' must yield!", data1->nTup()); break;
case 211 : sprintf(szText, "Section '[Batch]': Data base filename must ot contain any slashes, colons or dots! Please correct or remove key 'Data'!"); break;
case 212 : sprintf(szText, "Section '[Batch]': Model base filename must ot contain any slashes, colons or dots! Please correct or remove key 'Model'!"); break;
case 213 : sprintf(szText, "Section '[Batch]': Simulation base filename must ot contain any slashes, colons or dots! Please correct or remove key '%s'!", SZ_SIMULATION); break;
// section '[Show]'
case 400 : sprintf (szText, "Section '[Show]': Criterion 'pVal' conflicts with save memory mode!"); break;
case 401 : sprintf (szText, "Section '[Show]': Criterion 'pVal' cannot be used in GNU GPL version ;-("); break;
// section '[Tuning]'
case 501 : sprintf(szText, "Section '[Tuning]': No criterion for ranking of parameter sets given! Please specify key 'Ranking'!"); break;
case 502 : sprintf(szText, "Section '[Tuning]': No tuning objective is given! Please specify key 'Objective'!"); break;
case 512 : sprintf(szText, "Section '[Tuning]': Result filename missing! Please specify key 'Results'!"); break;
case 515 : sprintf(szText, "Section '[Tuning]': Ranking criterion unknown! Please correct key 'Ranking'!"); break;
case 516 : sprintf(szText, "Section '[Tuning]': Ranking criterion must not be 'pVal' or 'Rank'! Please correct key 'Ranking'"); break;
case 517 : sprintf(szText, "Section '[Tuning]': Ranking criterion must not be 'SizeFac'! Please correct key 'Ranking'!"); break;
case 518 : sprintf(szText, "Section '[Tuning]': Tuning objective unknown! Please correct key 'Objective'!"); break;
case 519 : sprintf(szText, "Section '[Tuning]': Tuning objective must not be 'SizeFac'! Please correct key 'Objective'!"); break;
case 520 : sprintf(szText, "Section '[Tuning]': Tuning objective must not be 'pVal' if memory save mode is enabled! Please correct key 'Objective' or unset key 'SaveMemory'!"); break;
case 523 : sprintf(szText, "Section '[Tuning]': Data base filename must ot contain any slashes, colons or dots! Please correct or remove key 'Data'!"); break;
case 524 : sprintf(szText, "Section '[Tuning]': Model base filename must ot contain any slashes, colons or dots! Please correct or remove key 'Model'!"); break;
case 525 : sprintf(szText, "Section '[Tuning]': Simulation base filename must ot contain any slashes, colons or dots! Please correct or remove key '%s'!", SZ_SIMULATION); break;
case 526 : sprintf(szText, "Section '[Tuning]': Tuning cannot be disabled if study type is 'Special'! Please correct key 'Tuning'!"); break;
// section '[Task...]'
case 600 : sprintf(szText, "Section '[Task%d]': There must not be more than one parameter set if tuning is disabled! Please correct specified tasks or set key 'Tuning' in section 'Tuning'!", tasks.Size()); break;
case 601 : sprintf(szText, "Section '[Task%d]': At least one task must be given! Please add section [Task1]!"); break;
}
ThrowTypeU(szText); // 'propagate' exception
}
// return parameter set list generated from settings in section [Parameter] note: not really good style but this
// list was generated while checking, so why do not use it ?
return sets;
}
//----------------------------------------------------------------------------------------------------------------------
// initialize flag arrays with the criterions that need to be calculated according to section '[Show]' , objective etc.
void TProjectB::IniCriterionFlags()
{
memcpy(f_CritToCalc, f_Criterion, nCriterion*sizeof(bool)); // evaluate which criterions need to be calculated
if(f_Tune)
f_CritToCalc[objective] = f_CritToCalc[ranking] = true; // 'add' ranking and tuning objective
f_CritToCalc[StringToCriterionId("SizeFac")] = true;
f_CritToCalc[StringToCriterionId("VarPerCub")] = true;
f_CritToCalc[StringToCriterionId("K")] = true; // needed while tuning parameters
f_CritToCalc[StringToCriterionId("K'")] = true;
f_LossOnLearnData = ::LossOnLearnData(f_CritToCalc); // check, if loss on learn data is required
}
//----------------------------------------------------------------------------------------------------------------------
// initialize learn and test data tuple counts (N_L and N_R) and repetition/cross-validation count (N_R) with respect
// to study flag (f_Study) and study type. note: No ini done for type 'Repetition'!
void TProjectB::IniStudyCounts()
{
if(f_Study)
{
// set dependant variables: # tests and # learn tuples for CV/Loocv/special mode
if(studyType==Cv) // N-fold cross-validation
{
N_T = data1->nTup()/N_R;
N_L = data1->nTup()-N_T;
}
if(studyType==Loocv) // leave-one-out cross-validation
{
N_L = data1->nTup()-1;
N_T = 1;
N_R = data1->nTup();
}
if(studyType==Special) // special mode
{
N_L = data1->nTup();
N_T = 0;
N_R = 1;
}
}
else // study disabled, set dependant variables
{
N_L = data1->nTup();
N_T = 0;
N_R = 1;
}
}
//----------------------------------------------------------------------------------------------------------------------
// peek learn data tuple count, e.g. peek value of N_L as it would be after calling IniStudyCounts()
int TProjectB::Peek_N_L()
{
int N_L = TProjectB::N_L; // ini with value that was loaded from project file
if(!f_Study || studyType==Special) N_L = data1->nTup(); // special mode or study disabled
if(f_Study && studyType==Cv) N_L = data1->nTup()-data1->nTup()/N_R; // N-fold cross-validation
if(f_Study && studyType==Loocv) N_L = data1->nTup()-1; // leave-one-out cross-validation
return N_L;
}
//----------------------------------------------------------------------------------------------------------------------
// get output directory, i.e. directory used for result files if these are specified with relative path
const char* TProjectB::GetOutputDir() const
{
static char szDir[STS];
if(szOutputDir[0]!='\0') // if output directory is specified
{
strcpy(szDir, szOutputDir); // copy
PrefixPath(szDir, szProjectFilePath); // prepend project file directory if necessary
}
else
strcpy(szDir, szProjectFilePath); // just copy project file directory
EnsurePathDelimiter(szDir); // ensure finishing path delimiter
return szDir;
}