//------------------------------------------------------------------------------
// module ParaSet.cpp //
// //
// Base class for TParameter. Encapsulates only those parameters that are //
// chosen within the parameter tuning. //
// See below or http://www.newty.de/pnc2/sdocu.html for more information. //
// //
// copyright (c) 2001-2003 by Lars Haendel //
// home: www.newty.de //
// //
// This program is free software and can be used under the terms of the //
// GNU licence. See header file for further information and disclaimer. //
// //
//------------------------------------------------------------------------------
// //
// NOTE: There are helper functions to convert to TParameter //
// //
// File I/O: Load, Save and Write routines //
//------------------------------------------------------------------------------
#include <math> // due to: atof() etc.
#include <stdlib> // strtod()
#include <iomanip> // setw()
#include "fileutil.h" // SizeOfString() and other utilities
#include "defines.h"
#include "ParaSet.h"
#include "exception.h" // IfTrueThrowTypeU()
//----------------------------------------------------------------------------------------------------------------------
// max. value for parameter 'min. cuboid mass' - note: use value of 10 to ensure, that the values used in the gui's
// task wizard, are legal
const int Max_P_Min(const TData*const& data)
{
if(!data) // if # tuples in learn data is unknown
return 10;
else
return max(10, data->nTup()/2);
}
//----------------------------------------------------------------------------------------------------------------------
// max. value for parameter 'w_COD'
const int Max_w_COD(const TData*const& data)
{
if(!data)
return 0; // secure value if # inputs is unknown
else
return max(data->nVar()-2, 0);
}
//----------------------------------------------------------------------------------------------------------------------
// initialize parameters with hard coded default values from para.h
// note: if data object is passed, then the parameter 'w_COD' is initialized using a heuristic
void TParaSet::SetStandardValues(const TData*const& data/*=NULL*/)
{
// a) if data given: calculate standard value for parameter 'w_COD'
if(data)
{
float nVar_eff = 0; // effective number of input variables
// (hack!)
for(int j=1;j<data->nVar();j++) // over all input variables ...
{
float tmp = min(1.0, 1-1.25*(0.8-data->Weights()[j])); // count variables not considering those with low weight
if(data->IsSymbolic(j)) // additionally down weight symbolic variables
tmp *= 0.3; // (hack!)
nVar_eff += tmp; // and sum up
}
// approx. 30% of effective number of variables note: will have a precision of 1
w_COD = min(floor(0.5+nVar_eff*3)/10.0, (double) Max_w_COD(data));
}
else
w_COD = min(DEF_W_COD, (float) Max_w_COD(data)); // else set hard coded default value
// b) use default values of class TParameter for the other parameters
N_Int = DEF_N_INT;
Eta = DEF_ETA;
W_Kernel = DEF_W_KERNEL;
Sigma = DEF_SIGMA;
p_min = min(DEF_P_MIN, Max_P_Min(data));
Prune = DEF_PRUNE;
Weights = DEF_WEIGHTS;
Metric = DEF_METRIC;
#ifndef RELEASE // obsolete in release versions
DifMax = DEF_DIF_MAX;
Noise = DEF_NOISE;
#endif
}
//----------------------------------------------------------------------------------------------------------------------
// write (short) parameter names used as table header in batch interface's output files
void TParaSet::WriteDescription(ofstream& file, const bool& f_Regression/*=true*/)
{
file << "Int COD Eta";
if(f_Regression)
file << " Sigma";
else
{
file << " W_K";
#ifndef RELEASE // obsolete in release versions
file << " W_K_M";
#endif
}
file << " p_min Prune Weight Metric";
#ifndef RELEASE // obsolete in release versions
if(f_Regression)
file << " Dif";
file << " Noise";
#endif
}
//----------------------------------------------------------------------------------------------------------------------
// write parameter values with or without their name/description - note: used in batchj interface output files and
// when saving the tunings results in the GUI
void TParaSet::WriteParameters(ostream& file, const bool& f_Regression/*=true*/, const bool& f_Names/*=false*/) const
{
char text[STS];
// write parameters
if(f_Names) // with description
{
sprintf(text, " %s=%d" , SZ_N_INT , N_Int); file << text; // # intervals
sprintf(text, " %s=%.1f" , SZ_W_COD , w_COD); file << text; // w_COD
sprintf(text, " %s=%.1f" , SZ_ETA , Eta); file << text; // eta
if(f_Regression) {
sprintf(text, " %s=%.4f", SZ_SIGMA , Sigma); file << text; } // sigma
else
{ // class. specific parameters
sprintf(text, " %s=%.1f", SZ_W_KERNEL , W_Kernel); file << text; // kernel width
#ifndef RELEASE // obsolete in release versions
sprintf(text, " %s=%.2f", SZ_W_KERNEL_MIN, W_Kernel_Min); file << text; // min. kernel width
#endif
}
sprintf(text, " %s=%d" , SZ_P_MIN , p_min); file << text; // min. cuboid mass
sprintf(text, " %s=%d" , SZ_PRUNE , (int) Prune); file << text; // post prune
sprintf(text, " %s=%d" , SZ_WEIGHTS , (int) Weights); file << text; // use weights
sprintf(text, " %s=%.1f" , SZ_METRIC , Metric); file << text; // metric
#ifndef RELEASE
if(f_Regression) {
sprintf(text, " %s=%d", SZ_DIF_MAX, DifMax); file << text; } // difMax
sprintf(text, " %s=%.1f" , SZ_NOISE , Noise); file << text;
#endif
}
else // without description
{
sprintf(text, " %2d" , N_Int); file << text; // # intervals
sprintf(text, " %5.1f" , w_COD); file << text; // w_COD
sprintf(text, " %4.1f" , Eta); file << text; // eta
if(f_Regression) {
sprintf(text, " %7.4f", Sigma); file << text; } // sigma
else
{ // class. specific parameters
sprintf(text, " %4.1f", W_Kernel); file << text; // kernel width
#ifndef RELEASE // obsolete in release versions
sprintf(text, " %6.2f", W_Kernel_Min); file << text; // min. kernel width
#endif
}
sprintf(text, " %5d" , p_min); file << text; // min. cuboid mass
sprintf(text, " %5d" , (int) Prune); file << text; // post prune
sprintf(text, " %6d" , (int) Weights); file << text; // use weights
sprintf(text, " %6.1f" , Metric); file << text; // metric
#ifndef RELEASE
if(f_Regression) {
sprintf(text, " %3d", DifMax); file << text; } // difMax
sprintf(text, " %5.1f" , Noise); file << text; // noise
#endif
}
}
//----------------------------------------------------------------------------------------------------------------------
// write parameters to file (ofstream)
void TParaSet::Save(ofstream& file, const bool& f_Commented) const
{
// a) initialize comment string and write section name if necessary
char szComment[16];
sprintf(szComment, "%c ", ComChar);
if(!f_Commented) // if none-commented style ...
{
file << "[Parameter]" << endl; // section name
szComment[0]='\0'; // 'delete' comment string
}
// b) write parameters
file << setiosflags(ios::left) << resetiosflags(ios::right); // left justified output
file << szComment << setw(WNAME) << SZ_N_INT << " = " << N_Int << endl;
file << szComment << setw(WNAME) << SZ_W_COD << " = " << w_COD << endl;
file << szComment << setw(WNAME) << SZ_ETA << " = " << Eta << endl;
file << szComment << setw(WNAME) << SZ_W_KERNEL << " = " << W_Kernel << endl;
file << szComment << setw(WNAME) << SZ_SIGMA << " = " << Sigma << endl ;
file << szComment << setw(WNAME) << SZ_P_MIN << " = " << p_min << endl;
file << szComment << setw(WNAME) << SZ_PRUNE << " = " << FlagToString(Prune) << endl;
file << szComment << setw(WNAME) << SZ_WEIGHTS << " = " << FlagToString(Weights) << endl;
file << szComment << setw(WNAME) << SZ_METRIC << " = " << Metric << endl;
// obsolete in release versions
#ifndef RELEASE
file << szComment << setw(WNAME) << SZ_W_KERNEL_MIN << " = " << W_Kernel_Min << endl;
file << szComment << setw(WNAME) << SZ_NOISE << " = " << Noise << endl;
file << szComment << setw(WNAME) << SZ_DIF_MAX << " = " << DifMax << endl;
#endif
file << resetiosflags(ios::left) << setiosflags(ios::right); // restore right justified output
}
//----------------------------------------------------------------------------------------------------------------------
// load parameters from file (ifstream)
bool TParaSet::Load(ifstream& file, int& line)
{
try
{
// a) position to section "[parameters]"
try
{
line += SearchKey(file, "[Parameter]");
} catch(int errNo)
{ // section not found
return false; // nothing loaded
}
// b) read parameters
N_Int = ReadKeyValue(file, SZ_N_INT , DEF_N_INT , SEARCH_LINES);
w_COD = ReadKeyValue(file, SZ_W_COD , DEF_W_COD , SEARCH_LINES);
Eta = ReadKeyValue(file, SZ_ETA , DEF_ETA , SEARCH_LINES);
W_Kernel = ReadKeyValue(file, SZ_W_KERNEL , DEF_W_KERNEL , SEARCH_LINES); // classification only
#ifndef RELEASE
W_Kernel_Min = ReadKeyValue(file, SZ_W_KERNEL_MIN, DEF_W_KERNEL_MIN, SEARCH_LINES); // classification only
#endif
Sigma = ReadKeyValue(file, SZ_SIGMA , DEF_SIGMA , SEARCH_LINES); // regression only
p_min = ReadKeyValue(file, SZ_P_MIN , DEF_P_MIN , SEARCH_LINES);
Prune = ReadKeyBool (file, SZ_PRUNE , DEF_PRUNE , SEARCH_LINES);
Weights = ReadKeyBool (file, SZ_WEIGHTS , DEF_WEIGHTS , SEARCH_LINES);
Metric = ReadKeyValue(file, SZ_METRIC , DEF_METRIC , SEARCH_LINES);
#ifndef RELEASE // note: parameters not used in release versions
DifMax = ReadKeyValue(file, SZ_DIF_MAX , DEF_DIF_MAX , SEARCH_LINES);
Noise = ReadKeyValue(file, SZ_NOISE , DEF_NOISE , SEARCH_LINES);
#endif
// c) check parameter values
const char* szText = CheckParameters();
if(szText[0]!='\0')
{
char szError[STS];
sprintf(szError, "Error in section [Parameter]: %s", szText); // prepend section information
ThrowTypeU(szError); // throw exception
}
}
catch(int errNo) // exception handling: compose error text and throw again
{
char szText[STS];
sprintf(szText, "Error reading section [Parameter]: %s", GetLastError(errNo));
ThrowTypeU(szText); // throw exception
}
return true;
}
//----------------------------------------------------------------------------------------------------------------------
// check if actual parameter values are within valid ranges; returns non-empty string if somethings wrong
// note: put in a seperate function as it is also used to check when loading the tuning results (GUI)
const char* TParaSet::CheckParameters()
{
static char szText[STS]; // error text
szText[0]='\0'; // initialize (reset string)
try
{
if(N_Int < MIN_N_INT || N_Int > MAX_N_INT) throw 1;
if(w_COD < MIN_W_COD) throw 2;
if(Eta < MIN_ETA || Eta > MAX_ETA) throw 3;
if(p_min < MIN_P_MIN) throw 4;
if(Metric < MIN_METRIC || Metric > MAX_METRIC) throw 5;
if(W_Kernel < MIN_W_KERNEL || W_Kernel > MAX_W_KERNEL) throw 6;
if(Sigma < MIN_SIGMA || Sigma > MAX_SIGMA) throw 7;
// obsolete in release versions
#ifndef RELEASE
if(W_Kernel_Min < MIN_W_KERNEL_MIN || W_Kernel_Min > MAX_W_KERNEL_MIN) throw 8;
if(Noise < MIN_NOISE || Noise > MAX_NOISE) throw 9;
if(DifMax < 0 || DifMax > N_Int-2) throw 10;
#endif
}
catch(int errNo)
{
switch(errNo)
{
case 1 : sprintf(szText, "Value of '%s' is not e[%d..%d]!", SZ_N_INT, MIN_N_INT, MAX_N_INT); break;
case 2 : sprintf(szText, "Value of '%s' must be > %s", SZ_W_COD, ValueToText1(MIN_W_COD, 0, PREC_PARA)); break;
case 3 : sprintf(szText, "Value of '%s' is not e[%s..%s]!", SZ_ETA, ValueToText1(MIN_ETA), ValueToText2(MAX_ETA)); break;
case 4 : sprintf(szText, "Value of '%s' must be > %d!", SZ_P_MIN, MIN_P_MIN); break;
case 5 : sprintf(szText, "Value of '%s' must be e[%s..%s]!", SZ_METRIC, ValueToText1(MIN_METRIC), ValueToText2(MAX_METRIC)); break;
case 6 : sprintf(szText, "Value of '%s' not e[%s..%s]!", SZ_W_KERNEL, ValueToText1(MIN_W_KERNEL), ValueToText2(MAX_W_KERNEL)); break;
case 7 : sprintf(szText, "Value of '%s' not e[%s..%s]!", SZ_SIGMA, ValueToText1(MIN_SIGMA, 0, PREC_PARA), ValueToText2(MAX_SIGMA, 0, PREC_PARA)); break;
#ifndef RELEASE // obsolete in release versions
case 8 : sprintf(szText, "Value of '%s' not e[%s..%s]!", SZ_W_KERNEL_MIN, ValueToText1(MIN_W_KERNEL_MIN), ValueToText2(MAX_W_KERNEL_MIN)); break;
case 9 : sprintf(szText, "Error in section [Parameter]: Value of '%s' is not e[%s..%s]!", SZ_NOISE, ValueToText1(MIN_NOISE), ValueToText2(MAX_NOISE)); break;
case 10 : sprintf(szText, "Error in section [Parameter]: Value of '%s' not e[0..%d]!", SZ_DIF_MAX, N_Int-2); break;
#endif
}
}
return szText;
}