//------------------------------------------------------------------------------
// module task.cpp //
// //
// A kind of struct with load and save functionality. Encapsulates a task, //
// i.e. a set of parameter values for the PNC2 algorithm, that are used //
// to generate a TParaSetList by building all possible combinations. //
// //
// copyright (c) 2001-2003 by Lars Haendel //
// home: www.newty.de //
// //
// This program is free software and can be used under the terms of the //
// GNU licence. See header file for further information and disclaimer. //
// //
//------------------------------------------------------------------------------
#include <iomanip> // due to: setw()
#include "fileutil.h" // SizeOfString()
#include "task.h"
#define MAX_PARAMETER_LENGTH (int) 6 // maximal length of one(!) parameter in parameter string
// note: is checked in ParseParameters()
//----------------------------------------------------------------------------------------------------------------------
// parse parameter string: search for sub-strings terminated by ';', convert them to floats and store them in parameter
// array. return # parameters found
int ParseParameters(const char*const& szPar, float*& para, const float& defVal, const char*const& szName
, const float& min, const bool f_Integer/*=false*/, const float& max/*=MAXFLOAT*/)
{
const bool boolean = (f_Integer && max==1 && min==0); // flag: parameter can only take boolean values
// a) create parameter field
int nSubStrings=1; // allocate one more to store default parameter when string is empty
int j=0;
while(szPar[j]!='\0') // count # ';'/spaces in string
{
if(szPar[j]==';' || szPar[j]==' ') // ';' or space found
{
nSubStrings++; // increment counter
while(szPar[j+1]==';' || szPar[j+1]==' ') // and ignore all directly following ';'/spaces
j++;
}
j++; // proceed with next character in string
}
para = new float[nSubStrings]; // allocate memory for parameter field
// b) parse string
int i=0, c, nPara=0;
char buffer[STS];
char* endptr ;
while(szPar[i]!='\0')
{
c=0; // reset buffer index
// copy sub-strings (terminated by ';' or space) to buffer
while(szPar[i]!=';' && szPar[i]!=' ' && szPar[i]!='\0' && c<255)
buffer[c++]=szPar[i++];
buffer[c]='\0'; // terminate substring
// check maximal string length, no need to allow more characters
IfTrueThrowTypeU(c>MAX_PARAMETER_LENGTH, "String to long for parameter '%s'", szName);
if(strcmp("true", buffer)==0) // compare sub-string with 'true' and 'false' first ...
para[nPara] = 1;
else
if(strcmp("false", buffer)==0)
para[nPara] = 0;
else
{
endptr = NULL;
para[nPara] = (float) strtod(buffer, &endptr); // ... else convert to floating point
if(f_Integer) // eventually check if specified parameter must be an integer
IfTrueThrowTypeU(para[nPara]!=floor(para[nPara]),"Parameter '%s' can take only integer values!", szName);
// check if there has been an error
if(*endptr!='\0')
{
char szText[STS];
sprintf(szText, "Error parsing '%s': Character '%c' is illegal!", szPar, *endptr);
ThrowTypeU(szText);
}
}
nPara++; // increment parameter counter
while(szPar[i]==';' || szPar[i]==' ') // position forward ('remove' all the ';' or space)
i++;
}
// c) check for dupes
for(int i=0;i<nPara;i++)
for(int j=i+1;j<nPara;j++)
if(para[i]==para[j])
{
char szText[STS];
sprintf(szText, "Duplicate value %s found for parameter '%s'!", ValueToText1(para[i], 0, PREC_PARA), szName);
ThrowTypeU(szText);
}
// d) check minima and maxima if specified
// if(min!=max) (hack!)
for(int i=0;i<nPara;i++)
if( para[i]<min || para[i]>max) // if parameter value is 'out of range' ...
if(boolean)
ThrowTypeU("Parameter '%s' can take only boolean values!", szName);
else
{
char szText[STS]; // ... compose error text ...
if(max!=MAXFLOAT)
// maximum specified
sprintf(szText, "Value(s) of parameter '%s' must be e[%s..%s]!", szName, ValueToText1(min, 0, PREC_PARA)
, ValueToText2(max, 0, PREC_PARA));
else
// no maximum specified
sprintf(szText, "Value(s) of parameter '%s' must be e[%s..oo]!", szName, ValueToText1(min));
ThrowTypeU(szText);
}
// e) parameter specific modifications
// ensure special ordering for parameter 'Prune': 'true' must come before 'false'
if(strcmp(szName, SZ_PRUNE)==0)
if(nPara>1)
{
para[0] = true;
para[1] = false;
}
// f) set default if string was empty
if(nPara==0)
para[nPara++]=defVal;
return nPara;
}
//----------------------------------------------------------------------------------------------------------------------
// convert tasks to parameter set list; also used to test validity of tasks
TParaSetList* /*cr*/ ToParaSetList(TTaskList*const& tasks, const TData*const& data, const bool& f_Regression)
{
// a) ini and pre-checks
TParaSetList* sets = new TParaSetList(); // create parameter set list (return value)
sets->SetName("TParaSetList (TProject)"); // set name
int def_N_Int = DEF_N_INT; // default for parameter 'N_Int'
if(!f_Regression)
def_N_Int = data->nIntegerMaxMin(0);
// b) for all tasks in task list ... parse them and add parameter sets to list
int i;
try {
for(i=0;i<tasks->Size();i++)
{
TTask& task = tasks->Get(i); // get i-th task from list
// parameters and # parameters
float *nInt, *w_COD, *eta, *wKernel, *sigma, *p_min, *prune, *weights, *metric;
int nnInt, nW_COD, nEta, nWKernel, nSigma, nP_min, nPrune, nWeights, nMetric;
#ifndef RELEASE // obsolete in release versions
float *wKernelMin, *noise, *difMax;
int nWKernelMin, nNoise, nDifMax;
#endif
// parse parameter strings, i.e. generate parameter fields and determine # parameters
nnInt = ParseParameters(task.szN_Int, nInt, def_N_Int, SZ_SIGMA, MIN_N_INT, INT_N_INT, MAX_N_INT);
nW_COD = ParseParameters(task.szW_COD, w_COD, DEF_W_COD, SZ_W_COD, MIN_W_COD, INT_W_COD, Max_w_COD(data));
nEta = ParseParameters(task.szEta, eta, DEF_ETA, SZ_ETA, MIN_ETA, INT_ETA, MAX_ETA);
nWKernel = ParseParameters(task.szW_Kernel, wKernel, DEF_W_KERNEL, SZ_W_KERNEL, MIN_W_KERNEL, INT_W_KERNEL);
nSigma = ParseParameters(task.szSigma, sigma, DEF_SIGMA, SZ_SIGMA, MIN_SIGMA, INT_SIGMA, MAX_SIGMA);
nP_min = ParseParameters(task.szP_Min, p_min, DEF_P_MIN, SZ_P_MIN, MIN_P_MIN, INT_P_MIN, Max_P_Min(data));
nPrune = ParseParameters(task.szPrune, prune, DEF_PRUNE, SZ_PRUNE, MIN_PRUNE, INT_PRUNE, MAX_PRUNE);
nWeights = ParseParameters(task.szWeights, weights, DEF_WEIGHTS, SZ_WEIGHTS, MIN_WEIGHTS, INT_WEIGHTS, MAX_WEIGHTS);
nMetric = ParseParameters(task.szMetric, metric, DEF_METRIC, SZ_METRIC, MIN_METRIC, INT_METRIC, MAX_METRIC);
#ifndef RELEASE // obsolete in release versions
nWKernelMin = ParseParameters(task.szW_Kernel_Min, wKernelMin, DEF_W_KERNEL_MIN, SZ_W_KERNEL_MIN,
MIN_W_KERNEL_MIN, INT_W_KERNEL_MIN, MAX_W_KERNEL_MIN);
nDifMax = ParseParameters(task.szDifMax, difMax, false, SZ_DIF_MAX, MIN_DIF_MAX, INT_DIF_MAX, MAX_DIF_MAX);
nNoise = ParseParameters(task.szNoise, noise, DEF_NOISE, SZ_NOISE, MIN_NOISE, INT_NOISE, MAX_NOISE);
#endif
// additional checks
IfTrueThrowTypeU(nWKernel>1 && f_Regression, "Parameter '%s' is irrelevant for regression tasks. Thus do not specify more than one value!", SZ_W_KERNEL);
#ifndef RELEASE // obsolete in release versions
IfTrueThrowTypeU(nWKernelMin>1 && f_Regression, "Parameter '%s' is irrelevant for regression tasks. Thus do not specify more than one value!", SZ_W_KERNEL_MIN);
#endif
IfTrueThrowTypeU(nSigma>1 && !f_Regression, "Parameter '%s' is irrelevant for classification tasks. Thus do not specify more than one value!", SZ_SIGMA);
IfTrueThrowTypeU(!f_Regression && (nnInt>1 || nInt[0]!=data->nIntegerMaxMin(0)), "Given problem is a classifcication task. Thus '%s' must be equal to the number of classes!", SZ_N_INT);
// # parameter sets that need learning and # parameters overall
task.nSetsLearn = nnInt * nEta * nW_COD * nWeights * nMetric;
#ifndef RELEASE // obsolete in release versions
task.nSetsLearn *= nDifMax * nNoise;
#endif
task.nSets = task.nSetsLearn * nPrune * nWKernel * nP_min * nSigma;
#ifndef RELEASE // obsolete in release versions
task.nSets *= nWKernelMin;
#endif
// sort parameters that have a direct effect on model size in an order that the smallest models are learned first
qsort(w_COD, nW_COD, sizeof(w_COD[0]), FloatCmpAsc); // w_COD ascending
qsort(eta, nEta, sizeof(eta[0]), FloatCmpDes); // eta descending
qsort(p_min, nP_min, sizeof(p_min[0]), FloatCmpDes); // min. cuboid mass descending
qsort(prune, nPrune, sizeof(prune[0]), FloatCmpDes); // pruning in descending order
// add parameter sets to list
for(int a=0;a<nnInt;a++)
for(int b=0;b<nEta;b++)
for(int c=0;c<nW_COD;c++)
for(int d=0;d<nWeights;d++)
for(int y=0;y<nMetric;y++)
#ifndef RELEASE
for(int v=0;v<nDifMax;v++)
for(int g=0;g<nNoise;g++) // note: noise needs relearning since v1.26b !
#endif
{
bool relearn = true; // set relearn flag
for(int z=0;z<nPrune;z++)
for(int x=0;x<nWKernel;x++)
for(int q=0;q<nP_min;q++)
#ifndef RELEASE // obsolete in release versions
for(int f=0;f<nWKernelMin;f++)
#endif
for(int e=0;e<nSigma;e++)
{
TParaSet& para = sets->Ins(); // insert new parameter set in list
para.N_Int = (int) nInt[a]; // copy/set parameters
para.w_COD = w_COD[c];
para.Eta = eta[b];
para.W_Kernel = wKernel[x];
para.Sigma = sigma[e];
para.p_min = p_min[q];
para.Prune = (bool) prune[z];
para.Weights = (bool) weights[d];
para.Metric = metric[y];
#ifndef RELEASE // obsolete in release versions
para.W_Kernel_Min = wKernelMin[f];
para.Noise = noise[g];
para.DifMax = difMax[v];
#endif
para.Relearn = relearn;
para.Skip = false; // no set is skipped by default
relearn=false; // reset relearn-flag
}
}
// release parameter fields
delete[] nInt;
delete[] w_COD;
delete[] eta;
delete[] wKernel;
delete[] sigma;
delete[] p_min;
delete[] prune;
delete[] weights;
delete[] metric;
#ifndef RELEASE // obsolete in release versions
delete[] wKernelMin;
delete[] difMax;
delete[] noise;
#endif
}}
catch(TExceptionU excp)
{
delete sets; // release
char szText[STS];
sprintf(szText, "Section '[Task%d]': %s", i+1, excp.GetErrorText()); // re-compose error text
ThrowTypeU(szText); // throw again
}
return sets; // return parameter set list
}
//----------------------------------------------------------------------------------------------------------------------
// write task to file
void TTask::Save(ofstream& file)
{
file << setiosflags(ios::left) << resetiosflags(ios::right); // set left justified output
Replace(' ', ';'); // replace spaces in task strings by ';'
if(szN_Int[0]!='\0') file << setw(WNAME) << SZ_N_INT << " = " << szN_Int << endl;
if(szW_COD[0]!='\0') file << setw(WNAME) << SZ_W_COD << " = " << szW_COD << endl;
if(szEta [0]!='\0') file << setw(WNAME) << SZ_ETA << " = " << szEta << endl;
if(szW_Kernel [0]!='\0') file << setw(WNAME) << SZ_W_KERNEL << " = " << szW_Kernel << endl;
if(szSigma [0]!='\0') file << setw(WNAME) << SZ_SIGMA << " = " << szSigma << endl;
if(szP_Min [0]!='\0') file << setw(WNAME) << SZ_P_MIN << " = " << szP_Min << endl;
if(szPrune [0]!='\0') file << setw(WNAME) << SZ_PRUNE << " = " << szPrune << endl;
if(szWeights[0]!='\0') file << setw(WNAME) << SZ_WEIGHTS << " = " << szWeights << endl;
if(szMetric [0]!='\0') file << setw(WNAME) << SZ_METRIC << " = " << szMetric << endl;
#ifndef RELEASE // obsolete in release versions
if(szW_Kernel_Min[0]!='\0') file << setw(WNAME) << SZ_W_KERNEL_MIN << " = " << szW_Kernel_Min << endl;
if(szNoise [0]!='\0') file << setw(WNAME) << SZ_NOISE << " = " << szNoise << endl;
if(szDifMax[0]!='\0') file << setw(WNAME) << SZ_DIF_MAX << " = " << szDifMax << endl;
#endif
Replace(';', ' '); // restore spaces in task strings
file << resetiosflags(ios::left) << setiosflags(ios::right); // restore right justified output
}
//----------------------------------------------------------------------------------------------------------------------
// used to replace ';' or space in task strings or vice versa
void Replace(const char& a, const char& b, char* szText)
{
int i=0; // ini
while(szText[i]!='\0') // parse whole string
{
if(szText[i]==a)
szText[i]=b;
i++;
}
}
void TTask::Replace(const char& a, const char& b)
{
// replace ';' by spaces
::Replace(a, b, szN_Int);
::Replace(a, b, szW_COD);
::Replace(a, b, szEta);
::Replace(a, b, szW_Kernel);
::Replace(a, b, szSigma);
::Replace(a, b, szP_Min);
::Replace(a, b, szPrune);
::Replace(a, b, szWeights);
::Replace(a, b, szMetric);
#ifndef RELEASE // obsolete in release versions
::Replace(a, b, szW_Kernel_Min);
::Replace(a, b, szDifMax);
::Replace(a, b, szNoise);
#endif
}
//----------------------------------------------------------------------------------------------------------------------
// read task from file
void TTask::Load(ifstream& file)
{
// load
ReadKeyString(file, SZ_N_INT , szN_Int , STS);
ReadKeyString(file, SZ_W_COD , szW_COD , STS);
ReadKeyString(file, SZ_ETA , szEta , STS);
ReadKeyString(file, SZ_W_KERNEL , szW_Kernel , STS);
ReadKeyString(file, SZ_SIGMA , szSigma , STS);
ReadKeyString(file, SZ_P_MIN , szP_Min , STS);
ReadKeyString(file, SZ_PRUNE , szPrune , STS);
ReadKeyString(file, SZ_WEIGHTS , szWeights , STS);
ReadKeyString(file, SZ_METRIC , szMetric , STS);
#ifndef RELEASE // obsolete in release versions
ReadKeyString(file, SZ_W_KERNEL_MIN, szW_Kernel_Min, STS);
ReadKeyString(file, SZ_DIF_MAX , szDifMax , STS);
ReadKeyString(file, SZ_NOISE , szNoise , STS);
#endif
// replace ';' in task strings by space
Replace(';', ' ');
}