//------------------------------------------------------------------------------
// module cluster.h //
// //
// Model learned with the PNC2 cluster algorithm. Used in combination //
// with TPrediction. //
// See source or http://www.newty.de/pnc2/sdocu.html for more information. //
// //
// copyright (c) 2000-2003 by Lars Haendel //
// home: www.newty.de //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation as version 2 of the License. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program; if not, write to the Free Software //
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. //
// //
//------------------------------------------------------------------------------
#ifndef CLUSTER_H
#define CLUSTER_H
//----------------------------------------------------------------------------------------------------------------------
#define N_BARS (int) 41 // # bars in histogram calculated for cuboid masses
#include "lossfunc.h" // due to TLossFunction
#include "ddata.h" // TDataData (will also include data.h)
#include "para.h" // TParameter
//----------------------------------------------------------------------------------------------------------------------
// Encapsulated two function pointers that are used to improve performance of distance calculations in the input space.
class TDFunc
{
public:
TDFunc() { Initialize(1.0); } // default constructor (city block metric)
void Initialize(const float& _p); // set function pointers
void (*DFunc1) (float&, const float&); // function pointer 1
void (*DFunc2) (float&, const float&); // function pointer 2
float p; //
float inv_p; // 1/p
};
//----------------------------------------------------------------------------------------------------------------------
// model learned by the PNC algorithm, encapsulates all what is needed to make predictions
class TCluster
{
public:
// constructor/destructor
TCluster(const int& __nTup, const TDataData*const& _ddata, const TParameter& _para); // normal constructor
TCluster(); // constructor for use with Load()
~TCluster();
inline const int& nVar() const { return ddata->nVar();}; // # variables
inline const int& nCuboidsRed() const { return _nCubRed; }; // # cuboids that exceed min. mass
inline const int& nCuboids() const { return _nCub; }; // # all cuboids
inline const bool& HasPruningInformation() const
{ return f_HasPruningInformation; }; // flag: pruning information avalaible
inline const bool& HasOriginalCuboids() const
{ return f_HasOriginalCuboids; }; // flag: original cuboid bounds/masks avalaible
inline const float& GetSizeRatio() const { return sizeRatio; };
#ifndef RELEASE // obsolete in release version
const float& d_ref() const { return _d_ref; };
#endif
const float AvrVarPerCub(const bool& f_Pruned) const;
inline const float& AvrHitRate() const { return avrHitrate; };
inline const float& AvrMass() const { return avrMass; };
// prepare object using alternate parameters if given (e.g. make mapper to use only those clusters with a mass
// bigger than 'k')
void Prepare(const TParameter* _para=NULL);
void CalculateHistogram();
const int& Histogram(const int& barId) const { return hist[barId]; };
// calculate distance (pruned or not) of input tuple 'x' to the cubId'th cuboid
float Distance (const float*const& /* input data */ x, const int& _cubId);
float PrunedDistance(const float*const& /* input data */ x, const int& _cubId);
void Save(ofstream& file, const bool& f_Compact=false, const TParameter* _para=NULL); // save model
void Load(ifstream& file, int& line); // load model
inline const TDataData*const& GetDataData() const { return ddata; };
inline const TParameter* GetParameters() const { return ¶ };
inline const float& Hitrate(const int& cubId) const { return q[mapper[cubId]]; };
inline const float& Output (const int& cubId) const { return l[mapper[cubId]][0];};
inline const float& Mass (const int& cubId) const { return m[mapper[cubId]]; };
inline const float& Neg (const int& cubId) const { return neg[mapper[cubId]]; };
inline float& Neg (const int& cubId) { return neg[mapper[cubId]]; };
inline float& Hitrate(const int& cubId) { return q[mapper[cubId]]; };
inline float& Output (const int& cubId) { return l[mapper[cubId]][0];};
inline float& Mass (const int& cubId) { return m[mapper[cubId]]; };
private:
friend class TPnc;
void CalculateHitRate(); // calculate hitrate for each cluster
void Allocate(); // allocate memory, used in constructor/load routine
// access functions position and radian or mask (bits)
inline float& Lower (const int& cubId, const int& j) { return l[mapper[cubId]][j]; };
inline float& Upper (const int& cubId, const int& j) { return r[mapper[cubId]][j]; };
inline int& Mask (const int& cubId, const int& j) { return *((int*) &(l[mapper[cubId]][j])); };
inline int& MaskDirect(const int& cubId, const int& j){ return *((int*) &(l[cubId][j])); };
inline float& NegMass(const int& cubId) { return neg[mapper[cubId]]; };
inline int& ActiveLowerBound (const int& cubId, const int& g) { return activeIdL[mapper[cubId]][g];};
inline int& ActiveUpperBound (const int& cubId, const int& g) { return activeIdR[mapper[cubId]][g];};
inline int& nActiveLowerBounds(const int& cubId) { return nActiveL[mapper[cubId]];};
inline int& nActiveUpperBounds(const int& cubId) { return nActiveR[mapper[cubId]];};
inline const int& ActiveLowerBound (const int& cubId, const int& g) const { return activeIdL[mapper[cubId]][g];};
inline const int& ActiveUpperBound (const int& cubId, const int& g) const { return activeIdR[mapper[cubId]][g];};
inline const int& nActiveLowerBounds(const int& cubId) const { return nActiveL[mapper[cubId]];};
inline const int& nActiveUpperBounds(const int& cubId) const { return nActiveR[mapper[cubId]];};
inline const float& Lower (const int& cubId, const int& j) const { return l[mapper[cubId]][j]; };
inline const float& Upper (const int& cubId, const int& j) const { return r[mapper[cubId]][j]; };
inline const int& Mask (const int& cubId, const int& j) const { return *((int*) &(l[mapper[cubId]][j])); };
float d_Lower(const int& j, const float x_j, const int& cubId);
float d_Upper(const int& j, const float x_j, const int& cubId);
#ifndef RELEASE // obsolete in release version
void CalcAvrDiameter(); // calculate average distance lower left to upper right bound
// note: used by TPnc::ToTCluster()
#endif
float** l; // left border
float** r; // right border
float* q; // cluster's hitrate
float* m; // mass
float* neg; // # covered negative examples
int* mapper; // map's cuboid id to index in l,r etc.
#ifndef RELEASE // obsolete in release version
float _d_ref; // reference distance used for parameter min. kernel width
#endif
TDFunc dfunc; // metric function
const TDataData* ddata; // data data like minima, # variables, weights etc.
TParameter para; // parameter used to learn(!) model (with class TPnc) note: parameters to use
// (k, p, use weights) are determined in Prepare() and can be these or alternate
// ones
bool f_UseWeights; // flag indicates to use weights
int p_min; // min. cuboid mass used in last call to Prepare()
int metric; // metric parameter used in last call to Prepare()
int _nCub; // # all cuboids
int _nCubRed; // # cuboids that exceed min. mass
float sizeRatio;
float avrVarPerCub; // average # variables in cuboid's premise
float avrHitrate; // average hitrate of clusters
float avrMass; // average mass per cuboid
int hist [N_BARS]; // histogram values calculated for cuboid masses
int* nActiveL; // # active, i.e. none pruned left(!) bounds for each cuboid
int* nActiveR; // # active, i.e. none pruned right(!) bounds for each cuboid
int** activeIdL; // id's of active left(!) bounds of each cuboid
int** activeIdR; // id's of active right(!) bounds of each cuboid
bool f_Loaded; // flag indicates that Load() has been called or that normal constructor has been
// used
bool f_HasPruningInformation; // flag: pruning information avalaible
bool f_HasOriginalCuboids; // flag: original cuboid bounds/masks avalaible
};
#endif