static const char* szModule        = "cluster.cpp";

//------------------------------------------------------------------------------
//    module cluster.cpp                                                      //
//                                                                            //
//    Model learned with the PNC2 cluster algorithm. Used in combination      //
//    with TPrediction.                                                       //
//    See below or http://www.newty.de/pnc2/sdocu.html for more information.  //
//                                                                            //
//    copyright (c) 2000-2003 by Lars Haendel                                 //
//    home: www.newty.de                                                      //
//                                                                            //
//    This program is free software and can be used under the terms of the    //
//    GNU licence. See header file for further information and disclaimer.    //
//                                                                            //
//------------------------------------------------------------------------------
//                                                                            //
//    CREATE:  a) Create an empty instance and call Load()                    //
//             b) Call TPnc::ToTCluster() to convert from TPnc                //
//                                                                            //
//    NOTE: The parameters (min. cuboid mass, usage of feature weights        //
//    and used metric) are initialized by a call to function Prepare().       //
//    Either the parameters originally used for learning or alternate         //
//    ones that you pass to Prepare() are used .                              //
//                                                                            //
//    NOTE: There are two flags 'f_HasPruningInformation' (f_PI) and          //
//    'f_HasOriginalCuboids' (f_OC). Here's their meaning:                    //
//                                                                            //
//    f_PI    f_OC      |   Meaning                                           //
//    --------------------------------------------------------------------    //
//    true    true      -> original cuboid bounds/masks are stored            //
//                         and additional pruning information is avalaible    //
//    true    false     -> illegal                                            //
//    false   true      -> original cuboid bounds/masks are stored and no     //
//                         pruning information is available                   //
//    false   false     -> pruned cuboids are stored, i.e. original bounds/   //
//                         masks are replaced by minima/maxima/unity mask     //
//                                                                            //
//    File I/O: Load and save routines                                        //
//------------------------------------------------------------------------------



//----------------------------------------------------------------------------------------------------------------------
#include <stdlib>                // due to   qsort()
#include <stdio>                 //          sprintf()
#include <iomanip>               //          setw()
#include <math>                  //          sqrt()
#include <values>                //          MAXFLOAT

#include "cluster.h"
#include "fileutil.h"            //          file io utilities
#include "defines.h"
#include "exception.h"           //          IfTrueThrowTypeA()

#ifndef RELEASE
#include "statistics.h"          //          CalcUpper() for confidence intervals
#endif


//----------------------------------------------------------------------------------------------------------------------
// Memory in-efficiency warning: Each cuboid is stored by his bounds/masks for each(!) input and by two arrays
// indicating the active, i.e. none-pruned, inputs. All this storage is always used, no matter if pruning information
// or original cuboid bounds/masks are available. If there is no pruning information, then the arrays with the active
// input Ids just contain the Id of each input. And if the original cuboid bounds/masks are not avalaible, then the
// the cuboid is represented for the pruned inputs just by +-MAXFLOAT (continuous input) or by a mask with all bits set
// (symbolic input). Maybe one should change this in future ... ?



//----------------------------------------------------------------------------------------------------------------------
// Encapsulated two function pointers that are used to improve performance of distance calculations in the input space.
// The metric used is d = sum (w_j * |d_j|^p )^1/p with 'w_j' being the j-th variable's weight and 'd_j' being the j-th
// variable component wise distance. For performance reasons there are two function pointers 'DFunc1' and 'DFunc2'
// which point to the functions which a) raise 'd_j' to the power of 'p' and b) which raise 'sum(w_j * |d_j|^p)' to the
// power of '1/p'.

// note: the absolute values (function fabs()) are used to cope with rounding effects (small negative values)
void Dummy  (float&    , const float&  ) {};                               // do nothing
void Square (float& d_j, const float&  ) { d_j *= d_j;               };    // square value
void Root   (float& d_j, const float&  ) { d_j = sqrt(fabs(d_j));    };    // square root of value
void Power  (float& d_j, const float& p) { d_j = pow(fabs(d_j), p);  };    // raise value to the power of 'p'

// initialize function pointers
void TDFunc::Initialize(const float& _p)
{
   p     = _p;          // copy
   inv_p = 1.0/p;

   // set function pointers
   if(p==1)    { DFunc1 = Dummy;     DFunc2 = Dummy;  }  else        // city block distance
   if(p==2)    { DFunc1 = Square;    DFunc2 = Root;   }  else        // euclidian distance
   if(p==0.5)  { DFunc1 = Root;      DFunc2 = Square; }  else
               { DFunc1 = Power;     DFunc2 = Power;  };
};



//----------------------------------------------------------------------------------------------------------------------
// dummy constructor used if Load() will be called to initialize/fill object
TCluster::TCluster()
{
   l = r                         = NULL;     // set all pointer to NULL
   m = neg = q                   = NULL;
   activeIdL = activeIdR         = NULL;
   mapper = nActiveL = nActiveR  = NULL;
   ddata                         = NULL;


   // initialize
   _nCub = _nCubRed = avrVarPerCub  = avrHitrate = sizeRatio = 0;
   p_min = -1;                // to ensure, that Prepare() gets executed
   f_Loaded = false;          // reset flag

   #ifndef RELEASE                     // obsolete in release version
   _d_ref = 0;
   #endif
}


//----------------------------------------------------------------------------------------------------------------------
// normal constructor used if instance is filled by friend TPnc::ToTCluster()
TCluster::TCluster(const int& __nTup, const TDataData*const& _ddata, const TParameter& _para)
{
   _nCub  = __nTup;                                   // # clusters ignoring cardinality
   ddata  = _ddata;                                   // pointer to data data (extrema, weights, etc.)
   para   = _para;                                    // parameter used to learn
   dfunc.Initialize(para.Metric);                     // set metric
   f_Loaded = true;

   _nCubRed = _nCub;                                  // assume 'k=0', i.e. all cuboids are used
   avrVarPerCub = avrHitrate = sizeRatio = 0;         // initialize
   p_min = -1;                                        // to ensure, that Prepare() gets executed

   #ifndef RELEASE                                    // obsolete in release version
   _d_ref = 0;
   #endif

   Allocate();          // allocate memory for specified # clusters in ddata->nVar() dimensional space (output+input)
}


//----------------------------------------------------------------------------------------------------------------------
// destructor
TCluster::~TCluster()
{
   // a) # positive and negative examples, hitrate, mapper ...
   delete[] m;
   delete[] neg;
   delete[] q;
   delete[] mapper;
   delete[] nActiveL;
   delete[] nActiveR;


   // b) left and right bounds, active Id's ...
   DeleteMatrix(l, _nCub);
   DeleteMatrix(r, _nCub);
   DeleteMatrix(activeIdL, _nCub);
   DeleteMatrix(activeIdR, _nCub);

   // c) data data (extrema, weights etc.) object
   if(ddata)
      ddata->Release();
}


//----------------------------------------------------------------------------------------------------------------------
// allocate memory, used in constructor and load routine
void TCluster::Allocate()
{
   // allocate memory
   l = MakeMatrix((float) 0.0, _nCub, ddata->nVar());    // left and
   r = MakeMatrix((float) 0.0, _nCub, ddata->nVar());    // right bounds

   m   = new float[_nCub];                               // mass
   neg = new float[_nCub];                               // # covered negative examples
   q   = new float[_nCub];                               // hitrate


   mapper    = new int[_nCub];                           // new and initialize mapper
   nActiveL  = new int[_nCub];                           // new # active (i.e. none pruned) left bounds
   nActiveR  = new int[_nCub];                           // new # active (i.e. none pruned) right bounds

   for(int i=0;i<_nCub;i++)                              // ini
   {
      mapper[i] = i;
      nActiveLowerBounds(i)=nActiveUpperBounds(i)=0;
   }
   activeIdL = MakeMatrix((int) 1, _nCub, ddata->nVar());  // id's of active left bounds of each cuboid
   activeIdR = MakeMatrix((int) 1, _nCub, ddata->nVar());  // id's of active right bounds of each cuboid
}


//----------------------------------------------------------------------------------------------------------------------
// save model data to file (ofstream), i.e. save data data, cuboid data and pruning information
// note: If a parameter object is passed this will overrule the class' own parameter object
// note: pruning information is not written if it is not avalaible or if compact flag is set
// note: pruned variable bounds/masks are replaced by '-' if compact and prune flag are set or if no original cuboid
// bounds/masks are avalaible.
void TCluster::Save(ofstream& file, const bool& f_Compact/*=false*/, const TParameter* _para/*=NULL*/)
{
   if(!_para)              // use own parameters if nothing else is given
      _para = &para;

   if(!f_Compact)          // if everything should be saved ...
   {
      TParameter tmp = *_para;   // copy parameters
      tmp.p_min = 0;             // set cardinality to zero and ...
      Prepare(&tmp);             // ... call Prepare() to ensure that all cuboids are saved irrespective of their mass
   }
   else
      Prepare(_para);


   // a) write data data (extrema, weights etc.) and parameters
   ddata->Save(file);
   _para->Save(file);


   // b) write # cuboids
   const bool f_WritePruningInfo = f_HasPruningInformation && !f_Compact;
   const bool f_WritePrunedCubs  = (f_Compact && _para->Prune) || !f_HasOriginalCuboids;


   file << endl << endl << "[Basic]" << endl;                                       // section name
   file << "nCuboid             = " << _nCubRed << endl;                            // # cuboids
   file << "PruningInformation  = " << FlagToString(f_WritePruningInfo) << endl;
   file << "OriginalCuboids     = " << FlagToString(!f_WritePrunedCubs) << endl;
   #ifndef RELEASE                                                                  // obsolete in release version
   file << "d_ref               = " << _d_ref << endl;
   #endif
   file << endl << endl;

   // c1) write header cuboid data
   file << "[Cuboid]" << endl;                                 // section name
   file << ComChar << " Each row contains the data of one cuboid.";
   if(!f_Compact)
      file << " Hint: Use the export feature to get a more compact representation!";
   file << endl << ComChar << " Format: ";
   #ifdef HITRATE
   file << "Hitrate | ";
   #endif
   file << "# positive examples  |  # negative examples  |  output  |  left and right bound or mask of covered";
   file << " symbols for each variable" << endl << endl;


   // c2) write cuboid data
   char szText[STS];
   for(int t=0;t<nCuboidsRed();t++)                                        // over all clusters (mapped)
   {
      #ifdef HITRATE
      file << setw(6) << WritePercentage(Hitrate(t), 6, 1) << " ";         // hitrate
      #endif
      file << setw(6) << Mass(t) << " " << setw(6) << Neg(t)  << " ";      // # positive and negative examples
      file << setw(10) << Output(t) << " ";                                // output


      // compact mode
      if(f_WritePrunedCubs)
      {
         int cL=0, cU=0;   // counter to index active lower and upper bound arrays (pruning information)

         for(int j=1;j<ddata->nVar();j++)                                  // over all input variables
            if(NOMINAL_VARIABLES && ddata->IsSymbolic(j))                  // nominal:
               if(ActiveLowerBound(t, cL)==j)                              // if variable is active, i.e. not pruned ...
               {
                  if(cL<nActiveLowerBounds(t)-1)
                     cL++;                                                 // increment counter
                  WriteBits(file, Mask(t,j), ddata->nIntegerMaxMin(j));    // write bits
                  file << "  ";                                            // write space
               }
               else
                  file << setw(ddata->nIntegerMaxMin(j)) << '-' << "  ";   // write '-' indicating pruned variable
            else                                                           // continuous or ordinal:
            {
               // left bound
               if(ActiveLowerBound(t, cL)==j)                              // if left bound is active, i.e. not pruned
               {
                  if(cL<nActiveLowerBounds(t)-1)
                     cL++;
                  sprintf(szText, "%9g ->", Lower(t,j));                   // write left bound
                  file << szText;
               }
               else
                  file << "        - ->";                                  // else write '-'


               // right bound
               if(ActiveUpperBound(t, cU)==j)                              // if right bound is active, i.e. not pruned
               {
                  if(cU<nActiveUpperBounds(t)-1)
                     cU++;
                  sprintf(szText, " %-9g ", Upper(t,j));                   // write right bound
                  file << szText;
               }
               else
                  file << " -         ";                                   // else write '-'
            }
      }
      else          // non-compact mode
         for(int j=1;j<ddata->nVar();j++)                                  // over all input variables
            if(NOMINAL_VARIABLES && ddata->IsSymbolic(j))
            {
               WriteBits(file, Mask(t,j), ddata->nIntegerMaxMin(j));       // symbolic (nominal): write bits
               file << "  ";
            }
            else
            {
               sprintf(szText, "%9g -> %-9g ", Lower(t,j), Upper(t,j));    // continuous or ordinal: write left and right
               file << szText;                                             // bounds
            }

      file << endl;
   }
   file << endl << endl;



   // d1) write header pruning information
   if(!f_WritePruningInfo)    // do not proceed if there is no (useful) pruning information or if 'compact' flag is set
      return;

   file << "[Prune]" << endl;                                                          // section name
   file << ComChar << " Each row contains the none pruned variable Id's of each cuboid." << endl;
   file << ComChar << " Format: none pruned variables regarding lower bounds / none pruned variables regarding";
   file << " upper bounds" << endl << endl;


   // d2) write pruning information
   for(int t=0;t<nCuboidsRed();t++)                      // for each cuboid (mapped)
   {
      for(int g=0;g<nActiveLowerBounds(t);g++)           // write all active lower bounds
         file << (ActiveLowerBound(t,g)+1) << " ";

      file << " / ";                                     // write seperator
      for(int g=0;g<nActiveUpperBounds(t);g++)
         file << (ActiveUpperBound(t,g)+1) << " ";

      file << endl;                                      // linefeed
   }
}


//----------------------------------------------------------------------------------------------------------------------
// load model, i.e. data data, cuboid data and pruning information
void TCluster::Load(ifstream& file, int& line)
{
   // a) check
   IfTrueThrowTypeA(f_Loaded, "Function cannot be called twice in the lifetime of an instance!", "TCluster::Load()"
                     , szModule);
   f_Loaded = true;              // set flag to prevent another call of Load()

   // b) load data data and parameters
   TDataData* _ddata = new TDataData();      // note: allocate and load with local and none const pointer ...
   _ddata->Load(file, line);                 // ... to get rid of a compiler warning
   ddata = _ddata;                           // write back
   para.Load(file, line);


   // c) read cuboid data and pruning information
   char c[3];
   int t=-1, j, varId;
   bool f_ReadingSectionPrune=false;
   try                                    // try to read
   {
      // c1) read section [Basic]
      line += SearchKey(file, "[Basic]");                                        // position to section
      _nCub    = ReadKeyValue(file, "nCuboid", -1, SEARCH_LINES);                // read # cuboids
      int tmp1 = ReadKeyBool (file, "PruningInformation", -1, SEARCH_LINES);     // flags
      int tmp2 = ReadKeyBool (file, "OriginalCuboids", -1, SEARCH_LINES);
      #ifndef RELEASE                                                            // obsolete in release version
      _d_ref   = ReadKeyValue(file, "d_ref", -1.0, SEARCH_LINES);                // read average cuboid diameter
      #endif


      if(_nCub<=0)            throw 1;                                           // ... and check
      if(tmp1<0)              throw 11;
      if(tmp2<0)              throw 12;
      if(tmp1==1 && tmp2==0)  throw 13;

      #ifndef RELEASE                                                            // obsolete in release version
      if(_d_ref<0)            throw 14;
      #endif

      f_HasPruningInformation = tmp1;     // set
      f_HasOriginalCuboids    = tmp2;


      Allocate();                                                 // allocate memory

      // c2) read section [Cuboid]
      line += SearchKey(file, "[Cuboid]");                        // position to section

      for(t=0;t<_nCub;t++)    // for each cuboid
      {
         Mass(t)   = ReadExpNoEndl(file, (float) 0);                 // read # positive examples
         Neg(t)    = ReadExpNoEndl(file, (float) 0);                 // read # negative examples
         Output(t) = ReadExpNoEndl(file, (float) 0);                 // read output value

         if(f_HasOriginalCuboids)                                       // cuboid data is unpruned, i.e. without '-'
            for(j=1;j<ddata->nVar();j++)                                // read cuboid's bounds
            {
               if(NOMINAL_VARIABLES && ddata->IsSymbolic(j))            // symbolic (nominal) variable:
               {
                  skipwsNoEndl(file);                                   // remove whitespaces
                  ReadBits(file, Mask(t,j), ddata->nIntegerMaxMin(j));  // read mask
               }
               else                                                     // continuous or ordinal variable:
               {
                  Lower(t,j)=ReadExpNoEndl(file, (float) 0);            // read lower bound

                  skipwsNoEndl(file);                                   // remove '->'
                  file.getline(c, 3);                                   // read two characters
                  if(strcmp(c, "->")!=0)                                // and compare to '->'
                     throw 2;

                  Upper(t,j)=ReadExpNoEndl(file, (float) 0);            // read upper bound
               }
            }
         else                                                           // cuboid data is pruned, i.e. contains '-'
                                                                        // indicating pruned bounds/masks
            for(j=1;j<ddata->nVar();j++)                                // read cuboid's bounds
            {
               if(NOMINAL_VARIABLES && ddata->IsSymbolic(j))            // symbolic (nominal) variable:
               {
                  skipwsNoEndl(file);                                   // remove whitespaces
                  if(file.peek()!='-')                                  // if no '-' is encountered ...
                  {
                     ReadBits(file, Mask(t,j), ddata->nIntegerMaxMin(j));  // ... read mask
                     ActiveLowerBound(t, nActiveLowerBounds(t)) = j;       // add variable to pruning information
                     nActiveLowerBounds(t)++;                              // increment
                  }
                  else
                  {
                     file.get();                // remove the '-'
                     SetAllBits(Mask(t,j));     // set mask to cover all
                  }
               }
               else                                                        // continuous or ordinal variable:
               {
                  skipwsNoEndl(file);                                      // remove whitespaces
                  if(file.peek()!='-')                                     // if no '-' is encountered ...
                  {
                     Lower(t,j)=ReadExpNoEndl(file, (float) 0);            // read lower bound
                     ActiveLowerBound(t, nActiveLowerBounds(t)) = j;       // add variable to pruning information
                     nActiveLowerBounds(t)++;                              // increment
                  }
                  else
                  {
                     file.get();                      // remove the '-'
                     Lower(t,j) = -MAXFLOAT;          // set left bound minus infinity
                  }

                  skipwsNoEndl(file);                                      // remove '->'
                  file.getline(c, 3);                                      // read two characters
                  if(strcmp(c, "->")!=0)                                   // and compare to '->'
                     throw 2;

                  skipwsNoEndl(file);                                      // remove whitespaces
                  if(file.peek()!='-')                                     // if no '-' is encountered ...
                  {
                     Upper(t,j)=ReadExpNoEndl(file, (float) 0);            // read upper bound
                     ActiveUpperBound(t, nActiveUpperBounds(t)) = j;       // add variable to pruning information
                     nActiveUpperBounds(t)++;                              // increment
                  }
                  else
                  {
                     file.get();                      // remove the '-'
                     Upper(t,j) = MAXFLOAT;    // set left bound to infinity
                  }
               }
            }

         line += skipwsExEndl(file);                  // remove linefeed
      }


      // c3) read section [Prune] (pruning information)
      if(f_HasPruningInformation)
      {
         f_ReadingSectionPrune=true;
         line += SearchKey(file, "[Prune]");                         // position to section
         for(t=0;t<_nCub;t++)                                        // for each cuboid
         {
            // c3.1) read active Id's of left bounds
            while(file.peek()!='/')                                  // peek character and compare to separator '/'
            {
               if(nActiveLowerBounds(t)>=ddata->nVar())              // check if too many id's are given
                  throw 4;
               varId = ReadExpNoEndl(file, 0)-1;                     // read variable Id
               if(varId<1 || varId >=ddata->nVar())                  // check it - note: output column must not be
                  throw 6;                                           // specified
                  ActiveLowerBound(t,nActiveLowerBounds(t)++)=varId; // all checks passed? Then set Id
               skipwsExNoEndl(file);                                 // remove whitespaces
            }
            file.get();                                              // remove the seperator '/'


            // c3.2) read active Id's of right bounds
            int lplus=skipws(file);
            while(lplus==0)                                          // continue until linebreak
                                                                     // (PoD: Maybe a problem in Unix/Linux??)
            {
               if(nActiveUpperBounds(t)>=ddata->nVar())              // check if too many id's are given
                  throw 5;
               varId = ReadExpNoEndl(file, 0)-1;                     // read variable Id
               if(varId<1 || varId >= ddata->nVar())                 // check it - note: output column must not be
                                                                     //  specified
                  throw 7;
               if(NOMINAL_VARIABLES && ddata->IsSymbolic(varId))     // check: do not specify upper bound for symbolic (nominal) variables
                  throw 8;
               ActiveUpperBound(t,nActiveUpperBounds(t)++) = varId;  // all checks passed? Then set Id

               lplus=skipws(file);
            }
            line += lplus;                                           // track line number
         }
      }

   }
   catch(int errNo)     // exception handling
   {
      char szText[STS];
      switch(errNo)       // compose error text
      {
         case 1           : strcpy (szText, "Error in section [Basic]: Value of 'nCuboid' missing, negative or zero!"); break;
         case 11          : strcpy (szText, "Error in section [Basic]: Flag 'PruningInformation' not found!"); break;
         case 12          : strcpy (szText, "Error in section [Basic]: Flag 'OriginalCuboids' not found!"); break;
         case 13          : strcpy (szText, "Error in section [Basic]: Flag 'PruningInformation' cannot be set if flag 'OriginalCuboids' is unset!"); break;
         case 14          : strcpy (szText, "Error in section [Basic]: Reference distance 'd_ref' not found or value negative! Please coorect."); break;
         case 2           : sprintf(szText, "Error in line %d in section [Cuboid]: Wanted '->' but found %s while reading cuboid %d, bound %d!", line, c, t+1, j); break;
         case 4           : sprintf(szText, "Error in line %d in section [Prune]: Found to many variable Id's for lower bounds of cuboid %d!", line, t+1); break;
         case 5           : sprintf(szText, "Error in line %d in section [Prune]: Found to many variable Id's for upper bounds of cuboid %d!", line, t+1); break;
         case 6           : sprintf(szText, "Error in line %d in section [Prune]: Variable Id '%d' for lower bounds of cuboid %d is not e[2..%d]!", line, varId+1, t+1, ddata->nVar()); break;
         case 7           : sprintf(szText, "Error in line %d in section [Prune]: Variable Id '%d' for upper bounds of cuboid %d is not e[2..%d]!", line, varId+1, t+1, ddata->nVar()); break;
         case 8           : sprintf(szText, "Error in line %d in section [Prune]: Variable %d is symbolic (nominal). Thus it cannot be given as variable Id for the upper bounds of cuboid %d!", line, varId+1, t+1);  break;
         case KeyNotFound : sprintf(szText,"%s",::GetLastError(errNo)); break;
         default          : if(t<0)
                              sprintf(szText,"Error in section [Basic]: %s", ::GetLastError(errNo));
                            else
                              if(f_ReadingSectionPrune)
                                 sprintf(szText,"Error in line %d in section [Prune] while reading cuboid %d: %s!", line, t+1, ::GetLastError(errNo));
                              else
                                 sprintf(szText,"Error on line %d in section [Cuboid] while reading cuboid %d, bound %d: %s!", line, t+1, min(j,ddata->nVar()-1), ::GetLastError(errNo));
      }
      ThrowTypeU(szText);           // throw error
   }


   // misc.
   CalculateHitRate();                 // caclulate cuboid hitrates
   dfunc.Initialize(para.Metric);      // initialize metric
}


//----------------------------------------------------------------------------------------------------------------------
// calculate hitrate of each cuboid/cluster
void TCluster::CalculateHitRate()
{
   #ifdef LAPCOR_HITRATE
   int nClasses = para.N_Int;                // ini for regression tasks
   if(!para.f_Regression)
      // adjust for classification tasks, e.g. if a two class problem is encoded with '7' and '15' as output values)
      nClasses = ddata->nSymbolsFound(0);
   #endif


   #ifdef CONF_HITRATE
   float u_Gamma = CalcUGamma(ALPHA); // used for confidence intervals
   #endif

   // calculate hitrate of each cuboid/cluster
   for(int i=0;i<_nCub;i++)

      #ifdef NORMAL_HITRATE
      q[i]=m[i]/(m[i]+neg[i]);
      #endif

      #ifdef LAPCOR_HITRATE
      q[i]=(m[i]+1)/(m[i]+neg[i]+nClasses);
      #endif

      #ifdef CONF_HITRATE
      q[i]=1-CalcUpper(m[i]+neg[i], neg[i], u_Gamma);
      #endif
}


//----------------------------------------------------------------------------------------------------------------------
// prepare to be used to make predictions: initialize mapper to all cuboids with a mass bigger than TParameter::k and
// calculate average hitrate, cuboid size etc.
// note: If a parameter object is passed this will overrule the class' own parameter object
void TCluster::Prepare(const TParameter* _para/*=NULL*/)
{
   if(!_para)                                                        // use own parameters if nothing else is given
      _para = &para;

   if(p_min==_para->p_min && f_UseWeights==_para->Weights && metric==_para->Metric)    // return if nothing has changed
      return;

   // else:


   // store parameters
   dfunc.Initialize(_para->Metric);    // set metric fucntion
   metric       = _para->Metric;
   f_UseWeights = _para->Weights;
   p_min        = _para->p_min;


   // ini/reset
   avrVarPerCub = 0;    // counts # non-prunde variables
   avrHitrate   = 0;    // sums up hitrate
   avrMass      = 0;    // sums up masses
   _nCubRed     = 0;    // counts # clusters that exceed min. cuboid mass


   // over all clusters  -  note: variable types and cluster data are with(!) output
   for(int i=0;i<_nCub;i++)
      if(m[i]>_para->p_min)      // use only cuboids that exceed the min. cuboid mass
      {
         mapper[_nCubRed] = i;   // a) set mapper and increment counter

         avrHitrate += q[i];     // b) sum up hitrate and mass
         avrMass    += m[i];

         // c) count active bounds
         for(int g=0;g<nActiveL[i];g++)            // count active lower bounds
            if(NOMINAL_VARIABLES && ddata->IsSymbolic(activeIdL[i][g]))
               avrVarPerCub += 2;                  // symbolic bounds count for 2
            else
               avrVarPerCub++;                     // increment
         avrVarPerCub += nActiveR[i];              // add # upper bounds (note: can only be non-symbolic)

         _nCubRed++;    // increment counter
      }

   // turn sums in mean values
   if(_nCubRed==0)
      avrVarPerCub = avrHitrate = avrMass = sizeRatio = 0;
   else
   {
      // calculate average values per cuboid
      avrVarPerCub /= 2*_nCubRed;                     // # active bounds
      avrHitrate   /= _nCubRed;                       // hitrate
      avrMass      /= _nCubRed;                       // mass

      sizeRatio = _nCubRed/((float) ddata->nTup());   // size relative to original # tuples in learn data
   }
}

//----------------------------------------------------------------------------------------------------------------------
// calculate average diameter, i.e. distance from lower left to upper right bound

#ifndef RELEASE                     // obsolete in release version
void TCluster::CalcAvrDiameter()
{
   _d_ref = 0;    // ini

   // over all cuboids
   float* d = new float[ddata->nVar()];      // componentwise range of cuboid
   for(int i=0;i<_nCub;i++)
   {
      for(int j=1;j<ddata->nVar();j++)          // over all input(!) variables
         if(NOMINAL_VARIABLES && ddata->IsSymbolic(j))
         {
            // --------------------
            // symbolic (nominal):
            int cSet=0;
            for(int g=0;g<ddata->nIntegerMaxMin(j);g++)
               if(IsBitSet(MaskDirect(i,j), g))                // count how many symbols are covered
                  cSet++;

            cSet = min(cSet, ddata->nSymbolsFound(j));         // bound, because SetAllBits() could have been called

            d[j] = ((float) cSet)/ddata->nSymbolsFound(j);     // note: divide by # of really occuring symbols
            d[j] *= ddata->NormFac()[j];                       // multiply by normalization factor

            // raise component wise distance to the power of metric parameter 'p'
            dfunc.DFunc1(d[j], dfunc.p);

            if(f_UseWeights)
               d[j] *= ddata->Weights()[j];                    // scale by variable's weight
         }
         else
         {
            // --------------------
            // continuous or ordinal:
            d[j] = ddata->NormFac()[j] * (r[i][j]-l[i][j]);

            dfunc.DFunc1(d[j], dfunc.p);              // raise distance to the power of metric parameter 'p'

            if(f_UseWeights)
               d[j] *= ddata->Weights()[j];           // scale by variable's weight if it's specified
         }


      // 2nd step
      float d_sum=0;
      for(int j=1;j<ddata->nVar();j++)       // over all input(!) variables
         d_sum += d[j];                      // sum up component wise distances
      dfunc.DFunc2(d_sum, dfunc.inv_p);      // raise sum to the power of '1/p' ...
      _d_ref += d_sum;
   }
   delete[] d; // release

   _d_ref /= _nCub;;             // average
}
#endif


//----------------------------------------------------------------------------------------------------------------------
// return # average active bounds
const float TCluster::AvrVarPerCub(const bool& f_Pruned)  const{
   if(f_Pruned) return avrVarPerCub;
   else         return ddata->nVar()-1;};


//----------------------------------------------------------------------------------------------------------------------
// calculate distance of j-th variable to left bound
// note: 'j' is given with(!) output and 'cubId' is already mapped
float TCluster::d_Lower(const int& j, const float x_j, const int& cubId)
{
   float d_j=0;           // ini

   // note: symbolic flags, weights, 'l' (also Mask()) and 'r' are with(!) output
   if(NOMINAL_VARIABLES && ddata->IsSymbolic(j))
   {
      // ----------------
      // symbolic (nominal):
      const int bit = x_j-ddata->Min()[j];

      // note on low and highcheck: test data may have a different minimum, i.e. unseen symbol ...
      if(bit<0 || bit>MaxBits || !IsBitSet(MaskDirect(cubId, j), bit))     // (PoD)
      {
         d_j = ddata->NormFac()[j] * 1.0;             // not covered

         // note: code placed here(!) because it should not be executed if distance is zero
         dfunc.DFunc1(d_j, dfunc.p);       // raise component wise distance ('d_j') to the power of metric parameter 'p'
         if(f_UseWeights)
            d_j *= ddata->Weights()[j];    // scale by variable's weight if it's specified
      }
   }
   else

      // --------------------
      // continuous or ordinal:
      if(x_j<l[cubId][j])                             // if value id left of left bound ....
      {
         d_j = ddata->NormFac()[j]*(l[cubId][j]-x_j); // use distance value to left bound normalized by variable's range

         // note: code placed here(!) because it should not be executed if distance is zero
         dfunc.DFunc1(d_j, dfunc.p);      // raise component wise distance ('d_j') to the power of metric parameter 'p'
         if(f_UseWeights)
            d_j *= ddata->Weights()[j];   // scale by variable's weight if it's specified
      }


   return d_j;
}


//----------------------------------------------------------------------------------------------------------------------
// calculate distance of j-th variable to right bound
// note: 'j' is given with(!) output and 'cubId' is already mapped
float TCluster::d_Upper(const int& j, const float x_j, const int& cubId)
{
   float d_j=0;           // ini

   // --------------------
   // continuous or ordinal:
   if(x_j>r[cubId][j])
   {                                               // if value is right of right bound ....
      d_j = ddata->NormFac()[j]*(x_j-r[cubId][j]); // use distance value to right bound normalized by variable's range

      dfunc.DFunc1(d_j, dfunc.p);        // raise component wise distance ('d_j') to the power of metric parameter 'p'
      if(f_UseWeights)
         d_j *= ddata->Weights()[j];     // scale by variable's weight if it's specified
   }

   return d_j;
}


//----------------------------------------------------------------------------------------------------------------------
// distance cuboid to tuple using pruned cuboids
float TCluster::PrunedDistance(const float*const& /* input data */ x, const int& _cubId)
{
   float d=0;                       // ini
   int cubId = mapper[_cubId];      // get real cuboid id


   // over all active left bounds
   for(int h=0;h<nActiveL[cubId];h++)
   {
      int j = activeIdL[cubId][h];        // variable id with(!) output
      d += d_Lower(j, x[j-1], cubId);     // sum up
   }



   // over all active right bounds
   for(int h=0;h<nActiveR[cubId];h++)
   {
      int j = activeIdR[cubId][h];        // variable id with(!) output
      d += d_Upper(j, x[j-1], cubId);     // sum up
   }

   dfunc.DFunc2(d, dfunc.inv_p);          // raise sum of component wise distances ('d') to the power of '1/p'
   return d;                              // return distance
}


//----------------------------------------------------------------------------------------------------------------------
// distance cuboid to tuple
float TCluster::Distance(const float*const& /* input data */ x, const int& _cubId)
{
   float d=0;                       // ini
   int cubId = mapper[_cubId];      // get real cuboid id

   // over all left bounds
   for(int j=1;j<ddata->nVar();j++)                         // variable id with(!) output
      d += d_Lower(j, x[j-1], cubId);                       // sum up


   // over all right bounds
   for(int j=1;j<ddata->nVar();j++)                         // variable id with(!) output
      if(!(NOMINAL_VARIABLES && ddata->IsSymbolic(j)))      // note: symbolic flags are with(!) output
         d += d_Upper(j, x[j-1], cubId);                    // sum up


   dfunc.DFunc2(d, dfunc.inv_p);          // raise sum of component wise distances ('d') to the power of '1/p'

   return d;                              // return distance
}


//----------------------------------------------------------------------------------------------------------------------
int BarId(const int& mass)
{
   return min(N_BARS-1, max(0, mass-1));
}

void TCluster::CalculateHistogram()
{
   for(int h=0;h<N_BARS;h++)  // reset histogram values
      hist[h]=0;


   for(int i=0;i<_nCub;i++)
      hist[BarId(m[i])]++;
}