static const char* szModule = "pnc.cpp";

//------------------------------------------------------------------------------
//    module pnc.cpp                                                          //
//                                                                            //
//    Contains the class TPnc and all other necessary sub-classes for         //
//    the PNC2 cluster algorithm.                                             //
//    See below or http://www.newty.de/pnc2/sdocu.html for more information.  //
//                                                                            //
//    copyright (c) 1999-2003 by Lars Haendel                                 //
//    home: www.newty.de                                                      //
//                                                                            //
//    This program is free software and can be used under the terms of the    //
//    GNU licence. See header file for further information and disclaimer.    //
//                                                                            //
//------------------------------------------------------------------------------
//                                                                            //
//    CREATE: Pass learn data (TData) and parameters (TParameter)             //
//                                                                            //
//    USE: Learn by calling the function Iterate() as long as it returns      //
//    'false'. There will be 2 steps for learning. You may get                //
//    information about current progress or state by the functions            //
//    GetProgress() and GetStatusText(). After learned, you've got to         //
//    convert to TCluster using the function ToTCluster() to be able to       //
//    use the learned model with a prediction object (TPrediction).           //
//    If specified in parameter object (TParameter::f_Prune) or by            //
//    calling argument 'f_PruneAnyway' the function ToTCluster() calls        //
//    function PostPrune() and makes a context sensitive feature              //
//    selection for each cuboid and stores this as additional(!)              //
//    information. Also you may specify that no cuboids are converted         //
//    whose mass is less than a threshold 'K' by the calling argument         //
//    'K'. Note: The parameter TParameter::p_min is ignored here to allow     //
//    you to change it and try different settings later.                      //
//                                                                            //
//    File I/O: main class and sub-classes have debug save routines           //
//------------------------------------------------------------------------------


//----------------------------------------------------------------------------------------------------------------------
//  Additional notes:
//
//  1. TInterval::Del(opI) means merge the 'opI'-th interval with the 'opI-1'-th one
//  2. TCuboidsLists (list with TCuboid lists) must be kept consistent with TInterval
//  3. TPnc::nInt         = # intervals used to discretize output (regular intervals)
//     TIntervals::nInt() = # non-empty regular intervals
//     TIntervals::Size() = # output intervals if # tuples per interval is bounded to 'N_G_max'
//
//  4. very important variables:
//     int opI -  first pass              : actually processed output interval, gets incremented
//                2nd pass (cascade-mode) : actually processed output interval determined by TIntervals::NextInterval()



//----------------------------------------------------------------------------------------------------------------------
#include <limits>          // due to   MAXFLOAT
#include <iomanip>         //          setw()
#include <stdio>           //          sprintf()
#include <stdlib>          //          min() ...
#include <math>            //          sqrt()
#include <values>          //          MAXFLOAT


#include "pnc.h"
#include "defines.h"       //          definitions like DEBUG ...
#include "fileutil.h"      //          RemoveExt()
#include "exception.h"     //          IfTrueThrowTypeA()
#ifndef RELEASE
#include "statistics.h"    //          CalcUGamma (confidence intervals)
#endif

//#define PROFILE             // insert profiling code. results are written to 'profile.txt'
#ifdef PROFILE
#include <time>
clock_t  t_FindMin, t_FindMinEx, t_NegTest, t_PostPrune, t_Convert2TCluster, t_Reset, t_Learn;
clock_t  t_TCuboids, t_Distance, t_TIntervals;
long     nFindMin, nFindMinEx, nNegTest, nHitTest, nTCuboids, nDistance, nConvert2TCluster;
ofstream logfile("profile.txt", ios::app);
#endif



   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TCuboid
   //


//----------------------------------------------------------------------------------------------------------------------
// allocate memory
void TCuboid::Initialize(const int& nInp)
{
   delete[] l;                         // delete old memory
   delete[] r;
   delete tuples;

   l = new float[nInp];                // ... and allocate new one
   r = new float[nInp];

   tuples = new TTupleIdList;          // tuple id list
   tuples->SetName("TTupleIdList");
}



   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TCuboids - TCuboid list
   //

//----------------------------------------------------------------------------------------------------------------------
// constructor - create empty cuboid list, used when two cuboid lists are merged (2nd pass cascade mode)
TCuboids::TCuboids(const TDFunc& _dfunc, const TDataData*const& _ddata, const bool& _f_UseWeights)
{
   dfunc        = _dfunc;
   ddata        = _ddata;
   f_UseWeights = _f_UseWeights;    // flag: use feature weigths in distance function

   list  = new TCuboidList;
   list->SetName("TCuboidList(M)");
}


//----------------------------------------------------------------------------------------------------------------------
// constructor - create trivial cuboids from (part of the) (learn) data
TCuboids::TCuboids(const TIntervals*const& intervals, const TData*const& _data, const int& opI, const TDFunc& _dfunc
                     , const TDataData*const& _ddata, const bool& _f_UseWeights)
{
#ifdef PROFILE
clock_t time=clock();
nTCuboids++;
#endif

   // 1.
   dfunc        = _dfunc;
   ddata        = _ddata;
   f_UseWeights = _f_UseWeights;    // flag: use feature weigths in distance function

   list  = new TCuboidList;
   list->SetName("TCuboidList");


   // 2. abbrevations
   const float    y_quant  = intervals->Y_Quant(opI);       // quantized output value
   const float    y_snap   = intervals->Y_Snap(opI);        // snapped, i.e. quantized and de-quantized, output value
   const int      nTup     = intervals->Size(opI);          // # tuples to create tivial cuboids from
   const float**  data     = intervals->Get(opI);           // pointer to first tuple (of interval)
   const int      id       = intervals->Id(opI);            // id of first tuple (of interval)


   // 3. create trivial cuboids
   for(int i=0;i<nTup;i++)
   {
      TCuboid& cuboid = list->Ins();            // a) insert new list entry (cuboid)
      cuboid.Initialize(ddata->nInp());         // b) initialize it


      // c) set cuboid's bounds and/or masks
      //    note: symbolic flags, minima/maxima and data tuples themself are with(!) output
      for(int j=0;j<ddata->nInp();j++)

         if(NOMINAL_VARIABLES && _data->IsSymbolic(j+1))                            // symbolic (nominal):
         {
            ClearAllBits(cuboid.Mask(j));                                           // reset mask
            SetBit(cuboid.Mask(j), (int) (data[i][j+1]-_data->Min()[j+1]));         // set current symbol's flag (PoD)
         }
         else                                                                       // continuous or ordinal:
            cuboid.Min(j) = cuboid.Max(j) = data[i][j+1];                           // set left and right bounds


      // d)
      cuboid.y_snap  = y_snap;      // snapped, i.e. quantized and de-quantized, output value
      cuboid.y_quant = y_quant;     // quantized output value
      cuboid.m       = 1;           // mass
      cuboid.neg     = 0;           // # covered negative examples
      int tupleId    = id+i;        // tuple id
      cuboid.tuples->Ins(tupleId);  // initialize tuple id list
   }

#ifdef PROFILE                      // profiling code
t_TCuboids +=clock()-time;
#endif
}


//----------------------------------------------------------------------------------------------------------------------
// debug function - save cuboid list
void TCuboids::Save(ofstream& file) const
{
   // 1. write header
   file << ComChar << " TCuboids" << endl;
   file << ComChar << " # Inputs    = " << ddata->nInp() << endl;
   file << ComChar << " # Cuboids   = " << list->Size()  << endl;
   file << ComChar << " output (snapped and quantized) |  left and right bounds for all input variables  |  mass  |";
   file << "  # negative examples" << endl;

   // 2. write cuboid data
   list->Reset();                                  // position to top of list
   for(int i=0;i<list->Size();i++)                 // over all cuboids
   {
      // a) get reference to i-th cuboid
      list->Next();
      const TCuboid& cuboid = list->Get();

      // b) write output value (snapped and quantized)
      file << cuboid.y_snap << " " << cuboid.y_quant << " ";


      // c) write cuboid's bounds and/or masks
      for(int j=0;j<ddata->nInp();j++)
         if(NOMINAL_VARIABLES && ddata->IsSymbolic(j+1))
            WriteBits(file, cuboid.Mask(j));                       // symbolic (nominal): write bits
         else
            file << " " << cuboid.Min(j) << " "  << cuboid.Max(j); // continuous or ordinal: write left and right bound


      // d) mass, covered negative examples
      file << " " << cuboid.m << " " << cuboid.neg << endl;
   }
   file << endl;
}


//----------------------------------------------------------------------------------------------------------------------
// distance of i-th cuboid in list to reference cuboid <ref>
float TCuboids::Distance(const int& i, const TCuboid*const& ref) const
{
   #ifdef PROFILE
   clock_t time=clock();
   nDistance++;
   #endif

   // ini
   float d = 0;

   // get i-th cuboid from list
   list->Pos(i);
   const TCuboid& cuboid = list->Get();


   // calculate distance of cuboids to each other
   // note: weight, variable types and IvdmDistance() are with(!) output
   for(int j=0;j<ddata->nInp();j++)                                                       // over all inputs
   {
      float d_j=0;   // distance of j-th variable


      //--------------------------------------------
      // overlap-metric
      if(NOMINAL_VARIABLES && ddata->IsSymbolic(j+1))
      {
         // symbolic (nominal):
         if((ref->Mask(j) & cuboid.Mask(j)) == 0)        // no bits overlapping ?
            d_j = ddata->NormFac()[j+1] * 1.0;
      }
      else
      {
         // continuous or ordinal:
         const float l1 = ref->Min(j), r1 = ref->Max(j), l2 = cuboid.Min(j), r2 = cuboid.Max(j);      // abbrevation

         // determine situation
         d_j = 0;    // ini
         if(r1<l2)
            d_j = ddata->NormFac()[j+1]*(l2-r1);                           // right bound 1 lies left of left bound 2
         else
            if(l1>r2)
               d_j = ddata->NormFac()[j+1]*(l1-r2);                        // left bound 1 lies right of rigth bound 2

      }

      dfunc.DFunc1(d_j, dfunc.p);         // raise component wise distance ('d_j') to the power of metric parameter 'p'
      if(f_UseWeights)
         d_j *= ddata->Weights()[j+1];    // scale by variable's weight if it's specified

      d += d_j;
   }


   #ifdef PROFILE                         // profiling code
   t_Distance+=clock()-time;
   #endif


   dfunc.DFunc2(d, dfunc.inv_p);          // raise sum of component wise distances ('d') to the power of '1/p'

   return d;
}


//----------------------------------------------------------------------------------------------------------------------
// return generalization of two cuboids
TCuboid* /* cr */ Generalize(const TCuboid& a, const TCuboid& b, const TData*const& data)
{
   // a) allocate memory
   TCuboid* cuboid = new TCuboid;
   cuboid->Initialize(data->nVar()-1);


   // b) generalize
   for(int j=0;j<data->nVar()-1;j++)
      if(NOMINAL_VARIABLES && data->IsSymbolic(j+1))     // '+1' because variable types are with(!) output

         // symbolic (nominal):
         cuboid->Mask(j) = a.Mask(j) | b.Mask(j);        // generalize by OR operation
      else
      {
         // continuous or ordinal:
         cuboid->Min(j) = min(b.Min(j),   a.Min(j));     // minimum of left bounds
         cuboid->Max(j) = max(b.Max(j),   a.Max(j));     // maximum of right bounds
      }

   // c) sum up masses
   cuboid->m = a.m + b.m;

   // d) copy
   cuboid->y_snap  = a.y_snap;
   cuboid->y_quant = a.y_quant;


   // e) store id's of merged tuples
   for(int i=0;i<a.m;i++) cuboid->tuples->Ins(a.tuples->Get(i));
   for(int i=0;i<b.m;i++) cuboid->tuples->Ins(b.tuples->Get(i));


   return cuboid;
}


//----------------------------------------------------------------------------------------------------------------------
// delete (i,j)-th cuboid and overwrite max(i,j)-th cuboid with <cuboid>
void TCuboids::Merge(const int& i, const int& j, TCuboid& _cuboid)
{
   // a) delete cuboid with smaller index
   Del(min(i, j));


   // b) get reference to cuboid with bigger index
   TCuboid& cuboid = Get(max(i, j)-1);             // note: '-1' cause a preceeding cuboid in the list was deleted in a)


   // c) delete 'old' memory - necessary cause cuboid will be overwritten
   cuboid.Release();


   // d) set (copy/overwrite) new cuboid
   cuboid = _cuboid;
}



   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TCuboidsEx
   //

//----------------------------------------------------------------------------------------------------------------------
// constructor
TCuboidsEx::TCuboidsEx(TCuboids*const& _a, TCuboids*const& _b, const TDFunc& dfunc, const TDataData*const& ddata
                        , const bool& f_UseWeigths)
{
   is_extended=true;

   // copy
   a = _a;
   b = _b;

   // new cuboid list for merged cuboids
   m = new TCuboids(dfunc, ddata, f_UseWeigths);
}


//----------------------------------------------------------------------------------------------------------------------
// destructor
TCuboidsEx::~TCuboidsEx()
{
   if(is_extended)
      delete a;

   delete b;
   delete m;
}


//----------------------------------------------------------------------------------------------------------------------
// merge two cuboids
void TCuboidsEx::Merge(TCuboids*const& cub1, int id1, TCuboids*const& cub2, const int& id2, TCuboid& cuboid)
{
   cub2->Del(id2);                        // delete 2nd cuboid from corresponding list


   if(cub1 == m)                          // if first cuboid is from list 'm' ...
   {
      if(cub2 == m)                          // if 2nd cuboid is also from list 'm' ...
         id1--;                                 // decrement its id cause one preceeding cuboid was deleted

      TCuboid& entry = cub1->Get(id1);       // get first cuboid
      entry.Release();                       // delete memory
      entry = cuboid;                        // set (copy/overwrite) new cuboid
   }
   else                                   // else if first cuboid isn't from cuboid list 'm' ...
   {
      cub1->Del(id1);                        // ... delete it from corresponding list
      m->App(cuboid);                        // append new cuboid to cuboid list 'm'
   }
}



   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TCuboidsLists  -  encapsulates list over cuboid lists
   //

//----------------------------------------------------------------------------------------------------------------------
// constructor
TCuboidsLists::TCuboidsLists()
{
   list = new TCuboidsList;               // create list
   list->SetName("TCuboidsList");         // set list name

   mapper = NULL;
}


//----------------------------------------------------------------------------------------------------------------------
// destructor
TCuboidsLists::~TCuboidsLists()
{
   DeleteEntries(list);    // delete list entries ...
   delete list;            // ... and list

   delete[] mapper;
}


//----------------------------------------------------------------------------------------------------------------------
// return mapper, call MakeMapper() before if flag <consistent> is unset
const TCuboid**const& TCuboidsLists::GetMapper(bool& consistent)
{
   if(!consistent)         // call MakeMapper() before if flag is unset
      MakeMapper();
   consistent=true;

   return (const TCuboid**const) mapper;
}


//----------------------------------------------------------------------------------------------------------------------
// create mapper to all cuboids
void TCuboidsLists::MakeMapper()
{
   delete[] mapper;                 // release old mapper

   // a) count exemplars
   _nCuboids=0;
   for(int i=0;i<Size();i++)        // over all cuboid lists
      _nCuboids +=Size(i);          // sum up


   // b) create mapper
   mapper = new TCuboid*[_nCuboids];

   int c=0;                               // counter
   for(int i=0;i<Size();i++)              // over all cuboid lists
   {
      const TCuboids* cuboids = Get(i);   // get i-th cuboid list

      for(int t=0;t<Size(i);t++)          // over all cuboids in cuboid list
         mapper[c++] = &cuboids->Get(t);
   }


   IfTrueThrowTypeB(c!=_nCuboids, MSG_IPI, "TCuboidsLists::MakeMapper", szModule);
}



   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TIntervals and utility functions  -  provides output interval wise access to (learn-) data tuples
   //


   // step through the data tuples up from position <tup+1> and count how many tuples have the same quantized output
   // value. Check if this number exceeds the maximum group size and thus adjust the N_G_Max value currently used.  
   float DetermineGroupSize(const int& y_quant, const int& tup, const int& nTup, const float**const& dat
                           , const float& max, const float& min, const TParameter*const& para)
   {
      // determine # tuples with same quantized output value
      int i;
      for(i=tup+1;i<nTup;i++)
         if(y_quant!=Quant(dat[i][0], max, min, para->N_Int, !para->f_Regression))
            break;

      float nTupSameOutput = i-tup;                               // # tuples with same quantized output value
      int nGroups        = ceil(nTupSameOutput/para->N_G_Max);    // # groups necessary

      return nTupSameOutput/nGroups;
   }


//----------------------------------------------------------------------------------------------------------------------
// constructor
TIntervals::TIntervals(const TData*const& _data, const TParameter*const& para)
{
#ifdef PROFILE
clock_t time=clock();
#endif

   data  = _data;                                  // copy
   float max = data->Max()[0];
   float min = data->Min()[0];
   const float** dat = data->GetDataPointer();     // direct pointer to data matrix


   // <order> used to access variables in order of decreasing weight, used to optimize HitTest()
   order = new TSorter(data->nVar()-1);                        // create
   for(int j=0;j<data->nVar()-1;j++)
      order->GetA(j) = order->GetB(j) = data->Weights()[j+1];  // note: weights are with(!) output  - note: use weights
                                                               // irrespective of flag TParameter::f_UseWeights
   order->SortDes();                                           // sort descending



   // create interval list
   list  = new TIntervalList;                      // new list
   list->SetName("TIntervalList");


   // ini
   // quantize first output value (is the lowest one cause tuples are sorted)
   float y_quant = Quant(dat[0][0], max, min, para->N_Int, para->f_Regression);
   const float** pt2opI = &dat[0];                             // pointer to first tuple of interval
   int           id     = 0;                                   // id of       "      "    "     "
   _nInt=1;                                                    // reset (ini with '1' and not with '0'!)

   int k = 1;        // # tuples in interval/group
   float loss = 1.5; // ini: '1.0' to increase first interval by one tuple and '0.5' to cope with rounding effects
   float N_G_Max = DetermineGroupSize(y_quant, 0, data->nTup(), dat, max, min, para);  // max. interval/group size for
                                                                                       // first quantized output value
   // iterate tuples and close old interval if #tuples exceeds <N_G_Max> or (quantized) output value changes
   for(int i=1;i<data->nTup();i++)
   {
      int _y_quant = Quant(dat[i][0], max, min, para->N_Int, !para->f_Regression);     // quantize i-th output value


      // check conditions to close old interval: other quantized output value or to many tuples in interval
      if(_y_quant != y_quant || k >= floor(N_G_Max) + floor(fabs(loss)))
      {
         #ifdef DEBUG_LOG_ON
         DebugLogFile << "k=" << k << " ";
         #endif


         // sum up loss: intervals can only have integer # tuples in them but calculated size is a float value
         if(loss>=1)
            loss -= 1;
         loss += N_G_Max-floor(N_G_Max);  // sum up loss


         // if output value has changed ...
         if(_y_quant != y_quant)
         {
            N_G_Max = DetermineGroupSize(_y_quant, i, data->nTup(), dat, max, min, para);  // determine new group size
            loss = 1.5; // ini: see note above
            _nInt++;                                        // increment number of none-empty intervals
         }

         // close old interval
         TInterval& interval = list->Ins();                 // insert new interval in list
         interval.opI  = pt2opI;
         interval.id   = id;
         interval.nTup = k;
         interval.y_quant = y_quant;
         interval.y_snap  = DeQuant(y_quant, max, min, para->N_Int, !para->f_Regression);


         // store values for new interval
         y_quant  = _y_quant;                      // store new (quantized) output value
         pt2opI   = &dat[i];                       // -> first tuple of new interval
         id       = i;
         k = 1;
      }
      else
         k++;  // increment interval tuple counter
   }

   // close last interval
   TInterval& interval = list->Ins();        // insert new interval in list
   interval.opI      = pt2opI;
   interval.id       = id;
   interval.nTup     = k;
   interval.y_quant  = y_quant;
   interval.y_snap   = DeQuant(y_quant, max, min, para->N_Int, !para->f_Regression);


   // new and fill check-vectors, i.e. arrays with the id's of tuples with a different class which may be neg. examples
   checkVec       = new int*[para->N_Int];
   len_checkVec   = new int [para->N_Int];

   for(int i=0;i<para->N_Int;i++)         // create
      CreateCheckVec(i, para);

   nCheckVec = para->N_Int;               // store for destructor
   id4Bg2 = 0;                            // ini for function NextInterval()


   #ifdef DEBUG_LOG_ON
   DebugLogFile << "k=" << k << " ";
   DebugLogFile << "Intervalls=" << list->Size() << " ";
   #endif



#ifdef PROFILE
t_TIntervals+=clock()-time;
#endif
}


//----------------------------------------------------------------------------------------------------------------------
// destructor
TIntervals::~TIntervals()
{
   delete list;

   // celete checkVecs
   for(int i=0;i<nCheckVec;i++)
      delete[] checkVec[i];

   delete[] checkVec;
   delete[] len_checkVec;
   delete order;
}


//----------------------------------------------------------------------------------------------------------------------
// debug function
void TIntervals::Save(ofstream& file) const
{
   file << ComChar << " TIntervals" << endl;
   file << ComChar << " # tuples   = " << data->nTup() << endl;
   file << ComChar << " # intervals= " << list->Size() << endl;
   file << ComChar << " interval id |  quantized interval value  |  # tuples in it  |  snapped interval value" << endl;

   // over all intervals
   for(int i=0;i<list->Size();i++)
      file << i << " " << Y_Quant(i)  << " " << Size(i) << " " << Y_Snap(i) << endl;
   file << endl;
}


//----------------------------------------------------------------------------------------------------------------------
// return size (# tuples) of regular output interval with (quantized) output value <value>
const int TIntervals::SizeReg(const int& value) const
{
   int len = 0;

   // iterate list and sum up
   for(int i=0;i<list->Size();i++)
      if(Y_Quant(i)==value)            // compare
         len += Size(i);

   return len;
}


//----------------------------------------------------------------------------------------------------------------------
// returns true if tuple is inside cuboid - note: tuple is given with(!) output
bool HitTest(const TCuboid& cub, const float*const& tuple, const TData*const& data, const float& w_COD
               , const float*const& weights, const TSorter*const& order)
{
#ifdef PROFILE
nHitTest++;
#endif

   float out=0;         // counts variables outside
   float w = w_COD;   // why??

   // count # variables outside   note: weights, variable types and (data) minima are with(!) output
   for(int v=0;v<order->Size();v++)
   {
      const int& j=order->GetId(v);             // performance boost: process variables in order of decreasing weight

      if(NOMINAL_VARIABLES && data->IsSymbolic(j+1))
      {
         // symbolic (nominal): outside if flag is unset for symbol (PoD)
         if(!IsBitSet(cub.Mask(j), (int) tuple[j+1]-data->Min()[j+1] ))
         {
            out += weights[j+1];       // sum up outside weights
            if(out > w)                // and check if treshold is exceeded
               return false;           // not inside
         }
      }
      else
         // continuous or ordinal
         if( tuple[j+1] < cub.Min(j) || tuple[j+1] > cub.Max(j) )
         {
            out += weights[j+1];       // sum up outside weights
            if(out > w)                // and check if treshold is exceeded
               return false;           // not inside
         }
   }

   // else ... tuple is inside
   return true;
}


//----------------------------------------------------------------------------------------------------------------------
// delete interval from list - note: this means merge(!) the opI-th interval with the 'opI-1'-th one
void TIntervals::Del(const int& opI)
{
   list->Pos(opI);                  // position
   int len = list->Get().nTup;      //
   list->Del();                     // delete


   list->Pos(opI-1);                                  // position to preceeding interval
   list->Get().nTup = len+list->Get().nTup;           // add length
}


//----------------------------------------------------------------------------------------------------------------------
// count # covered negative examples and decide upon merge
bool TIntervals::NegTest(TCuboid& cuboid, const int& y_quant, const int& maxNeg, const float& w_COD
                  , const float*const& weight) const
{
#ifdef PROFILE
clock_t time=clock();
nNegTest++;
#endif

   float neg = 0;    // ini


   // check all tuples of <checkVec>
   for(int i=0;i<len_checkVec[y_quant];i++)
      if(HitTest(cuboid, data->Row(checkVec[y_quant][i]), data, w_COD, weight, order))
         if(++neg > maxNeg)
            return false;     // deny merge

   cuboid.neg = neg;          // store # covered negative examples


#ifdef PROFILE
t_NegTest+=clock()-time;
#endif

   return true;               // allow merge
}


//----------------------------------------------------------------------------------------------------------------------
// create check-vectors and randomize their order:
//    store indices of all (learn-)tuples whose (quantized) output value differs more than <DiffMax> from <y>
void TIntervals::CreateCheckVec(const int& y_quant, const TParameter*const& para)
{
   int  nId = 0;                          // ini
   int* vec = new int[data->nTup()];


   // over all intervals in list
   int c =0;                              // counter
   for(int i=0;i<list->Size();i++)

      // if (quantized) output value differs more than <DiffMax> ...
      #ifndef RELEASE
      if(abs(Y_Quant(i)-y_quant)>para->DifMax)
      #else
      if(abs(Y_Quant(i)-y_quant)>0)
      #endif
         for(int t=0;t<Size(i);t++)                      // ... store all indices of interval
            vec[nId++] = c++;
      else
         c += Size(i);                                   // sum up skipped


   // set randomized tuple id's of <vec> as check-vector
   checkVec[y_quant]       = Randomize(vec, nId);
   len_checkVec[y_quant]   = nId;
}


//----------------------------------------------------------------------------------------------------------------------
// get pointer to first tuple of i-th interval
const float**const& TIntervals::Get(const int& i) const
{
   // note: '-1- is invalid but does not cause TStdList::Pos() to throw an exception. Thus do it here
   IfTrueThrowTypeB(i==-1, "TIntervals::Get(const int i) called with 'i=-1' which is illegal!");

   list->Pos(i);                    // position
   return list->Get().opI;          // return
}


//----------------------------------------------------------------------------------------------------------------------
// return index to an interval which is followed by another one with the same quantized output value. Function used
// in cascade mode (2nd pass) to find the intervals which need to be merged.  Beware of danger! Do not change unless
// you know what you do!! ;-)
int TIntervals::NextInterval()
{
   // ini: get (quantized) output value of current interval
   int y = Y_Quant(id4Bg2++);


   // a) search through list
   int i;
   for(i=0;i<list->Size();i++)
   {
      if(id4Bg2 >= list->Size())          // check for flashover
         id4Bg2 = 0;

      const float _y = Y_Quant(id4Bg2++);

      if(y==_y)                           // compare (quantized) output values
         break;
      else
         y =_y;                           // store new (quantized) output value
   }


   // b) if none found ... return '-1'
   if(i==list->Size())
      return -1;


   // c) calculate return value
   int opI = id4Bg2 - 2;


   // d) position forward to (quantized) next output value as start position for the next call to this function
   //    note: maximally check as many times as there are list items
   for(i=0;i<list->Size();i++)
   {
      if(id4Bg2 >= list->Size())          // check for flashover
         id4Bg2 = 0;

      if(y!=Y_Quant(id4Bg2))              // stop if new (quantized) output value is found
         break;

      id4Bg2++;
   }
   if(id4Bg2!= 0)
      id4Bg2--;      // position back, as (later) a preceeding element of the interval list will be deleted


   return opI;
}


   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TAdjMat - adjazenz matrix (lower triangular matrix) which stores distances from all cuboids of a cuboid
   //                    list to eachother
   //

//----------------------------------------------------------------------------------------------------------------------
// constructor - note_: data structure is list over list to emulate lower triangular matrix
TAdjMat::TAdjMat(const TCuboids*const& _cuboids)
{
   cuboids                    = _cuboids;

   // new matrix (row pointer list)
   mat = new TFloatsList;
   mat->SetName("TAdjMatRowList");

   // over all rows - 'i=1' cause matrix is lower triangular without diagonal
   for(int i=1;i<cuboids->Size();i++)
   {
      TFloatList* row = new TFloatList;               // new row (distances list) ...
      row->SetName("TAdjMatDistancesList");
      mat->Ins(row);                                  // ... and insert it


      // set i-th cuboid as reference cuboid ...
      TCuboid* ref = &cuboids->Get(i);

      // ... and calculate the distances to all other cuboids - note: mind that the matrix is lower triangular
      for(int j=0;j<i;j++)
         row->Ins()=cuboids->Distance(j, ref);
   }
}


//----------------------------------------------------------------------------------------------------------------------
// destructor
TAdjMat::~TAdjMat()
{
   // delete matrix
   DeleteEntries(mat);
   delete mat;
}


//----------------------------------------------------------------------------------------------------------------------
// append row
void TAdjMat::App(const TCuboid*const& ref)
{
   if(cuboids->Size() < 2)                         // to not proceed if there are less than two cuboids in cuboid list
      return;

   mat->Pos(Size()-1);                             // position to the end of the row pointer list

   TFloatList* row =  new TFloatList;              // new row ...
   row->SetName("TAdjMatDistancesList(appended)");
   mat->Ins(row);                                  // ... and insert it


   // calculate distances to all other cuboids - note: mind that the matrix is triangular
   for(int j=0;j<Size();j++)
      row->Ins()=cuboids->Distance(j, ref);
}


//----------------------------------------------------------------------------------------------------------------------
// debug function
void TAdjMat::Save(ofstream& file) const
{
   file << ComChar << " TAdjMat" << endl;
   file << ComChar << " Size       = " << mat->Size()    << endl;
   file << ComChar << " # Cuboids  = " << cuboids->Size() << endl;

   mat->Reset();                       // position to first row

   for(int i=0;i<mat->Size();i++)      // over all rows
   {
      mat->Next();

      TFloatList* row = mat->Get();    // get row (list with distances)

      row->Reset();

      for(int j=0;j<row->Size();j++)    // over complete row
      {
         row->Next();
         file << row->Get() << " ";
      }

      file << endl;
   }
   file << endl;
}


//----------------------------------------------------------------------------------------------------------------------
// delete row/column in adjazency matrix
void TAdjMat::Del(const int& rowId)
{
   // valid values are e[0..Size()]   (i.e. there are  Size()+1 valid values!)
   IfTrueThrowTypeB( rowId<0 || rowId>mat->Size(), "Function called with illegal value for 'rowId'!", "TAdjMat::Del"
                     , szModule);

   // second case: delete last row which means delete no(!) column
   IfTrueThrowTypeB( rowId==mat->Size(), MSG_FNI, "TAdjMat::Del", szModule);



   // first case: rowId==0 , i.e. delete first column and no(!) row
   if(rowId==0)
   {
      // reset to first row
      mat->Reset();
      mat->Next();

      // delete first column
      delete mat->Get();        // delete row
      mat->Del();               // delete from row pointer list


      if(mat->Size()==0)         // return if it's the last row
         return;


      // else
      TFloatList* row = mat->Get();          // get row

      row->Reset();                          // reset list with distances
      row->Next();                           //
      row->Del();                            // delete first entry (first column)


      for(int i=1;i<mat->Size();i++)         // over all rows - 'i=1' cause first row already worked over
      {
         mat->Next();

         TFloatList* row = mat->Get();       // get row

         row->Reset();                       // reset list with distances
         row->Next();                        //
         row->Del();                         // delete first entry (first column)
      }

      return;
   }


   // else: delete row and column
   mat->Pos(rowId-1);                        // '-1' cause rows specified by <rowId> are enumerated starting with 1
   delete mat->Get();                        // delete row
   mat->Del();                               // delete row from row pointer list

   TFloatList* row = mat->Get();             // get row

   row->Pos(rowId);                          // position
   row->Del();                               // ... and delete


   for(int i=rowId;i<mat->Size();i++)        // over all rows - 'i=rowId' cause columns need to be deleted just up from there
   {
      mat->Next();                           //

      TFloatList* row = mat->Get();          // get row

      row->Pos(rowId);                       // position
      row->Del();                            // ... and delete
   }
}


//----------------------------------------------------------------------------------------------------------------------
// re-calculate distances - caution: reference cuboid must be correctly initialized!
void TAdjMat::Recalc(const int& rowId, const TCuboid*const& ref)
{
   // valid values are e[0..Size] -  (i.e. there are Size()+1 valid values)
   IfTrueThrowTypeB(rowId<0 || rowId>mat->Size(), "Function called with illegal value for 'rowId'!", "TAdjMat::Recalc"
                           , szModule);



   // 1. re-calculate row -> ...
   if(rowId != 0)
   {
      mat->Pos(rowId-1);                              // '-1'

      TFloatList* row = mat->Get();                   // get row

      row->Reset();

      // re-calculate row
      for(int i=0;i<row->Size();i++)                  // over all distances
      {
         row->Next();
         row->Get() = cuboids->Distance(i, ref);      // re-calculate
      }
   }
   else
      mat->Reset();


   // 2. re-calculate column
   for(int i=rowId;i<mat->Size();i++)
   {
      mat->Next();

      TFloatList* row = mat->Get();                   // get row

      row->Pos(rowId);                                // position to column
      row->Get() = cuboids->Distance(i+1, ref);       // re-calculate
   }
}


//----------------------------------------------------------------------------------------------------------------------
// disconnect nodes 'i' and 'j'
void TAdjMat::Disconnect(const int& i, const int& j)
{
   // note: valid values are: i e[1..Size()], j e[0..Size()-1] with i<=j
   IfTrueThrowTypeB(i<=0||i>mat->Size(),"Function called with illegal value for 'i'!", "TAdjMat::Disconnect", szModule);
   IfTrueThrowTypeB(j<0||j>mat->Size()-1,"Function called with illegal value for 'j'", "TAdjMat::Disconnect", szModule);
   IfTrueThrowTypeB(i<=j, "Function called with 'i<=j' which is illegal!", "TAdjMat::Disconnect", szModule);


   mat->Pos(i-1);
   TFloatList* row = mat->Get();    // get row

   row->Pos(j);                     // position in row and ...
   row->Get() = MAXFLOAT;           // ... disconncet by setting maximal possible value
}


//----------------------------------------------------------------------------------------------------------------------
// search smallest distance in adjazenz matrix
//
// notes: rows are indicated by 'k' and enumerated from '1' to 'Size()-1'
//        columns are indicated by 'l' and enumerated from '0' to 'Size()-2'
// This allows to use 'k' and 'l' to index inside TCuboids-objects
// note: k>l
float TAdjMat::FindMin(int& k, int& l) const
{
#ifdef PROFILE
clock_t time=clock();
nFindMin++;
#endif

   float min = MAXFLOAT;      // ini

   if(mat->Size() == 0)       // return if matrix is empty
      return min;


   // 1. reset row pointer list
   mat->Reset();


   // 2. iterate all rows - 'i=1' cause matrix is triangular without diagonal
   for(int i=1;i<cuboids->Size();i++)        // over all rows
   {
      mat->Next();                           //

      TFloatList*& row = mat->Get();         // get row

      row->Reset();                          // ... reset it


      // iterate over all distances in row  - note: mind that matrix is triangular without diagonal
      for(int j=0;j<i;j++)
      {
         row->Next();                  //

         // if new minimum is found -> ...
         if(row->Get() < min)
         {
            min = row->Get();                   // store new minimum

            k = i;                              // store row and column
            l = j;

            if(min==0)                          // performance boost: stop if distance of zero is found
            {
               #ifdef PROFILE
               t_FindMin+=clock()-time;         // profiling code
               #endif

               return min;
            }
         }
      }
   }

#ifdef PROFILE
t_FindMin+=clock()-time;
#endif

   return min;
}



   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TAdjArr - adjazenz-array to represent all combinations between two cuboids lists (rectangular matrix)
   //

//----------------------------------------------------------------------------------------------------------------------
// constructor
TAdjMatEx::TAdjArr::TAdjArr(TCuboids*const& _a, TCuboids*const& _b)
{
   mat = new TFloatsList;              // new row pointer list
   mat->SetName("TAdjArrRowList");

   a = _a;
   b = _b;
   m = NULL;


   for(int i=0;i<b->Size();i++)  // new b->Size() rows and calculate distances
   {
      TFloatList*& row = mat->Ins();                  // new row pointer list entry ...
      row = new TFloatList;                           // and new distances list
      row->SetName("TAdjArrDistancesList");

      // set reference cuboid
      TCuboid* ref = &b->Get(i);

      for(int j=0;j<a->Size();j++)                    // over all columns
         row->Ins()=a->Distance(j, ref);              // calculate distance to reference cuboid
   }

   _nCols = a->Size();     // store # columns
}


//----------------------------------------------------------------------------------------------------------------------
// constructor
TAdjMatEx::TAdjArr::TAdjArr(TCuboids*const& _a, TCuboids*const& _b, TCuboids*const& _m)
{
   mat = new TFloatsList;              // new row list
   mat->SetName("TAdjArrRowList");     // and set its name


   // copy
   a  = _a;
   b  = _b;
   m  = _m;

   _nCols = a->Size()+b->Size();       // store # columns
}


//----------------------------------------------------------------------------------------------------------------------
// destructor
TAdjMatEx::TAdjArr::~TAdjArr()
{
   DeleteEntries(mat);
   delete mat;
}



//----------------------------------------------------------------------------------------------------------------------
// recalculate distances if cuboid was changed/merged
void TAdjMatEx::TAdjArr::Recalc(const int& _row, const TCuboid*const& ref)
{
   // check
   IfTrueThrowTypeB(!m, "Fucntion called for instance constructed with only 2 adjacency objects!", "TAdjArr::Recalc", szModule);
   IfTrueThrowTypeB(_row<0 || _row>=mat->Size(), "Function called with illegal values!", "TAdjArr::Recalc", szModule);


   mat->Pos(_row);                        // position to row
   TFloatList* row = mat->Get();          // get row

   if(_nCols == 0)                        // return if there are no columns anymore
      return;


   row->Reset();

   for(int j=0;j<a->Size();j++)           // all columns (<a>) - note: matrix has size m->Size() x (a->Size()+b->Size())
   {
      row->Next();
      row->Get() = a->Distance(j, ref);   // calculate distance to reference cuboid
   }

   for(int j=0;j<b->Size();j++)           // all columns (<b>)
   {
      row->Next();
      row->Get() = b->Distance(j, ref);   // calculate distance to reference cuboid
   }
}


//----------------------------------------------------------------------------------------------------------------------
// delete row
void TAdjMatEx::TAdjArr::DelRow(const int& rowId)
{
   IfTrueThrowTypeB(rowId<0||rowId>=nRows(),"Function called with illegal value of 'rowId'!","TAdjArr::DelRow",szModule);


   mat->Pos(rowId);        // position
   delete mat->Get();      // delete distances list
   mat->Del();             // delete from row pointer list
}


//----------------------------------------------------------------------------------------------------------------------
// delete column
void TAdjMatEx::TAdjArr::DelCol(const int& colId)
{
   IfTrueThrowTypeB(colId<0||colId>=_nCols,"Function called with illegal value of 'colId'!","TAdjArr::DelCol",szModule);


   for(int i=0;i<nRows();i++)          // delete colId-th distance in all distances lists, i.e. in all rows
   {
      mat->Pos(i);                     // position to i-th row
      TFloatList* row = mat->Get();
      row->Pos(colId);                 // position to colId-th distance
      row->Del();                      // delete list entry
   }

   _nCols--;                           // decrement column counter
}


//----------------------------------------------------------------------------------------------------------------------
// append row
void TAdjMatEx::TAdjArr::App()
{
   IfTrueThrowTypeB(!m,"Function called for instance constructed with only 2 adjacency objects!","TAdjArr::App",szModule);


   mat->Pos(nRows()-1);                      // position to the end of row pointer list
   TFloatList*& row = mat->Ins();            // insert new row
   row = new TFloatList;                     // ... and new distances list
   row->SetName("TAdjArrDistancesList");


   TCuboid* ref = &m->Get(nRows()-1);        // set reference cuboid


   for(int j=0;j<a->Size();j++)              // all columns due to <a>
      row->Ins()=a->Distance(j, ref);        // calculate distance

   for(int j=0;j<b->Size();j++)              // all columns du to <b>
      row->Ins()=b->Distance(j, ref);        // calculate distance
}


//----------------------------------------------------------------------------------------------------------------------
// disconnect node
void TAdjMatEx::TAdjArr::Disconnect(const int& rowId, const int& colId)
{
   IfTrueThrowTypeB(rowId<0 || rowId>=mat->Size() || colId<0 || colId>=_nCols, "Function called with illegal values!",
                      "TAdjArr::Disconnect", szModule);



   mat->Pos(rowId);                 // position to rowId-th row
   TFloatList* row = mat->Get();
   row->Pos(colId);                 // position to colId-th column
   row->Get() = MAXFLOAT;           // disconnect
}


//----------------------------------------------------------------------------------------------------------------------
// debug function
void TAdjMatEx::TAdjArr::Save(ofstream& file) const
{
   // write header
   file << ComChar << " TAdjArr" << endl;
   file << ComChar << " Size       = " << mat->Size() << " x " << _nCols << endl;


   // write matrix
   if(mat->Size() > 0 && _nCols >0)       // if matrix (list) non-empty
   {
      mat->Reset();

      // over all rows
      for(int i=0;i<nRows();i++)
      {
         mat->Next();                        //
         TFloatList*& row = mat->Get();      // get row
         row->Reset();
         for(int j=0;j<_nCols;j++)           // iterate all distances in row
         {
            row->Next();
            file << row->Get() << " ";
         }
      }
   }
   file << endl;
}


//----------------------------------------------------------------------------------------------------------------------
// find minimum distance
float TAdjMatEx::TAdjArr::FindMin(int& id1, TCuboids*& cub2, int& id2)
{
#ifdef PROFILE
clock_t time=clock();
nFindMinEx++;
#endif

   float min = MAXFLOAT;                     // ini with biggest possible value

   if(mat->Size() > 0 && _nCols >0)          // if matrix (list) non-empty
   {
      mat->Reset();

      // search for smallest distance
      for(int i=0;i<nRows();i++)
      {
         mat->Next();                        //
         TFloatList*& row = mat->Get();      // get row
         row->Reset();
         for(int j=0;j<_nCols;j++)           // iterate all distances in row
         {
            row->Next();
            if(row->Get() < min)             // if new minimum found ...
            {
               min = row->Get();             // ... store it
               id1 = i;
               id2 = j;
            }
         }
      }

      // set cuboid lists: change 'cub2' und 'id2' to 'b', if 'id2' identifies a column in the part of 'b'
      cub2 = a;                     // ini
      if(m && id2 >= a->Size())
      {
         cub2 = b;
         id2  = id2-a->Size();
      }
   }

#ifdef PROFILE
t_FindMinEx+=clock()-time;
#endif

   return min;
}



   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TAdjMatEx
   //

//----------------------------------------------------------------------------------------------------------------------
// constructor
TAdjMatEx::TAdjMatEx(TCuboidsEx*const& cuboidsEx)
{
   a = cuboidsEx->a;
   b = cuboidsEx->b;
   m = cuboidsEx->m;

   matA_B   = new TAdjArr(a, b);
   matM_AB  = new TAdjArr(a, b, m);
   matM_M   = new TAdjMat(m);
}


// destructor
TAdjMatEx::~TAdjMatEx()
{
   delete matA_B;
   delete matM_AB;
   delete matM_M;
}



//----------------------------------------------------------------------------------------------------------------------
// find smallest distance, search all three adjazenz objects
float TAdjMatEx::FindMin(TCuboids*& cub1, int& id1, TCuboids*& cub2, int& id2)
{
   // temporary variables to store results of each call of FindMin()
   TCuboids *cubInMatA_B, *cubInMatM_AB;
   int         id1_A_B, id2_A_B, id1_M_AB, id2_M_AB, id1_M_M, id2_M_M;

   // call FindMin()
   float min_A_B  =  matA_B   ->FindMin(id1_A_B,  cubInMatA_B,  id2_A_B);
   float min_M_AB =  matM_AB  ->FindMin(id1_M_AB, cubInMatM_AB, id2_M_AB);
   float min_M_M  =  matM_M   ->FindMin(id1_M_M , id2_M_M);


   // find smallest value
   if(min_M_M < min_A_B && min_M_M < min_M_AB)
   {
      // matrix 'M' has smallest distance
      cub1 = m;
      cub2 = m;
      id1  = id1_M_M;
      id2  = id2_M_M;

      return min_M_M;
   }

   // else
   if(min_A_B < min_M_AB)
   {
      // matrix 'A_B' has smallest distance
      cub1 = b;
      cub2 = a;
      id1  = id1_A_B;
      id2  = id2_A_B;

      return min_A_B;
   }

   // else

   // matrix 'M_AB' has smallest distance
   cub1 = m;
   cub2 = cubInMatM_AB;
   id1  = id1_M_AB;
   id2  = id2_M_AB;

   return min_M_AB;
}


//----------------------------------------------------------------------------------------------------------------------
// disconnect node
void TAdjMatEx::Disconnect(const TCuboids*const& cubs1, const int& id1, const TCuboids*const& cubs2, const int& id2)
{
   if(cubs1!=m) matA_B->Disconnect(id1, id2);                                       // delete in matrix 'A_B'
   else         if(cubs2 == a)matM_AB->Disconnect(id1,id2);                         // delete in matrix 'M_AB' in part A
                else          if(cubs2 == b) matM_AB->Disconnect(id1,id2+a->Size());// delete in matrix 'M_AB' in part B
                              else           if(cubs2 == m) matM_M->Disconnect(id1, id2); // delete in matrix 'M_M'
}


//----------------------------------------------------------------------------------------------------------------------
// reflect merge of two cuboids in adjazenz matrices
void TAdjMatEx::Merge(const TCuboids*const& cubs1, const int& id1, const TCuboids*const& cubs2, const int& id2
                     , const TCuboid*const& ref)
{
   // case a) :  merge in matrix 'A_B'
   if(cubs1 != m)
   {
      matA_B ->DelRow(id1);               // delete row
      matA_B ->DelCol(id2);               // ...and columns
      matM_AB->DelCol(id2);
      matM_AB->DelCol(a->Size()+id1);     // '+1-1=+-0' cause preceeding column was deleted and(!) cuboid list 'A'
                                          // has an cuboid fewer
      matM_AB->App();                     // append new row - note: will be calculated automatically
      matM_M ->App(ref);
      return;
   }

   // case c): merge in matrix 'M_M'
   if(cubs2 == m)                      // delete in matrix 'M_AB' and(!) 'M_M'
   {
      matM_AB->DelRow(id2);               // delete two rows (first delete)
      matM_AB->Recalc(id1-1, ref);        // '-1' cause preceeding row(?) was deleted
      matM_M ->Del(id2);                  // 2nd delete (see above)

      if(matM_M->Size() >0)
         matM_M->Recalc(id1-1, ref);      // ... recalculate
      return;
   }

   // case b): merge in matrix 'M_AB'
   if(cubs2 == a)
   {
      matA_B ->DelCol(id2);
      matM_AB->DelCol(id2);
   }
   else
   {
      matA_B ->DelRow(id2);
      matM_AB->DelCol(id2+a->Size());
   }

   // do recalculations
   matM_AB->Recalc(id1, ref);

   if(matM_M->Size()>0)             // only if matrix ins't empty
      matM_M->Recalc(id1, ref);
}



   //-------------------------------------------------------------------------------------------------------------------
   //
   //    class TPnc
   //

//----------------------------------------------------------------------------------------------------------------------
// constructor
TPnc::TPnc(TData*const& _data, const TParameter& _para)
{
   #ifdef DEBUG_LOG_ON
   DebugLogFile << "TPnc object intialized ... ";
   #endif

   // check if data is sorted
   IfTrueThrowTypeA(!_data->IsSorted(), "Data is not sorted! Call Sort(0) before!", "TPnc::TPnc", szModule);


   data = _data;                 // copy learn data and parameter
   para = _para;


   f_Finished = false;                                               // set flags
   f_Pruned   = false;
   f_Next     = true;
   f_Cascade  = false;
   state      = s_Pass1;                                            // set state

   n_ges    = data->nTup();                                         // initialize total number of (trivial) cuboids
   opI      = 0;                                                    // reset
   progress = 0;


   data->Lock();                                                    // lock data
   dfunc.Initialize(para.Metric);                                   // set metric

   TDataData::TNormType normType = TDataData::ByRange;              // normalization type for continuous features
   if(!para.f_NormalizeByRange)
      normType = TDataData::ByFourSigma;

   ddata        = new TDataData(data, normType, para.OverlapFac);
   intervals    = new TIntervals(data, &para);                      // interval wise access to sorted data
   cuboidsLists = new TCuboidsLists;                                // list with cuboid lists

   cuboids      = NULL;                                             // initialize with NULL pointer
   adjMat       = NULL;
   cuboidsEx    = NULL;
   adjMatEx     = NULL;


   #ifndef RELEASE
   u_Gamma = CalcUGamma(ALPHA);                                     // used for conf. intervals for confident merge test
   #else
   u_Gamma = 1.0;
   #endif


   // profiling code
   #ifdef PROFILE
   t_FindMin = t_FindMinEx = t_NegTest = t_PostPrune = t_Convert2TCluster = t_Distance = t_TCuboids = t_TIntervals= 0;
   nNegTest = nFindMin = nFindMinEx = nHitTest = nDistance = nTCuboids = nConvert2TCluster =0;
   t_Reset = clock();
   #endif
}


//----------------------------------------------------------------------------------------------------------------------
// destructor
TPnc::~TPnc()
{
   #ifdef DEBUG_LOG_ON
   DebugLogFile << "... deleted" << endl;
   #endif

   #ifdef PROFILE                                           // profiling code
   logfile << endl << endl << "T_Learn     =";                                            // learning
   WriteTime(t_Learn, logfile);

   logfile << endl << "T_PostPrune =";                                                    // post pruning
   WriteTime(t_PostPrune, logfile);

   logfile << endl << "FindMin           " << setw(8) << nFindMin    << " calls   ";
   WriteTime(t_FindMin, logfile);
   logfile << endl << "FindMinEx         " << setw(8) << nFindMinEx  << " calls   ";
   WriteTime(t_FindMinEx, logfile);
   logfile << endl << "NegTest           " << setw(8) << nNegTest    << " calls   ";
   WriteTime(t_NegTest, logfile);
   logfile << endl << "HitTest           " << setw(8) << nHitTest    << " calls   ";

   logfile << endl << "Distance          " << setw(8) << nDistance   << " calls   ";
   WriteTime(t_Distance, logfile);

   logfile << endl << "Convert2TCluster  " << setw(8) << nConvert2TCluster << " calls   ";
   WriteTime(t_Convert2TCluster, logfile);
   #endif

   delete cuboids;
   delete cuboidsEx;
   delete adjMat;
   delete adjMatEx;
   delete intervals;
   delete cuboidsLists;

   data->Unlock();
   ddata->Release();
}


//----------------------------------------------------------------------------------------------------------------------
// debug function: write model to file  (not tested!)
void TPnc::Save(ofstream& file) const
{
   // a) write header
   file << ComChar << " Cuboid data (debug function)" << endl;


   // b) get mapper to all cuboids
   const TCuboid**const& mapper=cuboidsLists->GetMapper(consistent_mapper);

   // c) write control parameters
   file << ComChar << " OpI        = " << cuboidsLists->Size()    << endl;              // # cuboid lists
   file << ComChar << " Cascade    = " << FlagToString(f_Cascade) << endl;              // cascade flag

   for(int i=0;i<cuboidsLists->Size();i++)                                              // # cuboids in each cuboid list
      file << ComChar << " N_Cuboids" << (i+1) << " = " << cuboidsLists->Size(i) << endl;
   file << ComChar << " N_C        = " << cuboidsLists->nCuboids() << endl << endl;     // total # cuboids all together


   // d) write comment lines with variable id and range
   char szText[STS];                                                 // variable numbers (comment line one)
   sprintf(szText, "%c VarId%3d ", ComChar, 1);
   file << szText;
   for(int j=1;j<data->nVar();j++)
   {
   if(NOMINAL_VARIABLES && data->IsSymbolic(j))
         sprintf(szText, "%*d  ", data->nIntegerMaxMin(j), j+1);     // note: use different width for symbolic columns
      else
         sprintf(szText, "%10d          ", j+1);
      file << szText;
   }

   file << endl << ComChar << " Range    ";                          // input minima/maxima (comment line two)
   for(int j=1;j<data->nVar();j++)
   {
      if(NOMINAL_VARIABLES && data->IsSymbolic(j))
         sprintf(szText, "%*d ", data->nIntegerMaxMin(j), data->nIntegerMaxMin(j)); // symbolic: write # symbols
      else
         sprintf(szText, "%9g-%-9g", data->Min()[j], data->Max()[j]);               // continuous: write minma/maxima
     file << szText << " ";
   }
   file << endl;



   // d) write cuboid data
   for(int i=0;i<cuboidsLists->nCuboids();i++)
   {
      sprintf(szText, "%10g ", mapper[i]->y_snap);                               // snapped output value
      file << szText;

      // note: variable 'j' is without(!) output
      for(int j=0;j<data->nVar()-1;j++)                                          // for each input variable
      {
         if(NOMINAL_VARIABLES && data->IsSymbolic(j+1))
         {
            WriteBits(file, mapper[i]->Mask(j), data->nIntegerMaxMin(j+1));      // symbolic (nominal): write bits
            file << " ";
         }
         else
         {
            sprintf(szText, "%9g-%-9g", mapper[i]->Min(j), mapper[i]->Max(j));   // cont. or ordinal: write left and
                                                                                 // right bound
            file << szText;
         }
         file << " ";
      }

      // mass and # covered negative examples
      file << setw(12) << mapper[i]->m << " " << setw(12) << mapper[i]->neg << endl;
   }
}


//----------------------------------------------------------------------------------------------------------------------
// write mass, #negative examples and tuple id's (id's of all tuples merged in the cluster) for each cluster
void TPnc::SaveClusterTupleIds(ofstream& file) const
{
   // a) get mapper to all cuboids
   const TCuboid**const& mapper=cuboidsLists->GetMapper(consistent_mapper);

   // b) write header
   file << ComChar << " Each cuboid's tuple Id's" << endl;
   file << ComChar << " Format: # positive examples  |  # negative examples  |  Id's of tuples merged in cuboid" <<endl;

   // c) write tuple id's for each cluster
   for(int i=0;i<cuboidsLists->nCuboids();i++)
   {
      file << setw(10) << mapper[i]->m << " " << setw(10) << mapper[i]->neg << " ";    // mass and # negative examples

      for(int k=0;k<mapper[i]->m;k++)
         file << " " << (mapper[i]->tuples->Get(k)+1);

      file << endl;
   }
}



#ifndef RELEASE
//----------------------------------------------------------------------------------------------------------------------
// return maximal # negative examples of generalized cuboid which does not lead to a hirate worse than the mean hitrate
// (reduced by confidence intervals) of the two single cuboids.
int MaxNegs(const TCuboid& a, const TCuboid& b, const float& u_Gamma, const float& r_min)
{
   float p=a.m+b.m;                                // mass of generalized cuboid, also the number of positive examples

   float r_a = 1-CalcUpper(a.m+a.neg, a.neg, u_Gamma); // hitrate of each single cuboid decreased by confidence interval
   float r_b = 1-CalcUpper(b.m+b.neg, b.neg, u_Gamma);
   float r   = max(r_min, (a.m*r_a+b.m*r_b)/p);        // average hitrate bounded to 'r_min'

   return (p-r*p)/r;                                   // r=p/(p+n) <=> n=(p-rp)/r
}
#endif


//----------------------------------------------------------------------------------------------------------------------
// return maximal # negative examples of generalized cuboid which does not lead to a hirate worse than the mean hitrate
// (determined by laplace correction) of the two single cuboids.
int MaxNegsLapCor(const TCuboid& a, const TCuboid& b, const int& nClasses)
{
   float p=a.m+b.m;                                // mass of generalized cuboid, also the number of positive examples

   float r_a = (a.m+1.0) / (a.m+a.neg+nClasses);      // hitrate of each single cuboid
   float r_b = (b.m+1.0) / (b.m+b.neg+nClasses);
//   float r   = max(r_min, (a.m*r_a+b.m*r_b)/p);     // average hitrate bounded to 'r_min'
   float r   = (a.m*r_a+b.m*r_b)/p;                   // average hitrate

   return (p*(1-r) + 1 - r*nClasses) / r;
}


//----------------------------------------------------------------------------------------------------------------------
// do one iteration step - return false if finished
bool TPnc::Iterate()
{
   // if 2nd pass (cascade mode) ...
   if(f_Cascade)
   {
      consistent_mapper = false;    // reset flag


      // 1. if flag is set: stepover to next output interval (outer loop)
      if(f_Next)
      {
         // a) get id of next output interval
         opI = intervals->NextInterval();


         // b) if there are no more intervals to process -> end process
         if(opI == -1)
         {
            #ifdef PROFILE                // profiling code
            t_Learn = clock()-t_Reset;
            #endif

            #ifdef DEBUG_LOG_ON
            DebugLogFile << " Finished! ";   // learning is finished
            #endif


            f_Finished = true;
            return false;
         }

         #ifdef DEBUG_LOG_ON
         DebugLogFile << "|";   // start merging of two groups/intervalls
         #endif


         // progress indication
         f_Next   = false;                                                                   // reset flag
         nChecked = 0;                                                                       // reset # checked nodes
         nToCheck     = 1+cuboidsLists->Get(opI)->Size()*cuboidsLists->Get(opI+1)->Size();
         baseProgress = 1.0-(intervals->Size()-para.N_Int)/((float) nCascadeInt);            // progress


         // c) create 'extended' cuboid list using the op-th and the 'opI-1'-th cuboid lists
         cuboidsEx = new TCuboidsEx(cuboidsLists->Get(opI), cuboidsLists->Get(opI+1), dfunc, ddata, para.Weights);

         // delete the two cuboid lists just taken to form TCuboidsEx object
         cuboidsLists->Del(opI);
         cuboidsLists->Del(opI);


         // d) create 'extended' adjazenz matrix
         adjMatEx = new TAdjMatEx(cuboidsEx);
      }


      // actualize progress
      progress = baseProgress + incProgress*min((float) 1.0, nChecked/((float)nToCheck));


      // inner loop: proceed as long as there are un-tested combinations of cuboids
      // 2. find the two closest cuboids and try to merge them
      TCuboids    *cubs1, *cubs2;
      int id1, id2;
      if(adjMatEx->FindMin(cubs1, id1, cubs2, id2)!= MAXFLOAT )      // a) find the two closest cuboids ...
      {
         const TCuboid& a = cubs1->Get(id1);                         // b) ... get references to them ...
         const TCuboid& b = cubs2->Get(id2);                         //

         TCuboid* conj = Generalize(a, b, data);                     // ... generalize them


         // determine maximal # negative examples
         #ifdef SINGLE_MERGE_TEST
         int maxNegs = (a.m+b.m)*para.Eta;                                          // single merge test
         #endif

         #ifdef DOUBLE_MERGE_TEST
         int maxNegs = min(a.m*para.Eta+b.neg, b.m*para.Eta+a.neg);                 // double merge test
         #endif

         #ifdef CONFIDENT_MERGE_TEST
         int maxNegs = MaxNegs(a, b, u_Gamma, 1.0/(1.0+para.Eta));                  // confident merge test
         #endif

         #ifdef LAPCOR_MERGE_TEST
         int maxNegs = MaxNegsLapCor(a, b, intervals->nInt());                      // laplace corrected merge test
         #endif


         // c) make test and merge if generalized cuboid passes test
         if(intervals->NegTest(*conj, intervals->Y_Quant(opI), maxNegs, para.w_COD, ddata->Weights()))
         {
            // test passed

            // delete min(k,l)-th cuboid and overwrite max(k,l)-th one with generalized cuboid <conj>
            cuboidsEx->Merge(cubs1, id1, cubs2, id2, *conj);

            // reflect merge in adjazenz matrix
            adjMatEx->Merge(cubs1, id1, cubs2, id2, conj);

            conj->Assimilate();
            n_ges--;                // decrement
         }
         else
            adjMatEx->Disconnect(cubs1, id1, cubs2, id2);         // test failed: disconnect node(k, l)


         delete conj; // d) delete generalized cuboid  -  note: if accepted and kept cuboid was copied and 'assimilated'

         nChecked++;                // increment # checked nodes
      }
      else
      {
         // 3. end actual step of outer loop: replace the two just processed cuboid lists with the resulting cuboid list
         #ifdef DEBUG_LOG_ON
         DebugLogFile << "ok ";
         #endif


         // set flag
         f_Next = true;


         // delete interval from interval list
         intervals->Del(opI+1);

         // ins. cub. in list of all cub. lists -  note: 'opI-1'cause insertion takes place after(!) specified position
         cuboidsLists->Ins(opI-1, cuboidsEx->ToCuboids());

         // release 'extended' cuboid list and 'extended' adjazenz matrix
         delete adjMatEx;     adjMatEx  = NULL;
         delete cuboidsEx;    cuboidsEx = NULL;
      }
      return true;
   }



// else: not cascade mode

   // 1. if flag is set: stepover to next output interval (outer loop)
   if(f_Next)
   {
      #ifdef DEBUG_LOG_ON
      DebugLogFile << ".";
      #endif


      // a) create cuboids from part of the (learn) data tuples
      cuboids  = new TCuboids(intervals, data, opI, dfunc, ddata, para.Weights);

      // b) create adjazenz matrix
      adjMat   = new TAdjMat(cuboids);


      f_Next   = false;                   // reset flag
      nChecked = 0;                       // reset # checked nodes
      incProgress = 1.0/intervals->Size();// progress in percent if one output interval/group is processed in first pass
      nToCheck = 1+cuboids->Size()*(cuboids->Size()-1)*0.5;
      baseProgress = opI*incProgress;     // progress
   }


   // actualize progress
   int nStillToCheck = 1+cuboids->Size()*(cuboids->Size()-1)*0.5;
   progress = baseProgress+incProgress*(1-sqrt(nStillToCheck/((float) nToCheck)));


   // inner loop: proceed as long as there are un-tested combinations of cuboids
   // 2. find the two closest cuboids and try to merge them
   int k, l;
   if(adjMat->FindMin(k, l)!=MAXFLOAT)                                        // a) find the two closest cuboid ...
   {
      const TCuboid& a = cuboids->Get(k);                                     // b) ... get references to them
      const TCuboid& b = cuboids->Get(l);


      TCuboid*    conj     = Generalize(a, b, data);                          // ... generalize them ...


      // determine maximal # negative examples
      #ifdef SINGLE_MERGE_TEST
      int maxNegs = (a.m+b.m)*para.Eta;                                          // single merge test
      #endif

      #ifdef DOUBLE_MERGE_TEST
      int maxNegs = min(a.m*para.Eta+b.neg, b.m*para.Eta+a.neg);                 // double merge test
      #endif

      #ifdef CONFIDENT_MERGE_TEST
      int maxNegs = MaxNegs(a, b, u_Gamma, 1.0/(1.0+para.Eta));                  // confident merge test
      #endif


      #ifdef LAPCOR_MERGE_TEST
      int maxNegs = MaxNegsLapCor(a, b, intervals->nInt());                      // laplace corrected merge test
      #endif



      // c) make test and merge if generalized cuboid passes test
      if(intervals->NegTest(*conj, intervals->Y_Quant(opI), maxNegs, para.w_COD, ddata->Weights()))
      {
         // test passed

         // delete (k,l)-th cuboid and overwrite max(k,l)-th cuboid in cuboid list with the generalized cuboid <conj>
         cuboids->Merge(k, l, *conj);


         // reflect merge in adjazenz matrix  -  note: only done if there is more than one cuboid left
         adjMat->Del(min(k, l));

         if(adjMat->Size() > 0)
            adjMat->Recalc(max(k, l)-1, conj);     // '-1' cause preceeding row/column was deleted


         n_ges--;                // decrement
         conj->Assimilate();
      }
      else
         adjMat->Disconnect(k, l);        // disconnect node(k, l)



      delete conj;                        // see note above

      nChecked++;                         // increment # checked nodes

   }
   else
   {
      // 3. end actual step of outer loop: insert actually processed cuboid list in 'list of all cuboid lists'
      #ifdef DEBUG_LOG_ON
      DebugLogFile << "ok ";
      #endif



      // set flags
      consistent_mapper = false;
      f_Next = true;

      // append actual cuboid list
      cuboidsLists->App(cuboids);

      // release adjazenz matrix and defer reponsibility for cuboid list by setting it to NULL
      delete adjMat;    adjMat   = NULL;
      cuboids= NULL;                      // note: ownership was passed to <cuboidsList>

      opI++;

      // if last interval was processed -> proceed in cascade-mode
      if(opI == intervals->Size())
      {
         f_Cascade   = true;
         nCascadeInt = intervals->Size()-intervals->nInt(); // # intervals which need to be merged in 2nd pass

         // progress in percent if one output interval/group is processed in 2nd pass (cascade mode)
         if(nCascadeInt>0)
            incProgress = 1.0/nCascadeInt;
         state = s_Pass2;
      }
   }

   return true;
}


// id's of tuples merged in i-th cuboid
TTupleIdList* TPnc::Tuples(const int& i) const{ return cuboidsLists->GetMapper(consistent_mapper)[i]->tuples;};


//----------------------------------------------------------------------------------------------------------------------
// write status text to static variable (char field) and return pointer to it
const char* TPnc::GetStatusText()
{
   static char szStatus[STS];

   switch(state)
   {
      case s_Converting : sprintf(szStatus, "Converting ...");    break;
      case s_Pass1      : sprintf(szStatus, "First pass ...");    break;
      case s_Pass2      : sprintf(szStatus, "Second pass ...");   break;
      case s_Pruning    : sprintf(szStatus, "Pruning ...");       break;
   }

   return szStatus;
}


//----------------------------------------------------------------------------------------------------------------------
// get progress  -  note: very simple heuristics used! ToDo: improve in future
const float TPnc::GetProgress(const bool& f_Prune/*=true*/) const
{
   float base=0.0;
   float step;

   if(f_Prune)                    // if pruning needs to be done
   {
      step = 0.5;                 // 0.5=50% for first pass
      if(state==s_Pass2)
      {
         base = 0.5;
         step = 0.2;              // 0.2=20% for 2nd pass
      }
      else
         if(state==s_Pruning)
         {
            base = 0.7;
            step = 0.3;           // 0.3=30% for pruning
         }
   }
   else                           // else: only learning needs to be done
   {
      step = 0.7;                 // 0.7=70% for first pass
      if(state==s_Pass2)
      {
         base = 0.7;
         step = 0.3;              // 0.3=30% for 2nd pass
      }
   }

   // note: the variable 'progress' indicates progress of the currently processed step (first pass, 2nd pass, pruning)
   return base + progress*step;  // calculate progress;
}


//----------------------------------------------------------------------------------------------------------------------
// re-calculate cuboid's bounds by taking an average of the two most outstanding values
// notes: - use ORIGINAL bounds if bound was pruned or is symbolic
//        - do not call for symbolic (nominal) variables
//        - variable Id 'j' is counted with(!) output
void CalcPosAndRadians(const int& j, float& lower, float& upper, const TCuboid*const& cub, const TData*const& data
                     , const float& fac)
{
   const int nTup = cub->tuples->Size();        // # tuples merged in cluster


   float l=-1,r=-1;              // ini

   // use original bounds if bound was pruned
   if(cub->Min(j-1)== -MAXFLOAT)   l=cub->Min(j-1);
   if(cub->Max(j-1)== MAXFLOAT)    r=cub->Max(j-1);


   // if left and/or right bound were not pruned
   if(r==-1 || l==-1)
   {
      float* x=new float[nTup];                       // allocate

      for(int t=0;t<nTup;t++)                         // copy j-th variable of all tuples
         x[t]=data->Row(cub->tuples->Get(t))[j];         // note: data is with output

      qsort(x, nTup, sizeof(x[0]), FloatCmpDes);      // sort descending

      // re-calculate if bound was not reduced
      if(r==-1)
         r=(x[0]+x[1]*fac)/(1+fac);                   // mean of first and 2nd maximum

      if(l==-1)
         l=(x[nTup-1]+x[nTup-2]*fac)/(1+fac);         // mean of first and 2nd minima

      delete[] x;    // release
   }

   lower = l;                                         // copy
   upper = r;
}


//----------------------------------------------------------------------------------------------------------------------
// calculate real output value of cuboid
float CalcCuboidOutput(const TCuboid*const& cub, const TData*const& data)
{
   const int nTup = cub->tuples->Size();        // # tuples merged in cluster

   float y=0;  // ini

   // calculate mean output value of all tuples merged in cuboid
   for(int t=0;t<nTup;t++)
      y += data->Row(cub->tuples->Get(t))[0];   // sum up
   y /= nTup;                                   // make mean

   return y;
}


//----------------------------------------------------------------------------------------------------------------------
// convert cuboids to TCluster to use it for TPrediction or derivates  -  note: skip cuboids with a mass less
// than ppara->k+1
TCluster* /*cr*/ TPnc::ToTCluster(const bool& f_PruneAnyWay, const bool*const& f_Stop/*=NULL*/, const int& k /*=0*/)
{
   #ifdef DEBUG_LOG_ON
   DebugLogFile << "Converting ... ";
   #endif


   if(!cuboidsLists)          // security check
      return NULL;

#ifdef PROFILE
clock_t time=clock();
nConvert2TCluster++;
#endif

   state = s_Converting;

   // 1. get mapper to all cuboids
   const TCuboid**const& mapper = cuboidsLists->GetMapper(consistent_mapper);


   // 2. new clusters
   int nCls = nCluster(k);          // # cuboids with mass > k
   if(nCls<=0)                      // catch trivial case ...
      nCls=1;
   TCluster* cls = new TCluster(nCls, ddata->GetObject(), para);
   cls->f_HasOriginalCuboids = true;                               // model is initialized with un-pruned bounds here


   // 3. copy cuboid data:
   int count=0;
   for(int i=0;i<cuboidsLists->nCuboids();i++)
   {
      // a) skip if # tuples per cuboid is to small
      if(mapper[i]->m<=k && (count!=0 || i!= cuboidsLists->nCuboids()-1))    // note: ensure at least one cluster
         continue;

      // b) output
      #ifdef RE_CALC_Y_C
      if(para.f_Regression)
         // re-calculate cuboid's output value as mean over the output of all merged tuples
         cls->Output(count) = CalcCuboidOutput(mapper[i], data);
      else
      #endif
         cls->Output(count) = mapper[i]->y_snap;


      // c) input positions and radians
      for(int j=1;j<data->nVar();j++)                       // note: '-1' cause there is no output in TCuboid::l ....
         if(NOMINAL_VARIABLES && data->IsSymbolic(j))
            cls->Mask(count, j) = mapper[i]->Mask(j-1);              // symbolic (nominal): copy
         else
            // continuous or ordinal:
            #ifndef RELEASE   // obsolete in release versions
            if(para.Noise>0 && mapper[i]->tuples->Size()>1)          // recalculate left and right bound
               CalcPosAndRadians(j, cls->Lower(count,j), cls->Upper(count,j), mapper[i], data, para.Noise);
            else
            #endif
            {
               cls->Lower(count,j) = mapper[i]->Min(j-1);            // copy left and ..
               cls->Upper(count,j) = mapper[i]->Max(j-1);            // ... right bound
            }


      // d) mass and # covered negative examples
      cls->Mass(count)    = mapper[i]->m;
      cls->NegMass(count) = mapper[i]->neg;

      count++;
   }

   #ifndef RELEASE                     // obsolete in release version
   cls->CalcAvrDiameter();             // calculate average diameter
   #endif


   // 4. post prune if necessary/specified
   // set flag which indicates that 'useful' pruning information is avalaible
   cls->f_HasPruningInformation = (para.Prune || f_PruneAnyWay);
   if(para.Prune || f_PruneAnyWay)
   {
      #ifdef DEBUG_LOG_ON
      DebugLogFile << "Pruning ... ";
      #endif

      PostPrune(f_Stop);
   }

   // 5. set active id's, i.e. id's of non pruned variables  note: id's are indexed with(!) output
   state = s_Converting;
   count=0;
   for(int i=0;i<cuboidsLists->nCuboids();i++)              // for all cuboids
   {
      // a) skip if # tuples per cuboid is to small
      if(mapper[i]->m<=k && (count!=0 || i!= cuboidsLists->nCuboids()-1))    // note: ensure at least one cluster
         continue;


      // b) count # active, i.e. none pruned, variables and store their id's for performance reasons
      int nActiveL=0, nActiveR=0;   // ini
      for(int j=1;j<data->nVar();j++)
         if(NOMINAL_VARIABLES && data->IsSymbolic(j))
         {
            if((mapper[i]->Mask(j-1) ^~0)!=0)                        // not all bits are set (not pruned) ...
               cls->ActiveLowerBound(count, nActiveL++) = j;         // store id
         }
         else
         {
            if(mapper[i]->Min(j-1)>=data->Min()[j])                  // if lower bound is not(!) pruned ...
               cls->ActiveLowerBound(count, nActiveL++) = j;         // store id

            if(mapper[i]->Max(j-1)<=data->Max()[j])                  // if upper bound is not(!) pruned ...
               cls->ActiveUpperBound(count, nActiveR++) = j;         // store id
         }
      cls->nActiveLowerBounds(count) = nActiveL;                     // copy
      cls->nActiveUpperBounds(count) = nActiveR;

      count++;
   }

#ifdef PROFILE
t_Convert2TCluster +=clock()-time;
#endif

  cls->CalculateHitRate();                   // calculate hitrate for each cuboid


   #ifdef DEBUG_LOG_ON
   DebugLogFile << "Done! ";
   #endif


  return cls;
}


#ifdef FORWARD_PRUNING
//----------------------------------------------------------------------------------------------------------------------
// post pruning of cuboids; alternate forward selection pruning
   struct TTId
   {
      int   id;
      float mass;
   };

void TPnc::PostPrune(const bool*const& f_Stop)
{
   // 1. do some checks and get mapper to all cuboids
   if(!cuboidsLists) return;                                // check: nothing can be pruned if there are no cuboids
   if(f_Pruned)      return;                                // check: return if instance is already pruned
   f_Pruned=true;
   const TCuboid**const& mapper = cuboidsLists->GetMapper(consistent_mapper);       // get mapper to all cuboids
   state = s_Pruning;                                                               // set state
   progress = 0;

   // 2. for each cuboid: try to generalize left and/or right bounds
   const int nCub = cuboidsLists->nCuboids();      // abbrevation
   for(int i=0;i<nCub;i++)
   {
      if(f_Stop)
         if(*f_Stop)     // check stop flag and abort if it is set
            return;

      // a) progress and abbrevation
      progress = i/((float)nCub);                              // current progress
      TCuboid* cub = const_cast<TCuboid*>(mapper[i]);
      const int nInp = data->nVar()-1;                         // # input variables

      // b) make array with id's of tuples not(!) inside the cuboid (negative tuples)
      bool* ins = new bool[data->nTup()];                      // allocate
      for(int i=0;i<data->nTup();i++)                          // initialize/set
         ins[i]=true;
      for(int t=0;t<cub->tuples->Size();t++)                   // reset if tuples is inside cuboid
         ins[cub->tuples->Get(t)] = false;
      int nNegId = data->nTup()-cub->tuples->Size();           // # tuples outside (# negative examples)
      TTId* negId = new TTId[nNegId];                          // allocate
      int c=0;                                                 // copy id's of negative tuples
      for(int i=0;i<data->nTup();i++)
         if(ins[i])
         {
            negId[c].id = i;
            negId[c++].mass = para.w_COD;
         }
      delete[] ins;  // release


      // c) allocate new cuboid (left and right) bounds and initialize them as completely generalized
      float* l0 = new float[nInp];              // allocate
      float* r0 = new float[nInp];
      for(int j=0;j<nInp;j++)                   // ini (completely generalize)
      {
         l0[j] = data->Min()[j+1]-data->Range()[j+1];       // set left bound beyond minimum
         r0[j] = data->Max()[j+1]+data->Range()[j+1];       // set right bound beyond maximum
      }


      TTId* negId0  = new TTId[nNegId];                        // allocate
      TTId* negId00 = new TTId[nNegId];                        // allocate
      int nNegId0;
      while(nNegId>0)
      {
         // d) find most discriminative bound
         bool left;
         int  varId;
         float  best=0;                // # negative examples discriminated by best bound so far

         // two runs, one for left and one for right bound
         // first run for left bound
         for(int j=0;j<nInp;j++)         // left
         {
            if(l0[j]>=data->Min()[j+1])   // continue if left bound is already accepted
               continue;

            float weight = ddata->Weights()[j+1];  // variable weight

            float count=0;
            int  nNegId00=0;
            for(int t=0;t<nNegId;t++)                          // for all negative examples
            {
               negId00[nNegId00]=negId[t]; // copy
               if(data->Row(negId[t].id)[j+1]<cub->Min(j))        // if example is discriminated by lower (left) bound
               {
                  count+=weight;                                  // ... increment counter
                  if(negId[t].mass>weight)         //
                     nNegId00++;                   // keep
                  else
                     negId00[nNegId00].mass -= weight;   // decrease mass
               }
               else
                  nNegId00++;       // keep
            }


            // check if new maximum is found
            if(count>best)
            {
               left  = true;                          // set new best values
               varId = j;
               best  = count;
               TTId* tmp=negId0;    // swap pointer
               negId0=negId00;
               negId00=tmp;
               nNegId0 = nNegId00;
            }
          }


         // 2nd run for right bound
         for(int j=0;j<nInp;j++)
         {
            if(r0[j]<=data->Max()[j+1])   // continue if right bound is already accepted
               continue;

            float weight = ddata->Weights()[j+1];  // variable weight

            float count=0;
            int nNegId00=0;
            for(int t=0;t<nNegId;t++)                          // for all negative examples
            {
               negId00[nNegId00]=negId[t]; // copy
               if(data->Row(negId[t].id)[j+1]>cub->Max(j))        // if example is discriminated by upper (right) bound
               {
                  count += weight;                                // ... increment counter
                  if(negId[t].mass>weight)         //
                     nNegId00++;                   // keep
                  else
                     negId00[nNegId00].mass -= weight;   // decrease mass
               }
               else
                  nNegId00++;       // keep
            }




            // check if new maximum is found
            if(count>best)
            {
               left  = false;                         // set new best values
               varId = j;
               best  = count;
               TTId* tmp=negId0;    // swap pointer
               negId0=negId00;
               negId00=tmp;
               nNegId0 = nNegId00;
            }
         }


         if(best==0)
            break;

         // re-generalize most discriminative bound
         if(left)
            l0[varId] = cub->Min(varId);  // copy from cuboid
         else
            r0[varId] = cub->Max(varId);  // copy from cuboid

         // "copy" new id's of negative examples
         TTId* tmp=negId;     // swap pointer
         negId=negId0;
         negId0=tmp;
         nNegId = nNegId0;             // # negative tuples
      }

      cub->Replace(l0, r0);

      delete[] negId00;
      delete[] negId0;
      delete[] negId;      // release
   }
}

#else

//----------------------------------------------------------------------------------------------------------------------
// post pruning of cuboids: component wise try to generalize left and/or right bounds or mask if this does not affect
// the cuboid's separation power, i.e. if no new negative examples are covered
void TPnc::PostPrune(const bool*const& f_Stop)
{
#ifdef PROFILE
clock_t time=clock();
#endif


   // 1. do some checks and get mapper to all cuboids
   if(!cuboidsLists) return;                               // check: nothing can be pruned if there are no cuboids
   if(f_Pruned)      return;                               // check: return if instance is already pruned
   f_Pruned=true;
   const TCuboid**const& mapper = cuboidsLists->GetMapper(consistent_mapper);    // get mapper to all cuboids
   state = s_Pruning;                                                            // set state
   progress = 0;

   // 2. for each cuboid: try to generalize left and/or right bounds
   const int nCub = cuboidsLists->nCuboids();               // abbrevation
   for(int i=0;i<nCub;i++)
   {
      if(f_Stop)
         if(*f_Stop)     // check stop flag and abort if it is set
           return;

      // a) progress and abbrevation
      progress = i/((float)nCub);                              // current progress
      TCuboid* cub = const_cast<TCuboid*>(mapper[i]);

      // b) get variable id's sorted regarding cuboid's range and variable's weight
      //    note: symbolic flags, weights, range and symbol count are with(!) output
      TIdVec* id = new TIdVec[data->nVar()-1];
      for(int j=0;j<data->nVar()-1;j++)                           // over all input variables
      {
         if(NOMINAL_VARIABLES && data->IsSymbolic(j+1))
         {                                                        // symbolic (nominal): percentage of symbols covered
            int cSet=0;
            for(int g=0;g<data->nIntegerMaxMin(j+1);g++)
               if(IsBitSet(cub->Mask(j), g))                      // count how many symbols are covered
                  cSet++;

            id[j].a = ((float) cSet)/data->nSymbolsFound(j+1);
         }
         else
            // continuous or ordinal: normalized radian
            id[j].a = (cub->Max(j)-cub->Min(j))*ddata->Weights()[j+1]*ddata->NormFac()[j+1];


         id[j].b  = ddata->Weights()[j+1];                           // 2nd criterion: weight
         id[j].id = j;                                               // set corresponding index
      }
      qsort(id, data->nVar()-1, sizeof(id[0]), IdVecCmpDes);         // sort descending



      // c) try to generalize cuboid's variables in the order determined above in b)
      for(int j=0;j<data->nVar()-1;j++)
      {
         const int varId = id[j].id;                                 // variable to generalize


         if(NOMINAL_VARIABLES && data->IsSymbolic(varId+1))          // variable types are with(!) output

         // symbolic (nominal):
         {
            int mask = cub->Mask(varId);                             // preserve original mask
            SetAllBits(cub->Mask(varId));                            // manipulate cuboid to cover all symbols


            // test if any new(!) negative example is covered due to generalization
            for(int i=0;i<intervals->SizeOfCheckVec(cub->y_quant);i++)

               // if negative example is inside generalized cuboid ...
               if(HitTest(*cub,  data->Row( intervals->CheckVec(cub->y_quant)[i]), data, para.w_COD, ddata->Weights()
                           , intervals->Order()))
               {
                  // restore original mask
                  cub->Mask(varId)=mask;

                  // ... but also inside the original cuboid
                  if(HitTest(*cub, data->Row( intervals->CheckVec(cub->y_quant)[i]), data, para.w_COD, ddata->Weights()
                        , intervals->Order()))
                     // ... then keep generalization
                     SetAllBits(cub->Mask(varId));                               // cover all symbols
                  else
                     break;      // else the original mask is still valid
               }
         }
         else

         // continuous or ordinal:
         {
            // 1) left bound
            float l = cub->Min(varId);                                           // preserve original left bound
            cub->Min(varId) = -MAXFLOAT;                                         // manipulate left bound

            // test if any new(!) negative example is covered due to (left) generalization
            for(int i=0;i<intervals->SizeOfCheckVec(cub->y_quant);i++)

               // if negative example is inside generalized cuboid ...
               if(HitTest(*cub,  data->Row( intervals->CheckVec(cub->y_quant)[i]), data, para.w_COD, ddata->Weights()
                           , intervals->Order()))
               {
                  // restore original left bound
                  cub->Min(varId)=l;

                  // ... but also inside the original cuboid  (note: it's not the real(!) orig. cub. but it will do so)
                  if(HitTest(*cub, data->Row( intervals->CheckVec(cub->y_quant)[i]), data, para.w_COD, ddata->Weights()
                        , intervals->Order()))
                     cub->Min(varId) = -MAXFLOAT;     // ... then keep generalization by resetting left boudn again

                  else
                     break;      // else the original bound is still valid, i.e. left bound was not generalized
               }


            // 2) right bound
            float r = cub->Max(varId);                                           // preserve original right bound
            cub->Max(varId) = MAXFLOAT;                                          // manipulate right bound

            // test if any new(!) negative example is covered due to (right) generalization
            for(int i=0;i<intervals->SizeOfCheckVec(cub->y_quant);i++)

               // if negative example is inside generalized cuboid ...
               if(HitTest(*cub,  data->Row( intervals->CheckVec(cub->y_quant)[i]), data, para.w_COD, ddata->Weights()
                     , intervals->Order()))
               {
                  // restore original right bound
                  cub->Max(varId)=r;

                  // ... but also inside the original cuboid ...  (note: it's not the original cuboid but it will do so)
                  if(HitTest(*cub, data->Row( intervals->CheckVec(cub->y_quant)[i]), data, para.w_COD, ddata->Weights()
                     , intervals->Order()))
                     cub->Max(varId) = MAXFLOAT;   // ... then keep generalization by resetting right bound again
                  else
                     break;      // else the original right bound is still valid, i.e. right bound was not generalized
               }

         }
      }
      delete[] id;      // release
   }

#ifdef PROFILE
t_PostPrune=clock()-time;
#endif
}
#endif   // endif FORWARD_PRUNING


//----------------------------------------------------------------------------------------------------------------------
// # cuboids in all cuboid lists  -  if <k> is specified count # cuboids with more than <k> tuples
int TPnc::nCluster(const int& k) const
{
   const TCuboid** mapper = cuboidsLists->GetMapper(consistent_mapper); // get mapper

   if(k==0)
      return cuboidsLists->nCuboids();
   else                                                  // if minimum # tuples per cuboid is given -> iterate and count
   {
      int n=0;
      for(int i=0;i<cuboidsLists->nCuboids();i++)        // for all cuboids
         if(mapper[i]->m>k)
            n++;
      return n;
   }
}