static const char* szModule = "ddata.cpp";

//------------------------------------------------------------------------------
//    module ddata.cpp                                                        //
//                                                                            //
//    Class TDataData encapsulates some necessary information from the learn  //
//    data file like # variables, minima, maxima, feature, weights ...        //
//    Counted object: use Release() to delete!                                //
//    See below or http://www.newty.de/pnc2/sdocu.html for more information.  //
//                                                                            //
//    copyright (c) 1998-2003 by Lars Haendel                                 //
//    home: www.newty.de                                                      //
//                                                                            //
//    This program is free software and can be used under the terms of the    //
//    GNU licence. See header file for further information and disclaimer.    //
//                                                                            //
//------------------------------------------------------------------------------
//                                                                            //
//    CREATE: Pass a TData object which is used to initialize/copy            //
//                                                                            //
//    USE: You get maxima, minima, weights etc. just like from a TData        //
//    object.                                                                 //
//                                                                            //
//    File I/O: Load and save routines                                        //
//------------------------------------------------------------------------------


#include <iomanip>      // due to:     setw()
#include "fileUtil.h"   //             readExpNoEndl() ...

#include "ddata.h"      //             TDataData (will also include data.h)


//----------------------------------------------------------------------------------------------------------------------
// dummy constructor used if instance is initialized with Load()
TDataData::TDataData()
{
   // counted object, initialize reference counter
   ref=1;
   f_Loaded = false;

   // set pointer to NULL
   min = weight = NULL;
   _nIntegerMaxMin = NULL;
   #ifndef RELEASE            // obsolete in release versions
   _nSymbolsFound = NULL;
   #endif

   type = NULL;

   _nVar = _nInp = meanY = outcol = 0;
   strcpy(szFilename, "unknown");
}


//----------------------------------------------------------------------------------------------------------------------
// normal constructor, initialize from given TData object
TDataData::TDataData(const TData*const& data, const TNormType& normType, const float& overlapFac)
{
   // a) initialize/copy
   ref = 1;                                           // counted object, initialize reference counter
   f_Loaded = true;

   Allocate(data->nVar());                            // allocate memory
   _nInp = data->nVar()-1;
   _nTup = data->nTup();
   outcol= data->outcol;
   strcpy(szFilename, data->LoadFileName());
   meanY = data->Mean()[0];                           // output mean


   // b) copy minima, maxima, deviation etc. from given data object
   memcpy(min,             data->Min(),            sizeof(float)*_nVar);
   memcpy(type,            data->varType,          sizeof(TData::TVarType)*_nVar);
   memcpy(_nIntegerMaxMin, data->_nIntegerMaxMin,  sizeof(int)*_nVar);
   memcpy(weight,          data->Weights(),        sizeof(float)*_nVar);
   #ifndef RELEASE                                                            // obsolete in release versions
   memcpy(_nSymbolsFound,  data->_nSymbolsFound,   sizeof(int)*_nVar);
   #endif


   // c) calculate normalization factor

   // c1) normalize continuous varaibles by the fourth of their standard deviation (four sigma)
   if(normType==ByFourSigma)
   {
      for(int j=0;j<_nVar;j++)
         if(NOMINAL_VARIABLES && IsSymbolic(j))          // nominal: use overlap factor/weigth
            normFac[j] = overlapFac;
         else                                            // continuous: divide by four sigma
            if(data->Dev()[j]!=0)                        // note: prevent division by zero error; use default value 1
               normFac[j]  = 1/(4*data->Dev()[j]);
            else
               normFac[j]  = 1;
   }
   else

      // c2) normalize continuous variables by their range
      if(normType==ByRange)
      {
         for(int j=0;j<_nVar;j++)
            if(NOMINAL_VARIABLES && IsSymbolic(j))       // nominal: use overlap factor/weigth
               normFac[j] = overlapFac;
            else                                         // continuous: divide by range
               if(data->Range()[j]!=0)                   // note: prevent division by zero error; use default value 1
                  normFac[j]  = 1/data->Range()[j];
               else
                  normFac[j]  = 1;

      }
      else

         // c3) normalize variables (regardless of type) by the average distance of the learn tuples to each other
         if(normType==ByAverageDistance)
         {
            const int nTup = data->nTup();      // abbrevation
            for(int j=0;j<_nVar;j++)
            {
               float d=0;     // ini
               for(int i=0;i<nTup;i++)                                  // sum up distance for all combinations
                  for(int u=i;u<nTup;u++)
                     if(NOMINAL_VARIABLES && IsSymbolic(j))             // nominal:
                     {
                        if(data->Row(i)[j]!=data->Row(u)[j])
                           d++;                                         // sum up; distance is one if none-matching
                     }
                     else                                               // continuous:
                        d += fabs(data->Row(i)[j]-data->Row(u)[j]);     // sum up distance
               d /= (nTup*(nTup-1)*0.5);                                // calculate average distance

               if(d!=0)
                  normFac[j] = 1/d;                                     // use inverse to normalize
               else
                  normFac[j] = 1;
            }
         }
}


//----------------------------------------------------------------------------------------------------------------------
// destructor  note: private! Use Release() instead!
TDataData::~TDataData()
{
   // release
   delete[] min;
   delete[] weight;
   delete[] normFac;
   delete[] type;
   delete[] _nIntegerMaxMin;
   #ifndef RELEASE            // obsolete in release versions
   delete[] _nSymbolsFound;
   #endif
}


//----------------------------------------------------------------------------------------------------------------------
// used to 'delete' instance
void TDataData::Release() const
{
   if(--ref==0)            // decrement reference counter
      delete this;         // delete instance
}


//----------------------------------------------------------------------------------------------------------------------
// allocate memory, used in constructor and in Load()
void TDataData::Allocate(const int& __nVar)
{
   _nVar = __nVar;                                             // store

   min                     = new float[_nVar];                 // allocate memory
   normFac                 = new float[_nVar];
   weight                  = new float[_nVar];
   _nIntegerMaxMin         = new int[_nVar];
   type                    = new TData::TVarType[_nVar];
   #ifndef RELEASE                                             // obsolete in release versions
   _nSymbolsFound          = new int[_nVar];
   #endif

}


//----------------------------------------------------------------------------------------------------------------------
// save to file (ofstream)
void TDataData::Save(ofstream& file) const
{
   // write header
   file << "[Misc]" << endl;                                            // section name
   file << "nVar     = " << _nVar << endl;                              // # variables
   file << "nTup     = " << _nTup << endl;                              // # tuples
   file << "Output   = " << (outcol+1) << endl;                         // output column (referring original data file)
   file << "Filename = " << szFilename << endl << endl << endl;         // filename of original data

   file << "[DataData]" << endl;                                        // section name
   file << ComChar << " Statistics and variable types obtained from the learn data file." << endl;
   file << ComChar << " Format: Column | Minimum | Weight | Normalization Factor | # Symbols | \
type (n=nominal, o=ordinal, c=continuous)" << endl << endl;             // format


   for(int j=0;j<_nVar;j++)   // for all variables
   {
      file << setiosflags(ios::left) << resetiosflags(ios::right);         // left justified output
      file << setw(6) << OriginalColumnInFile(j) << " ";                   // column
      file << setw(W_FLOAT)  << min[j]  << " ";                            // minimum
      file << setw(W_FLOAT)  << weight[j] << " ";                          // weight
      file << setw(W_FLOAT)  << normFac[j] << " ";                         // normalization factor
      file << setw(W_SYMBOL) << _nIntegerMaxMin[j] << " ";                 // # integer values max to min
      #ifndef RELEASE                                                      // obsolete in release versions
      file << setw(W_SYMBOL) << _nSymbolsFound[j] << " ";                  // # symbols
      #endif
      file << setw(W_SYMBOL) << TData::VarTypeToChar(type[j]) << endl;     //  variable types
      file << resetiosflags(ios::left) << setiosflags(ios::right);         // restore right justified output
   }
   file << endl << endl;                                                   // linefeed
}


//----------------------------------------------------------------------------------------------------------------------
// load from file (ifstream)
void TDataData::Load(ifstream& file, int& line)
{
   // a) checks
   IfTrueThrowTypeA(f_Loaded, "Function cannot be called twice in the lifetime of an instance!", "TDataData::Load"
                     , szModule);
   f_Loaded = true;

   // b) read # variables and data data
   char c;
   int j=-1;      // variable counter
   try
   {
      // b1) read section [Misc]
      line += SearchKey(file, "[Misc]");                             // position to section
      _nVar  = ReadKeyValue(file, "nVar", -1, SEARCH_LINES);         // read # variables
      _nTup  = ReadKeyValue(file, "nTup", -1, SEARCH_LINES);         // read # tuples
      outcol = ReadKeyValue(file, "Output", -1, SEARCH_LINES)-1;     // read output column (ref. to original data file)
      ReadKeyString(file, "Filename", szFilename, STS, SEARCH_LINES);// filename of original data
      if(szFilename[0]=='\0')                                        // if no filename was found
         strcpy(szFilename, "unknown");                              // set it to be unknown

      if(_nVar<=0)   throw 1;                                        // ... and check them
      if(_nTup<=0)   throw 10;                                       // ... and check them
      if(outcol<0 || outcol >=_nVar) throw 2;

      Allocate(_nVar);                                               // allocate memory for now known # variables


      // b2) read section [DataData]
      line += SearchKey(file, "[DataData]");                         // position to section
      for(j=0;j<_nVar;j++)                                           // read statistics for each variables
      {
         ReadExpNoEndl(file, (float) 0);                             // remove column number
         min[j]             = ReadExpNoEndl(file, (float) 0);        // read minimum
         weight[j]          = ReadExpNoEndl(file, (float) 0);        // read weight
         normFac[j]         = ReadExpNoEndl(file, (float) 0);        // read normalization factor
         _nIntegerMaxMin[j] = ReadExpNoEndl(file, (float) 0);        // read # integer values from min. up to max.
         #ifndef RELEASE                                             // obsolete in release versions
         _nSymbolsFound[j]  = ReadExpNoEndl(file, (float) 0);        // read # symbols
         #endif

         skipwsNoEndl(file);                                         // position foreward
         c = (char) file.get();                                      // read, check and set variable type
         type[j]=TData::CharToVarType(c);
         if(type[j]==TData::none)
            throw 3;                                                 // error: unknown variable type

         line += skipwsExEndl(file);                                 // remove linefeed
      }
   }
   catch(int errNo)     // exception handling: compose error text and throw again
   {
      char szText[STS];
      switch(errNo)
      {
         case 1           : strcpy(szText, "Error in section [Basic]: Key 'nVar' missing or value negative!"); break;
         case 10          : strcpy(szText, "Error in section [Basic]: Key 'nTup' missing or value negative!"); break;
         case 2           : sprintf(szText, "Error in section [Basic]: Key 'Output' missing or value not e[1..%d]!", _nVar); break;
         case 3           : sprintf(szText, "Error in line %d in section [DataData] while reading variable %d: Unknown variable type '%c'!", line, j+1, c); break;
         case KeyNotFound : sprintf(szText,"%s",::GetLastError(errNo)); break;
         default          : if(j<0)
                              sprintf(szText,"Error in section [Basic]: %s", ::GetLastError(errNo));
                            else
                              sprintf(szText,"Error in line %d in section [DataData] while reading variable %d: %s", line, j+1, ::GetLastError(errNo));
      }
      ThrowTypeU(szText);     // throw exception
   }
}