// file: $isip_ifc/class/stat/Histogram/Histogram.h // version: $Id: Histogram.h 10427 2006-02-13 19:33:54Z srinivas $ // // make sure definitions are only made once // #ifndef ISIP_HISTOGRAM #define ISIP_HISTOGRAM // isip include files // #ifndef ISIP_VECTOR_BYTE #include #endif #ifndef ISIP_VECTOR_DOUBLE #include #endif #ifndef ISIP_VECTOR_FLOAT #include #endif #ifndef ISIP_VECTOR_LLONG #include #endif #ifndef ISIP_VECTOR_LONG #include #endif #ifndef ISIP_VECTOR_SHORT #include #endif #ifndef ISIP_VECTOR_ULLONG #include #endif #ifndef ISIP_VECTOR_ULONG #include #endif #ifndef ISIP_VECTOR_USHORT #include #endif #ifndef ISIP_MATRIX_BYTE #include #endif #ifndef ISIP_MATRIX_DOUBLE #include #endif #ifndef ISIP_MATRIX_FLOAT #include #endif #ifndef ISIP_MATRIX_LLONG #include #endif #ifndef ISIP_MATRIX_LONG #include #endif #ifndef ISIP_MATRIX_SHORT #include #endif #ifndef ISIP_MATRIX_ULLONG #include #endif #ifndef ISIP_MATRIX_ULONG #include #endif #ifndef ISIP_MATRIX_USHORT #include #endif #ifndef ISIP_VECTOR #include #endif #ifndef ISIP_NAME_MAP #include #endif #ifndef ISIP_MEMORY_MANAGER #include #endif // Histogram: a class for storing, computing, and updating histograms of data. // the Histogram class works under three binning scales // LINEAR: bins within each dimension are equal // LOG: bins within each dimension are logarithmically spaced // USER_DEFINED: bins within each dimension are defined by the user // // In LINEAR and LOG scale binnings, user can specify the min, max values and // the number of bins for each dimension independently. The user can also // specify how the bins are stored: using CENTERs or EDGES. // This is more useful for the USER_DEFINED mode as the bins are manually // set by the user here: // // CENTERS: bins are represented by their bin centers. // EDGES: bins are represented by their bin edges. values will be assigned // to bin 'k' if edge(k) <= value < edge(k+1). the last bin counts // only those values that exactly match the last bin. values outside // of the edge boundaries are not counted. the edges must increase // monotonically with their index. // // Presently, even in USER_DEFINED scale, there is no way of setting // multidimensional bins directly. for e.g., in 2-D histograms, we can only // specify bins for X and Y seperately; thus all bins at a particular X value // have the same width in the X direction irrespective of Y value , and // vice versa. this will be sufficient for most histogram applications. // // The base of the log in LOG scale binning is immaterial as all bases lead // to same bins. If min and max are a and b, num bins is N, edge i in the // log mode is given by: // // c(i) = a * (b/a)^(i/N); // // which is independent of the base. Here log base-10 is chosen. // // The dimension of the histogram is defined using setDim method. // // There are several ways of setting up the bins by using the various setBins // methods. In USER_DEFINED scaling, the user can specify the bins for each // dimension independently using: // setBins(Vector& bins); // or the same set of bins in each dimension using: // setBins(TVector& bins); // For LINEAR and LOG scale, the bins are set using different bin parameters // for each dimension using // setBins(VectorLong& num_bins, TVector& min, TVector& max); // or the same kind of bins in all dimensions using: // setBins(int32 num_bins, TIntegral min, TIntegral max); // // The getCount, pdf and cdf methods return a single array the multidimensional // index of which can be obtained using the method: // getBinIndexCoordinates(Vector& bin_indices) // // There are 2D specific methods for calling pdf and cdf and 1D specific // method for compute. // // The cdf method will now work for any dimension. It calculates cdf // using the inclusion-exclusion principle for probability of a union // of sets. // class Histogram { //--------------------------------------------------------------------------- // // public constants // //--------------------------------------------------------------------------- public: // define the class name // static const String CLASS_NAME; //---------------------------------------- // // i/o related constants // //---------------------------------------- static const String DEF_PARAM; static const String PARAM_SCALE; static const String PARAM_MODE; static const String PARAM_DIM; static const String PARAM_BINS; static const String PARAM_MIN; static const String PARAM_MAX; static const String PARAM_NUM_BINS; static const String PARAM_COUNTS; //---------------------------------------- // // other important constants // //---------------------------------------- // a static name map // static const NameMap SCALE_MAP; static const NameMap MODE_MAP; //---------------------------------------- // // default values and arguments // //---------------------------------------- static const int32 DEF_DIM = 1; static const int32 DEF_MIN = 0; static const int32 DEF_MAX = 1; static const int32 DEF_NUM_BINS = 10; enum SCALE {LINEAR = 0, LOG, USER_DEFINED, DEF_SCALE = LINEAR}; enum BIN_MODE {EDGES = 0, CENTERS, DEF_MODE = EDGES}; //---------------------------------------- // // error codes // //---------------------------------------- static const int32 ERR = 60500; static const int32 ERR_BINS = 60501; //--------------------------------------------------------------------------- // // protected data // //--------------------------------------------------------------------------- protected: // scale // SCALE scale_d; // bin mode // BIN_MODE mode_d; // dimension // Long dim_d; // hold the bins (using centers or edges) in each dimension seperately // in USER_DEFINED bin scale // Vector bins_d; // hold the counts for each multi-dimensional bin // VectorLong counts_d; // the following three variables are used to specify the bins in LINEAR // and LOG scale. the structure in the scales can be used for fast search // using these variables instead of directly dealing with bin edges or // centers // // hold the max and min values for each dimension // VectorDouble min_d, max_d; // hold the number of bins in each dimension // VectorLong num_bins_d; // hold the boundary of blocks. // using this we can go back and forth between a global (scalar) bin index // and a (vector) index to specify the coordinate of a multidimensional bin // // the indexing procedure can be thought of as a generalized number system // VectorLong block_boundary_index_d; // variable to hold the bin size in LINEAR and LOG scale for // fast 1D bin index search // VectorDouble delta_d; // a static debug level // static Integral::DEBUG debug_level_d; // a static memory manager // static MemoryManager mgr_d; //--------------------------------------------------------------------------- // // required public methods // //--------------------------------------------------------------------------- public: // method: name // static const String& name() { return CLASS_NAME; } // other static methods // static bool8 diagnose(Integral::DEBUG debug_level); // method: setDebug // static bool8 setDebug(Integral::DEBUG debug_level) { debug_level_d = debug_level; return true; } // other debug methods // bool8 debug(const unichar* msg) const; // method: destructor // ~Histogram() {} // method: default constructor // Histogram(SCALE scale = DEF_SCALE, BIN_MODE mode = DEF_MODE, int32 dim = DEF_DIM) { scale_d = scale; mode_d = mode; setDim(dim); } // method: copy constructor // Histogram(const Histogram& arg) { assign(arg); } // method: assign // bool8 assign(const Histogram& arg) { scale_d = arg.scale_d; mode_d = arg.mode_d; dim_d = arg.dim_d; bins_d = arg.bins_d; counts_d = arg.counts_d; min_d = arg.min_d; max_d = arg.max_d; num_bins_d = arg.num_bins_d; return true; } // method: operator= // Histogram& operator= (const Histogram& arg) { assign(arg); return *this; } // method: sofSize // int32 sofSize() const; // other i/o methods // bool8 read(Sof& sof, int32 tag, const String& name = CLASS_NAME); bool8 write(Sof& sof, int32 tag, const String& name = CLASS_NAME) const; bool8 readData(Sof& sof, const String& pname = DEF_PARAM, int32 size = SofParser::FULL_OBJECT, bool8 param = true, bool8 nested = false); bool8 writeData(Sof& sof, const String& name = DEF_PARAM) const; // method: eq // bool8 eq(const Histogram& arg) const { return ((scale_d == arg.scale_d) && (mode_d == arg.mode_d) && (dim_d == arg.dim_d) && bins_d.eq(arg.bins_d) && counts_d.eq(arg.counts_d) && min_d.eq(arg.min_d) && max_d.eq(arg.max_d) && num_bins_d.eq(arg.num_bins_d)); } // memory management methods // // method: new // static void* operator new(size_t size) { return mgr_d.get(); } // method: new[] // static void* operator new[](size_t size) { return mgr_d.getBlock(size); } // method: delete // static void operator delete(void* ptr) { mgr_d.release(ptr); } // method: delete[] // static void operator delete[](void* ptr) { mgr_d.releaseBlock(ptr); } // method: setGrowSize // static bool8 setGrowSize(int32 grow_size) { return mgr_d.setGrow(grow_size); } // method: clear // bool8 clear(Integral::CMODE cmode = Integral::DEF_CMODE); //--------------------------------------------------------------------------- // // class-specific public methods: // set methods // //--------------------------------------------------------------------------- // method: setScale // if the scale is changed then the counts and bins are reset // bool8 setScale(SCALE scale) { scale_d = scale; return true; } // method: setMode // if the mode is changed then the counts and bins are reset // bool8 setMode(BIN_MODE mode) { mode_d = mode; return true; } // method: setDim // if the dim is changed clear affected class variables // bool8 setDim(int32 dim) { dim_d = dim; bins_d.setLength(dim_d, false); counts_d.clear(Integral::RESET); min_d.setLength(dim_d, false); max_d.setLength(dim_d, false); delta_d.setLength(dim_d, false); num_bins_d.setLength(dim_d, false); return true; } // methods to manually initialize the bins // in all cases, the counts vector is reset to zeros. // // block_boundary_index_d is updated to be usable for bin index conversions // between a global scalar index and a multidimensional vector index // // Any data type can be used to set the bins (float32, float64, int32, etc.) // // USER_DEFINED scale for bins // here, bins are specified by the class variable bins_d // template bool8 setBins(const Vector& bins); // USER_DEFINED scale for bins with same binning for all dimensions // here, bins are specified by the class variable bins_d // template bool8 setBins(const TVector& bins); // LOG or LINEAR scale binning, when range and num of bins set by user // with same binning in all dimensions // here, bins are specified using min_d, max_d and num_bins_d // template bool8 setBins(int32 num_bins = DEF_NUM_BINS, TIntegral min = DEF_MIN, TIntegral max = DEF_MAX); // LOG or LINEAR binning, when range and num of bins set by user // for each dimension independently // here, bins are specified using min_d, max_d and num_bins_d // template bool8 setBins(const VectorLong& num_bins, const TVector& min, const TVector& max); // methods to manually initialize the counts // the length of the input vectors must be the same as the // total number of multidimensional bins // bool8 setCounts(const VectorLong& counts); // 2D histogram-specific setCounts // bool8 setCounts(const MatrixLong& counts); //--------------------------------------------------------------------------- // // class-specific public methods: // get methods // //--------------------------------------------------------------------------- // method: getDim // int32 getDim() { return dim_d; } // method: getScale // SCALE getScale() { return scale_d; } // method: getMode // BIN_MODE getMode() { return mode_d; } // method: getBins // to be used in USER_DEFINED scale // bool8 getBins(Vector& bins) { return bins.assign(bins_d); } // method: getBins // to be used in LINEAR or LOG scale // bool8 getBins(VectorLong& num_bins, VectorDouble& min, VectorDouble& max) { return (min.assign(min_d) && max.assign(max_d) && num_bins.assign(num_bins_d)); } // method: getCounts // bool8 getCounts(VectorLong& counts) { return counts.assign(counts_d); } // 2D histogram-specific getCounts // bool8 getCounts(MatrixLong& counts); // method: getBinIndexCoordinates // bool8 getBinIndexCoordinates(Vector& multidim_bin_index); // method: getBlockBoundaryIndex // bool8 getBlockBoundaryIndex(VectorLong& block_boundary_index) { return block_boundary_index.assign(block_boundary_index_d); } //--------------------------------------------------------------------------- // // class-specific public methods: // histogram computation methods. all methods operate on any numerical // input type // //--------------------------------------------------------------------------- // methods to compute a histogram // compute(Vector values): bins the elements of 'values' into // the bins that are already set. If no bins are set, then it // bins the 'values' into DEF_NUM_BINS equally spaced (LINEAR scale) // containers on the range of [min(values), max(values)] in all // dimensions // template bool8 compute(const Vector& values); // 1D histogram-specific compute // template bool8 compute(const TVector& values); // methods to update the histogram counts // this method adds to the binned counts rather than overwriting them // template bool8 update(const Vector& values); // 1D histogram-specific update // template bool8 update(const TVector& values); //--------------------------------------------------------------------------- // // class-specific public methods: // histogram normalization methods // //--------------------------------------------------------------------------- // probability distribution methods // // method to obtain pdf values in each multi-dimensional bin // the actual multidimensional (vector) index coordinate of each bin is // obtained from the method getBinIndexCooordinates // bool8 pdf(VectorDouble& pdf) const; // 2D histogram-specific pdf // bool8 pdf(MatrixDouble& pdf); // method to obtain cdf values in each multi-dimensional bin // the actual multidimensional (vector) index coordinate of each bin is // obtained from the method getBinIndexCooordinates // bool8 cdf(VectorDouble& cdf); // 2D histogram-specific cdf // bool8 cdf(MatrixDouble& cdf); //--------------------------------------------------------------------------- // // private methods // //--------------------------------------------------------------------------- private: // method to compute intermediate indices used for indexing // bool8 computeBlockBoundaryIndex(); // this method finds the bin (1-D) to which a scalar point belongs to. // presently this method uses linear search. this can be replaced with // binary tree search. // int32 findBinIndex(float64 value, int32 dim_num); // convert global index to local index vector // bool8 convertGlobalToLocalIndex(VectorLong& loc_ind, int32 global_ind); // convert local index vector to global index // int32 convertLocalToGlobalIndex(const VectorLong& loc_ind); }; // end of include file // #endif