// isip include files // #include #include #include "FuzzyClassifier.h" // method: normalize // // arguments: // none // // return: a boolean value indicating status // boolean FuzzyClassifier::normalize() { // clear // vec_max_d.setLength(dimension_d); vec_max_d.clear(Integral::RETAIN); // find the maximum value of each dimension // for (int i = 0; i < num_class_d; i++) { for (int j = 0; j < num_vec_d(i); j++) { for ( int k = 0; k < dimension_d; k++) { Float value = (*vec_d[i][j])(k); if (vec_max_d(k) < value.abs()) { vec_max_d(k) = value.abs(); } } } } // normaliztion // for (int i = 0; i < num_class_d; i++) { for (int j = 0; j < num_vec_d(i); j++) { (*vec_d[i][j]).div(vec_max_d); // for ( int k = 0; k < dimension_d; k++) { // (*vec_d[i][j])(k) /= vec_max_d(k); // (*vec_d[i][j]).debug(L"vec_d"); // } } } return true; } boolean FuzzyClassifier::train() { // local variables // int l = 0; // class l = 0 ~ s int i = 0; // cluster i = 0 ~ c int k = 0; // recorded cluster k int c; VectorLong group_mark, tmp_mark; VectorFloat mean_cluster(dimension_d); Float radius_cluster; Long num_cluster; boolean is_any_record = false; boolean is_record = false; Float d_diff; float cost, cost1; // assign space // record_center_d = new VectorFloat*[num_vec_d.sum()]; record_cluster_d = new VectorLong*[num_class_d]; for (l = 0; l < num_class_d; l++) { record_cluster_d[l] = new VectorLong(num_vec_d(l)); } // loop each class // for (l = 0; l < num_class_d; l++) { printf("=========== cluster the %d-th class=======\n", l); // set initial cluster c = 1 // c = 1; // initial subset belonging to l-th class // group_mark.setLength(num_vec_d(l)); group_mark.assign(UNCLUSTERED); tmp_mark.setLength(num_vec_d(l)); (*record_cluster_d[l]).setLength(num_vec_d(l)); do { // initial some variable // is_any_record = false; // use k-mean algorithm to get clusters // printf("\n--------------do k-mean on k = %d---------\n", c); tmp_mark.assign(group_mark); kmean(vec_d[l], group_mark, c, FIRST_MEAN, cost); kmean(vec_d[l], tmp_mark, c, SMOOTH_MEAN, cost1); if (cost > cost1) { group_mark.assign(tmp_mark); } // group_mark.debug(L"after kmean group_mark"); // loop through all the clusters // for (i = 0; i < c; i++) { // initialization for each cluster // mean_cluster.clear(Integral::RETAIN); num_cluster = 0; // loop each vector clustered recently // only count those vectors belonging to i-th cluster // for (int j = 0; j < num_vec_d(l); j++) { if (group_mark(j) == i) { // compute number of ith cluster // num_cluster++; // compute sum of ith cluster // mean_cluster.add(*vec_d[l][j]); } } // if this cluster doesn't contain data, go to next // if (!num_cluster) { continue; } // compute mean of i-th cluster // // mean_cluster.debug(L"mean_cluster before div"); mean_cluster.div(num_cluster); printf("cluster: %d, num_cluster = %d\n", i, (int)num_cluster); // mean_cluster.debug(L"mean_cluster"); // compute maximum radius of i-th cluster // radius_cluster = 0; for (int j = 0; j < num_vec_d(l); j++) { if (group_mark(j) == i) { float distance = mean_cluster.euclidean(*vec_d[l][j]); if (radius_cluster < distance) { radius_cluster = distance; } } } // distance from ith cluster center of l-th class to the nearest // different class datum // d_diff = 100000; for (int n = 0; n < num_class_d; n++) { // ignore l-th class itself // if (n == l) { continue; } for (int j = 0; j < num_vec_d(n); j++) { Float value = mean_cluster.euclidean(*vec_d[n][j]); // printf("value = %f\n", (float)value); if (d_diff > value) { d_diff = value; } } } is_record = false; if (num_cluster == 1) { radius_cluster = 0.1 * d_diff; // record G is_record = true; } if ((radius_cluster)< d_diff) { // record G // is_record = true; } if (is_record) { // record G(li) // record_center_d[k] = new VectorFloat(dimension_d); (*record_center_d[k]).assign(mean_cluster); record_radius_d.setLength(k + 1); record_radius_d(k) = (radius_cluster + d_diff) * 0.5; //record_radius_d(k) = radius_cluster; record_class_d.setLength(k + 1); record_class_d(k) = l; record_num_cluster_d.setLength(k + 1); record_num_cluster_d(k) = num_cluster; record_cluster_weight_d.setLength(k + 1); record_cluster_weight_d(k) = (float)num_cluster / num_vec_d(l); // update data set // for (int j = 0; j < num_vec_d(l); j++) { if (group_mark(j) == i) { group_mark(j) = CLUSTERED; (*record_cluster_d[l])(j) = k; } } // printf("record the %d-th cluster from cluster %d, num_cluster = %d, radius = %f\n", k, i, (int)num_cluster, (float)radius_cluster); // mean_cluster.debug(L"record mean"); // (*record_cluster_d[l]).debug(L"record_cluster"); // group_mark.debug(L"group_mark"); // increase k // k++; } is_any_record |= is_record; // cluear is_record for the next cluster // is_record = false; } // loop through all the clusters if (is_any_record) { // if there are any successful partitions, // we assume c = 1 c = 1; } else { c++; } is_any_record = group_mark.eq(CLUSTERED); } while (!is_any_record); } k_d = k; printf("k = %d\n", k); return true; } Long FuzzyClassifier::decode(VectorFloat vec_a) { float diff; Long hypo(0); static int i; int overlap = 0; i++; printf("--- i = %d ---------------------------------\n", i); for (int k = 0; k < k_d; k++) { diff = vec_a.euclidean(*record_center_d[k]); if (diff <= record_radius_d(k)) { if (hypo != record_class_d(k) + 1) { overlap++; hypo = record_class_d(k) + 1; } // printf("k = %d, hypo = %d, %f < %f\n", // k, (int)hypo, diff, (float)record_radius_d(k)); } } if ((!hypo)) { hypo = decode1(vec_a); } return hypo; } Long FuzzyClassifier::decode1(VectorFloat vec_a) { float diff, score = 100000; Long hypo; for (int k = 0; k < k_d; k++) { diff = vec_a.euclidean(*record_center_d[k]); diff = diff - record_radius_d(k); // diff = fabs(diff); if (score > diff) { hypo = record_class_d(k) + 1; printf("k = %d, hypo = %d, %f < %f\n", k, (int)hypo, diff, score); score = diff; } } return hypo; } Long FuzzyClassifier::decode2(VectorFloat vec_a) { float diff = 0, score = 100000, sum; Long hypo; long clas, num; for (int k = 0; k < k_d; k++) { // loop through all data belonging to this cluster // num = 0; clas = record_class_d(k); diff = 0; for (int j = 0; j < num_vec_d(clas); j++) { if ((*record_cluster_d[clas])(j) != k) { continue; } sum = vec_a.euclidean(*vec_d[clas][j]); diff += sum * sum; num++; } if (num != record_num_cluster_d(k)) { printf("*****************ERROR on num %d != record %d, exit!\n", (int)num, (int)record_num_cluster_d(k)); (*record_cluster_d[clas]).debug(L"record_cluster"); exit(1); } diff /= (float)record_num_cluster_d(k); if (score > diff) { hypo = record_class_d(k) + 1; printf("k = %d, hypo = %d, %f < %f\n", k, (int)hypo, diff, score); score = diff; } } return hypo; } Long FuzzyClassifier::decode3(VectorFloat vec_a) { // the class corresponds to the maximum g // float g = 0, a, b; Long hypo; for (int k = 0; k < k_d; k++) { // compute A(x) = exp(-||x -record_center|| / 2*record_radius) // a = vec_a.euclidean(*record_center_d[k]); a = a * a; b = record_radius_d(k); a = -a / (b * b * 2); a = exp(a); // find the maximum value // if (g < a) { g = a; hypo = record_class_d(k) + 1; float radius = record_radius_d(k); printf("k = %d, class = %d, a = %f, hypo = %d, radius = %f\n", k, (int)(record_class_d(k)), (float)a, (int)hypo, radius); } } return hypo; } Long FuzzyClassifier::decode4(VectorFloat vec_a) { // the class corresponds to the maximum g // float g = 0, a, b; Long hypo; for (int k = 0; k < k_d; k++) { // compute A(x) = exp(-||x -record_center|| / 2*record_radius) // a = vec_a.euclidean(*record_center_d[k]); a = a * a; b = record_radius_d(k); a = -a / (b * b * 2); a = exp(a); // find the maximum value // if (g < a) { g = a; hypo = record_class_d(k) + 1; float radius = record_radius_d(k); printf("k = %d, class = %d, a = %f, hypo = %d, radius = %f\n", k, (int)(record_class_d(k)), (float)a, (int)hypo, radius); } } if (g < (float)0.60530659713) { hypo = decode1(vec_a); } return hypo; } /* Long FuzzyClassifier::decode4(VectorFloat vec_a) { // the class corresponds to the maximum g // float a, b; Long N = 9, hypothesis; VectorFloat g(N), store_class(N); VectorLong hypo(num_class_d); long l, pos; g.assign((float)0); hypo.assign((long)0); for (int k = 0; k < k_d; k++) { // compute A(x) = exp(-||x -record_center|| / 2*record_radius) // a = vec_a.euclidean(*record_center_d[k]); a = a * a; b = record_radius_d(k); a = -a / (b * b * 2); a = exp(a); // find the maximum value // g.min(pos); if (g(pos) < a) { g(pos) = a; store_class(pos) = record_class_d(k); } } for (int i = 0; i < N; i++) { hypo(store_class(i))++; } hypo.debug(L"hypo"); hypo.max(pos); hypothesis = pos + 1; hypothesis.debug(L"hypothesis"); return hypothesis; } */ boolean FuzzyClassifier::kmean(VectorFloat **vec_a, VectorLong &mask_a, Long c_a, long mode, float& j_cost) { int num = mask_a.length(); VectorFloat mean[(int)c_a]; VectorLong u(mask_a); VectorLong num_cluster(c_a); float radius; // assign the first c vector as the initial c points // not randomly here // if (mode == FIRST_MEAN) { int i = 0, j = 0; while (i < c_a) { if (u(j).eq(CLUSTERED)) { j++; continue; } mean[i].assign(*vec_a[j]); u(j) = i; i++; j++; } } else { // choose first centers smoothly // // fisrt find the maximum and minimum values // VectorFloat max(dimension_d), min(dimension_d), scale(dimension_d); min.assign((float)100000); max.assign((float)-100000); for (int j = 0; j < num; j++) { // recompute mean of cluster i // if (u(j).eq(CLUSTERED)) { continue; } for (int i = 0; i < dimension_d; i++) { if (min(i) > (*vec_a[j])(i)) { min(i) = (*vec_a[j])(i); } if (max(i) < (*vec_a[j])(i)) { max(i) = (*vec_a[j])(i); } } } scale.sub(max, min); scale.div((float)(2 * c_a)); mean[0].add(min, scale); scale.mult((float)2); for (int i = 1; i < c_a; i++) { mean[i].add(mean[i - 1], scale); } /* min.debug(L"min"); max.debug(L"max"); scale.debug(L"scale"); mean[0].debug(L"mean"); mean[c_a-1].debug(L"mean1"); */ } for (;;) { // u.debug(L"U cluster"); // cluster data according to the new mean // for (int j = 0; j < num; j++) { // loop all the unclustered data // if (u(j).eq(CLUSTERED)) { continue; } // cluster data, update u(j) // radius = 100000; for (int i = 0; i < c_a; i++) { if (mean[i].eq(*vec_a[j])) { u(j) = i; break; } float euclidean = mean[i].euclidean(*vec_a[j]); if (radius > euclidean) { radius = euclidean; u(j) = i; } } } // if u == mask_a, cluster finish // if (u.ne(mask_a)) { // update mask_a, mean // mask_a.assign(u); for (int i = 0; i < c_a; i++) { mean[i] = 0; num_cluster.assign((float)0); } for (int j = 0; j < num; j++) { // recompute mean of cluster i // if (u(j).eq(CLUSTERED)) { continue; } mean[u(j)].add(*vec_a[j]); num_cluster(u(j))++; } for (int i = 0; i < c_a; i++) { mean[i].div(num_cluster(i)); } continue; } break; } float cost; j_cost = 0; for (int j = 0; j < num; j++) { // recompute mean of cluster i // if (mask_a(j).eq(CLUSTERED)) { continue; } cost = mean[mask_a(j)].euclidean(*vec_a[j]); j_cost += cost * cost; } return true; } boolean FuzzyClassifier::debug() { for (int k = 0; k < k_d; k++ ) { printf("k = %d, class = %d, num_cluster = %d, radius = %f\n, weight = %f\n", k, (int)record_class_d(k), (int)record_num_cluster_d(k), (float)record_radius_d(k), (float)record_cluster_weight_d(k)); (*record_center_d[k]).debug(L"mean = "); } record_class_d.debug(L"record_class_d"); record_num_cluster_d.debug(L"record_num_cluster_d"); return true; } boolean FuzzyClassifier::remove_junk() { int k, j, l; boolean is = false; for (k = 0; k < k_d; k++) { if (record_num_cluster_d(k) > 1) { continue; } l = record_class_d(k); for (j = 0; j < num_vec_d(l); j++) { if ((*record_cluster_d[l])(j) == k) { break; } } for (j = k; j < num_vec_d(l) - 1; j++) { vec_d[l][j] = vec_d[l][j+1]; } num_vec_d(l) -= 1; printf("remove class %d one data from %d cluster\n", l, k); is = true; } return is; } Long FuzzyClassifier::decode5(VectorFloat vec_a) { float diff, score = 100000, r; Long hypo, best_k; VectorLong hypo_count(num_class_d); for (int k = 0; k < k_d; k++) { diff = vec_a.euclidean(*record_center_d[k]); diff = diff - record_radius_d(k); diff = fabs(diff); if (score > diff) { hypo = record_class_d(k) + 1; score = diff; best_k = k; } } r = vec_a.euclidean(*record_center_d[best_k]); for (int l = 0; l < num_class_d; l++) { for (int j = 0; j < num_vec_d(l); j++) { diff = vec_a.euclidean(*vec_d[l][j]); if (diff <= r) { hypo_count(l)++; } } } hypo_count.debug(L"hypo_count"); long pos; hypo_count.max(pos); hypo = pos; return hypo; } Long FuzzyClassifier::knn(VectorFloat vec_a) { // the class corresponds to the maximum g // float a; Long N = 1, hypothesis; VectorFloat g(N), store_class(N); VectorLong hypo(num_class_d); long l, pos, max; static long number = 0; g.assign((float)0); hypo.assign((long)0); number++; printf("--- i = %d ---------------------------------\n", (int)number); for (l = 0; l < num_class_d; l++) { for (int j = 0; j < num_vec_d(l); j++) { // compute A(x) = exp(-||x -record_center|| / 2*record_radius) // a = vec_a.euclidean(*vec_d[l][j]); a = exp(-a); // find the maximum value // g.min(pos); if (g(pos) < a) { g(pos) = a; store_class(pos) = l; } } } for (int i = 0; i < N; i++) { hypo(store_class(i))++; } g.debug(L"cost"); store_class.debug(L"class"); hypo.debug(L"hypo"); max = hypo.max(pos); // if any ties // int n; n = 0; for (int i = 0; i < num_class_d; i++) { if (hypo(i) == max) { n++; } else { hypo(i) = 0; } } if (n == 1) { hypothesis = pos + 1; } else { hypothesis.rand(0, n); hypothesis.debug(L"random"); for (int i = 0; i < num_class_d; i++) { if (hypo(i) == max) { if (hypothesis == 0) { hypothesis = i + 1; break; } hypothesis--; } } } /* else { VectorFloat cost(num_class_d); cost.assign((float)0); for (int i = 0; i < N; i++) { cost(store_class(i)) += g(i); } cost.mult(hypo); cost.debug(L"cost"); cost.max(pos); hypothesis = pos + 1; } */ hypothesis.debug(L"hypothesis"); return hypothesis; }