00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 package de.picana.classifier;
00014
00015 import de.picana.control.*;
00016 import de.picana.clusterer.*;
00017 import de.picana.math.*;
00018
00019 import weka.core.*;
00020 import java.io.*;
00021 import java.util.*;
00022
00023
00045 public class Centroid extends Classifier {
00046
00048 private List centroids;
00049
00050
00052 public Centroid() {
00053 }
00054
00055 protected void loadModel(String filename) throws TaskException {
00056
00057 try {
00058 centroids = new ArrayList();
00059
00060 File model = new File(filename);
00061 FileReader frd = new FileReader(model);
00062 BufferedReader in = new BufferedReader(frd);
00063 String line = in.readLine();
00064 StringTokenizer tk = new StringTokenizer(line,",");
00065 int dimension = tk.countTokens();
00066
00067 logger.debug(LOGSRC, "dim = " + dimension);
00068
00069 int i=0;
00070 while((line = in.readLine()) != null) {
00071 StringTokenizer tok = new StringTokenizer(line, ",");
00072 MLVector vec = new MLVector(dimension);
00073 int j=0;
00074 while (tok.hasMoreTokens()) {
00075 double d = 0.0;
00076 String token = tok.nextToken();
00077 try {
00078 d = Double.parseDouble(token);
00079 } catch (NumberFormatException nfe) {}
00080 vec.value[j] = d;
00081 j++;
00082 }
00083 logger.debug(LOGSRC, "centroid[" + i + "] = " + vec.toString());
00084 centroids.add(vec);
00085 i++;
00086 }
00087
00088 in.close();
00089
00090 num_clusters = centroids.size();
00091 logger.debug(LOGSRC, "num_clusters = " + num_clusters);
00092
00093 } catch (Exception e) {
00094 throw new TaskException(e.toString());
00095 }
00096 }
00097
00098 protected int classify(Instance i) {
00099
00100 double min_dist = Double.MAX_VALUE;
00101 double act_dist = 0.0;
00102 MLVector vec_a = new MLVector(i.numValues());
00103 for (int j=0; j < i.numValues(); j++) {
00104 vec_a.value[j] = i.value(j);
00105 }
00106 int cl = 0;
00107 for (int j=0; j < centroids.size(); j++) {
00108 MLVector vec_b = (MLVector)centroids.get(j);
00109 act_dist = Distance.euklidian(vec_a.value, vec_b.value);
00110 if (act_dist < min_dist) {
00111 cl = j;
00112 min_dist = act_dist;
00113 }
00114 }
00115 return cl;
00116 }
00117 }