00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 package de.picana.converter;
00014
00015 import de.picana.control.*;
00016 import de.picana.clusterer.*;
00017 import de.picana.logging.*;
00018 import de.picana.math.*;
00019
00020 import java.io.*;
00021 import java.util.*;
00022
00023 import weka.core.*;
00024
00025
00032 public class FreqConverter extends Task {
00033
00036 protected String srcfile;
00039 protected String destfile;
00041 protected String transformation;
00042
00044 protected Instances training_set;
00045
00046 protected List freq_table;
00047 protected Map freq_htable;
00048
00049
00051 public FreqConverter() {
00052 }
00053
00054
00055 public void init(ParameterSet params, Logger logger) {
00056
00057 super.init(params, logger);
00058
00059 srcfile = (String)params.getParameter("src");
00060 destfile = (String)params.getParameter("dest");
00061 transformation = (String)params.getParameter("transformation");
00062
00063 }
00064
00065
00066 public void start() throws TaskException {
00067
00068 try {
00069 File infile = new File(srcfile);
00070 File outfile = new File(destfile);
00071
00072 if (!outfile.exists()
00073 || (outfile.exists() && (outfile.lastModified() <= infile.lastModified()))) {
00074
00075 logger.info(LOGSRC, "Started.");
00076
00077
00078
00079 logger.info(LOGSRC, "Read srcfile '" + srcfile + "' ...");
00080 FileInputStream fis = new FileInputStream(infile);
00081 InputStreamReader reader = new InputStreamReader(fis);
00082 training_set = new Instances(reader);
00083 logger.info(LOGSRC, "reading '" + srcfile + "' done.");
00084
00085 freq_htable = new HashMap();
00086
00087 MLVector vec;
00088 Instance inst;
00089 Integer freq;
00090
00091
00092
00093 logger.info(LOGSRC, "Build frequency table from " + training_set.numInstances() + " instances ...");
00094 for (int i=0; i < training_set.numInstances(); i++) {
00095 vec = new MLVector(training_set.instance(i));
00096 freq = (Integer)freq_htable.get(vec);
00097
00098 freq_htable.put(vec, (freq==null) ? ONE :
00099 new Integer(freq.intValue() + 1));
00100 }
00101 logger.info(LOGSRC, "frequency table containing " + freq_htable.size() + " instances built.");
00102
00103
00104
00105
00106
00107 freq_table = new ArrayList();
00108
00109 Iterator iter = freq_htable.entrySet().iterator();
00110 while (iter.hasNext()) {
00111 Map.Entry entry = (Map.Entry)iter.next();
00112 vec = (MLVector)entry.getKey();
00113 freq = (Integer)entry.getValue();
00114
00115
00116
00117 if (transformation.equals("picana")) {
00118 vec.freq = (int)Math.ceil(freq.intValue() *
00119 ((arccot((1.0/40.0)*((double)freq.intValue()-80.0)) / 12.5663706) + 0.75));
00120 }
00121 if (transformation.equals("unique")) {
00122 vec.freq = 1;
00123 }
00124 freq_table.add(vec);
00125 }
00126
00127
00128
00129
00130
00131 FileOutputStream output = new FileOutputStream(outfile);
00132 PrintWriter pw = new PrintWriter(output);
00133
00134 pw.println("% ARFF file for picture data from PNG files");
00135 pw.println("%");
00136 pw.println("@relation colour");
00137 pw.println();
00138 pw.println("@attribute red numeric");
00139 pw.println("@attribute green numeric");
00140 pw.println("@attribute blue numeric");
00141 pw.println();
00142 pw.println("@data");
00143 pw.println("%");
00144 pw.println("% x instances");
00145 pw.println("%");
00146
00147 int rand_index;
00148
00149 while(freq_table.size() > 0) {
00150
00151 rand_index = rand.nextInt(freq_table.size());
00152 vec = (MLVector)freq_table.get(rand_index);
00153 vec.freq--;
00154
00155 if (vec.freq == 0)
00156 freq_table.remove(rand_index);
00157
00158 for (int i=0; i < vec.dim; i++) {
00159 pw.print(vec.value[i]);
00160 if (i != (vec.dim-1))
00161 pw.print(", ");
00162 }
00163 pw.println();
00164 }
00165
00166 pw.close();
00167
00168 logger.info(LOGSRC, "Stopped.");
00169 }
00170
00171 } catch (Exception e) {
00172 throw new TaskException(e.toString());
00173 }
00174
00175 }
00176
00177 public void stop() {
00178
00179 }
00180
00181 public void pause() {
00182
00183 }
00184
00185 public void resume() {
00186
00187 }
00188
00189 protected double arccot(double x) {
00190 if (x >= 0.0)
00191 return Math.acos (1.0/Math.sqrt(1.0+x*x)) + 1.570796327;
00192 return -Math.acos (1.0/Math.sqrt(1.0+x*x)) + 1.570796327;
00193 }
00194 }