diff options
Diffstat (limited to 'fsa/src/main/java')
5 files changed, 37 insertions, 108 deletions
diff --git a/fsa/src/main/java/com/yahoo/fsa/FSA.java b/fsa/src/main/java/com/yahoo/fsa/FSA.java index f0fc27a15c5..5a4a0e4475f 100644 --- a/fsa/src/main/java/com/yahoo/fsa/FSA.java +++ b/fsa/src/main/java/com/yahoo/fsa/FSA.java @@ -83,7 +83,7 @@ public class FSA implements Closeable { public void delta(String string){ ByteBuffer buf = fsa.encode(string); Maps m = fsa.map(); - while(state >0 && buf.position()<buf.limit()){ + while (state >0 && buf.position()<buf.limit()){ delta(m, buf.get()); } } @@ -106,7 +106,7 @@ public class FSA implements Closeable { /** Jumps ahead by a word - if this is not the first word, it must be preceeded by space. */ public void deltaWord(String string){ - if (state!=fsa.start()) { + if (state != fsa.start()) { delta((byte)' '); } delta(string); diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java index 97453d97c6d..ab2ec75d7f3 100644 --- a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java +++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java @@ -4,39 +4,38 @@ package com.yahoo.fsa.segmenter; /** * Class encapsulation of a segment. * - * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a> + * @author Peter Boros */ public class Segment { - int _beg; - int _end; - int _conn; + final int begin; + final int end; + final int conn; - public Segment(int b, int e, int c) - { - _beg = b; - _end = e; - _conn = c; + public Segment(int b, int e, int c) { + begin = b; + end = e; + conn = c; } public int beg() { - return _beg; + return begin; } public int end() { - return _end; + return end; } public int len() { - return _end-_beg; + return end - begin; } public int conn() { - return _conn; + return conn; } } diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java index 64c20e38b55..505085c46cf 100644 --- a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java +++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java @@ -9,58 +9,56 @@ import com.yahoo.fsa.FSA; /** * API for accessing the Segmenter automata. * - * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a> + * @author Peter Boros */ public class Segmenter { - private FSA _fsa; + private final FSA fsa; public Segmenter(FSA fsa) { - _fsa = fsa; + this.fsa = fsa; } public Segmenter(String filename) { - _fsa = new FSA(filename,"utf-8"); + fsa = new FSA(filename, "utf-8"); } public Segmenter(String filename, String charsetname) { - _fsa = new FSA(filename,charsetname); + fsa = new FSA(filename, charsetname); } - public boolean isOk() - { - return _fsa.isOk(); + public boolean isOk() { + return fsa.isOk(); } - public Segments segment(String input) - { + public Segments segment(String input) { String[] tokens = input.split("\\s"); return segment(tokens); } private class Detector { - FSA.State _state; - int _index; - public Detector(FSA.State s, int i) - { - _state = s; - _index = i; + final FSA.State state; + final int index; + + public Detector(FSA.State s, int i) { + state = s; + index = i; } public FSA.State state() { - return _state; + return state; } public int index() { - return _index; + return index; } + } - public Segments segment(String[] tokens) - { + public Segments segment(String[] tokens) { Segments segments = new Segments(tokens); LinkedList detectors = new LinkedList(); @@ -68,7 +66,7 @@ public class Segmenter { while(i<tokens.length){ - detectors.add(new Detector(_fsa.getState(),i)); + detectors.add(new Detector(fsa.getState(), i)); ListIterator det_it = detectors.listIterator(); while(det_it.hasNext()){ @@ -88,50 +86,5 @@ public class Segmenter { return segments; } - //// test //// - public static void main(String[] args) { - String fsafile = "/home/gv/fsa/automata/segments.fsa"; - - Segmenter segmenter = new Segmenter(fsafile); - - System.out.println("Loading segmenter FSA file "+fsafile+": "+segmenter.isOk()); - - for(int a=0;a<1||a<args.length;a++){ - - String query; - if(a==args.length){ - query = "times square head"; - } - else { - query = args[a]; - } - System.out.println("processing query \""+query+"\""); - - Segments segments = segmenter.segment(query); - System.out.println("all segments:"); - for(int i=0; i<segments.size();i++){ - System.out.println(" "+i+": \""+segments.sgm(i)+"\","+segments.conn(i)); - } - - Segments best; - - best = segments.segmentation(Segments.SEGMENTATION_WEIGHTED); - System.out.print("best segments (weighted): "); - for(int i=0; i<best.size();i++){ - System.out.print("("+best.sgm(i)+")"); - } - System.out.println(); - - best = segments.segmentation(Segments.SEGMENTATION_RIGHTMOST_LONGEST); - System.out.print("best segments (rightmost_longest):"); - for(int i=0; i<best.size();i++){ - System.out.print("("+best.sgm(i)+")"); - } - System.out.println(); - - } - - } - } diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java index 6d7ee7be1a6..e01afa91cd5 100644 --- a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java +++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java @@ -6,7 +6,7 @@ import java.util.LinkedList; /** * Contains the segmentation() method. * - * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a> + * @author Peter Boros */ public class Segments extends LinkedList { diff --git a/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java b/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java index f29dc38f2ce..cc3ff2e79ac 100644 --- a/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java +++ b/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java @@ -25,13 +25,13 @@ import com.yahoo.fsa.MetaData; * predicted topics for a term. Each topic has an attached weight and * a term vector (topicSegments). * - * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a> - **/ + * @author Peter Boros + */ public class TopicPredictor extends MetaData { private static final String packageName = "com.yahoo.fsa.topicpredictor"; - private FSA fsa = null; + private final FSA fsa; public TopicPredictor(String fsafile, String datfile){ this(fsafile, datfile, "utf-8"); @@ -154,27 +154,4 @@ public class TopicPredictor extends MetaData { return getStringArrayEntry(user(0) + topicId, 2); } - - //// test //// - public static void main(String[] args) { - String segment = "new york"; - if (args.length >= 1) { - segment = args[0]; - } - - String fsafile = "/home/gv/fsa/automata/dmozPred_2.fsa"; - String datfile = "/home/gv/fsa/automata/dmozPred_2.dat"; - - TopicPredictor predictor = new TopicPredictor(fsafile, datfile); - - List predictedTopics = predictor.getPredictedTopics(segment, 25); - Iterator i = predictedTopics.iterator(); - while (i.hasNext()) { - PredictedTopic topic = (PredictedTopic) i.next(); - System.out.println("\n topic=" + topic.getTopic()); - System.out.println(" weight=" + topic.getWeight()); - System.out.println(" vector=" + topic.getVector()); - } - } - } |