summaryrefslogtreecommitdiffstats
path: root/fsa
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2020-01-06 21:06:26 +0100
committerJon Bratseth <bratseth@verizonmedia.com>2020-01-06 21:06:26 +0100
commit1f6753d9d0f35a4a6612987fe8c6ea42ff166495 (patch)
tree0cfd3557a7400b7178b5dd6aa884d3407237d552 /fsa
parentcc711b5a8fbc1a7a5897f8ee1761103fcb89e644 (diff)
Non-functional changes
Diffstat (limited to 'fsa')
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/FSA.java4
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java25
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java85
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java2
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java29
5 files changed, 37 insertions, 108 deletions
diff --git a/fsa/src/main/java/com/yahoo/fsa/FSA.java b/fsa/src/main/java/com/yahoo/fsa/FSA.java
index f0fc27a15c5..5a4a0e4475f 100644
--- a/fsa/src/main/java/com/yahoo/fsa/FSA.java
+++ b/fsa/src/main/java/com/yahoo/fsa/FSA.java
@@ -83,7 +83,7 @@ public class FSA implements Closeable {
public void delta(String string){
ByteBuffer buf = fsa.encode(string);
Maps m = fsa.map();
- while(state >0 && buf.position()<buf.limit()){
+ while (state >0 && buf.position()<buf.limit()){
delta(m, buf.get());
}
}
@@ -106,7 +106,7 @@ public class FSA implements Closeable {
/** Jumps ahead by a word - if this is not the first word, it must be preceeded by space. */
public void deltaWord(String string){
- if (state!=fsa.start()) {
+ if (state != fsa.start()) {
delta((byte)' ');
}
delta(string);
diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java
index 97453d97c6d..ab2ec75d7f3 100644
--- a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java
+++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java
@@ -4,39 +4,38 @@ package com.yahoo.fsa.segmenter;
/**
* Class encapsulation of a segment.
*
- * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ * @author Peter Boros
*/
public class Segment {
- int _beg;
- int _end;
- int _conn;
+ final int begin;
+ final int end;
+ final int conn;
- public Segment(int b, int e, int c)
- {
- _beg = b;
- _end = e;
- _conn = c;
+ public Segment(int b, int e, int c) {
+ begin = b;
+ end = e;
+ conn = c;
}
public int beg()
{
- return _beg;
+ return begin;
}
public int end()
{
- return _end;
+ return end;
}
public int len()
{
- return _end-_beg;
+ return end - begin;
}
public int conn()
{
- return _conn;
+ return conn;
}
}
diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java
index 64c20e38b55..505085c46cf 100644
--- a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java
+++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java
@@ -9,58 +9,56 @@ import com.yahoo.fsa.FSA;
/**
* API for accessing the Segmenter automata.
*
- * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ * @author Peter Boros
*/
public class Segmenter {
- private FSA _fsa;
+ private final FSA fsa;
public Segmenter(FSA fsa) {
- _fsa = fsa;
+ this.fsa = fsa;
}
public Segmenter(String filename) {
- _fsa = new FSA(filename,"utf-8");
+ fsa = new FSA(filename, "utf-8");
}
public Segmenter(String filename, String charsetname) {
- _fsa = new FSA(filename,charsetname);
+ fsa = new FSA(filename, charsetname);
}
- public boolean isOk()
- {
- return _fsa.isOk();
+ public boolean isOk() {
+ return fsa.isOk();
}
- public Segments segment(String input)
- {
+ public Segments segment(String input) {
String[] tokens = input.split("\\s");
return segment(tokens);
}
private class Detector {
- FSA.State _state;
- int _index;
- public Detector(FSA.State s, int i)
- {
- _state = s;
- _index = i;
+ final FSA.State state;
+ final int index;
+
+ public Detector(FSA.State s, int i) {
+ state = s;
+ index = i;
}
public FSA.State state()
{
- return _state;
+ return state;
}
public int index()
{
- return _index;
+ return index;
}
+
}
- public Segments segment(String[] tokens)
- {
+ public Segments segment(String[] tokens) {
Segments segments = new Segments(tokens);
LinkedList detectors = new LinkedList();
@@ -68,7 +66,7 @@ public class Segmenter {
while(i<tokens.length){
- detectors.add(new Detector(_fsa.getState(),i));
+ detectors.add(new Detector(fsa.getState(), i));
ListIterator det_it = detectors.listIterator();
while(det_it.hasNext()){
@@ -88,50 +86,5 @@ public class Segmenter {
return segments;
}
- //// test ////
- public static void main(String[] args) {
- String fsafile = "/home/gv/fsa/automata/segments.fsa";
-
- Segmenter segmenter = new Segmenter(fsafile);
-
- System.out.println("Loading segmenter FSA file "+fsafile+": "+segmenter.isOk());
-
- for(int a=0;a<1||a<args.length;a++){
-
- String query;
- if(a==args.length){
- query = "times square head";
- }
- else {
- query = args[a];
- }
- System.out.println("processing query \""+query+"\"");
-
- Segments segments = segmenter.segment(query);
- System.out.println("all segments:");
- for(int i=0; i<segments.size();i++){
- System.out.println(" "+i+": \""+segments.sgm(i)+"\","+segments.conn(i));
- }
-
- Segments best;
-
- best = segments.segmentation(Segments.SEGMENTATION_WEIGHTED);
- System.out.print("best segments (weighted): ");
- for(int i=0; i<best.size();i++){
- System.out.print("("+best.sgm(i)+")");
- }
- System.out.println();
-
- best = segments.segmentation(Segments.SEGMENTATION_RIGHTMOST_LONGEST);
- System.out.print("best segments (rightmost_longest):");
- for(int i=0; i<best.size();i++){
- System.out.print("("+best.sgm(i)+")");
- }
- System.out.println();
-
- }
-
- }
-
}
diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java
index 6d7ee7be1a6..e01afa91cd5 100644
--- a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java
+++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java
@@ -6,7 +6,7 @@ import java.util.LinkedList;
/**
* Contains the segmentation() method.
*
- * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ * @author Peter Boros
*/
public class Segments extends LinkedList {
diff --git a/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java b/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java
index f29dc38f2ce..cc3ff2e79ac 100644
--- a/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java
+++ b/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java
@@ -25,13 +25,13 @@ import com.yahoo.fsa.MetaData;
* predicted topics for a term. Each topic has an attached weight and
* a term vector (topicSegments).
*
- * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
- **/
+ * @author Peter Boros
+ */
public class TopicPredictor extends MetaData {
private static final String packageName = "com.yahoo.fsa.topicpredictor";
- private FSA fsa = null;
+ private final FSA fsa;
public TopicPredictor(String fsafile, String datfile){
this(fsafile, datfile, "utf-8");
@@ -154,27 +154,4 @@ public class TopicPredictor extends MetaData {
return getStringArrayEntry(user(0) + topicId, 2);
}
-
- //// test ////
- public static void main(String[] args) {
- String segment = "new york";
- if (args.length >= 1) {
- segment = args[0];
- }
-
- String fsafile = "/home/gv/fsa/automata/dmozPred_2.fsa";
- String datfile = "/home/gv/fsa/automata/dmozPred_2.dat";
-
- TopicPredictor predictor = new TopicPredictor(fsafile, datfile);
-
- List predictedTopics = predictor.getPredictedTopics(segment, 25);
- Iterator i = predictedTopics.iterator();
- while (i.hasNext()) {
- PredictedTopic topic = (PredictedTopic) i.next();
- System.out.println("\n topic=" + topic.getTopic());
- System.out.println(" weight=" + topic.getWeight());
- System.out.println(" vector=" + topic.getVector());
- }
- }
-
}