aboutsummaryrefslogtreecommitdiffstats
path: root/vespaclient-java/src/main/java/com/yahoo
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2023-02-23 14:45:34 +0100
committerTor Brede Vekterli <vekterli@yahooinc.com>2023-02-23 15:18:20 +0100
commit7cb3323d0cf0c1c98f0c0e47cb88741f56c73eb9 (patch)
treebb2662cfc5777d812cceae7198962b39c14a59ff /vespaclient-java/src/main/java/com/yahoo
parentf66f816102ce0a7c3aaba72d1db61a83157259ed (diff)
Add JSONL output support to `vespa-visit` CLI tool
JSONL output is enabled via new `--jsonl` argument. Mutually exclusive with `--jsonoutput` and (deprecated) `--xmloutput`.
Diffstat (limited to 'vespaclient-java/src/main/java/com/yahoo')
-rw-r--r--vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java90
-rw-r--r--vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java70
2 files changed, 100 insertions, 60 deletions
diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java
index fc74cb6d899..288df7e470c 100644
--- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java
+++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java
@@ -35,42 +35,45 @@ import java.util.logging.Logger;
@SuppressWarnings("deprecation")
public class StdOutVisitorHandler extends VdsVisitHandler {
- private static final Logger log = Logger.getLogger(
- StdOutVisitorHandler.class.getName());
- private final boolean printIds;
- private final boolean indentXml;
- private final int processTimeMilliSecs;
- private final PrintStream out;
- private final boolean jsonOutput;
- private final boolean tensorShortForm;
- private final boolean tensorDirectValues;
+ private static final Logger log = Logger.getLogger(StdOutVisitorHandler.class.getName());
- private final VisitorDataHandler dataHandler;
+ public enum OutputFormat {
+ JSONL,
+ JSON,
+ XML // Deprecated
+ }
- public StdOutVisitorHandler(boolean printIds, boolean indentXml,
- boolean showProgress, boolean showStatistics, boolean doStatistics,
- boolean abortOnClusterDown, int processtime, boolean jsonOutput,
- boolean tensorShortForm,
- boolean tensorDirectValues)
- {
- this(printIds, indentXml, showProgress, showStatistics, doStatistics, abortOnClusterDown, processtime,
- jsonOutput, tensorShortForm, tensorDirectValues, createStdOutPrintStream());
+ // Explicitly _not_ a record since we want the fields to be mutable when building.
+ public static class Params {
+ boolean printIds = false;
+ boolean indentXml = false;
+ boolean showProgress = false;
+ boolean showStatistics = false;
+ boolean doStatistics = false;
+ boolean abortOnClusterDown = false;
+ int processTimeMilliSecs = 0;
+ OutputFormat outputFormat = OutputFormat.JSON;
+ boolean tensorShortForm = false; // TODO Vespa 9: change default to true
+ boolean tensorDirectValues = false; // TODO Vespa 9: change default to true
+
+ boolean usesJson() {
+ return outputFormat == OutputFormat.JSON || outputFormat == OutputFormat.JSONL;
+ }
}
- StdOutVisitorHandler(boolean printIds, boolean indentXml,
- boolean showProgress, boolean showStatistics, boolean doStatistics,
- boolean abortOnClusterDown, int processtime, boolean jsonOutput,
- boolean tensorShortForm, boolean tensorDirectValues, PrintStream out)
- {
- super(showProgress, showStatistics, abortOnClusterDown);
- this.printIds = printIds;
- this.indentXml = indentXml;
- this.processTimeMilliSecs = processtime;
- this.jsonOutput = jsonOutput;
- this.tensorShortForm = tensorShortForm;
- this.tensorDirectValues = tensorDirectValues;
+ private final Params params;
+ private final PrintStream out;
+ private final VisitorDataHandler dataHandler;
+
+ public StdOutVisitorHandler(Params params, PrintStream out) {
+ super(params.showProgress, params.showStatistics, params.abortOnClusterDown);
+ this.params = params;
this.out = out;
- this.dataHandler = new DataHandler(doStatistics);
+ this.dataHandler = new DataHandler(params.doStatistics);
+ }
+
+ public StdOutVisitorHandler(Params params) {
+ this(params, createStdOutPrintStream());
}
private static PrintStream createStdOutPrintStream() {
@@ -128,9 +131,9 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
@Override
public void onMessage(Message m, AckToken token) {
- if (processTimeMilliSecs > 0) {
+ if (params.processTimeMilliSecs > 0) {
try {
- Thread.sleep(processTimeMilliSecs);
+ Thread.sleep(params.processTimeMilliSecs);
} catch (InterruptedException e) {}
}
@@ -158,16 +161,15 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
System.err.print('\r');
}
- if (printIds) {
+ if (params.printIds) {
out.print(doc.getId());
out.print(" (Last modified at ");
out.println(timestamp + ")");
} else {
- if (jsonOutput) {
+ if (params.usesJson()) {
writeJsonDocument(doc);
} else {
- out.print(doc.toXML(
- indentXml ? " " : ""));
+ out.print(doc.toXML(params.indentXml ? " " : ""));
}
}
} catch (Exception e) {
@@ -179,7 +181,7 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
private void writeJsonDocument(Document doc) throws IOException {
writeFeedStartOrRecordSeparator();
- out.write(JsonWriter.toByteArray(doc, tensorShortForm, tensorDirectValues));
+ out.write(JsonWriter.toByteArray(doc, params.tensorShortForm, params.tensorDirectValues));
}
@Override
@@ -189,10 +191,10 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
System.err.print('\r');
}
- if (printIds) {
+ if (params.printIds) {
out.println(docId + " (Removed)");
} else {
- if (jsonOutput) {
+ if (params.usesJson()) {
writeJsonDocumentRemove(docId);
} else {
XmlStream stream = new XmlStream();
@@ -218,10 +220,12 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
private void writeFeedStartOrRecordSeparator() {
if (first) {
- out.println("[");
+ if (params.outputFormat == OutputFormat.JSON) {
+ out.println("[");
+ }
first = false;
} else {
- out.println(",");
+ out.println((params.outputFormat == OutputFormat.JSON) ? "," : "");
}
}
@@ -259,7 +263,7 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
@Override
public synchronized void onDone() {
- if (jsonOutput && !printIds) {
+ if ((params.outputFormat == OutputFormat.JSON) && !params.printIds) {
if (first) {
out.print('[');
}
diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
index a6e34055fbd..ceea7d320e9 100644
--- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
+++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
@@ -24,7 +24,6 @@ import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import java.io.*;
-import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.stream.Collectors;
@@ -334,9 +333,15 @@ public class VdsVisit {
.hasArg(false)
.build());
+ options.addOption(Option.builder()
+ .longOpt("jsonl")
+ .desc("Output documents as JSONL (JSON Lines format)")
+ .hasArg(false)
+ .build());
+
options.addOption(Option.builder("x")
.longOpt("xmloutput")
- .desc("Output documents as XML")
+ .desc("Output documents as XML (deprecated)")
.hasArg(false)
.build());
@@ -370,6 +375,7 @@ public class VdsVisit {
private int processTime = 0;
private int fullTimeout = 7 * 24 * 60 * 60 * 1000;
private boolean jsonOutput = true;
+ private boolean jsonLinesOutput = false;
private boolean tensorShortForm = false; // TODO Vespa 9: change default to true
private boolean tensorDirectValues = false; // TODO Vespa 9: change default to true
@@ -437,10 +443,32 @@ public class VdsVisit {
this.processTime = processTime;
}
+ public boolean jsonOutput() {
+ return jsonOutput;
+ }
+
public void setJsonOutput(boolean jsonOutput) {
this.jsonOutput = jsonOutput;
}
+ public boolean jsonLinesOutput() {
+ return jsonLinesOutput;
+ }
+
+ public void setJsonLinesOutput(boolean jsonLinesOutput) {
+ this.jsonLinesOutput = jsonLinesOutput;
+ }
+
+ public StdOutVisitorHandler.OutputFormat stdOutHandlerOutputFormat() {
+ if (jsonLinesOutput) {
+ return StdOutVisitorHandler.OutputFormat.JSONL;
+ } else if (jsonOutput) {
+ return StdOutVisitorHandler.OutputFormat.JSON;
+ } else {
+ return StdOutVisitorHandler.OutputFormat.XML;
+ }
+ }
+
public boolean tensorShortForm() {
return tensorShortForm;
}
@@ -587,11 +615,18 @@ public class VdsVisit {
}
boolean jsonOutput = line.hasOption("jsonoutput");
- boolean xmlOutput = line.hasOption("xmloutput");
- if (jsonOutput && xmlOutput) {
- throw new IllegalArgumentException("Cannot combine both xml and json output");
+ boolean jsonl = line.hasOption("jsonl");
+ boolean xmlOutput = line.hasOption("xmloutput");
+ if ((jsonOutput || jsonl) && xmlOutput) {
+ throw new IllegalArgumentException("Cannot combine both XML and JSON output");
+ } else if (jsonOutput && jsonl) {
+ throw new IllegalArgumentException("Cannot combine both JSON and JSONL output");
+ }
+ if (jsonl) {
+ allParams.setJsonLinesOutput(true);
+ } else {
+ allParams.setJsonOutput(!xmlOutput);
}
- allParams.setJsonOutput(!xmlOutput);
allParams.setVisitorParameters(params);
return allParams;
@@ -747,17 +782,18 @@ public class VdsVisit {
VdsVisitHandler handler;
- handler = new StdOutVisitorHandler(
- params.isPrintIdsOnly(),
- params.isVerbose(),
- params.isVerbose(),
- params.isVerbose(),
- params.getStatisticsParts() != null,
- params.getAbortOnClusterDown(),
- params.getProcessTime(),
- params.jsonOutput,
- params.tensorShortForm,
- params.tensorDirectValues);
+ var handlerParams = new StdOutVisitorHandler.Params();
+ handlerParams.printIds = params.isPrintIdsOnly();
+ handlerParams.indentXml = params.isVerbose();
+ handlerParams.showProgress = params.isVerbose();
+ handlerParams.showStatistics = params.isVerbose();
+ handlerParams.doStatistics = params.getStatisticsParts() != null;
+ handlerParams.abortOnClusterDown = params.getAbortOnClusterDown();
+ handlerParams.processTimeMilliSecs = params.getProcessTime();
+ handlerParams.outputFormat = params.stdOutHandlerOutputFormat();
+ handlerParams.tensorShortForm = params.tensorShortForm();
+ handlerParams.tensorDirectValues = params.tensorDirectValues();
+ handler = new StdOutVisitorHandler(handlerParams);
if (visitorParameters.getResumeFileName() != null) {
handler.setProgressFileName(visitorParameters.getResumeFileName());