summaryrefslogtreecommitdiffstats
path: root/vespaclient-java
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2023-02-23 14:45:34 +0100
committerTor Brede Vekterli <vekterli@yahooinc.com>2023-02-23 15:18:20 +0100
commit7cb3323d0cf0c1c98f0c0e47cb88741f56c73eb9 (patch)
treebb2662cfc5777d812cceae7198962b39c14a59ff /vespaclient-java
parentf66f816102ce0a7c3aaba72d1db61a83157259ed (diff)
Add JSONL output support to `vespa-visit` CLI tool
JSONL output is enabled via new `--jsonl` argument. Mutually exclusive with `--jsonoutput` and (deprecated) `--xmloutput`.
Diffstat (limited to 'vespaclient-java')
-rw-r--r--vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java90
-rw-r--r--vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java70
-rw-r--r--vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java80
3 files changed, 168 insertions, 72 deletions
diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java
index fc74cb6d899..288df7e470c 100644
--- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java
+++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java
@@ -35,42 +35,45 @@ import java.util.logging.Logger;
@SuppressWarnings("deprecation")
public class StdOutVisitorHandler extends VdsVisitHandler {
- private static final Logger log = Logger.getLogger(
- StdOutVisitorHandler.class.getName());
- private final boolean printIds;
- private final boolean indentXml;
- private final int processTimeMilliSecs;
- private final PrintStream out;
- private final boolean jsonOutput;
- private final boolean tensorShortForm;
- private final boolean tensorDirectValues;
+ private static final Logger log = Logger.getLogger(StdOutVisitorHandler.class.getName());
- private final VisitorDataHandler dataHandler;
+ public enum OutputFormat {
+ JSONL,
+ JSON,
+ XML // Deprecated
+ }
- public StdOutVisitorHandler(boolean printIds, boolean indentXml,
- boolean showProgress, boolean showStatistics, boolean doStatistics,
- boolean abortOnClusterDown, int processtime, boolean jsonOutput,
- boolean tensorShortForm,
- boolean tensorDirectValues)
- {
- this(printIds, indentXml, showProgress, showStatistics, doStatistics, abortOnClusterDown, processtime,
- jsonOutput, tensorShortForm, tensorDirectValues, createStdOutPrintStream());
+ // Explicitly _not_ a record since we want the fields to be mutable when building.
+ public static class Params {
+ boolean printIds = false;
+ boolean indentXml = false;
+ boolean showProgress = false;
+ boolean showStatistics = false;
+ boolean doStatistics = false;
+ boolean abortOnClusterDown = false;
+ int processTimeMilliSecs = 0;
+ OutputFormat outputFormat = OutputFormat.JSON;
+ boolean tensorShortForm = false; // TODO Vespa 9: change default to true
+ boolean tensorDirectValues = false; // TODO Vespa 9: change default to true
+
+ boolean usesJson() {
+ return outputFormat == OutputFormat.JSON || outputFormat == OutputFormat.JSONL;
+ }
}
- StdOutVisitorHandler(boolean printIds, boolean indentXml,
- boolean showProgress, boolean showStatistics, boolean doStatistics,
- boolean abortOnClusterDown, int processtime, boolean jsonOutput,
- boolean tensorShortForm, boolean tensorDirectValues, PrintStream out)
- {
- super(showProgress, showStatistics, abortOnClusterDown);
- this.printIds = printIds;
- this.indentXml = indentXml;
- this.processTimeMilliSecs = processtime;
- this.jsonOutput = jsonOutput;
- this.tensorShortForm = tensorShortForm;
- this.tensorDirectValues = tensorDirectValues;
+ private final Params params;
+ private final PrintStream out;
+ private final VisitorDataHandler dataHandler;
+
+ public StdOutVisitorHandler(Params params, PrintStream out) {
+ super(params.showProgress, params.showStatistics, params.abortOnClusterDown);
+ this.params = params;
this.out = out;
- this.dataHandler = new DataHandler(doStatistics);
+ this.dataHandler = new DataHandler(params.doStatistics);
+ }
+
+ public StdOutVisitorHandler(Params params) {
+ this(params, createStdOutPrintStream());
}
private static PrintStream createStdOutPrintStream() {
@@ -128,9 +131,9 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
@Override
public void onMessage(Message m, AckToken token) {
- if (processTimeMilliSecs > 0) {
+ if (params.processTimeMilliSecs > 0) {
try {
- Thread.sleep(processTimeMilliSecs);
+ Thread.sleep(params.processTimeMilliSecs);
} catch (InterruptedException e) {}
}
@@ -158,16 +161,15 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
System.err.print('\r');
}
- if (printIds) {
+ if (params.printIds) {
out.print(doc.getId());
out.print(" (Last modified at ");
out.println(timestamp + ")");
} else {
- if (jsonOutput) {
+ if (params.usesJson()) {
writeJsonDocument(doc);
} else {
- out.print(doc.toXML(
- indentXml ? " " : ""));
+ out.print(doc.toXML(params.indentXml ? " " : ""));
}
}
} catch (Exception e) {
@@ -179,7 +181,7 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
private void writeJsonDocument(Document doc) throws IOException {
writeFeedStartOrRecordSeparator();
- out.write(JsonWriter.toByteArray(doc, tensorShortForm, tensorDirectValues));
+ out.write(JsonWriter.toByteArray(doc, params.tensorShortForm, params.tensorDirectValues));
}
@Override
@@ -189,10 +191,10 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
System.err.print('\r');
}
- if (printIds) {
+ if (params.printIds) {
out.println(docId + " (Removed)");
} else {
- if (jsonOutput) {
+ if (params.usesJson()) {
writeJsonDocumentRemove(docId);
} else {
XmlStream stream = new XmlStream();
@@ -218,10 +220,12 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
private void writeFeedStartOrRecordSeparator() {
if (first) {
- out.println("[");
+ if (params.outputFormat == OutputFormat.JSON) {
+ out.println("[");
+ }
first = false;
} else {
- out.println(",");
+ out.println((params.outputFormat == OutputFormat.JSON) ? "," : "");
}
}
@@ -259,7 +263,7 @@ public class StdOutVisitorHandler extends VdsVisitHandler {
@Override
public synchronized void onDone() {
- if (jsonOutput && !printIds) {
+ if ((params.outputFormat == OutputFormat.JSON) && !params.printIds) {
if (first) {
out.print('[');
}
diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
index a6e34055fbd..ceea7d320e9 100644
--- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
+++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
@@ -24,7 +24,6 @@ import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import java.io.*;
-import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.stream.Collectors;
@@ -334,9 +333,15 @@ public class VdsVisit {
.hasArg(false)
.build());
+ options.addOption(Option.builder()
+ .longOpt("jsonl")
+ .desc("Output documents as JSONL (JSON Lines format)")
+ .hasArg(false)
+ .build());
+
options.addOption(Option.builder("x")
.longOpt("xmloutput")
- .desc("Output documents as XML")
+ .desc("Output documents as XML (deprecated)")
.hasArg(false)
.build());
@@ -370,6 +375,7 @@ public class VdsVisit {
private int processTime = 0;
private int fullTimeout = 7 * 24 * 60 * 60 * 1000;
private boolean jsonOutput = true;
+ private boolean jsonLinesOutput = false;
private boolean tensorShortForm = false; // TODO Vespa 9: change default to true
private boolean tensorDirectValues = false; // TODO Vespa 9: change default to true
@@ -437,10 +443,32 @@ public class VdsVisit {
this.processTime = processTime;
}
+ public boolean jsonOutput() {
+ return jsonOutput;
+ }
+
public void setJsonOutput(boolean jsonOutput) {
this.jsonOutput = jsonOutput;
}
+ public boolean jsonLinesOutput() {
+ return jsonLinesOutput;
+ }
+
+ public void setJsonLinesOutput(boolean jsonLinesOutput) {
+ this.jsonLinesOutput = jsonLinesOutput;
+ }
+
+ public StdOutVisitorHandler.OutputFormat stdOutHandlerOutputFormat() {
+ if (jsonLinesOutput) {
+ return StdOutVisitorHandler.OutputFormat.JSONL;
+ } else if (jsonOutput) {
+ return StdOutVisitorHandler.OutputFormat.JSON;
+ } else {
+ return StdOutVisitorHandler.OutputFormat.XML;
+ }
+ }
+
public boolean tensorShortForm() {
return tensorShortForm;
}
@@ -587,11 +615,18 @@ public class VdsVisit {
}
boolean jsonOutput = line.hasOption("jsonoutput");
- boolean xmlOutput = line.hasOption("xmloutput");
- if (jsonOutput && xmlOutput) {
- throw new IllegalArgumentException("Cannot combine both xml and json output");
+ boolean jsonl = line.hasOption("jsonl");
+ boolean xmlOutput = line.hasOption("xmloutput");
+ if ((jsonOutput || jsonl) && xmlOutput) {
+ throw new IllegalArgumentException("Cannot combine both XML and JSON output");
+ } else if (jsonOutput && jsonl) {
+ throw new IllegalArgumentException("Cannot combine both JSON and JSONL output");
+ }
+ if (jsonl) {
+ allParams.setJsonLinesOutput(true);
+ } else {
+ allParams.setJsonOutput(!xmlOutput);
}
- allParams.setJsonOutput(!xmlOutput);
allParams.setVisitorParameters(params);
return allParams;
@@ -747,17 +782,18 @@ public class VdsVisit {
VdsVisitHandler handler;
- handler = new StdOutVisitorHandler(
- params.isPrintIdsOnly(),
- params.isVerbose(),
- params.isVerbose(),
- params.isVerbose(),
- params.getStatisticsParts() != null,
- params.getAbortOnClusterDown(),
- params.getProcessTime(),
- params.jsonOutput,
- params.tensorShortForm,
- params.tensorDirectValues);
+ var handlerParams = new StdOutVisitorHandler.Params();
+ handlerParams.printIds = params.isPrintIdsOnly();
+ handlerParams.indentXml = params.isVerbose();
+ handlerParams.showProgress = params.isVerbose();
+ handlerParams.showStatistics = params.isVerbose();
+ handlerParams.doStatistics = params.getStatisticsParts() != null;
+ handlerParams.abortOnClusterDown = params.getAbortOnClusterDown();
+ handlerParams.processTimeMilliSecs = params.getProcessTime();
+ handlerParams.outputFormat = params.stdOutHandlerOutputFormat();
+ handlerParams.tensorShortForm = params.tensorShortForm();
+ handlerParams.tensorDirectValues = params.tensorDirectValues();
+ handler = new StdOutVisitorHandler(handlerParams);
if (visitorParameters.getResumeFileName() != null) {
handler.setProgressFileName(visitorParameters.getResumeFileName());
diff --git a/vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java b/vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java
index c1bbe8711a5..aa708b1fde9 100644
--- a/vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java
+++ b/vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java
@@ -1,15 +1,19 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespavisit;
+import com.yahoo.document.DataType;
import com.yahoo.document.Document;
+import com.yahoo.document.DocumentId;
import com.yahoo.document.DocumentPut;
import com.yahoo.document.DocumentType;
import com.yahoo.document.TensorDataType;
+import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.document.datatypes.TensorFieldValue;
import com.yahoo.documentapi.AckToken;
import com.yahoo.documentapi.VisitorControlSession;
import com.yahoo.documentapi.VisitorDataHandler;
import com.yahoo.documentapi.messagebus.protocol.PutDocumentMessage;
+import com.yahoo.documentapi.messagebus.protocol.RemoveDocumentMessage;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
import org.junit.jupiter.api.Test;
@@ -26,23 +30,27 @@ import static org.mockito.Mockito.mock;
* @author bjorncs
*/
public class StdOutVisitorHandlerTest {
- private boolean jsonOutput;
-
- public void initStdOutVisitorHandlerTest(boolean jsonOutput) {
- this.jsonOutput = jsonOutput;
- }
public static Object[] data() {
return new Object[]{true, false};
}
+ private static StdOutVisitorHandler.Params createHandlerParams(boolean jsonOutput, boolean tensorShortForm, boolean tensorDirectValues) {
+ var params = new StdOutVisitorHandler.Params();
+ params.outputFormat = jsonOutput ? StdOutVisitorHandler.OutputFormat.JSON
+ : StdOutVisitorHandler.OutputFormat.XML;
+ params.tensorShortForm = tensorShortForm;
+ params.tensorDirectValues = tensorDirectValues;
+ return params;
+ }
+
@MethodSource("data")
@ParameterizedTest(name = "jsonOutput={0}")
void printing_ids_for_zero_documents_produces_empty_output(boolean jsonOutput) {
- initStdOutVisitorHandlerTest(jsonOutput);
ByteArrayOutputStream out = new ByteArrayOutputStream();
- StdOutVisitorHandler visitorHandler =
- new StdOutVisitorHandler(/*printIds*/true, false, false, false, false, false, 0, jsonOutput, false, false, new PrintStream(out, true));
+ var params = createHandlerParams(jsonOutput, false, false);
+ params.printIds = true;
+ StdOutVisitorHandler visitorHandler = new StdOutVisitorHandler(params, new PrintStream(out, true));
VisitorDataHandler dataHandler = visitorHandler.getDataHandler();
dataHandler.onDone();
String output = out.toString();
@@ -52,10 +60,9 @@ public class StdOutVisitorHandlerTest {
@MethodSource("data")
@ParameterizedTest(name = "jsonOutput={0}")
void printing_zero_documents_produces_empty_output(boolean jsonOutput) {
- initStdOutVisitorHandlerTest(jsonOutput);
ByteArrayOutputStream out = new ByteArrayOutputStream();
StdOutVisitorHandler visitorHandler =
- new StdOutVisitorHandler(/*printIds*/false, false, false, false, false, false, 0, jsonOutput, false, false, new PrintStream(out, true));
+ new StdOutVisitorHandler(createHandlerParams(jsonOutput, false, false), new PrintStream(out, true));
VisitorDataHandler dataHandler = visitorHandler.getDataHandler();
dataHandler.onDone();
String expectedOutput = jsonOutput ? "[]" : "";
@@ -71,8 +78,7 @@ public class StdOutVisitorHandlerTest {
var putMsg = new PutDocumentMessage(new DocumentPut(doc));
var out = new ByteArrayOutputStream();
- var visitorHandler = new StdOutVisitorHandler(/*printIds*/false, false, false, false, false, false,
- 0, true, tensorShortForm, tensorDirectValues, new PrintStream(out, true));
+ var visitorHandler = new StdOutVisitorHandler(createHandlerParams(true, tensorShortForm, tensorDirectValues), new PrintStream(out, true));
var dataHandler = visitorHandler.getDataHandler();
var controlSession = mock(VisitorControlSession.class);
var ackToken = mock(AckToken.class);
@@ -100,4 +106,54 @@ public class StdOutVisitorHandlerTest {
do_test_json_tensor_fields_rendering(true, false, expectedOutput);
}
+ private static PutDocumentMessage createPutWithDocAndValue(DocumentType docType, String docId, String fieldValue) {
+ var doc = new Document(docType, docId);
+ doc.setFieldValue("bar", new StringFieldValue(fieldValue));
+ return new PutDocumentMessage(new DocumentPut(doc));
+ }
+
+ private static RemoveDocumentMessage createRemoveForDoc(String docId) {
+ return new RemoveDocumentMessage(new DocumentId(docId));
+ }
+
+ @MethodSource("data")
+ @ParameterizedTest(name = "jsonLinesFormat={0}")
+ void json_can_be_output_in_json_lines_format(boolean jsonLinesFormat) {
+ var docType = new DocumentType("foo");
+ docType.addField("bar", DataType.STRING);
+
+ var params = createHandlerParams(true, true, true);
+ params.outputFormat = jsonLinesFormat ? StdOutVisitorHandler.OutputFormat.JSONL
+ : StdOutVisitorHandler.OutputFormat.JSON;
+
+ var out = new ByteArrayOutputStream();
+ var visitorHandler = new StdOutVisitorHandler(params, new PrintStream(out, true));
+ var dataHandler = visitorHandler.getDataHandler();
+ var controlSession = mock(VisitorControlSession.class);
+ dataHandler.setSession(controlSession);
+
+ dataHandler.onMessage(createPutWithDocAndValue(docType, "id:baz:foo::1", "fluffy\nbunnies"), mock(AckToken.class));
+ dataHandler.onMessage(createRemoveForDoc("id:baz:foo::2"), mock(AckToken.class));
+ dataHandler.onMessage(createPutWithDocAndValue(docType, "id:baz:foo::3", "\r\ncool fox\r\n"), mock(AckToken.class));
+ dataHandler.onDone();
+
+ String output = out.toString().trim();
+ if (jsonLinesFormat) {
+ // JSONL; no implicit start/end array chars or trailing commas after objects
+ var expected = """
+ {"id":"id:baz:foo::1","fields":{"bar":"fluffy\\nbunnies"}}
+ {"remove":"id:baz:foo::2"}
+ {"id":"id:baz:foo::3","fields":{"bar":"\\r\\ncool fox\\r\\n"}}""";
+ assertEquals(expected, output);
+ } else {
+ // non-JSONL; usual array of comma-separated objects form
+ var expected = """
+ [
+ {"id":"id:baz:foo::1","fields":{"bar":"fluffy\\nbunnies"}},
+ {"remove":"id:baz:foo::2"},
+ {"id":"id:baz:foo::3","fields":{"bar":"\\r\\ncool fox\\r\\n"}}]""";
+ assertEquals(expected, output);
+ }
+ }
+
}