From 7cb3323d0cf0c1c98f0c0e47cb88741f56c73eb9 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Thu, 23 Feb 2023 14:45:34 +0100 Subject: Add JSONL output support to `vespa-visit` CLI tool JSONL output is enabled via new `--jsonl` argument. Mutually exclusive with `--jsonoutput` and (deprecated) `--xmloutput`. --- .../com/yahoo/vespavisit/StdOutVisitorHandler.java | 90 +++++++++++----------- .../main/java/com/yahoo/vespavisit/VdsVisit.java | 70 +++++++++++++---- 2 files changed, 100 insertions(+), 60 deletions(-) (limited to 'vespaclient-java/src/main/java/com/yahoo') diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java index fc74cb6d899..288df7e470c 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java @@ -35,42 +35,45 @@ import java.util.logging.Logger; @SuppressWarnings("deprecation") public class StdOutVisitorHandler extends VdsVisitHandler { - private static final Logger log = Logger.getLogger( - StdOutVisitorHandler.class.getName()); - private final boolean printIds; - private final boolean indentXml; - private final int processTimeMilliSecs; - private final PrintStream out; - private final boolean jsonOutput; - private final boolean tensorShortForm; - private final boolean tensorDirectValues; + private static final Logger log = Logger.getLogger(StdOutVisitorHandler.class.getName()); - private final VisitorDataHandler dataHandler; + public enum OutputFormat { + JSONL, + JSON, + XML // Deprecated + } - public StdOutVisitorHandler(boolean printIds, boolean indentXml, - boolean showProgress, boolean showStatistics, boolean doStatistics, - boolean abortOnClusterDown, int processtime, boolean jsonOutput, - boolean tensorShortForm, - boolean tensorDirectValues) - { - this(printIds, indentXml, showProgress, showStatistics, doStatistics, abortOnClusterDown, processtime, - jsonOutput, tensorShortForm, tensorDirectValues, createStdOutPrintStream()); + // Explicitly _not_ a record since we want the fields to be mutable when building. + public static class Params { + boolean printIds = false; + boolean indentXml = false; + boolean showProgress = false; + boolean showStatistics = false; + boolean doStatistics = false; + boolean abortOnClusterDown = false; + int processTimeMilliSecs = 0; + OutputFormat outputFormat = OutputFormat.JSON; + boolean tensorShortForm = false; // TODO Vespa 9: change default to true + boolean tensorDirectValues = false; // TODO Vespa 9: change default to true + + boolean usesJson() { + return outputFormat == OutputFormat.JSON || outputFormat == OutputFormat.JSONL; + } } - StdOutVisitorHandler(boolean printIds, boolean indentXml, - boolean showProgress, boolean showStatistics, boolean doStatistics, - boolean abortOnClusterDown, int processtime, boolean jsonOutput, - boolean tensorShortForm, boolean tensorDirectValues, PrintStream out) - { - super(showProgress, showStatistics, abortOnClusterDown); - this.printIds = printIds; - this.indentXml = indentXml; - this.processTimeMilliSecs = processtime; - this.jsonOutput = jsonOutput; - this.tensorShortForm = tensorShortForm; - this.tensorDirectValues = tensorDirectValues; + private final Params params; + private final PrintStream out; + private final VisitorDataHandler dataHandler; + + public StdOutVisitorHandler(Params params, PrintStream out) { + super(params.showProgress, params.showStatistics, params.abortOnClusterDown); + this.params = params; this.out = out; - this.dataHandler = new DataHandler(doStatistics); + this.dataHandler = new DataHandler(params.doStatistics); + } + + public StdOutVisitorHandler(Params params) { + this(params, createStdOutPrintStream()); } private static PrintStream createStdOutPrintStream() { @@ -128,9 +131,9 @@ public class StdOutVisitorHandler extends VdsVisitHandler { @Override public void onMessage(Message m, AckToken token) { - if (processTimeMilliSecs > 0) { + if (params.processTimeMilliSecs > 0) { try { - Thread.sleep(processTimeMilliSecs); + Thread.sleep(params.processTimeMilliSecs); } catch (InterruptedException e) {} } @@ -158,16 +161,15 @@ public class StdOutVisitorHandler extends VdsVisitHandler { System.err.print('\r'); } - if (printIds) { + if (params.printIds) { out.print(doc.getId()); out.print(" (Last modified at "); out.println(timestamp + ")"); } else { - if (jsonOutput) { + if (params.usesJson()) { writeJsonDocument(doc); } else { - out.print(doc.toXML( - indentXml ? " " : "")); + out.print(doc.toXML(params.indentXml ? " " : "")); } } } catch (Exception e) { @@ -179,7 +181,7 @@ public class StdOutVisitorHandler extends VdsVisitHandler { private void writeJsonDocument(Document doc) throws IOException { writeFeedStartOrRecordSeparator(); - out.write(JsonWriter.toByteArray(doc, tensorShortForm, tensorDirectValues)); + out.write(JsonWriter.toByteArray(doc, params.tensorShortForm, params.tensorDirectValues)); } @Override @@ -189,10 +191,10 @@ public class StdOutVisitorHandler extends VdsVisitHandler { System.err.print('\r'); } - if (printIds) { + if (params.printIds) { out.println(docId + " (Removed)"); } else { - if (jsonOutput) { + if (params.usesJson()) { writeJsonDocumentRemove(docId); } else { XmlStream stream = new XmlStream(); @@ -218,10 +220,12 @@ public class StdOutVisitorHandler extends VdsVisitHandler { private void writeFeedStartOrRecordSeparator() { if (first) { - out.println("["); + if (params.outputFormat == OutputFormat.JSON) { + out.println("["); + } first = false; } else { - out.println(","); + out.println((params.outputFormat == OutputFormat.JSON) ? "," : ""); } } @@ -259,7 +263,7 @@ public class StdOutVisitorHandler extends VdsVisitHandler { @Override public synchronized void onDone() { - if (jsonOutput && !printIds) { + if ((params.outputFormat == OutputFormat.JSON) && !params.printIds) { if (first) { out.print('['); } diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java index a6e34055fbd..ceea7d320e9 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java @@ -24,7 +24,6 @@ import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import java.io.*; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.stream.Collectors; @@ -334,9 +333,15 @@ public class VdsVisit { .hasArg(false) .build()); + options.addOption(Option.builder() + .longOpt("jsonl") + .desc("Output documents as JSONL (JSON Lines format)") + .hasArg(false) + .build()); + options.addOption(Option.builder("x") .longOpt("xmloutput") - .desc("Output documents as XML") + .desc("Output documents as XML (deprecated)") .hasArg(false) .build()); @@ -370,6 +375,7 @@ public class VdsVisit { private int processTime = 0; private int fullTimeout = 7 * 24 * 60 * 60 * 1000; private boolean jsonOutput = true; + private boolean jsonLinesOutput = false; private boolean tensorShortForm = false; // TODO Vespa 9: change default to true private boolean tensorDirectValues = false; // TODO Vespa 9: change default to true @@ -437,10 +443,32 @@ public class VdsVisit { this.processTime = processTime; } + public boolean jsonOutput() { + return jsonOutput; + } + public void setJsonOutput(boolean jsonOutput) { this.jsonOutput = jsonOutput; } + public boolean jsonLinesOutput() { + return jsonLinesOutput; + } + + public void setJsonLinesOutput(boolean jsonLinesOutput) { + this.jsonLinesOutput = jsonLinesOutput; + } + + public StdOutVisitorHandler.OutputFormat stdOutHandlerOutputFormat() { + if (jsonLinesOutput) { + return StdOutVisitorHandler.OutputFormat.JSONL; + } else if (jsonOutput) { + return StdOutVisitorHandler.OutputFormat.JSON; + } else { + return StdOutVisitorHandler.OutputFormat.XML; + } + } + public boolean tensorShortForm() { return tensorShortForm; } @@ -587,11 +615,18 @@ public class VdsVisit { } boolean jsonOutput = line.hasOption("jsonoutput"); - boolean xmlOutput = line.hasOption("xmloutput"); - if (jsonOutput && xmlOutput) { - throw new IllegalArgumentException("Cannot combine both xml and json output"); + boolean jsonl = line.hasOption("jsonl"); + boolean xmlOutput = line.hasOption("xmloutput"); + if ((jsonOutput || jsonl) && xmlOutput) { + throw new IllegalArgumentException("Cannot combine both XML and JSON output"); + } else if (jsonOutput && jsonl) { + throw new IllegalArgumentException("Cannot combine both JSON and JSONL output"); + } + if (jsonl) { + allParams.setJsonLinesOutput(true); + } else { + allParams.setJsonOutput(!xmlOutput); } - allParams.setJsonOutput(!xmlOutput); allParams.setVisitorParameters(params); return allParams; @@ -747,17 +782,18 @@ public class VdsVisit { VdsVisitHandler handler; - handler = new StdOutVisitorHandler( - params.isPrintIdsOnly(), - params.isVerbose(), - params.isVerbose(), - params.isVerbose(), - params.getStatisticsParts() != null, - params.getAbortOnClusterDown(), - params.getProcessTime(), - params.jsonOutput, - params.tensorShortForm, - params.tensorDirectValues); + var handlerParams = new StdOutVisitorHandler.Params(); + handlerParams.printIds = params.isPrintIdsOnly(); + handlerParams.indentXml = params.isVerbose(); + handlerParams.showProgress = params.isVerbose(); + handlerParams.showStatistics = params.isVerbose(); + handlerParams.doStatistics = params.getStatisticsParts() != null; + handlerParams.abortOnClusterDown = params.getAbortOnClusterDown(); + handlerParams.processTimeMilliSecs = params.getProcessTime(); + handlerParams.outputFormat = params.stdOutHandlerOutputFormat(); + handlerParams.tensorShortForm = params.tensorShortForm(); + handlerParams.tensorDirectValues = params.tensorDirectValues(); + handler = new StdOutVisitorHandler(handlerParams); if (visitorParameters.getResumeFileName() != null) { handler.setProgressFileName(visitorParameters.getResumeFileName()); -- cgit v1.2.3