summaryrefslogtreecommitdiffstats
path: root/vespaclient-java
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2023-02-24 11:12:40 +0100
committerTor Brede Vekterli <vekterli@yahooinc.com>2023-02-24 11:21:34 +0100
commitcfd28a7300cd35914f08a1c792eb6a8ba37ef130 (patch)
tree912b88cec02873022616657881101c387eebf49d /vespaclient-java
parent36c0d79e750e1c7b32dcbc6e294a462a8d691bae (diff)
Batch `vespa-visit` progress file updates
Avoids writing and syncing to disk for every bucket updated. Instead, write every 10 seconds and at process shutdown.
Diffstat (limited to 'vespaclient-java')
-rw-r--r--vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java22
-rw-r--r--vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java66
2 files changed, 57 insertions, 31 deletions
diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
index ceea7d320e9..822c64ea5fa 100644
--- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
+++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java
@@ -23,8 +23,12 @@ import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
-import java.io.*;
+import java.io.IOException;
+import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Path;
import java.util.Map;
import java.util.stream.Collectors;
@@ -751,24 +755,14 @@ public class VdsVisit {
!"".equals(visitorParameters.getResumeFileName()))
{
try {
- File file = new File(visitorParameters.getResumeFileName());
- FileInputStream fos = new FileInputStream(file);
-
- StringBuilder builder = new StringBuilder();
- byte[] b = new byte[100000];
- int length;
-
- while ((length = fos.read(b)) > 0) {
- builder.append(new String(b, 0, length));
- }
- fos.close();
- visitorParameters.setResumeToken(new ProgressToken(builder.toString()));
+ var progressFileContents = Files.readString(Path.of(visitorParameters.getResumeFileName()));
+ visitorParameters.setResumeToken(new ProgressToken(progressFileContents));
if (params.isVerbose()) {
System.err.format("Resuming visitor already %.1f %% finished.\n",
visitorParameters.getResumeToken().percentFinished());
}
- } catch (FileNotFoundException e) {
+ } catch (NoSuchFileException e) {
// Ignore; file has not been created yet but will be shortly.
} catch (IOException e) {
System.err.println("Could not open progress file: " + visitorParameters.getResumeFileName());
diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java
index ea861399e76..ccb0888a654 100644
--- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java
+++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java
@@ -6,16 +6,20 @@ import com.yahoo.documentapi.VisitorControlHandler;
import com.yahoo.documentapi.VisitorDataHandler;
import com.yahoo.vdslib.VisitorStatistics;
-import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Duration;
import java.util.Date;
import java.util.TimeZone;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
+import static java.nio.file.StandardCopyOption.ATOMIC_MOVE;
+import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
+
/**
* An abstract class that can be subclassed by different visitor handlers.
*
@@ -30,14 +34,26 @@ public abstract class VdsVisitHandler {
String lastPercentage;
final Object printLock = new Object();
- protected String progressFileName = "";
+ private static class ProgressMeta {
+ String fileName = "";
+ String lastProgressContents;
+ int unwrittenUpdates = 0;
+ long lastWriteAtNanos = 0;
+ Duration writeInterval = Duration.ofSeconds(10);
+ boolean shouldWriteProgress() {
+ return !fileName.isEmpty();
+ }
+ }
+
+ ProgressMeta progressMeta = new ProgressMeta();
final VisitorControlHandler controlHandler = new ControlHandler();
public VdsVisitHandler(boolean showProgress, boolean showStatistics, boolean abortOnClusterDown) {
this.showProgress = showProgress;
this.showStatistics = showStatistics;
this.abortOnClusterDown = abortOnClusterDown;
+ this.progressMeta.lastWriteAtNanos = System.nanoTime(); // Avoid always writing a file on the first progress update
}
public boolean getShowProgress() {
@@ -75,11 +91,11 @@ public abstract class VdsVisitHandler {
public void onDone() { }
public String getProgressFileName() {
- return progressFileName;
+ return progressMeta.fileName;
}
public void setProgressFileName(String progressFileName) {
- this.progressFileName = progressFileName;
+ this.progressMeta.fileName = progressFileName;
}
public VisitorControlHandler getControlHandler() { return controlHandler; }
@@ -88,20 +104,29 @@ public abstract class VdsVisitHandler {
class ControlHandler extends VisitorControlHandler {
VisitorStatistics statistics;
+ private void rewriteProgressFile() {
+ try {
+ var tmpPath = Path.of(progressMeta.fileName + ".tmp");
+ Files.writeString(tmpPath, progressMeta.lastProgressContents);
+ Files.move(tmpPath, Path.of(progressMeta.fileName), REPLACE_EXISTING, ATOMIC_MOVE);
+ } catch (IOException e) {
+ e.printStackTrace();
+ abort(); // Don't continue visiting if we're unable to save progress state
+ }
+ }
+
public void onProgress(ProgressToken token) {
- if (progressFileName.length() > 0) {
- try {
- synchronized (token) {
- File file = new File(progressFileName + ".tmp");
- FileOutputStream fos = new FileOutputStream(file);
- fos.write(token.toString().getBytes());
- fos.close();
- file.renameTo(new File(progressFileName));
+ if (progressMeta.shouldWriteProgress()) {
+ synchronized (token) {
+ progressMeta.unwrittenUpdates++;
+ progressMeta.lastProgressContents = token.toString();
+ long nowNanos = System.nanoTime();
+ if ((nowNanos - progressMeta.lastWriteAtNanos) > progressMeta.writeInterval.toNanos()) {
+ rewriteProgressFile();
+ progressMeta.unwrittenUpdates = 0;
+ progressMeta.lastWriteAtNanos = nowNanos;
}
}
- catch (IOException e) {
- e.printStackTrace();
- }
}
if (showProgress) {
synchronized (printLock) {
@@ -111,7 +136,9 @@ public abstract class VdsVisitHandler {
if (lastLineIsProgress) {
System.err.print('\r');
}
- System.err.print(percentage + " % finished.");
+ // Pad with a few extra spaces to handle case where current line written is shorter
+ // than the previous line written. Would otherwise leave stale characters behind.
+ System.err.print(percentage + " % finished. ");
lastLineIsProgress = true;
lastPercentage = percentage;
}
@@ -147,6 +174,11 @@ public abstract class VdsVisitHandler {
}
}
public void onDone(CompletionCode code, String message) {
+ // Flush any remaining unwritten progress updates.
+ // It is expected that this happens-after any and all calls to onProgress().
+ if (progressMeta.unwrittenUpdates > 0) {
+ rewriteProgressFile();
+ }
if (lastLineIsProgress) {
System.err.print('\n');
lastLineIsProgress = false;