diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-02-24 11:12:40 +0100 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-02-24 11:21:34 +0100 |
commit | cfd28a7300cd35914f08a1c792eb6a8ba37ef130 (patch) | |
tree | 912b88cec02873022616657881101c387eebf49d /vespaclient-java | |
parent | 36c0d79e750e1c7b32dcbc6e294a462a8d691bae (diff) |
Batch `vespa-visit` progress file updates
Avoids writing and syncing to disk for every bucket updated.
Instead, write every 10 seconds and at process shutdown.
Diffstat (limited to 'vespaclient-java')
-rw-r--r-- | vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java | 22 | ||||
-rw-r--r-- | vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java | 66 |
2 files changed, 57 insertions, 31 deletions
diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java index ceea7d320e9..822c64ea5fa 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java @@ -23,8 +23,12 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; -import java.io.*; +import java.io.IOException; +import java.io.PrintStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; import java.util.Map; import java.util.stream.Collectors; @@ -751,24 +755,14 @@ public class VdsVisit { !"".equals(visitorParameters.getResumeFileName())) { try { - File file = new File(visitorParameters.getResumeFileName()); - FileInputStream fos = new FileInputStream(file); - - StringBuilder builder = new StringBuilder(); - byte[] b = new byte[100000]; - int length; - - while ((length = fos.read(b)) > 0) { - builder.append(new String(b, 0, length)); - } - fos.close(); - visitorParameters.setResumeToken(new ProgressToken(builder.toString())); + var progressFileContents = Files.readString(Path.of(visitorParameters.getResumeFileName())); + visitorParameters.setResumeToken(new ProgressToken(progressFileContents)); if (params.isVerbose()) { System.err.format("Resuming visitor already %.1f %% finished.\n", visitorParameters.getResumeToken().percentFinished()); } - } catch (FileNotFoundException e) { + } catch (NoSuchFileException e) { // Ignore; file has not been created yet but will be shortly. } catch (IOException e) { System.err.println("Could not open progress file: " + visitorParameters.getResumeFileName()); diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java index ea861399e76..ccb0888a654 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisitHandler.java @@ -6,16 +6,20 @@ import com.yahoo.documentapi.VisitorControlHandler; import com.yahoo.documentapi.VisitorDataHandler; import com.yahoo.vdslib.VisitorStatistics; -import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; import java.util.Date; import java.util.TimeZone; import java.text.DateFormat; import java.text.DecimalFormat; import java.text.SimpleDateFormat; +import static java.nio.file.StandardCopyOption.ATOMIC_MOVE; +import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; + /** * An abstract class that can be subclassed by different visitor handlers. * @@ -30,14 +34,26 @@ public abstract class VdsVisitHandler { String lastPercentage; final Object printLock = new Object(); - protected String progressFileName = ""; + private static class ProgressMeta { + String fileName = ""; + String lastProgressContents; + int unwrittenUpdates = 0; + long lastWriteAtNanos = 0; + Duration writeInterval = Duration.ofSeconds(10); + boolean shouldWriteProgress() { + return !fileName.isEmpty(); + } + } + + ProgressMeta progressMeta = new ProgressMeta(); final VisitorControlHandler controlHandler = new ControlHandler(); public VdsVisitHandler(boolean showProgress, boolean showStatistics, boolean abortOnClusterDown) { this.showProgress = showProgress; this.showStatistics = showStatistics; this.abortOnClusterDown = abortOnClusterDown; + this.progressMeta.lastWriteAtNanos = System.nanoTime(); // Avoid always writing a file on the first progress update } public boolean getShowProgress() { @@ -75,11 +91,11 @@ public abstract class VdsVisitHandler { public void onDone() { } public String getProgressFileName() { - return progressFileName; + return progressMeta.fileName; } public void setProgressFileName(String progressFileName) { - this.progressFileName = progressFileName; + this.progressMeta.fileName = progressFileName; } public VisitorControlHandler getControlHandler() { return controlHandler; } @@ -88,20 +104,29 @@ public abstract class VdsVisitHandler { class ControlHandler extends VisitorControlHandler { VisitorStatistics statistics; + private void rewriteProgressFile() { + try { + var tmpPath = Path.of(progressMeta.fileName + ".tmp"); + Files.writeString(tmpPath, progressMeta.lastProgressContents); + Files.move(tmpPath, Path.of(progressMeta.fileName), REPLACE_EXISTING, ATOMIC_MOVE); + } catch (IOException e) { + e.printStackTrace(); + abort(); // Don't continue visiting if we're unable to save progress state + } + } + public void onProgress(ProgressToken token) { - if (progressFileName.length() > 0) { - try { - synchronized (token) { - File file = new File(progressFileName + ".tmp"); - FileOutputStream fos = new FileOutputStream(file); - fos.write(token.toString().getBytes()); - fos.close(); - file.renameTo(new File(progressFileName)); + if (progressMeta.shouldWriteProgress()) { + synchronized (token) { + progressMeta.unwrittenUpdates++; + progressMeta.lastProgressContents = token.toString(); + long nowNanos = System.nanoTime(); + if ((nowNanos - progressMeta.lastWriteAtNanos) > progressMeta.writeInterval.toNanos()) { + rewriteProgressFile(); + progressMeta.unwrittenUpdates = 0; + progressMeta.lastWriteAtNanos = nowNanos; } } - catch (IOException e) { - e.printStackTrace(); - } } if (showProgress) { synchronized (printLock) { @@ -111,7 +136,9 @@ public abstract class VdsVisitHandler { if (lastLineIsProgress) { System.err.print('\r'); } - System.err.print(percentage + " % finished."); + // Pad with a few extra spaces to handle case where current line written is shorter + // than the previous line written. Would otherwise leave stale characters behind. + System.err.print(percentage + " % finished. "); lastLineIsProgress = true; lastPercentage = percentage; } @@ -147,6 +174,11 @@ public abstract class VdsVisitHandler { } } public void onDone(CompletionCode code, String message) { + // Flush any remaining unwritten progress updates. + // It is expected that this happens-after any and all calls to onProgress(). + if (progressMeta.unwrittenUpdates > 0) { + rewriteProgressFile(); + } if (lastLineIsProgress) { System.err.print('\n'); lastLineIsProgress = false; |