diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2020-06-12 15:46:21 +0200 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2020-06-12 15:46:21 +0200 |
commit | 4a36842785c919e626fcb3665e8be9d4a9df31a6 (patch) | |
tree | fb2676afa6917c0b985f559991e12bbeb90ebce4 /container-core | |
parent | 153813c0969f821c869689ebaebef589b6af118c (diff) |
Extract time stamp from path instead of modified time
Diffstat (limited to 'container-core')
-rw-r--r-- | container-core/src/main/java/com/yahoo/container/handler/LogReader.java | 131 | ||||
-rw-r--r-- | container-core/src/test/java/com/yahoo/container/handler/LogReaderTest.java | 51 |
2 files changed, 116 insertions, 66 deletions
diff --git a/container-core/src/main/java/com/yahoo/container/handler/LogReader.java b/container-core/src/main/java/com/yahoo/container/handler/LogReader.java index ff1a7313540..6e70befa011 100644 --- a/container-core/src/main/java/com/yahoo/container/handler/LogReader.java +++ b/container-core/src/main/java/com/yahoo/container/handler/LogReader.java @@ -20,17 +20,19 @@ import java.nio.file.Paths; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.time.Instant; +import java.time.ZoneId; +import java.time.ZonedDateTime; import java.time.temporal.ChronoUnit; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; +import java.util.TreeMap; +import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.Comparator.comparing; /** * @author olaaun @@ -39,6 +41,9 @@ import static java.util.Comparator.comparing; */ class LogReader { + static final Pattern logArchivePathPattern = Pattern.compile("(\\d{4})/(\\d{2})/(\\d{2})/(\\d{2})-\\d(.gz)?"); + static final Pattern vespaLogPathPattern = Pattern.compile("vespa\\.log(?:-(\\d{4})-(\\d{2})-(\\d{2})\\.(\\d{2})-(\\d{2})-(\\d{2})(?:.gz)?)?"); + private final Path logDirectory; private final Pattern logFilePattern; @@ -53,43 +58,45 @@ class LogReader { void writeLogs(OutputStream outputStream, Instant from, Instant to) { try { - List<Path> logs = getMatchingFiles(from, to); + List<List<Path>> logs = getMatchingFiles(from, to); for (int i = 0; i < logs.size(); i++) { - Path log = logs.get(i); - boolean zipped = log.toString().endsWith(".gz"); - try (InputStream in = Files.newInputStream(log)) { - InputStream inProxy; - - // If the log needs filtering, possibly unzip (and rezip) it, and filter its lines on timestamp. - if (i == 0 || i == logs.size() - 1) { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - try (BufferedReader reader = new BufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(in) : in, UTF_8)); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(zipped ? new GZIPOutputStream(buffer) : buffer, UTF_8))) { - for (String line; (line = reader.readLine()) != null; ) { - String[] parts = line.split("\t"); - if (parts.length != 7) - continue; - - Instant at = Instant.EPOCH.plus((long) (Double.parseDouble(parts[0]) * 1_000_000), ChronoUnit.MICROS); - if (at.isAfter(from) && ! at.isAfter(to)) { - writer.write(line); - writer.newLine(); + for (Path log : logs.get(i)) { + boolean zipped = log.toString().endsWith(".gz"); + try (InputStream in = Files.newInputStream(log)) { + InputStream inProxy; + + // If the log needs filtering, possibly unzip (and rezip) it, and filter its lines on timestamp. + // When multiple log files exist for the same instant, their entries should ideally be sorted. This is not done here. + if (i == 0 || i == logs.size() - 1) { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(in) : in, UTF_8)); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(zipped ? new GZIPOutputStream(buffer) : buffer, UTF_8))) { + for (String line; (line = reader.readLine()) != null; ) { + String[] parts = line.split("\t"); + if (parts.length != 7) + continue; + + Instant at = Instant.EPOCH.plus((long) (Double.parseDouble(parts[0]) * 1_000_000), ChronoUnit.MICROS); + if (at.isAfter(from) && !at.isAfter(to)) { + writer.write(line); + writer.newLine(); + } } } + inProxy = new ByteArrayInputStream(buffer.toByteArray()); } - inProxy = new ByteArrayInputStream(buffer.toByteArray()); - } - else - inProxy = in; + else + inProxy = in; - if ( ! zipped) { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - try (OutputStream outProxy = new GZIPOutputStream(buffer)) { - inProxy.transferTo(outProxy); + if ( ! zipped) { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + try (OutputStream outProxy = new GZIPOutputStream(buffer)) { + inProxy.transferTo(outProxy); + } + inProxy = new ByteArrayInputStream(buffer.toByteArray()); } - inProxy = new ByteArrayInputStream(buffer.toByteArray()); + inProxy.transferTo(outputStream); } - inProxy.transferTo(outputStream); } } } @@ -98,9 +105,9 @@ class LogReader { } } - /** Returns log files which may have relevant entries, sorted by modification time — the first and last must be filtered. */ - private List<Path> getMatchingFiles(Instant from, Instant to) { - Map<Path, Instant> paths = new HashMap<>(); + /** Returns log files which may have relevant entries, grouped and sorted by {@link #extractTimestamp(Path)} — the first and last group must be filtered. */ + private List<List<Path>> getMatchingFiles(Instant from, Instant to) { + List<Path> paths = new ArrayList<>(); try { Files.walkFileTree(logDirectory, new SimpleFileVisitor<>() { @@ -112,7 +119,7 @@ class LogReader { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { if (logFilePattern.matcher(file.getFileName().toString()).matches()) - paths.put(file, attrs.lastModifiedTime().toInstant()); + paths.add(file); return FileVisitResult.CONTINUE; } @@ -127,15 +134,53 @@ class LogReader { throw new UncheckedIOException(e); } - List<Path> sorted = new ArrayList<>(); - for (var entries = paths.entrySet().stream().sorted(comparing(Map.Entry::getValue)).iterator(); entries.hasNext(); ) { - var entry = entries.next(); - if (entry.getValue().isAfter(from)) - sorted.add(entry.getKey()); - if (entry.getValue().isAfter(to)) + var logsByTimestamp = paths.stream() + .collect(Collectors.groupingBy(this::extractTimestamp, + TreeMap::new, + Collectors.toList())); + System.err.println(logsByTimestamp); + + List<List<Path>> sorted = new ArrayList<>(); + for (var entry : logsByTimestamp.entrySet()) { + if (entry.getKey().isAfter(from)) + sorted.add(entry.getValue()); + if (entry.getKey().isAfter(to)) break; } return sorted; } + /** Extracts a timestamp after all entries in the log file with the given path. */ + Instant extractTimestamp(Path path) { + String relativePath = logDirectory.relativize(path).toString(); + Matcher matcher = logArchivePathPattern.matcher(relativePath); + if (matcher.matches()) { + return ZonedDateTime.of(Integer.parseInt(matcher.group(1)), + Integer.parseInt(matcher.group(2)), + Integer.parseInt(matcher.group(3)), + Integer.parseInt(matcher.group(4)) + 1, // timestamp is start of hour range of the log file + 0, + 0, + 0, + ZoneId.of("UTC")) + .toInstant(); + } + matcher = vespaLogPathPattern.matcher(relativePath); + if (matcher.matches()) { + if (matcher.group(1) == null) + return Instant.MAX; + + return ZonedDateTime.of(Integer.parseInt(matcher.group(1)), + Integer.parseInt(matcher.group(2)), + Integer.parseInt(matcher.group(3)), + Integer.parseInt(matcher.group(4)), + Integer.parseInt(matcher.group(5)), + Integer.parseInt(matcher.group(6)) + 1, // timestamp is that of the last entry, with seconds truncated + 0, + ZoneId.of("UTC")) + .toInstant(); + } + throw new IllegalArgumentException("Unrecognized file pattern for file at '" + path + "'"); + } + } diff --git a/container-core/src/test/java/com/yahoo/container/handler/LogReaderTest.java b/container-core/src/test/java/com/yahoo/container/handler/LogReaderTest.java index c5ddfe898b9..6b7be1d8cd3 100644 --- a/container-core/src/test/java/com/yahoo/container/handler/LogReaderTest.java +++ b/container-core/src/test/java/com/yahoo/container/handler/LogReaderTest.java @@ -12,7 +12,7 @@ import java.io.OutputStream; import java.nio.file.FileSystem; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.attribute.FileTime; +import java.time.Duration; import java.time.Instant; import java.util.regex.Pattern; import java.util.zip.GZIPInputStream; @@ -26,51 +26,56 @@ public class LogReaderTest { private final FileSystem fileSystem = TestFileSystem.create(); private final Path logDirectory = fileSystem.getPath("/opt/vespa/logs"); - private static final String log1 = "0.1\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstdout\tinfo\tERROR: Bundle canary-application [71] Unable to get module class path. (java.lang.NullPointerException)\n"; - private static final String log2 = "0.15\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstderr\twarning\twarning\n"; - private static final String log3 = "0.2\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstderr\twarning\tjava.lang.NullPointerException\\n\\tat org.apache.felix.framework.BundleRevisionImpl.calculateContentPath(BundleRevisionImpl.java:438)\\n\\tat org.apache.felix.framework.BundleRevisionImpl.initializeContentPath(BundleRevisionImpl.java:371)\n"; + private static final String logv11 = "3600.2\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstdout\tinfo\tfourth\n"; + private static final String logv = "90000.1\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstdout\tinfo\tlast\n"; + private static final String log100 = "0.2\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstdout\tinfo\tsecond\n"; + private static final String log101 = "0.1\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstdout\tinfo\tERROR: Bundle canary-application [71] Unable to get module class path. (java.lang.NullPointerException)\n"; + private static final String log110 = "3600.1\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstderr\twarning\tthird\n"; + private static final String log200 = "86400.1\t17491290-v6-1.ostk.bm2.prod.ne1.yahoo.com\t5480\tcontainer\tstderr\twarning\tjava.lang.NullPointerException\\n\\tat org.apache.felix.framework.BundleRevisionImpl.calculateContentPath(BundleRevisionImpl.java:438)\\n\\tat org.apache.felix.framework.BundleRevisionImpl.initializeContentPath(BundleRevisionImpl.java:371)\n"; @Before public void setup() throws IOException { - Files.createDirectories(logDirectory.resolve("subfolder")); - - Files.setLastModifiedTime( - Files.write(logDirectory.resolve("log1.log.gz"), compress(log1)), - FileTime.from(Instant.ofEpochMilli(123))); - Files.setLastModifiedTime( - Files.write(logDirectory.resolve("subfolder/log2.log.gz"), compress(log2)), - FileTime.from(Instant.ofEpochMilli(180))); - Files.setLastModifiedTime( - Files.write(logDirectory.resolve("subfolder/log3.log"), log3.getBytes(UTF_8)), - FileTime.from(Instant.ofEpochMilli(234))); - + // Log archive paths and file names indicate what hour they contain logs for, with the start of that hour. + // Multiple entries may exist for each hour. + Files.createDirectories(logDirectory.resolve("1970/01/01")); + Files.write(logDirectory.resolve("1970/01/01/00-0.gz"), compress(log100)); + Files.write(logDirectory.resolve("1970/01/01/00-1"), log101.getBytes(UTF_8)); + Files.write(logDirectory.resolve("1970/01/01/01-0.gz"), compress(log110)); + + Files.createDirectories(logDirectory.resolve("1970/01/02")); + Files.write(logDirectory.resolve("1970/01/02/00-0"), log200.getBytes(UTF_8)); + + // Vespa log file names are the second-truncated timestamp of the last entry. + // The current log file has no timestamp suffix. + Files.write(logDirectory.resolve("vespa.log-1970-01-01.01-00-00"), logv11.getBytes(UTF_8)); + Files.write(logDirectory.resolve("vespa.log"), logv.getBytes(UTF_8)); } @Test public void testThatLogsOutsideRangeAreExcluded() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); LogReader logReader = new LogReader(logDirectory, Pattern.compile(".*")); - logReader.writeLogs(baos, Instant.ofEpochMilli(150), Instant.ofEpochMilli(160)); + logReader.writeLogs(baos, Instant.ofEpochMilli(150), Instant.ofEpochMilli(3601050)); - assertEquals("", decompress(baos.toByteArray())); + assertEquals(log100 + logv11 + log110, decompress(baos.toByteArray())); } @Test public void testThatLogsNotMatchingRegexAreExcluded() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - LogReader logReader = new LogReader(logDirectory, Pattern.compile(".*3\\.log")); - logReader.writeLogs(baos, Instant.ofEpochMilli(0), Instant.ofEpochMilli(300)); + LogReader logReader = new LogReader(logDirectory, Pattern.compile(".*-1.*")); + logReader.writeLogs(baos, Instant.EPOCH, Instant.EPOCH.plus(Duration.ofDays(2))); - assertEquals(log3, decompress(baos.toByteArray())); + assertEquals(log101 + logv11, decompress(baos.toByteArray())); } @Test public void testZippedStreaming() throws IOException { ByteArrayOutputStream zippedBaos = new ByteArrayOutputStream(); LogReader logReader = new LogReader(logDirectory, Pattern.compile(".*")); - logReader.writeLogs(zippedBaos, Instant.ofEpochMilli(0), Instant.ofEpochMilli(300)); + logReader.writeLogs(zippedBaos, Instant.EPOCH, Instant.EPOCH.plus(Duration.ofDays(2))); - assertEquals(log1 + log2 + log3, decompress(zippedBaos.toByteArray())); + assertEquals(log100 + log101 + logv11 + log110 + log200 + logv, decompress(zippedBaos.toByteArray())); } private byte[] compress(String input) throws IOException { |