diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2022-12-21 15:24:28 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-21 15:24:28 +0100 |
commit | 18d376f2b7992f07c31e6d6c79fff5d457f3bbc8 (patch) | |
tree | baeea0b53f7309b90f0eb608460a7a06934e287d | |
parent | 958cc2a655735beca465a080e202fdf35b1e0772 (diff) | |
parent | 88cf54128aec9b1c3bebd36735b0a12b606ebf23 (diff) |
Merge pull request #25315 from vespa-engine/arnej/better-information-when-messagebus-fails-to-start
add more relevant information when messagebus fails to start
-rw-r--r-- | jrt/pom.xml | 4 | ||||
-rw-r--r-- | jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java | 51 | ||||
-rw-r--r-- | jrt/tests/com/yahoo/jrt/SlobrokTest.java | 1 | ||||
-rw-r--r-- | messagebus/src/main/java/com/yahoo/messagebus/MessageBus.java | 10 |
4 files changed, 64 insertions, 2 deletions
diff --git a/jrt/pom.xml b/jrt/pom.xml index b43c2b3899c..353e0df777d 100644 --- a/jrt/pom.xml +++ b/jrt/pom.xml @@ -67,6 +67,10 @@ <artifactId>maven-surefire-plugin</artifactId> <configuration> <forkCount>4</forkCount> + <redirectTestOutputToFile>false</redirectTestOutputToFile> + <systemPropertyVariables> + <java.util.logging.SimpleFormatter.format>%1$Ts.%1$TL [%4$s] %5$s {%3$s}%n</java.util.logging.SimpleFormatter.format> + </systemPropertyVariables> </configuration> </plugin> </plugins> diff --git a/jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java b/jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java index b4c06355e1a..7e3a1b570c5 100644 --- a/jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java +++ b/jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java @@ -32,6 +32,7 @@ public class Mirror implements IMirror { private static final Logger log = Logger.getLogger(Mirror.class.getName()); + private final EventLog eventLog = new EventLog(); private final Supervisor orb; private final SlobrokList slobroks; private String currSlobrok; @@ -59,6 +60,7 @@ public class Mirror implements IMirror { this.orb = orb; this.slobroks = slobroks; this.backOff = bop; + eventLog.addEvent("mirror created; with list of servers: "+slobroks); transportThread = orb.transport().selectThread(); updateTask = transportThread.createTask(this::checkForUpdate); reqWait = new RequestWaiter() { @@ -87,6 +89,7 @@ public class Mirror implements IMirror { * stop the regular mirror updates, and discard all entries. */ public void shutdown() { + eventLog.addEvent("mirror shutdown"); updateTask.kill(); transportThread.perform(this::handleShutdown); } @@ -182,6 +185,7 @@ public class Mirror implements IMirror { log.log(Level.INFO, "location broker "+currSlobrok+" removed, will disconnect and use one of: "+slobroks); target.close(); target = null; + eventLog.addEvent("new list of servers: "+slobroks); } if (target == null) { logOnSuccess = true; @@ -197,6 +201,7 @@ public class Mirror implements IMirror { updateTask.schedule(delay); return; } + eventLog.addEvent("selected new server: "+currSlobrok); log.fine(() -> "Try connecting to "+currSlobrok); target = orb.connect(new Spec(currSlobrok)); specsGeneration = 0; @@ -221,8 +226,10 @@ public class Mirror implements IMirror { } target.close(); target = null; // try next slobrok + eventLog.addEvent("failed: "+currSlobrok+" ["+req.errorMessage()+"]"); return; } + eventLog.addEvent("good answer from: "+currSlobrok); Values answer = req.returnValues(); int diffFromGeneration = answer.get(0).asInt32(); @@ -339,13 +346,57 @@ public class Mirror implements IMirror { log.log(Level.INFO, "location broker mirror state: " + " iterations: " + iterations + ", connected to: " + target + + ", number of service specs: " + specs.get().length + ", seen " + updates + " updates" + ", current server: "+ currSlobrok + ", list of servers: " + slobroks); + eventLog.dump(); } public long getIterations() { return iterations; } + static class EventLog { + private static class Event { + final long timestamp; + final String message; + Event(String msg) { + this.timestamp = System.nanoTime(); + this.message = msg; + } + } + int idx = 0; + List<Event> firstEvents = new ArrayList<>(); + List<Event> lastEvents = new ArrayList<>(); + void addEvent(String message) { + var event = new Event(message); + if (firstEvents.size() < 10) { + firstEvents.add(event); + } else if (lastEvents.size() < 10) { + lastEvents.add(event); + } else { + lastEvents.set(idx, event); + idx = (idx + 1) % lastEvents.size(); + } + } + void dump(Event e, long nanos, double now) { + long tt = (long)(now - (nanos - e.timestamp) / 1.0e9); + log.info("event at [" + tt + "]: " + e.message); + } + void dump() { + long nanos = System.nanoTime(); + double now = System.currentTimeMillis() / 1000.0; + log.info("initial events for location broker mirror"); + for (Event e : firstEvents) { + dump(e, nanos, now); + } + if (lastEvents.size() > 0) { + log.info("last events for location broker mirror"); + for (Event e : lastEvents) { + dump(e, nanos, now); + } + } + } + } } diff --git a/jrt/tests/com/yahoo/jrt/SlobrokTest.java b/jrt/tests/com/yahoo/jrt/SlobrokTest.java index e1f43935a8d..a7ba768b1e9 100644 --- a/jrt/tests/com/yahoo/jrt/SlobrokTest.java +++ b/jrt/tests/com/yahoo/jrt/SlobrokTest.java @@ -111,6 +111,7 @@ public class SlobrokTest { assertTrue(mirror.updates() > 0); List<Entry> oneArr = mirror.lookup("*/*/*"); + mirror.dumpState(); assertTrue(oneArr.size() == 1); Mirror.Entry one = oneArr.get(0); assertTrue(one.equals(new Mirror.Entry(wantName, mySpec))); diff --git a/messagebus/src/main/java/com/yahoo/messagebus/MessageBus.java b/messagebus/src/main/java/com/yahoo/messagebus/MessageBus.java index a7647992cdd..0c3f3168568 100644 --- a/messagebus/src/main/java/com/yahoo/messagebus/MessageBus.java +++ b/messagebus/src/main/java/com/yahoo/messagebus/MessageBus.java @@ -151,18 +151,24 @@ public class MessageBus implements ConfigHandler, NetworkOwner, MessageHandler, this.net = net; net.attach(this); if ( ! net.net().waitUntilReady(180)) { + var failure = new IllegalStateException("Network failed to become ready in time."); try { var tmp = net.net().getMirror(); var mirror = (com.yahoo.jrt.slobrok.api.Mirror) tmp; - if (mirror.getIterations() < 2) { + mirror.dumpState(); + if (mirror.ready()) { + log.warning("location broker mirror is ready, but network is not"); + } else if (mirror.getIterations() < 2) { Process.dumpThreads(); String fn = "var/crash/java_pid." + ProcessHandle.current().pid() + ".hprof"; Process.dumpHeap(Defaults.getDefaults().underVespaHome(fn), true); + } else { + failure = new IllegalStateException("No answer from any service location broker, failing startup"); } } catch (Exception e) { // ignore } - throw new IllegalStateException("Network failed to become ready in time."); + throw failure; } // Start messenger. |