diff options
author | Arne Juul <arnej@verizonmedia.com> | 2021-04-19 06:43:43 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2021-04-19 06:46:44 +0000 |
commit | 2d5417f39cac7b5c093ab6e26a760f1428611389 (patch) | |
tree | f5bed16853ba8a6236e766d7aa1e9df321f28466 | |
parent | fd9dba8fe5f5db6624f5ce771322f90a504286da (diff) |
add more logging
* make it possible to track which location broker
various components are registered with or
mirroring information from.
* most of these log messages should now look the same
in the Java and C++ implementations
-rw-r--r-- | jrt/src/com/yahoo/jrt/slobrok/api/BackOff.java | 15 | ||||
-rw-r--r-- | jrt/src/com/yahoo/jrt/slobrok/api/BackOffPolicy.java | 8 | ||||
-rw-r--r-- | jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java | 30 | ||||
-rw-r--r-- | jrt/src/com/yahoo/jrt/slobrok/api/Register.java | 34 | ||||
-rw-r--r-- | jrt/src/com/yahoo/jrt/slobrok/api/SlobrokList.java | 2 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/CMakeLists.txt | 1 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/backoff.cpp | 39 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/backoff.h | 39 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/sblist.cpp | 8 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/sbmirror.cpp | 15 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/sbmirror.h | 1 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/sbregister.cpp | 21 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/sbregister.h | 1 |
13 files changed, 143 insertions, 71 deletions
diff --git a/jrt/src/com/yahoo/jrt/slobrok/api/BackOff.java b/jrt/src/com/yahoo/jrt/slobrok/api/BackOff.java index d8a6718b407..f883ac32b72 100644 --- a/jrt/src/com/yahoo/jrt/slobrok/api/BackOff.java +++ b/jrt/src/com/yahoo/jrt/slobrok/api/BackOff.java @@ -1,15 +1,18 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.jrt.slobrok.api; - class BackOff implements BackOffPolicy { - private double time = 0.50; + private static final double initialTime = 0.50; + + private double time = initialTime; + @Override public void reset() { - time = 0.50; + time = initialTime; } + @Override public double get() { double ret = time; if (time < 5.0) { @@ -24,7 +27,13 @@ class BackOff implements BackOffPolicy return ret; } + @Override public boolean shouldWarn(double t) { return ((t > 8.1 && t < 9.9) || (t > 29.9)); } + + @Override + public boolean shouldInform(double t) { + return (t == initialTime); + } } diff --git a/jrt/src/com/yahoo/jrt/slobrok/api/BackOffPolicy.java b/jrt/src/com/yahoo/jrt/slobrok/api/BackOffPolicy.java index f98e3456880..4cc402b1706 100644 --- a/jrt/src/com/yahoo/jrt/slobrok/api/BackOffPolicy.java +++ b/jrt/src/com/yahoo/jrt/slobrok/api/BackOffPolicy.java @@ -35,4 +35,12 @@ public interface BackOffPolicy * @param t current delay value **/ public boolean shouldWarn(double t); + + /** + * Check if a certain delay should result in an information message logged. + * + * @return true if we should log + * @param t current delay value + **/ + public boolean shouldInform(double t); } diff --git a/jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java b/jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java index 1a00cd25a2c..6e39e2a3dbd 100644 --- a/jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java +++ b/jrt/src/com/yahoo/jrt/slobrok/api/Mirror.java @@ -39,6 +39,7 @@ public class Mirror implements IMirror { private final BackOffPolicy backOff; private volatile int updates = 0; private boolean requestDone = false; + private boolean logOnSuccess = true; private AtomicReference<Entry[]> specs = new AtomicReference<>(new Entry[0]); private int specsGeneration = 0; private final TransportThread transportThread; @@ -176,18 +177,22 @@ public class Mirror implements IMirror { } if (target != null && ! slobroks.contains(currSlobrok)) { + log.log(Level.INFO, "location broker "+currSlobrok+" removed, will disconnect and use one of: "+slobroks); target.close(); target = null; } if (target == null) { + logOnSuccess = true; currSlobrok = slobroks.nextSlobrokSpec(); if (currSlobrok == null) { double delay = backOff.get(); + Level level = Level.FINE; + if (backOff.shouldInform(delay)) level = Level.INFO; + // note: since this happens quite often during normal operation, + // the level is lower than the more-natural Level.WARNING: + if (backOff.shouldWarn(delay)) level = Level.INFO; + log.log(level, "no location brokers available, retrying: "+slobroks+" (in " + delay + " seconds)"); updateTask.schedule(delay); - if (backOff.shouldWarn(delay)) { - log.log(Level.INFO, "no location brokers available " - + "(retry in " + delay + " seconds) for: " + slobroks); - } return; } target = orb.connect(new Spec(currSlobrok)); @@ -216,6 +221,11 @@ public class Mirror implements IMirror { for (int idx = 0; idx < numNames; idx++) { newSpecs[idx] = new Entry(n[idx], s[idx]); } + if (logOnSuccess) { + log.log(Level.INFO, "successfully connected to location broker "+currSlobrok+ + "(mirror initialized with "+numNames+" service names)"); + logOnSuccess = false; + } specs.set(newSpecs); specsGeneration = answer.get(2).asInt32(); @@ -233,15 +243,16 @@ public class Mirror implements IMirror { || (req.returnValues().get(2).count() != req.returnValues().get(3).count())) { - log.log(Level.INFO, "Error when handling update from slobrok. Error: " + req.errorMessage() + - " (error code " + req.errorCode() + ")" + ", target: " + target); + if (! logOnSuccess) { + log.log(Level.INFO, "Error with location broker "+currSlobrok+" update: " + req.errorMessage() + + " (error code " + req.errorCode() + ")"); + } target.close(); target = null; updateTask.scheduleNow(); // try next slobrok return; } - Values answer = req.returnValues(); int diffFromGeneration = answer.get(0).asInt32(); @@ -279,7 +290,10 @@ public class Mirror implements IMirror { newSpecs[idx++] = e; } } - + if (logOnSuccess) { + log.log(Level.INFO, "successfully connected to location broker "+currSlobrok+" (mirror initialized with "+newSpecs.length+" service names)"); + logOnSuccess = false; + } specs.set(newSpecs); specsGeneration = diffToGeneration; diff --git a/jrt/src/com/yahoo/jrt/slobrok/api/Register.java b/jrt/src/com/yahoo/jrt/slobrok/api/Register.java index 713cecc62d1..0da390f8bf6 100644 --- a/jrt/src/com/yahoo/jrt/slobrok/api/Register.java +++ b/jrt/src/com/yahoo/jrt/slobrok/api/Register.java @@ -181,6 +181,7 @@ public class Register { reqDone = false; boolean logOnSuccess = false; + boolean logOnFailure = true; synchronized (this) { if (req.methodName().equals(UNREGISTER_METHOD_NAME)) { logOnSuccess = true; @@ -191,16 +192,21 @@ public class Register { lastRegisterSucceeded.remove(name); } else { final Boolean lastSucceeded = lastRegisterSucceeded.get(name); - if (lastSucceeded == null || lastSucceeded != !req.isError()) { + if (lastSucceeded == null) { + logOnSuccess = true; + logOnFailure = false; + } else if (lastSucceeded != !req.isError()) { logOnSuccess = true; - lastRegisterSucceeded.put(name, !req.isError()); } + lastRegisterSucceeded.put(name, !req.isError()); } } if (req.isError()) { if (req.errorCode() != ErrorCode.METHOD_FAILED) { - log.log(Level.INFO, logMessagePrefix() + " failed, will disconnect: " + req.errorMessage() + " (code " + req.errorCode() + ")"); + if (logOnFailure) { + log.log(Level.INFO, logMessagePrefix() + " failed, will disconnect: " + req.errorMessage() + " (code " + req.errorCode() + ")"); + } target.close(); target = null; } else { @@ -210,7 +216,6 @@ public class Register { log.log(logOnSuccess ? Level.INFO : Level.FINE, logMessagePrefix() + " completed successfully"); backOff.reset(); } - req = null; name = null; } @@ -219,7 +224,7 @@ public class Register { return; // current request still in progress } if (target != null && ! slobroks.contains(currSlobrok)) { - log.log(Level.INFO, "RPC server " + mySpec + ": Slobrok server " + currSlobrok + " removed, will disconnect"); + log.log(Level.INFO, "[RPC @ " + mySpec + "] location broker " + currSlobrok + " removed, will disconnect and use one of: "+slobroks); target.close(); target = null; } @@ -227,9 +232,10 @@ public class Register { currSlobrok = slobroks.nextSlobrokSpec(); if (currSlobrok == null) { double delay = backOff.get(); - Level level = backOff.shouldWarn(delay) ? Level.WARNING : Level.FINE; - log.log(level, "RPC server " + mySpec + ": All Slobrok servers tried, will retry in " + delay - + " seconds: " + slobroks); + Level level = Level.FINE; + if (backOff.shouldInform(delay)) level = Level.INFO; + if (backOff.shouldWarn(delay)) level = Level.WARNING; + log.log(level, "[RPC @ " + mySpec + "] no location brokers available, retrying: "+slobroks+" (in " + delay + " seconds)"); updateTask.schedule(delay); return; } @@ -247,8 +253,8 @@ public class Register { } if (logFine) { - log.log(Level.FINE, "RPC server " + mySpec + ": Connect to Slobrok server " + currSlobrok + - " and reregister all Slobrok names: " + namesString); + log.log(Level.FINE, "[RPC @ " + mySpec + "] Connect to location broker " + currSlobrok + + " and reregister all service names: " + namesString); } } @@ -261,7 +267,7 @@ public class Register { req = new Request(REGISTER_METHOD_NAME); } else { pending.addAll(names); - log.log(Level.FINE, "RPC server " + mySpec + ": Reregister all Slobrok names in 30 seconds: " + names); + log.log(Level.FINE, "[RPC @ " + mySpec + "] Reregister all service names in 30 seconds: " + names); updateTask.schedule(30.0); return; } @@ -274,9 +280,9 @@ public class Register { } private String logMessagePrefix() { - return "RPC server " + mySpec - + (req.methodName().equals(UNREGISTER_METHOD_NAME) ? " unregistering " : " registering ") - + name + " with Slobrok server " + currSlobrok; + return "[RPC @ " + mySpec + "] " + + (req.methodName().equals(UNREGISTER_METHOD_NAME) ? "unregistering " : "registering ") + + name + " with location broker " + currSlobrok; } private synchronized void handleRpcList(Request req) { diff --git a/jrt/src/com/yahoo/jrt/slobrok/api/SlobrokList.java b/jrt/src/com/yahoo/jrt/slobrok/api/SlobrokList.java index b524c25d8ff..10d8923d9f5 100644 --- a/jrt/src/com/yahoo/jrt/slobrok/api/SlobrokList.java +++ b/jrt/src/com/yahoo/jrt/slobrok/api/SlobrokList.java @@ -48,7 +48,7 @@ public class SlobrokList { public boolean contains(String slobrok) { checkUpdate(); for (String s : slobroks) { - if (s == slobrok) return true; + if (s.equals(slobrok)) return true; } return false; } diff --git a/slobrok/src/vespa/slobrok/CMakeLists.txt b/slobrok/src/vespa/slobrok/CMakeLists.txt index 92edaf79c7d..00ccadb1226 100644 --- a/slobrok/src/vespa/slobrok/CMakeLists.txt +++ b/slobrok/src/vespa/slobrok/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(slobrok SOURCES + backoff.cpp sblist.cpp cfg.cpp sbmirror.cpp diff --git a/slobrok/src/vespa/slobrok/backoff.cpp b/slobrok/src/vespa/slobrok/backoff.cpp new file mode 100644 index 00000000000..e09e688f646 --- /dev/null +++ b/slobrok/src/vespa/slobrok/backoff.cpp @@ -0,0 +1,39 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "backoff.h" + +namespace slobrok::api { + +namespace { +constexpr size_t num_warn_intervals = 5; +const double warn_intervals[num_warn_intervals] = { 0.0, 10.0, 60.0, 600.0, 3600.0 }; +} + +BackOff::BackOff() { reset(); } + +void BackOff::reset() { + _time = 0.0; + _since_last_warn = 0.0; + _nextwarn_idx = 0; +} + +double BackOff::get() { + _since_last_warn += _time; + if (_time < 20.0) { + _time += 0.5; + } + return _time; +} + +bool BackOff::shouldWarn() { + if (_since_last_warn >= warn_intervals[_nextwarn_idx]) { + if (_nextwarn_idx + 1 < num_warn_intervals) { + ++_nextwarn_idx; + } + _since_last_warn = 0.0; + return true; + } + return false; +} + +} // namespace slobrok::api diff --git a/slobrok/src/vespa/slobrok/backoff.h b/slobrok/src/vespa/slobrok/backoff.h index 9bfc4667e6d..53193a9de6e 100644 --- a/slobrok/src/vespa/slobrok/backoff.h +++ b/slobrok/src/vespa/slobrok/backoff.h @@ -1,6 +1,8 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <stdio.h> + namespace slobrok { namespace api { @@ -8,39 +10,14 @@ class BackOff { private: double _time; - double _warntime; - double _nextwarn; + double _since_last_warn; + size_t _nextwarn_idx; public: - BackOff() : _time(0.50), _warntime(0.0), _nextwarn(4.0) {} - void reset() { _time = 0.50; _warntime = 0.0; _nextwarn = 15.0; } - double get() { - double ret = _time; - _warntime += ret; - if (_time < 5.0) { - _time += 0.5; - } else if (_time < 10.0) { - _time += 1.0; - } else if (_time < 30.0) { - _time += 5; - } else { - // max retry time is 30 seconds - _time = 30.0; - } - return ret; - } - bool shouldWarn() { - if (_warntime > _nextwarn) { - _warntime = 0.0; - _nextwarn *= 4.0; - if (_nextwarn > 86400.0) { - _nextwarn = 86400.0; - } - return true; - } else { - return false; - } - } + BackOff(); + void reset(); + double get(); + bool shouldWarn(); }; } // namespace api diff --git a/slobrok/src/vespa/slobrok/sblist.cpp b/slobrok/src/vespa/slobrok/sblist.cpp index e7528fb6db2..2cb5f0b3d60 100644 --- a/slobrok/src/vespa/slobrok/sblist.cpp +++ b/slobrok/src/vespa/slobrok/sblist.cpp @@ -60,12 +60,12 @@ SlobrokList::logString() if (_slobrokSpecs.size() == 0) { return "[empty service location broker list]"; } - std::string v; - v = _slobrokSpecs[0]; - for (size_t i = 1 ; i < _slobrokSpecs.size(); ++i) { - v += " or "; + std::string v = "["; + for (size_t i = 0 ; i < _slobrokSpecs.size(); ++i) { + if (i > 0) v += ", "; v += _slobrokSpecs[i]; } + v += "]"; return v; } diff --git a/slobrok/src/vespa/slobrok/sbmirror.cpp b/slobrok/src/vespa/slobrok/sbmirror.cpp index 460d61cb2a8..8d5a0e39c6a 100644 --- a/slobrok/src/vespa/slobrok/sbmirror.cpp +++ b/slobrok/src/vespa/slobrok/sbmirror.cpp @@ -16,6 +16,7 @@ MirrorAPI::MirrorAPI(FRT_Supervisor &orb, const ConfiguratorFactory & config) _reqPending(false), _scheduled(false), _reqDone(false), + _logOnSuccess(true), _specs(), _specsGen(), _updates(), @@ -234,6 +235,11 @@ MirrorAPI::handleReqDone() } else { _backOff.reset(); // req done OK + if (_logOnSuccess) { + LOG(info, "successfully connected to location broker %s (mirror initialized with %zu service names)", + _currSlobrok.c_str(), _specs.size()); + _logOnSuccess = false; + } return true; } } @@ -245,6 +251,7 @@ void MirrorAPI::handleReconnect() { if (_target == 0) { + _logOnSuccess = true; _currSlobrok = _slobrokSpecs.nextSlobrokSpec(); if (_currSlobrok.size() > 0) { _target = _orb.GetTarget(_currSlobrok.c_str()); @@ -256,10 +263,12 @@ MirrorAPI::handleReconnect() } double delay = _backOff.get(); reSched(delay); + std::string cps = _slobrokSpecs.logString(); + static const char * const msgfmt = "no location brokers available, retrying: %s (in %.1f seconds)"; if (_backOff.shouldWarn()) { - std::string cps = _slobrokSpecs.logString(); - LOG(warning, "cannot connect to location broker at %s (retry in %f seconds)", - cps.c_str(), delay); + LOG(warning, msgfmt, cps.c_str(), delay); + } else { + LOG(debug, msgfmt, cps.c_str(), delay); } } } diff --git a/slobrok/src/vespa/slobrok/sbmirror.h b/slobrok/src/vespa/slobrok/sbmirror.h index 437ef334af6..c1c7009ce12 100644 --- a/slobrok/src/vespa/slobrok/sbmirror.h +++ b/slobrok/src/vespa/slobrok/sbmirror.h @@ -92,6 +92,7 @@ private: bool _reqPending; bool _scheduled; bool _reqDone; + bool _logOnSuccess; SpecList _specs; vespalib::GenCnt _specsGen; vespalib::GenCnt _updates; diff --git a/slobrok/src/vespa/slobrok/sbregister.cpp b/slobrok/src/vespa/slobrok/sbregister.cpp index 72be5f69538..7ec3325a22a 100644 --- a/slobrok/src/vespa/slobrok/sbregister.cpp +++ b/slobrok/src/vespa/slobrok/sbregister.cpp @@ -58,6 +58,7 @@ RegisterAPI::RegisterAPI(FRT_Supervisor &orb, const ConfiguratorFactory & config _hooks(*this), _lock(), _reqDone(false), + _logOnSuccess(true), _busy(false), _slobrokSpecs(), _configurator(config.create(_slobrokSpecs)), @@ -146,6 +147,11 @@ RegisterAPI::handleReqDone() _req->GetErrorMessage()); } } else { + if (_logOnSuccess && (_pending.size() == 0) && (_names.size() > 0)) { + LOG(info, "[RPC @ %s] registering %s with location broker %s completed successfully", + createSpec(_orb).c_str(), _names[0].c_str(), _currSlobrok.c_str()); + _logOnSuccess = false; + } // reset backoff strategy on any successful request _backOff.reset(); } @@ -161,13 +167,14 @@ RegisterAPI::handleReconnect() if (_configurator->poll() && _target != 0) { if (! _slobrokSpecs.contains(_currSlobrok)) { vespalib::string cps = _slobrokSpecs.logString(); - LOG(warning, "current server %s not in list of location brokers: %s", - _currSlobrok.c_str(), cps.c_str()); + LOG(warning, "[RPC @ %s] location broker %s removed, will disconnect and use one of: %s", + createSpec(_orb).c_str(), _currSlobrok.c_str(), cps.c_str()); _target->SubRef(); _target = 0; } } if (_target == 0) { + _logOnSuccess = true; _currSlobrok = _slobrokSpecs.nextSlobrokSpec(); if (_currSlobrok.size() > 0) { // try next possible server. @@ -185,13 +192,13 @@ RegisterAPI::handleReconnect() // possibly with a warning. double delay = _backOff.get(); Schedule(delay); + static const char * const msgfmt = "[RPC @ %s] no location brokers available, retrying: %s (in %.1f seconds)"; + vespalib::string cps = _slobrokSpecs.logString(); if (_backOff.shouldWarn()) { - vespalib::string cps = _slobrokSpecs.logString(); - LOG(warning, "cannot connect to location broker at %s " - "(retry in %f seconds)", cps.c_str(), delay); + LOG(warning, msgfmt, createSpec(_orb).c_str(), cps.c_str(), delay); } else { - LOG(debug, "slobrok retry in %f seconds", delay); - } + LOG(debug, msgfmt, createSpec(_orb).c_str(), cps.c_str(), delay); + } return; } } diff --git a/slobrok/src/vespa/slobrok/sbregister.h b/slobrok/src/vespa/slobrok/sbregister.h index 8e8614fdf0a..8e849e1ea13 100644 --- a/slobrok/src/vespa/slobrok/sbregister.h +++ b/slobrok/src/vespa/slobrok/sbregister.h @@ -87,6 +87,7 @@ private: RPCHooks _hooks; std::mutex _lock; bool _reqDone; + bool _logOnSuccess; std::atomic<bool> _busy; SlobrokList _slobrokSpecs; Configurator::UP _configurator; |