// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; import com.google.common.collect.Sets; import com.yahoo.jdisc.Metric; import com.yahoo.lang.MutableInteger; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; import com.yahoo.vespa.hosted.provision.lb.LoadBalancer.State; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerService; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerSpec; import com.yahoo.vespa.hosted.provision.persistence.CuratorDb; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; /** * Periodically expire load balancers and de-provision inactive ones. * * Load balancers expire from the following states: * * {@link LoadBalancer.State#inactive}: An application is removed and load balancers are deactivated. * {@link LoadBalancer.State#reserved}: An prepared application is never successfully activated, thus never activating * any prepared load balancers. * * @author mpolden */ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { private static final Logger LOG = Logger.getLogger(LoadBalancerExpirer.class.getName()); private static final Duration reservedExpiry = Duration.ofHours(1); private static final Duration inactiveExpiry = Duration.ofHours(1); private final LoadBalancerService service; private final CuratorDb db; public LoadBalancerExpirer(NodeRepository nodeRepository, Duration interval, LoadBalancerService service, Metric metric) { super(nodeRepository, interval, metric); this.service = Objects.requireNonNull(service, "service must be non-null"); this.db = nodeRepository.database(); } @Override protected double maintain() { expireReserved(); return (deprovisionRemovable() + pruneReals()) / 2; } /** Move reserved load balancer that have expired to inactive */ private void expireReserved() { Instant now = nodeRepository().clock().instant(); Instant expiry = now.minus(reservedExpiry); patchLoadBalancers(lb -> canDeactivate(lb, expiry), lb -> db.writeLoadBalancer(lb.with(State.inactive, now), lb.state())); } /** Deprovision removable load balancers */ private double deprovisionRemovable() { MutableInteger attempts = new MutableInteger(0); var failed = new ArrayList(); var lastException = new AtomicReference(); var expiry = nodeRepository().clock().instant().minus(inactiveExpiry); patchLoadBalancers(lb -> canRemove(lb, expiry), lb -> { try { attempts.add(1); log.log(Level.INFO, () -> "Removing expired inactive " + lb.id()); service.remove(lb); db.removeLoadBalancer(lb.id()); } catch (Exception e){ failed.add(lb.id()); lastException.set(e); } }); if (!failed.isEmpty()) { log.log(Level.WARNING, lastException.get(), () -> String.format("Failed to remove %d load balancers: %s, retrying in %s", failed.size(), failed.stream() .map(LoadBalancerId::serializedForm) .collect(Collectors.joining(", ")), interval())); } return asSuccessFactorDeviation(attempts.get(), failed.size()); } /** Remove reals from inactive load balancers */ private double pruneReals() { var attempts = new MutableInteger(0); var failed = new ArrayList(); var lastException = new AtomicReference(); patchLoadBalancers(lb -> lb.state() == State.inactive, lb -> { if (lb.instance().isEmpty()) return; var allocatedNodes = allocatedNodes(lb.id()).stream().map(Node::hostname).collect(Collectors.toSet()); var reals = new LinkedHashSet<>(lb.instance().get().reals()); // Remove any real no longer allocated to this application reals.removeIf(real -> !allocatedNodes.contains(real.hostname().value())); if (reals.equals(lb.instance().get().reals())) return; // Nothing to remove try { attempts.add(1); LOG.log(Level.INFO, () -> "Removing reals from inactive load balancer " + lb.id() + ": " + Sets.difference(lb.instance().get().reals(), reals)); LoadBalancerInstance instance = service.configure(lb.instance().get(), new LoadBalancerSpec(lb.id().application(), lb.id().cluster(), reals, lb.instance().get().settings(), lb.instance().get().cloudAccount(), lb.idSeed()), true); db.writeLoadBalancer(lb.with(instance), lb.state()); } catch (Exception e) { failed.add(lb.id()); lastException.set(e); } }); if (!failed.isEmpty()) { log.log(Level.WARNING, String.format("Failed to remove reals from %d load balancers: %s, retrying in %s", failed.size(), failed.stream() .map(LoadBalancerId::serializedForm) .collect(Collectors.joining(", ")), interval()), lastException.get()); } return asSuccessFactorDeviation(attempts.get(), failed.size()); } /** Patch load balancers matching given filter, while holding lock */ private void patchLoadBalancers(Predicate filter, Consumer patcher) { for (var id : db.readLoadBalancerIds()) { Optional loadBalancer = db.readLoadBalancer(id); if (loadBalancer.isEmpty() || !filter.test(loadBalancer.get())) continue; try (var lock = db.lock(id.application(), Duration.ofSeconds(1))) { loadBalancer = db.readLoadBalancer(id); if (loadBalancer.isEmpty() || !filter.test(loadBalancer.get())) continue; patcher.accept(loadBalancer.get()); } } } private boolean canRemove(LoadBalancer lb, Instant expiry) { return lb.state() == State.removable || (lb.state() == State.inactive && lb.changedAt().isBefore(expiry) && allocatedNodes(lb.id()).isEmpty()); } private boolean canDeactivate(LoadBalancer lb, Instant expiry) { return lb.state() == State.reserved && lb.changedAt().isBefore(expiry); } private List allocatedNodes(LoadBalancerId loadBalancer) { return nodeRepository().nodes() .list(Node.State.active, Node.State.inactive, Node.State.reserved) .owner(loadBalancer.application()) // Always match the cluster by the effective container ID // TODO(mpolden): Remove this and use NodeList::cluster once combined disappears in Vespa 9 .matching((node) -> node.allocation().isPresent() && LoadBalancer.containerId(node.allocation().get().membership().cluster()) .equals(loadBalancer.cluster())) .asList(); } }