From bd42627870b4066b4b8085e17cf67cc7656468f0 Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Sat, 4 Nov 2023 09:25:36 +0100 Subject: Revert "Move node-admin" --- node-admin/.gitignore | 1 + node-admin/CMakeLists.txt | 7 + node-admin/OWNERS | 1 + node-admin/README.md | 4 + node-admin/pom.xml | 175 ++++ node-admin/src/main/application/services.xml | 12 + .../vespa/hosted/node/admin/cgroup/Cgroup.java | 166 ++++ .../vespa/hosted/node/admin/cgroup/CgroupCore.java | 34 + .../hosted/node/admin/cgroup/CpuController.java | 111 +++ .../hosted/node/admin/cgroup/IoController.java | 111 +++ .../hosted/node/admin/cgroup/MemoryController.java | 92 +++ .../yahoo/vespa/hosted/node/admin/cgroup/Size.java | 68 ++ .../hosted/node/admin/cgroup/package-info.java | 9 + .../node/admin/component/ConfigServerInfo.java | 47 ++ .../node/admin/component/IdempotentTask.java | 43 + .../hosted/node/admin/component/TaskContext.java | 48 ++ .../node/admin/component/TestTaskContext.java | 35 + .../hosted/node/admin/component/package-info.java | 5 + .../node/admin/configserver/ConfigServerApi.java | 78 ++ .../admin/configserver/ConfigServerApiImpl.java | 273 +++++++ .../admin/configserver/ConfigServerClients.java | 32 + .../admin/configserver/ConfigServerException.java | 10 + .../admin/configserver/ConnectionException.java | 43 + .../node/admin/configserver/HttpException.java | 72 ++ .../configserver/RealConfigServerClients.java | 70 ++ .../configserver/StandardConfigServerResponse.java | 22 + .../admin/configserver/cores/CoreDumpMetadata.java | 96 +++ .../node/admin/configserver/cores/Cores.java | 16 + .../node/admin/configserver/cores/CoresImpl.java | 26 + .../cores/bindings/ReportCoreDumpRequest.java | 97 +++ .../admin/configserver/cores/package-info.java | 9 + .../configserver/flags/RealFlagRepository.java | 29 + .../admin/configserver/flags/package-info.java | 5 + .../admin/configserver/noderepository/Acl.java | 236 ++++++ .../admin/configserver/noderepository/AddNode.java | 86 ++ .../admin/configserver/noderepository/Event.java | 54 ++ .../noderepository/NoSuchNodeException.java | 8 + .../noderepository/NodeAttributes.java | 192 +++++ .../noderepository/NodeMembership.java | 115 +++ .../configserver/noderepository/NodeReports.java | 103 +++ .../noderepository/NodeRepository.java | 38 + .../noderepository/NodeRepositoryException.java | 10 + .../configserver/noderepository/NodeSpec.java | 880 ++++++++++++++++++++ .../configserver/noderepository/NodeState.java | 13 + .../noderepository/OrchestratorStatus.java | 23 + .../noderepository/RealNodeRepository.java | 406 ++++++++++ .../noderepository/TrustStoreItem.java | 48 ++ .../noderepository/bindings/GetAclResponse.java | 114 +++ .../noderepository/bindings/GetNodesResponse.java | 25 + .../bindings/GetWireguardResponse.java | 50 ++ .../bindings/NodeRepositoryNode.java | 279 +++++++ .../configserver/noderepository/package-info.java | 5 + .../noderepository/reports/BaseReport.java | 147 ++++ .../reports/DropDocumentsReport.java | 55 ++ .../noderepository/reports/package-info.java | 5 + .../configserver/orchestrator/Orchestrator.java | 36 + .../orchestrator/OrchestratorException.java | 16 + .../orchestrator/OrchestratorImpl.java | 143 ++++ .../OrchestratorNotFoundException.java | 9 + .../configserver/orchestrator/package-info.java | 5 + .../node/admin/configserver/package-info.java | 5 + .../node/admin/configserver/state/HealthCode.java | 32 + .../node/admin/configserver/state/State.java | 12 + .../node/admin/configserver/state/StateImpl.java | 29 + .../state/bindings/HealthResponse.java | 36 + .../admin/configserver/state/package-info.java | 5 + .../hosted/node/admin/container/Container.java | 85 ++ .../node/admin/container/ContainerEngine.java | 70 ++ .../hosted/node/admin/container/ContainerId.java | 36 + .../hosted/node/admin/container/ContainerName.java | 59 ++ .../node/admin/container/ContainerNetworkMode.java | 29 + .../node/admin/container/ContainerOperations.java | 153 ++++ .../node/admin/container/ContainerResources.java | 135 ++++ .../node/admin/container/ContainerStats.java | 94 +++ .../admin/container/ContainerStatsCollector.java | 168 ++++ .../node/admin/container/PartialContainer.java | 139 ++++ .../node/admin/container/RegistryCredentials.java | 25 + .../container/RegistryCredentialsProvider.java | 13 + .../container/image/ContainerImageDownloader.java | 66 ++ .../container/image/ContainerImagePruner.java | 164 ++++ .../hosted/node/admin/container/image/Image.java | 50 ++ .../node/admin/container/image/package-info.java | 8 + .../node/admin/container/metrics/Counter.java | 28 + .../admin/container/metrics/DimensionMetrics.java | 76 ++ .../node/admin/container/metrics/Dimensions.java | 30 + .../hosted/node/admin/container/metrics/Gauge.java | 24 + .../node/admin/container/metrics/MetricValue.java | 9 + .../node/admin/container/metrics/Metrics.java | 139 ++++ .../node/admin/container/metrics/package-info.java | 5 + .../hosted/node/admin/container/package-info.java | 5 + .../admin/maintenance/ContainerWireguardTask.java | 16 + .../node/admin/maintenance/StorageMaintainer.java | 196 +++++ .../node/admin/maintenance/acl/AclMaintainer.java | 138 ++++ .../maintenance/acl/FilterTableLineEditor.java | 61 ++ .../admin/maintenance/acl/NatTableLineEditor.java | 49 ++ .../node/admin/maintenance/acl/package-info.java | 5 + .../admin/maintenance/coredump/CoreCollector.java | 132 +++ .../maintenance/coredump/CoredumpHandler.java | 338 ++++++++ .../coredump/SecretSharedKeySupplier.java | 17 + .../admin/maintenance/coredump/package-info.java | 5 + .../maintenance/disk/CoredumpCleanupRule.java | 106 +++ .../node/admin/maintenance/disk/DiskCleanup.java | 59 ++ .../admin/maintenance/disk/DiskCleanupRule.java | 20 + .../admin/maintenance/disk/LinearCleanupRule.java | 48 ++ .../node/admin/maintenance/disk/package-info.java | 5 + .../identity/AthenzCredentialsMaintainer.java | 433 ++++++++++ .../identity/CredentialsMaintainer.java | 29 + .../admin/maintenance/identity/package-info.java | 8 + .../node/admin/maintenance/package-info.java | 5 + .../admin/maintenance/servicedump/Artifact.java | 55 ++ .../maintenance/servicedump/ArtifactProducer.java | 37 + .../maintenance/servicedump/ArtifactProducers.java | 109 +++ .../maintenance/servicedump/ConfigDumper.java | 35 + .../admin/maintenance/servicedump/JvmDumper.java | 103 +++ .../maintenance/servicedump/PerfReporter.java | 40 + .../maintenance/servicedump/PmapReporter.java | 24 + .../maintenance/servicedump/ServiceDumpReport.java | 143 ++++ .../maintenance/servicedump/VespaLogDumper.java | 47 ++ .../servicedump/VespaServiceDumper.java | 13 + .../servicedump/VespaServiceDumperImpl.java | 269 +++++++ .../servicedump/ZooKeeperSnapshotDumper.java | 27 + .../maintenance/servicedump/package-info.java | 8 + .../node/admin/maintenance/sync/SyncClient.java | 22 + .../node/admin/maintenance/sync/SyncFileInfo.java | 143 ++++ .../sync/ZstdCompressingInputStream.java | 83 ++ .../node/admin/maintenance/sync/package-info.java | 8 + .../node/admin/nodeadmin/ConvergenceException.java | 41 + .../hosted/node/admin/nodeadmin/NodeAdmin.java | 56 ++ .../hosted/node/admin/nodeadmin/NodeAdminImpl.java | 261 ++++++ .../admin/nodeadmin/NodeAdminStateUpdater.java | 180 +++++ .../hosted/node/admin/nodeadmin/ProcMeminfo.java | 12 + .../node/admin/nodeadmin/ProcMeminfoReader.java | 42 + .../hosted/node/admin/nodeadmin/package-info.java | 5 + .../hosted/node/admin/nodeagent/ContainerData.java | 42 + .../hosted/node/admin/nodeagent/HealthChecker.java | 15 + .../hosted/node/admin/nodeagent/NodeAgent.java | 43 + .../node/admin/nodeagent/NodeAgentContext.java | 66 ++ .../admin/nodeagent/NodeAgentContextFactory.java | 13 + .../node/admin/nodeagent/NodeAgentContextImpl.java | 283 +++++++ .../admin/nodeagent/NodeAgentContextManager.java | 124 +++ .../admin/nodeagent/NodeAgentContextSupplier.java | 20 + .../node/admin/nodeagent/NodeAgentFactory.java | 10 + .../hosted/node/admin/nodeagent/NodeAgentImpl.java | 633 +++++++++++++++ .../node/admin/nodeagent/NodeAgentScheduler.java | 25 + .../hosted/node/admin/nodeagent/NodeAgentTask.java | 31 + .../hosted/node/admin/nodeagent/PathScope.java | 57 ++ .../hosted/node/admin/nodeagent/UserNamespace.java | 67 ++ .../hosted/node/admin/nodeagent/UserScope.java | 52 ++ .../hosted/node/admin/nodeagent/package-info.java | 5 + .../node/admin/provider/DebugHandlerHelper.java | 50 ++ .../node/admin/provider/NodeAdminDebugHandler.java | 18 + .../hosted/node/admin/provider/package-info.java | 5 + .../node/admin/task/util/DefaultEnvWriter.java | 116 +++ .../hosted/node/admin/task/util/editor/Cursor.java | 96 +++ .../node/admin/task/util/editor/CursorImpl.java | 356 +++++++++ .../node/admin/task/util/editor/FileEditor.java | 58 ++ .../hosted/node/admin/task/util/editor/Mark.java | 52 ++ .../hosted/node/admin/task/util/editor/Match.java | 53 ++ .../node/admin/task/util/editor/Position.java | 72 ++ .../node/admin/task/util/editor/StringEditor.java | 30 + .../node/admin/task/util/editor/TextBuffer.java | 175 ++++ .../admin/task/util/editor/TextBufferImpl.java | 117 +++ .../node/admin/task/util/editor/TextUtil.java | 59 ++ .../node/admin/task/util/editor/Version.java | 52 ++ .../node/admin/task/util/file/AttributeSync.java | 125 +++ .../hosted/node/admin/task/util/file/DiskSize.java | 71 ++ .../hosted/node/admin/task/util/file/Editor.java | 135 ++++ .../node/admin/task/util/file/EditorFactory.java | 13 + .../node/admin/task/util/file/FileAttributes.java | 37 + .../admin/task/util/file/FileAttributesCache.java | 32 + .../admin/task/util/file/FileContentCache.java | 35 + .../node/admin/task/util/file/FileDeleter.java | 34 + .../node/admin/task/util/file/FileFinder.java | 272 +++++++ .../node/admin/task/util/file/FileMover.java | 55 ++ .../node/admin/task/util/file/FileSnapshot.java | 83 ++ .../hosted/node/admin/task/util/file/FileSync.java | 107 +++ .../node/admin/task/util/file/FileWriter.java | 92 +++ .../node/admin/task/util/file/IOExceptionUtil.java | 33 + .../hosted/node/admin/task/util/file/LineEdit.java | 40 + .../node/admin/task/util/file/LineEditor.java | 21 + .../node/admin/task/util/file/MakeDirectory.java | 70 ++ .../node/admin/task/util/file/PartialFileData.java | 67 ++ .../node/admin/task/util/file/StoredBoolean.java | 53 ++ .../node/admin/task/util/file/StoredDouble.java | 46 ++ .../node/admin/task/util/file/StoredInteger.java | 42 + .../hosted/node/admin/task/util/file/Template.java | 58 ++ .../hosted/node/admin/task/util/file/UnixPath.java | 350 ++++++++ .../hosted/node/admin/task/util/file/UnixUser.java | 58 ++ .../node/admin/task/util/file/package-info.java | 5 + .../task/util/fs/ContainerAttributeViews.java | 81 ++ .../admin/task/util/fs/ContainerFileSystem.java | 98 +++ .../task/util/fs/ContainerFileSystemProvider.java | 348 ++++++++ .../node/admin/task/util/fs/ContainerPath.java | 224 ++++++ .../fs/ContainerUserPrincipalLookupService.java | 130 +++ .../node/admin/task/util/fs/package-info.java | 5 + .../node/admin/task/util/network/IPAddresses.java | 142 ++++ .../admin/task/util/network/IPAddressesImpl.java | 21 + .../node/admin/task/util/network/IPVersion.java | 85 ++ .../task/util/network/VersionedIpAddress.java | 85 ++ .../node/admin/task/util/network/package-info.java | 5 + .../hosted/node/admin/task/util/package-info.java | 8 + .../admin/task/util/process/ChildProcess2.java | 16 + .../admin/task/util/process/ChildProcess2Impl.java | 139 ++++ .../task/util/process/ChildProcessException.java | 47 ++ .../util/process/ChildProcessFailureException.java | 15 + .../node/admin/task/util/process/CommandLine.java | 382 +++++++++ .../admin/task/util/process/CommandResult.java | 92 +++ .../process/LargeOutputChildProcessException.java | 15 + .../node/admin/task/util/process/ProcessApi2.java | 17 + .../admin/task/util/process/ProcessApi2Impl.java | 36 + .../admin/task/util/process/ProcessFactory.java | 10 + .../task/util/process/ProcessFactoryImpl.java | 106 +++ .../admin/task/util/process/ProcessStarter.java | 10 + .../task/util/process/ProcessStarterImpl.java | 26 + .../node/admin/task/util/process/Terminal.java | 14 + .../node/admin/task/util/process/TerminalImpl.java | 26 + .../admin/task/util/process/TestChildProcess2.java | 52 ++ .../task/util/process/TestProcessFactory.java | 113 +++ .../node/admin/task/util/process/TestTerminal.java | 67 ++ .../util/process/TimeoutChildProcessException.java | 18 + .../util/process/UnexpectedOutputException.java | 26 + .../process/UnkillableChildProcessException.java | 21 + .../node/admin/task/util/process/package-info.java | 5 + .../node/admin/task/util/systemd/SystemCtl.java | 227 ++++++ .../admin/task/util/systemd/SystemCtlTester.java | 91 +++ .../node/admin/task/util/systemd/package-info.java | 5 + .../task/util/template/BadTemplateException.java | 13 + .../hosted/node/admin/task/util/template/Form.java | 32 + .../node/admin/task/util/template/IfSection.java | 69 ++ .../node/admin/task/util/template/ListElement.java | 17 + .../node/admin/task/util/template/ListSection.java | 61 ++ .../admin/task/util/template/LiteralSection.java | 26 + .../NameAlreadyExistsTemplateException.java | 14 + .../util/template/NoSuchNameTemplateException.java | 13 + .../template/NotBooleanValueTemplateException.java | 11 + .../node/admin/task/util/template/Section.java | 35 + .../node/admin/task/util/template/SectionList.java | 69 ++ .../node/admin/task/util/template/Template.java | 114 +++ .../admin/task/util/template/TemplateBuilder.java | 76 ++ .../task/util/template/TemplateDescriptor.java | 42 + .../task/util/template/TemplateException.java | 18 + .../util/template/TemplateNameNotSetException.java | 13 + .../admin/task/util/template/TemplateParser.java | 156 ++++ .../node/admin/task/util/template/Token.java | 60 ++ .../admin/task/util/template/VariableSection.java | 37 + .../admin/task/util/template/package-info.java | 5 + .../hosted/node/admin/task/util/text/Cursor.java | 165 ++++ .../node/admin/task/util/text/CursorRange.java | 38 + .../node/admin/task/util/text/TextLocation.java | 30 + .../node/admin/task/util/text/package-info.java | 5 + .../vespa/hosted/node/admin/task/util/yum/Yum.java | 85 ++ .../node/admin/task/util/yum/YumCommand.java | 305 +++++++ .../node/admin/task/util/yum/YumPackageName.java | 245 ++++++ .../hosted/node/admin/task/util/yum/YumTester.java | 178 +++++ .../node/admin/task/util/yum/package-info.java | 5 + .../hosted/node/admin/wireguard/WireguardPeer.java | 30 + .../hosted/node/admin/wireguard/package-info.java | 5 + node-admin/src/main/sh/node-admin.sh | 112 +++ .../vespa/hosted/node/admin/cgroup/CgroupTest.java | 162 ++++ .../hosted/node/admin/cgroup/IoControllerTest.java | 19 + .../configserver/ConfigServerApiImplTest.java | 194 +++++ .../node/admin/configserver/cores/CoresTest.java | 151 ++++ .../configserver/flags/RealFlagRepositoryTest.java | 40 + .../admin/configserver/noderepository/AclTest.java | 182 +++++ .../configserver/noderepository/NodeStateTest.java | 26 + .../noderepository/RealNodeRepositoryTest.java | 249 ++++++ .../bindings/NodeRepositoryNodeTest.java | 72 ++ .../noderepository/reports/BaseReportTest.java | 73 ++ .../orchestrator/OrchestratorImplTest.java | 172 ++++ .../configserver/state/HealthResponseTest.java | 54 ++ .../admin/configserver/state/StateImplTest.java | 39 + .../node/admin/container/ContainerEngineMock.java | 256 ++++++ .../node/admin/container/ContainerNameTest.java | 52 ++ .../admin/container/ContainerOperationsTest.java | 70 ++ .../admin/container/ContainerResourcesTest.java | 49 ++ .../container/ContainerStatsCollectorTest.java | 147 ++++ .../image/ContainerImageDownloaderTest.java | 37 + .../container/image/ContainerImagePrunerTest.java | 184 +++++ .../node/admin/container/metrics/MetricsTest.java | 99 +++ .../node/admin/integration/ContainerFailTest.java | 52 ++ .../node/admin/integration/ContainerTester.java | 182 +++++ .../node/admin/integration/MultiContainerTest.java | 58 ++ .../node/admin/integration/NodeRepoMock.java | 91 +++ .../hosted/node/admin/integration/RebootTest.java | 44 + .../hosted/node/admin/integration/RestartTest.java | 50 ++ .../admin/maintenance/StorageMaintainerTest.java | 178 +++++ .../admin/maintenance/acl/AclMaintainerTest.java | 351 ++++++++ .../maintenance/acl/FilterTableLineEditorTest.java | 88 ++ .../maintenance/acl/NatTableLineEditorTest.java | 96 +++ .../maintenance/coredump/CoreCollectorTest.java | 234 ++++++ .../maintenance/coredump/CoredumpHandlerTest.java | 300 +++++++ .../maintenance/disk/CoredumpCleanupRuleTest.java | 103 +++ .../admin/maintenance/disk/DiskCleanupTest.java | 129 +++ .../maintenance/disk/LinearCleanupRuleTest.java | 58 ++ .../servicedump/ArtifactProducersTest.java | 31 + .../servicedump/VespaServiceDumperImplTest.java | 319 ++++++++ .../admin/maintenance/sync/SyncFileInfoTest.java | 134 ++++ .../sync/ZstdCompressingInputStreamTest.java | 58 ++ .../node/admin/nodeadmin/NodeAdminImplTest.java | 166 ++++ .../admin/nodeadmin/NodeAdminStateUpdaterTest.java | 277 +++++++ .../admin/nodeagent/NodeAgentContextImplTest.java | 103 +++ .../nodeagent/NodeAgentContextManagerTest.java | 152 ++++ .../node/admin/nodeagent/NodeAgentImplTest.java | 889 +++++++++++++++++++++ .../node/admin/nodeagent/UserNamespaceTest.java | 29 + .../admin/provider/DebugHandlerHelperTest.java | 28 + .../node/admin/task/util/DefaultEnvWriterTest.java | 68 ++ .../admin/task/util/editor/StringEditorTest.java | 148 ++++ .../admin/task/util/editor/TextBufferImplTest.java | 59 ++ .../node/admin/task/util/file/DiskSizeTest.java | 26 + .../node/admin/task/util/file/EditorTest.java | 122 +++ .../task/util/file/FileAttributesCacheTest.java | 39 + .../admin/task/util/file/FileAttributesTest.java | 20 + .../admin/task/util/file/FileContentCacheTest.java | 62 ++ .../node/admin/task/util/file/FileDeleterTest.java | 27 + .../node/admin/task/util/file/FileFinderTest.java | 238 ++++++ .../node/admin/task/util/file/FileMoverTest.java | 73 ++ .../admin/task/util/file/FileSnapshotTest.java | 60 ++ .../node/admin/task/util/file/FileSyncTest.java | 79 ++ .../node/admin/task/util/file/FileWriterTest.java | 62 ++ .../admin/task/util/file/MakeDirectoryTest.java | 87 ++ .../admin/task/util/file/StoredBooleanTest.java | 52 ++ .../node/admin/task/util/file/TemplateTest.java | 39 + .../node/admin/task/util/file/UnixPathTest.java | 199 +++++ .../task/util/fs/ContainerFileSystemTest.java | 211 +++++ .../node/admin/task/util/fs/ContainerPathTest.java | 120 +++ .../ContainerUserPrincipalLookupServiceTest.java | 41 + .../admin/task/util/network/IPAddressesMock.java | 32 + .../admin/task/util/network/IPAddressesTest.java | 78 ++ .../task/util/network/VersionedIpAddressTest.java | 69 ++ .../task/util/process/ChildProcess2ImplTest.java | 147 ++++ .../admin/task/util/process/CommandLineTest.java | 190 +++++ .../task/util/process/ProcessFactoryImplTest.java | 88 ++ .../admin/task/util/systemd/SystemCtlTest.java | 149 ++++ .../task/util/systemd/SystemCtlTesterTest.java | 52 ++ .../admin/task/util/template/TemplateTest.java | 218 +++++ .../admin/task/util/yum/YumPackageNameTest.java | 194 +++++ .../hosted/node/admin/task/util/yum/YumTest.java | 335 ++++++++ .../node/admin/task/util/yum/YumTesterTest.java | 80 ++ .../node/admin/wireguard/WireguardPeerTest.java | 39 + .../src/test/resources/default-env-example.txt | 5 + .../src/test/resources/default-env-rewritten.txt | 4 + node-admin/src/test/resources/template1.tmp | 10 + node-admin/src/test/resources/template2.tmp | 4 + node-admin/src/test/resources/template3.tmp | 6 + 344 files changed, 29790 insertions(+) create mode 100644 node-admin/.gitignore create mode 100644 node-admin/CMakeLists.txt create mode 100644 node-admin/OWNERS create mode 100644 node-admin/README.md create mode 100644 node-admin/pom.xml create mode 100644 node-admin/src/main/application/services.xml create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/Cgroup.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupCore.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/CpuController.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/IoController.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/Size.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/ConfigServerInfo.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/IdempotentTask.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/TaskContext.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/TestTaskContext.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/StandardConfigServerResponse.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoreDumpMetadata.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/Cores.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoresImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/bindings/ReportCoreDumpRequest.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepository.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/Acl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/AddNode.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/Event.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NoSuchNodeException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeAttributes.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeMembership.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeReports.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepository.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeSpec.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/OrchestratorStatus.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/TrustStoreItem.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetAclResponse.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetNodesResponse.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetWireguardResponse.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNode.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/BaseReport.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/DropDocumentsReport.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/Orchestrator.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/HealthCode.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/State.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/StateImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/bindings/HealthResponse.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerId.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerNetworkMode.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResources.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/RegistryCredentials.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/RegistryCredentialsProvider.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Counter.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/DimensionMetrics.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Dimensions.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Gauge.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/MetricValue.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Metrics.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/ContainerWireguardTask.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/FilterTableLineEditor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/NatTableLineEditor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/SecretSharedKeySupplier.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/CoredumpCleanupRule.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanup.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanupRule.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/LinearCleanupRule.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/CredentialsMaintainer.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/Artifact.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducer.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducers.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ConfigDumper.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JvmDumper.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/PerfReporter.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/PmapReporter.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ServiceDumpReport.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaLogDumper.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumper.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ZooKeeperSnapshotDumper.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncClient.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncFileInfo.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/ZstdCompressingInputStream.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfo.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfoReader.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/ContainerData.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/HealthChecker.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextFactory.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextSupplier.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentFactory.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/PathScope.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserNamespace.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserScope.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/DebugHandlerHelper.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminDebugHandler.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/DefaultEnvWriter.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Cursor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/CursorImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/FileEditor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Mark.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Match.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Position.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/StringEditor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBuffer.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBufferImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextUtil.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Version.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/AttributeSync.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/DiskSize.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/Editor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/EditorFactory.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributes.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributesCache.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileContentCache.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileFinder.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMover.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSnapshot.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSync.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileWriter.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/IOExceptionUtil.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/LineEdit.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/LineEditor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/MakeDirectory.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/PartialFileData.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredBoolean.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredDouble.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredInteger.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/Template.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixPath.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixUser.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerAttributeViews.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystem.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemProvider.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPath.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupService.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddressesImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPVersion.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/VersionedIpAddress.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2Impl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcessException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcessFailureException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandLine.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandResult.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/LargeOutputChildProcessException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessApi2.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessApi2Impl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactory.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactoryImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessStarter.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessStarterImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/Terminal.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TerminalImpl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestChildProcess2.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestProcessFactory.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestTerminal.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TimeoutChildProcessException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/UnexpectedOutputException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/UnkillableChildProcessException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtl.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtlTester.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/BadTemplateException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/Form.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/IfSection.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/ListElement.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/ListSection.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/LiteralSection.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/NameAlreadyExistsTemplateException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/NoSuchNameTemplateException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/NotBooleanValueTemplateException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/Section.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/SectionList.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/Template.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/TemplateBuilder.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/TemplateDescriptor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/TemplateException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/TemplateNameNotSetException.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/TemplateParser.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/Token.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/VariableSection.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/text/Cursor.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/text/CursorRange.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/text/TextLocation.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/text/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/yum/Yum.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/yum/YumCommand.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/yum/YumPackageName.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/yum/YumTester.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/yum/package-info.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/wireguard/WireguardPeer.java create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/wireguard/package-info.java create mode 100755 node-admin/src/main/sh/node-admin.sh create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/cgroup/IoControllerTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoresTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepositoryTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/AclTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeStateTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepositoryTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNodeTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/BaseReportTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/state/HealthResponseTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/state/StateImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngineMock.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerNameTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperationsTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResourcesTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollectorTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloaderTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePrunerTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/container/metrics/MetricsTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerFailTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/MultiContainerTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/NodeRepoMock.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/RebootTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/RestartTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/FilterTableLineEditorTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/NatTableLineEditorTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/CoredumpCleanupRuleTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanupTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/LinearCleanupRuleTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducersTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncFileInfoTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/ZstdCompressingInputStreamTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdaterTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserNamespaceTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/provider/DebugHandlerHelperTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/DefaultEnvWriterTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/StringEditorTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBufferImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/DiskSizeTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/EditorTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributesCacheTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributesTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileContentCacheTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleterTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileFinderTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMoverTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSnapshotTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSyncTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileWriterTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/MakeDirectoryTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredBooleanTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/TemplateTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixPathTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPathTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupServiceTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddressesMock.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddressesTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/network/VersionedIpAddressTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2ImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandLineTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactoryImplTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtlTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtlTesterTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/template/TemplateTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/yum/YumPackageNameTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/yum/YumTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/yum/YumTesterTest.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/wireguard/WireguardPeerTest.java create mode 100644 node-admin/src/test/resources/default-env-example.txt create mode 100644 node-admin/src/test/resources/default-env-rewritten.txt create mode 100644 node-admin/src/test/resources/template1.tmp create mode 100644 node-admin/src/test/resources/template2.tmp create mode 100644 node-admin/src/test/resources/template3.tmp (limited to 'node-admin') diff --git a/node-admin/.gitignore b/node-admin/.gitignore new file mode 100644 index 00000000000..adbb97d2d31 --- /dev/null +++ b/node-admin/.gitignore @@ -0,0 +1 @@ +data/ \ No newline at end of file diff --git a/node-admin/CMakeLists.txt b/node-admin/CMakeLists.txt new file mode 100644 index 00000000000..1056a09976a --- /dev/null +++ b/node-admin/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +install(DIRECTORY DESTINATION logs/vespa/node-admin) +install(FILES target/node-admin-jar-with-dependencies.jar DESTINATION conf/node-admin-app/components) +install_symlink(lib/jars/application-model-jar-with-dependencies.jar conf/node-admin-app/components/application-model-jar-with-dependencies.jar) +install_symlink(lib/jars/flags-jar-with-dependencies.jar conf/node-admin-app/components/flags-jar-with-dependencies.jar) +install(FILES src/main/application/services.xml DESTINATION conf/node-admin-app) +install(PROGRAMS src/main/sh/node-admin.sh DESTINATION libexec/vespa) diff --git a/node-admin/OWNERS b/node-admin/OWNERS new file mode 100644 index 00000000000..e131dacde49 --- /dev/null +++ b/node-admin/OWNERS @@ -0,0 +1 @@ +hakonhall diff --git a/node-admin/README.md b/node-admin/README.md new file mode 100644 index 00000000000..d366400ea7e --- /dev/null +++ b/node-admin/README.md @@ -0,0 +1,4 @@ + +# Node Admin + +Manages docker containers that run different applications on a host. diff --git a/node-admin/pom.xml b/node-admin/pom.xml new file mode 100644 index 00000000000..75adc2fb380 --- /dev/null +++ b/node-admin/pom.xml @@ -0,0 +1,175 @@ + + + + 4.0.0 + + com.yahoo.vespa + parent + 8-SNAPSHOT + ../parent/pom.xml + + + node-admin + 8-SNAPSHOT + container-plugin + ${project.artifactId} + + + + + com.yahoo.vespa + config-provisioning + ${project.version} + provided + + + com.yahoo.vespa + container-dev + ${project.version} + provided + + + com.yahoo.vespa + vespa-athenz + ${project.version} + provided + + + com.yahoo.vespa + flags + ${project.version} + provided + + + com.fasterxml.jackson.core + jackson-databind + provided + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + com.yahoo.vespa + container-apache-http-client-bundle + ${project.version} + provided + + + + + com.yahoo.vespa + orchestrator-restapi + ${project.version} + + + org.apache.velocity + velocity-engine-core + + + + org.slf4j + slf4j-api + + + + + com.yahoo.vespa + http-utils + ${project.version} + compile + + + + + org.mockito + mockito-core + test + + + org.junit.jupiter + junit-jupiter + test + + + com.yahoo.vespa + application + ${project.version} + test + + + com.yahoo.vespa + application-model + ${project.version} + test + + + com.yahoo.vespa + orchestrator + ${project.version} + test + + + com.yahoo.vespa + node-repository + ${project.version} + test + + + com.yahoo.vespa + service-monitor + ${project.version} + test + + + com.yahoo.vespa + testutil + ${project.version} + test + + + junit + junit + + + org.hamcrest + * + + + + + + com.yahoo.vespa + zkfacade + ${project.version} + test + + + com.yahoo.vespa + container-test + ${project.version} + test + + + + + + com.yahoo.vespa + bundle-plugin + true + + true + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + diff --git a/node-admin/src/main/application/services.xml b/node-admin/src/main/application/services.xml new file mode 100644 index 00000000000..d1ea4f2b2ff --- /dev/null +++ b/node-admin/src/main/application/services.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/Cgroup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/Cgroup.java new file mode 100644 index 00000000000..034c7a381ed --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/Cgroup.java @@ -0,0 +1,166 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.cgroup; + +import com.yahoo.vespa.defaults.Defaults; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.ContainerId; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; + +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Consumer; +import java.util.logging.Logger; + +/** + * Represents a cgroup in the control group v2 hierarchy, see + * Control Group v2. + * + * @author hakonhall + */ +public class Cgroup { + private static final Logger logger = Logger.getLogger(Cgroup.class.getName()); + + private static final Map> cgroupDirectoryCallbacks = new HashMap<>(); + + private final Path root; + private final Path relativePath; + + public static Cgroup root(FileSystem fileSystem) { + return new Cgroup(fileSystem.getPath("/sys/fs/cgroup"), fileSystem.getPath("")); + } + + private Cgroup(Path root, Path relativePath) { + this.root = root.normalize(); + this.relativePath = this.root.relativize(this.root.resolve(relativePath).normalize()); + if (this.relativePath.toString().equals("..") || this.relativePath.toString().startsWith("../")) { + throw new IllegalArgumentException("Invalid cgroup relative path: " + relativePath); + } + } + + /** Whether this cgroup actually exists in the kernel / on the file system. */ + public boolean exists() { return unixPath().resolve("cgroup.controllers").exists(); } + + /** Creates this cgroup if it does not already exist, and return this. */ + public Cgroup create() { + if (unixPath().createDirectory()) { + // cgroup automatically creates various files in a newly created cgroup directory. A unit test may simulate + // this by registering consumers before the test is run. + Consumer callback = cgroupDirectoryCallbacks.get(relativePath.toString()); + if (callback != null) + callback.accept(unixPath()); + } + return this; + } + + /** Whether v2 cgroup is enabled on this host. */ + public boolean v2CgroupIsEnabled() { return resolveRoot().exists(); } + + /** + * Resolve the given path against the path of this cgroup, and return the resulting cgroup. + * If the given path is absolute, it is resolved against the root of the cgroup hierarchy. + */ + public Cgroup resolve(String path) { + Path effectivePath = fileSystem().getPath(path); + if (effectivePath.isAbsolute()) { + return new Cgroup(root, fileSystem().getPath("/").relativize(effectivePath)); + } else { + return new Cgroup(root, relativePath.resolve(path)); + } + } + + /** Returns the root cgroup, possibly this. */ + public Cgroup resolveRoot() { return isRoot() ? this : new Cgroup(root, fileSystem().getPath("")); } + + /** Returns the cgroup of a system service assuming this is the root, e.g. vespa-host-admin -> system.slice/vespa-host-admin.service. */ + public Cgroup resolveSystemService(String name) { return resolve("system.slice").resolve(serviceNameOf(name)); } + + /** Returns the root cgroup of the given Podman container. */ + public Cgroup resolveContainer(ContainerId containerId) { return resolve("/machine.slice/libpod-" + containerId + ".scope/container"); } + + /** Returns the root cgroup of the container, or otherwise the root cgroup. */ + public Cgroup resolveRoot(Optional containerId) { return containerId.map(this::resolveContainer).orElseGet(this::resolveRoot); } + + /** Returns the absolute path to this cgroup. */ + public Path path() { return root.resolve(relativePath); } + + /** Returns the UnixPath of {@link #path()}. */ + public UnixPath unixPath() { return new UnixPath(path()); } + + public String read(String filename) { + return unixPath().resolve(filename).readUtf8File(); + } + + public Optional readIfExists(String filename) { + return unixPath().resolve(filename).readUtf8FileIfExists().map(String::strip); + } + + public List readLines(String filename) { + return unixPath().resolve(filename).readUtf8File().lines().toList(); + } + + public Optional readIntIfExists(String filename) { + return unixPath().resolve(filename).readUtf8FileIfExists().map(String::strip).map(Integer::parseInt); + } + + public Size readSize(String filename) { return Size.from(read(filename).stripTrailing()); } + + public boolean convergeFileContent(TaskContext context, String filename, String content, boolean apply) { + UnixPath path = unixPath().resolve(filename); + String currentContent = path.readUtf8File(); + if (ensureSuffixNewline(currentContent).equals(ensureSuffixNewline(content))) return false; + + if (apply) { + context.recordSystemModification(logger, "Updating " + path + " from '" + currentContent.stripTrailing() + + "' to '" + content.stripTrailing() + "'"); + path.writeUtf8File(content); + } + return true; + } + + /** The kernel appears to append a newline if none exist, when writing to files in cgroupfs. */ + private static String ensureSuffixNewline(String content) { + return content.endsWith("\n") ? content : content + "\n"; + } + + /** Returns an instance representing core interface files (cgroup.* files). */ + public CgroupCore core() { return new CgroupCore(this); } + + /** Returns the CPU controller of this cgroup (cpu.* files). */ + public CpuController cpu() { return new CpuController(this); } + + /** Returns the memory controller of this cgroup (memory.* files). */ + public MemoryController memory() { return new MemoryController(this); } + + /** Returns the IO controller of this cgroup (io.* files). */ + public IoController io() { return new IoController(this); } + + /** + * Wraps {@code command} to ensure it is executed in this cgroup. + * + *

WARNING: This method must be called only after vespa-cgexec has been installed.

+ */ + public String[] wrapCommandForExecutionInCgroup(String... command) { + String[] fullCommand = new String[3 + command.length]; + fullCommand[0] = Defaults.getDefaults().vespaHome() + "/bin/vespa-cgexec"; + fullCommand[1] = "-g"; + fullCommand[2] = relativePath.toString(); + System.arraycopy(command, 0, fullCommand, 3, command.length); + return fullCommand; + } + + public static void unitTesting_atCgroupCreation(String relativePath, Consumer callback) { + cgroupDirectoryCallbacks.put(relativePath, callback); + } + + private boolean isRoot() { return relativePath.toString().isEmpty(); } + + private static String serviceNameOf(String name) { + return name.indexOf('.') == -1 ? name + ".service" : name; + } + + private FileSystem fileSystem() { return root.getFileSystem(); } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupCore.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupCore.java new file mode 100644 index 00000000000..ecee819cc66 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/CgroupCore.java @@ -0,0 +1,34 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.cgroup; + +import java.util.List; + +/** + * Utility methods for accessing the cgroup core interface files, i.e. all cgroup.* files. + * + * @author hakonhall + */ +public class CgroupCore { + private final Cgroup cgroup; + + CgroupCore(Cgroup cgroup) { this.cgroup = cgroup; } + + public List getPidsInCgroup() { + return cgroup.readLines("cgroup.procs") + .stream() + .map(Integer::parseInt) + .toList(); + } + + /** Whether the given PID is a member of this cgroup. */ + public boolean isMember(int pid) { + return getPidsInCgroup().contains(pid); + } + + /** Move the given PID to this cgroup, but return false if it was already a member. */ + public boolean addMember(int pid) { + if (isMember(pid)) return false; + cgroup.unixPath().resolve("cgroup.procs").writeUtf8File(Integer.toString(pid)); + return true; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/CpuController.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/CpuController.java new file mode 100644 index 00000000000..5ca8a84cad6 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/CpuController.java @@ -0,0 +1,111 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.cgroup; + +import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.util.Arrays; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +import static java.lang.Integer.parseInt; + +/** + * Represents a cgroup v2 CPU controller, i.e. all cpu.* files. + * + * @author hakonhall + */ +public class CpuController { + private final Cgroup cgroup; + + CpuController(Cgroup cgroup) { + this.cgroup = cgroup; + } + + /** + * The maximum bandwidth limit of the format "QUOTA PERIOD", which indicates that the cgroup may consume + * up to QUOTA in each PERIOD duration. A quota of "max" indicates no limit. + */ + public record Max(Size quota, int period) { + public String toFileContent() { return quota + " " + period + '\n'; } + } + + /** + * Returns the maximum CPU usage, or empty if cgroup is not found. + * + * @see Max + */ + public Optional readMax() { + return cgroup.readIfExists("cpu.max") + .map(content -> { + String[] parts = content.strip().split(" "); + return new Max(Size.from(parts[0]), parseInt(parts[1])); + }); + } + + /** + * Update CPU quota and period for the given container ID. Set quota to -1 value for unlimited. + * + * @see #readMax() + * @see Max + */ + public boolean updateMax(TaskContext context, int quota, int period) { + Max max = new Max(quota < 0 ? Size.max() : Size.from(quota), period); + return cgroup.convergeFileContent(context, "cpu.max", max.toFileContent(), true); + } + + /** @return The weight in the range [1, 10000], or empty if not found. */ + private Optional readWeight() { + return cgroup.readIntIfExists("cpu.weight"); + } + + /** @return The number of shares allocated to this cgroup for purposes of CPU time scheduling, or empty if not found. */ + public Optional readShares() { + return readWeight().map(CpuController::weightToShares); + } + + public boolean updateShares(TaskContext context, int shares) { + return cgroup.convergeFileContent(context, "cpu.weight", sharesToWeight(shares) + "\n", true); + } + + // Must be same as in crun: https://github.com/containers/crun/blob/72c6e60ade0e4716fe2d8353f0d97d72cc8d1510/src/libcrun/cgroup.c#L3061 + // TODO: Migrate to weights + public static int sharesToWeight(int shares) { return (int) (1 + ((shares - 2L) * 9999) / 262142); } + public static int weightToShares(int weight) { return (int) (2 + ((weight - 1L) * 262142) / 9999); } + + public enum StatField { + TOTAL_USAGE_USEC("usage_usec"), + USER_USAGE_USEC("user_usec"), + SYSTEM_USAGE_USEC("system_usec"), + TOTAL_PERIODS("nr_periods"), + THROTTLED_PERIODS("nr_throttled"), + THROTTLED_TIME_USEC("throttled_usec"); + + private final String name; + + StatField(String name) { + this.name = name; + } + + long parseValue(String value) { + return Long.parseLong(value); + } + + static Optional fromField(String fieldName) { + return Arrays.stream(values()) + .filter(field -> fieldName.equals(field.name)) + .findFirst(); + } + } + + public Map readStats() { + return cgroup.readLines("cpu.stat") + .stream() + .map(line -> line.split("\\s+")) + .filter(parts -> parts.length == 2) + .flatMap(parts -> StatField.fromField(parts[0]).stream().map(field -> new Pair<>(field, field.parseValue(parts[1])))) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/IoController.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/IoController.java new file mode 100644 index 00000000000..f6676347605 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/IoController.java @@ -0,0 +1,111 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.cgroup; + +import ai.vespa.validation.Validation; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; + +import java.util.Map; +import java.util.Optional; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static java.lang.Integer.parseInt; + +/** + * Represents a cgroup v2 IO controller, i.e. all io.* files. + * + * @author freva + */ +public class IoController { + private static final Logger logger = Logger.getLogger(IoController.class.getName()); + private final Cgroup cgroup; + + IoController(Cgroup cgroup) { + this.cgroup = cgroup; + } + + public record Device(int major, int minor) implements Comparable { + public Device { + // https://www.halolinux.us/kernel-architecture/representation-of-major-and-minor-numbers.html + Validation.requireInRange(major, "device major", 0, 0xFFF); + Validation.requireInRange(minor, "device minor", 0, 0xFFFFF); + } + + private String toFileContent() { return major + ":" + minor; } + private static Device fromString(String device) { + String[] parts = device.split(":"); + return new Device(parseInt(parts[0]), parseInt(parts[1])); + } + + @Override + public int compareTo(Device o) { + return major != o.major ? Integer.compare(major, o.major) : Integer.compare(minor, o.minor); + } + } + + /** + * Defines max allowed IO: + *
    + *
  • rbps: Read bytes per seconds
  • + *
  • riops: Read IO operations per seconds
  • + *
  • wbps: Write bytes per seconds
  • + *
  • wiops: Write IO operations per seconds
  • + *
. + */ + public record Max(Size rbps, Size wbps, Size riops, Size wiops) { + public static Max UNLIMITED = new Max(Size.max(), Size.max(), Size.max(), Size.max()); + + // Keys can be specified in any order, this is the order they are outputted in from io.max + // https://github.com/torvalds/linux/blob/c1a515d3c0270628df8ae5f5118ba859b85464a2/block/blk-throttle.c#L1541 + private String toFileContent() { return "rbps=%s wbps=%s riops=%s wiops=%s".formatted(rbps, wbps, riops, wiops); } + + public static Max fromString(String max) { + String[] parts = max.split(" "); + Size rbps = Size.max(), riops = Size.max(), wbps = Size.max(), wiops = Size.max(); + for (String part : parts) { + if (part.isEmpty()) continue; + String[] kv = part.split("="); + if (kv.length != 2) throw new IllegalArgumentException("Invalid io.max format: " + max); + switch (kv[0]) { + case "rbps" -> rbps = Size.from(kv[1]); + case "riops" -> riops = Size.from(kv[1]); + case "wbps" -> wbps = Size.from(kv[1]); + case "wiops" -> wiops = Size.from(kv[1]); + default -> throw new IllegalArgumentException("Unknown key " + kv[0]); + } + } + return new Max(rbps, wbps, riops, wiops); + } + } + + /** + * Returns the maximum allowed IO usage, by device, or empty if cgroup is not found. + * + * @see Max + */ + public Optional> readMax() { + return cgroup.readIfExists("io.max") + .map(content -> content + .lines() + .map(line -> { + String[] parts = line.strip().split(" ", 2); + return Map.entry(Device.fromString(parts[0]), Max.fromString(parts[1])); + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + } + + public boolean updateMax(TaskContext context, Device device, Max max) { + Max prevMax = readMax() + .map(maxByDevice -> maxByDevice.get(device)) + .orElse(Max.UNLIMITED); + if (prevMax.equals(max)) return false; + + UnixPath path = cgroup.unixPath().resolve("io.max"); + context.recordSystemModification(logger, "Updating %s for device %s from '%s' to '%s'", + path, device.toFileContent(), prevMax.toFileContent(), max.toFileContent()); + path.writeUtf8File(device.toFileContent() + ' ' + max.toFileContent() + '\n'); + return true; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java new file mode 100644 index 00000000000..28da683ea69 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/MemoryController.java @@ -0,0 +1,92 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.cgroup; + +import java.util.List; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +/** + * Represents a cgroup v2 memory controller, i.e. all memory.* files. + * + * @author hakonhall + */ +public class MemoryController { + private final Cgroup cgroup; + + MemoryController(Cgroup cgroup) { + this.cgroup = cgroup; + } + + /** @return Maximum amount of memory that can be used by the cgroup and its descendants. */ + public Size readMax() { + return cgroup.readSize("memory.max"); + } + + /** @return The total amount of memory currently being used by the cgroup and its descendants, in bytes. */ + public Size readCurrent() { + return cgroup.readSize("memory.current"); + } + + /** @return The total amount of memory currently being used by the cgroup and its descendants, in bytes. */ + public Optional readCurrentIfExists() { + return cgroup.readIfExists("memory.current").map(Size::from); + } + + public Stats readStat() { + var lines = cgroup.readLines("memory.stat"); + return new Stats( + Size.from(readField(lines, "file")), Size.from(readField(lines, "sock")), Size.from(readField(lines, "slab")), + Size.from(readField(lines, "slab_reclaimable")), Size.from(readField(lines, "anon"))); + } + + public Optional readPressureIfExists() { + return cgroup.readIfExists("memory.pressure") + .map(fileContent -> + new Pressure( + readPressureField(fileContent, "some"), + readPressureField(fileContent, "full") + ) + ); + } + + private static String readField(List lines, String fieldName) { + return lines.stream() + .map(line -> line.split("\\s+")) + .filter(fields -> fields.length == 2) + .filter(fields -> fieldName.equals(fields[0])) + .map(fields -> fields[1]) + .findFirst() + .orElseThrow(() -> new IllegalArgumentException("No such field: " + fieldName)); + } + + /** + * Fetches the avg60 value from the specified type, i.e. "some" or "full". + */ + private static Double readPressureField(String fileContent, String type) { + var pattern = Pattern.compile(type + ".*avg60=(?\\d+\\.\\d+).*"); + return Stream.of(fileContent.split("\n")) + .map(pattern::matcher) + .filter(Matcher::matches) + .map(matcher -> matcher.group("avg60")) + .findFirst() + .map(Double::parseDouble) + .orElseThrow(() -> new IllegalArgumentException("No such field: " + type)); + } + + /** + * @param file Number of bytes used to cache filesystem data, including tmpfs and shared memory. + * @param sock Amount of memory used in network transmission buffers. + * @param slab Amount of memory used for storing in-kernel data structures. + * @param slabReclaimable Part of "slab" that might be reclaimed, such as dentries and inodes. + * @param anon Amount of memory used in anonymous mappings such as brk(), sbrk(), and mmap(MAP_ANONYMOUS). + */ + public record Stats(Size file, Size sock, Size slab, Size slabReclaimable, Size anon) {} + + /** + * @param some The avg60 value of the "some" pressure level. + * @param full The avg60 value of the "full" pressure level. + */ + public record Pressure(double some, double full) {} +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/Size.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/Size.java new file mode 100644 index 00000000000..d89db56e4d2 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/Size.java @@ -0,0 +1,68 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.cgroup; + +import java.util.Objects; + +/** + * Represents a number of bytes or possibly "max". + * + * @author hakonhall + */ +public class Size { + private static final String MAX = "max"; + private static final Size MAX_SIZE = new Size(true, 0); + + private final boolean max; + private final long value; + + public static Size max() { + return MAX_SIZE; + } + + public static Size from(long value) { + return new Size(false, value); + } + + public static Size from(String value) { + return value.equals(MAX) ? MAX_SIZE : new Size(false, Long.parseLong(value)); + } + + private Size(boolean max, long value) { + this.max = max; + this.value = value; + } + + public boolean isMax() { + return max; + } + + /** Returns the value, i.e. the number of "bytes" if applicable. Throws if this is max. */ + public long value() { + if (max) throw new IllegalStateException("Value is max"); + return value; + } + + public String toFileContent() { return toString() + '\n'; } + + @Override + public String toString() { return max ? MAX : Long.toString(value); } + + public boolean isGreaterThan(Size that) { + if (that.max) return false; + if (this.max) return true; + return this.value > that.value; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Size size = (Size) o; + return max == size.max && value == size.value; + } + + @Override + public int hashCode() { + return Objects.hash(max, value); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/package-info.java new file mode 100644 index 00000000000..b4c1a5228f8 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/cgroup/package-info.java @@ -0,0 +1,9 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +/** + * @author hakonhall + */ +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.cgroup; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/ConfigServerInfo.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/ConfigServerInfo.java new file mode 100644 index 00000000000..64c6b19b8bb --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/ConfigServerInfo.java @@ -0,0 +1,47 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.component; + +import com.yahoo.vespa.athenz.api.AthenzIdentity; + +import java.net.URI; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Information necessary to e.g. establish communication with the config servers + * + * @author hakon + */ +public class ConfigServerInfo { + private final URI loadBalancerEndpoint; + private final AthenzIdentity configServerIdentity; + private final Function configServerHostnameToUriMapper; + private final List configServerURIs; + + public ConfigServerInfo(URI loadBalancerEndpoint, List configServerHostNames, + AthenzIdentity configServerAthenzIdentity) { + this.loadBalancerEndpoint = loadBalancerEndpoint; + this.configServerIdentity = configServerAthenzIdentity; + this.configServerHostnameToUriMapper = hostname -> URI.create("https://" + hostname + ":4443"); + this.configServerURIs = configServerHostNames.stream() + .map(configServerHostnameToUriMapper) + .toList(); + } + + public List getConfigServerUris() { + return configServerURIs; + } + + public URI getConfigServerUri(String hostname) { + return configServerHostnameToUriMapper.apply(hostname); + } + + public URI getLoadBalancerEndpoint() { + return loadBalancerEndpoint; + } + + public AthenzIdentity getConfigServerIdentity() { + return configServerIdentity; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/IdempotentTask.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/IdempotentTask.java new file mode 100644 index 00000000000..492020b7ae4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/IdempotentTask.java @@ -0,0 +1,43 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.component; + +/** + *

This class is thread unsafe: All method calls MUST be exclusive and serialized.

+ * + *
+ *
In a specialized environment it is possible to provide a richer context than TaskContext:
+ *
- Define a subclass T of TaskContext with the additional functionality.
+ *
- Define task classes that implement IdempotentTask<T>.
+ *
+ */ +public interface IdempotentTask { + /** + *

A short id of the task to e.g. identify the task in the log.

+ * + *

Prefer PascalCase and without white-space.

+ * + *

Example: "EnableDocker"

+ */ + default String name() { return getClass().getSimpleName(); } + + /** + *

Execute an administrative task to converge towards some ideal state, whether it is + * system state or in-memory Java state.

+ * + *

converge() must be idempotent: it may be called any number of times, or + * interrupted at any time e.g. by `kill -9`.

+ * + *

converge() is not thread safe: The caller must ensure there is at most one invocation + * of converge() at any given time.

+ * + * @return false if already converged, i.e. was a no-op. A typical sequence of converge() + * calls on a IdempotentTask will consist of: + * - Any number of calls that throws an exception due to some issues. Assuming + * no exceptions were thrown, or the issue eventually resolved itself... + * (convergence failure) + * - Returns true once (converged just now) + * - Returns false for all further calls (already converged) + * @throws RuntimeException (or a subclass) if the task is unable to converge. + */ + boolean converge(T context); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/TaskContext.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/TaskContext.java new file mode 100644 index 00000000000..0e8fdb6e1f6 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/TaskContext.java @@ -0,0 +1,48 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.component; + +import java.util.logging.Level; +import java.util.logging.Logger; + +public interface TaskContext { + /** + * Record a system modification. IdempotentTask is supposed to converge the system (files, + * directory permission, iptable rules, etc) to some wanted state. It is especially important + * to produce a truthful log of system changes to understand what may or may not be going on. + * + * All tasks should: + * 1. Record any and all modifications to the system + * 2. Avoid recording system interactions that does not actually change the system. + * 3. Record system modifications as early as possible and preferably before they are + * performed (sometimes this is not possible). + * + * @param logger Used to log the modification to help locate the source of the modification. + * @param message Description of the modification, e.g. "Changing owner of /foo from alice + * to bob". + */ + void recordSystemModification(Logger logger, String message); + default void recordSystemModification(Logger logger, String messageFormat, Object... args) { + recordSystemModification(logger, String.format(messageFormat, args)); + } + + /** + * Log message at Level.INFO, scoped to denote the current task. The message may + * also be directed to status pages or similar. + * + * Please do not call this too many times as that spams the log. Typically a task may call + * this zero times, or up to a few times. + * + * Do not log a message that is also recorded with recordSystemModification. + */ + default void log(Logger logger, String message) { + log(logger, Level.INFO, message); + } + + default void log(Logger logger, String messageFormat, Object... args) { + log(logger, String.format(messageFormat, args)); + } + + void log(Logger logger, Level level, String message); + + void log(Logger logger, Level level, String message, Throwable throwable); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/TestTaskContext.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/TestTaskContext.java new file mode 100644 index 00000000000..beedb56941a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/TestTaskContext.java @@ -0,0 +1,35 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.component; + +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class TestTaskContext implements TaskContext { + private final List systemModifications = new ArrayList<>(); + + @Override + public void recordSystemModification(Logger logger, String description) { + systemModifications.add(description); + } + + @Override + public void log(Logger logger, Level level, String message) { + logger.log(level, message); + } + + @Override + public void log(Logger logger, Level level, String message, Throwable throwable) { + logger.log(level, message, throwable); + } + + public List getSystemModificationLog() { + return systemModifications; + } + + public void clearSystemModificationLog() { + systemModifications.clear(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/package-info.java new file mode 100644 index 00000000000..53cb32300b4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.component; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java new file mode 100644 index 00000000000..b401e2f3d08 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java @@ -0,0 +1,78 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver; + +import java.net.URI; +import java.time.Duration; +import java.util.Optional; + +/** + * Interface to execute basic HTTP/HTTPS request against config server(s) + * + * @author freva + */ +public interface ConfigServerApi extends AutoCloseable { + + /** + * The result of sending a request to a config server results in a jackson response or exception. If a response + * is returned, an instance of this interface is conferred to discard the result and try the next config server, + * unless it was the last attempt. + * + * @param the type of the returned jackson response + */ + interface RetryPolicy { + boolean tryNextConfigServer(URI configServerEndpoint, T response); + } + + class Params { + private Optional connectionTimeout = Optional.empty(); + + private RetryPolicy retryPolicy = (configServerEndpoint, response) -> false; + + public Params() {} + + /** Set the socket connect and read timeouts. */ + public Params setConnectionTimeout(Duration connectionTimeout) { + this.connectionTimeout = Optional.of(connectionTimeout); + return this; + } + + public Optional getConnectionTimeout() { return connectionTimeout; } + + /** Set the retry policy to use against the config servers. */ + public Params setRetryPolicy(RetryPolicy retryPolicy) { + this.retryPolicy = retryPolicy; + return this; + } + + public RetryPolicy getRetryPolicy() { return retryPolicy; } + } + + T get(String path, Class wantedReturnType, Params params); + default T get(String path, Class wantedReturnType) { + return get(path, wantedReturnType, new Params<>()); + } + + T post(String path, Object bodyJsonPojo, Class wantedReturnType, Params params); + default T post(String path, Object bodyJsonPojo, Class wantedReturnType) { + return post(path, bodyJsonPojo, wantedReturnType, new Params<>()); + } + + T put(String path, Optional bodyJsonPojo, Class wantedReturnType, Params params); + default T put(String path, Optional bodyJsonPojo, Class wantedReturnType) { + return put(path, bodyJsonPojo, wantedReturnType, new Params<>()); + } + + T patch(String path, Object bodyJsonPojo, Class wantedReturnType, Params params); + default T patch(String path, Object bodyJsonPojo, Class wantedReturnType) { + return patch(path, bodyJsonPojo, wantedReturnType, new Params<>()); + } + + T delete(String path, Class wantedReturnType, Params params); + default T delete(String path, Class wantedReturnType) { + return delete(path, wantedReturnType, new Params<>()); + } + + /** Close the underlying HTTP client and any threads this class might have started. */ + @Override + void close(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java new file mode 100644 index 00000000000..b645e993a05 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java @@ -0,0 +1,273 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver; + +import ai.vespa.util.http.hc4.SslConnectionSocketFactory; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.yahoo.vespa.athenz.identity.ServiceIdentityProvider; +import com.yahoo.vespa.athenz.identity.ServiceIdentitySslSocketFactory; +import com.yahoo.vespa.hosted.node.admin.component.ConfigServerInfo; +import com.yahoo.yolean.Exceptions; +import org.apache.http.HttpHeaders; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpDelete; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPatch; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.socket.PlainConnectionSocketFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.util.EntityUtils; + +import javax.net.ssl.HostnameVerifier; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * Retries request on config server a few times before giving up. Assumes that all requests should be sent with + * content-type application/json + * + * @author dybdahl + * @author bjorncs + */ +public class ConfigServerApiImpl implements ConfigServerApi { + + private static final Logger logger = Logger.getLogger(ConfigServerApiImpl.class.getName()); + private static final RequestConfig DEFAULT_REQUEST_CONFIG = RequestConfig.custom() + .setConnectionRequestTimeout(1_000) // connection from connection manager + .setConnectTimeout(10_000) // establishment of connection + .setSocketTimeout(10_000) // waiting for data + .build(); + + private final ObjectMapper mapper = new ObjectMapper(); + + private final List configServers; + + private final CloseableHttpClient client; + + public static ConfigServerApiImpl create(ConfigServerInfo info, + ServiceIdentityProvider provider, + HostnameVerifier hostnameVerifier) { + return new ConfigServerApiImpl( + info.getConfigServerUris(), + hostnameVerifier, + provider); + } + + public static ConfigServerApiImpl createFor(URI uri, + ServiceIdentityProvider provider, + HostnameVerifier hostnameVerifier) { + return new ConfigServerApiImpl(List.of(uri), hostnameVerifier, provider); + } + + private ConfigServerApiImpl(Collection configServers, + HostnameVerifier verifier, + ServiceIdentityProvider identityProvider) { + this(configServers, createClient(SslConnectionSocketFactory.of(new ServiceIdentitySslSocketFactory(identityProvider), verifier))); + } + + private ConfigServerApiImpl(Collection configServers, CloseableHttpClient client) { + this.configServers = randomizeConfigServerUris(configServers); + this.client = client; + } + + public static ConfigServerApiImpl createForTesting(List configServerHosts) { + return new ConfigServerApiImpl(configServerHosts, createClient(SslConnectionSocketFactory.of())); + } + + static ConfigServerApiImpl createForTestingWithClient(List configServerHosts, + CloseableHttpClient client) { + return new ConfigServerApiImpl(configServerHosts, client); + } + + interface CreateRequest { + HttpUriRequest createRequest(URI configServerUri) throws JsonProcessingException, UnsupportedEncodingException; + } + + private T tryAllConfigServers(CreateRequest requestFactory, Class wantedReturnType, Params params) { + T lastResult = null; + Exception lastException = null; + + for (URI configServer : configServers) { + var request = Exceptions.uncheck(() -> requestFactory.createRequest(configServer)); + try (CloseableHttpResponse response = client.execute(request)) { + var responseBody = EntityUtils.toString(response.getEntity()); + HttpException.handleStatusCode(response.getStatusLine().getStatusCode(), + request.getMethod() + " " + request.getURI() + + " failed with response '" + responseBody + "'"); + + T result; + try { + result = mapper.readValue(responseBody, wantedReturnType); + } catch (IOException e) { + throw new UncheckedIOException("Failed parse response from config server", e); + } + + if (params.getRetryPolicy().tryNextConfigServer(configServer, result)) { + lastResult = result; + lastException = null; + } else { + return result; + } + } catch (HttpException e) { + if (!e.isRetryable()) throw e; + lastResult = null; + lastException = e; + } catch (Exception e) { + lastResult = null; + lastException = e; + if (configServers.size() == 1) break; + + // Failure to communicate with a config server is not abnormal during upgrades + if (ConnectionException.isKnownConnectionException(e)) { + logger.info("Failed to connect to " + configServer + ", will try next: " + e.getMessage()); + } else { + logger.warning("Failed to communicate with " + configServer + ", will try next: " + e.getMessage()); + } + } + } + + if (lastResult != null) { + logger.warning("Giving up after trying all config servers: returning result: " + lastResult); + return lastResult; + } + + String prefix = configServers.size() == 1 ? + "Request against " + configServers.get(0) + " failed: " : + "All requests against the config servers (" + configServers + ") failed, last as follows: "; + throw ConnectionException.handleException(prefix, lastException); + } + + @Override + public T put(String path, Optional bodyJsonPojo, Class wantedReturnType, Params params) { + Optional requestConfigOverride = getRequestConfigOverride(params); + return tryAllConfigServers(configServer -> { + HttpPut put = new HttpPut(configServer.resolve(path)); + requestConfigOverride.ifPresent(put::setConfig); + setContentTypeToApplicationJson(put); + if (bodyJsonPojo.isPresent()) { + put.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo.get()))); + } + return put; + }, wantedReturnType, params); + } + + @Override + public T patch(String path, Object bodyJsonPojo, Class wantedReturnType, Params params) { + Optional requestConfigOverride = getRequestConfigOverride(params); + return tryAllConfigServers(configServer -> { + HttpPatch patch = new HttpPatch(configServer.resolve(path)); + requestConfigOverride.ifPresent(patch::setConfig); + setContentTypeToApplicationJson(patch); + patch.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo))); + return patch; + }, wantedReturnType, params); + } + + @Override + public T delete(String path, Class wantedReturnType, Params params) { + Optional requestConfigOverride = getRequestConfigOverride(params); + return tryAllConfigServers(configServer -> { + HttpDelete delete = new HttpDelete(configServer.resolve(path)); + requestConfigOverride.ifPresent(delete::setConfig); + return delete; + }, wantedReturnType, params); + } + + @Override + public T get(String path, Class wantedReturnType, Params params) { + Optional requestConfig = getRequestConfigOverride(params); + return tryAllConfigServers(configServer -> { + HttpGet get = new HttpGet(configServer.resolve(path)); + requestConfig.ifPresent(get::setConfig); + return get; + }, wantedReturnType, params); + } + + @Override + public T post(String path, Object bodyJsonPojo, Class wantedReturnType, Params params) { + Optional requestConfigOverride = getRequestConfigOverride(params); + return tryAllConfigServers(configServer -> { + HttpPost post = new HttpPost(configServer.resolve(path)); + requestConfigOverride.ifPresent(post::setConfig); + setContentTypeToApplicationJson(post); + post.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo))); + return post; + }, wantedReturnType, params); + } + + @Override + public void close() { + // Need to do try and catch, using e.g. uncheck(client::close) might fail because + // components are deconstructed in random order and if the bundle containing uncheck has been + // unloaded it will fail with NoClassDefFoundError + try { + client.close(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private void setContentTypeToApplicationJson(HttpRequestBase request) { + request.setHeader(HttpHeaders.CONTENT_TYPE, "application/json"); + } + + private static CloseableHttpClient createClient(SSLConnectionSocketFactory socketFactory) { + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.getSocketFactory()) + .register("https", socketFactory) + .build(); + + PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + cm.setMaxTotal(200); // Increase max total connections to 200, which should be enough + + // Have experienced hang in socket read, which may have been because of + // system defaults, therefore set explicit timeouts. + return HttpClientBuilder.create() + .setDefaultRequestConfig(DEFAULT_REQUEST_CONFIG) + .disableAutomaticRetries() + .disableConnectionState() // Share connections between subsequent requests. + .setUserAgent("node-admin") // Node-repository depends on this value to identify agent of node-admin/host-admin requests + .setConnectionManager(cm) + .build(); + } + + private static Optional getRequestConfigOverride(Params params) { + if (params.getConnectionTimeout().isEmpty()) return Optional.empty(); + + RequestConfig.Builder builder = RequestConfig.copy(DEFAULT_REQUEST_CONFIG); + + params.getConnectionTimeout().ifPresent(connectionTimeout -> { + builder.setConnectTimeout((int) connectionTimeout.toMillis()); + builder.setSocketTimeout((int) connectionTimeout.toMillis()); + }); + + return Optional.of(builder.build()); + } + + // Shuffle config server URIs to balance load + private static List randomizeConfigServerUris(Collection configServerUris) { + List shuffledConfigServerHosts = new ArrayList<>(configServerUris); + Collections.shuffle(shuffledConfigServerHosts); + return shuffledConfigServerHosts; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java new file mode 100644 index 00000000000..8c6212f83f4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerClients.java @@ -0,0 +1,32 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver; + +import com.yahoo.vespa.flags.FlagRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.cores.Cores; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; +import com.yahoo.vespa.hosted.node.admin.configserver.state.State; + +/** + * The available (and implemented) APIs of the config server + * + * @author freva + */ +public interface ConfigServerClients { + /** Get handle to /nodes/v2/ REST API */ + NodeRepository nodeRepository(); + + /** Get handle to /orchestrator/v1/ REST API */ + Orchestrator orchestrator(); + + /** Get handle to the /state/v1 REST API */ + State state(); + + /** Get handle to the /flags/v1 REST API */ + FlagRepository flagRepository(); + + /** Get handle to the /cores/v1 REST API */ + Cores cores(); + + void stop(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerException.java new file mode 100644 index 00000000000..e957a56c0ae --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerException.java @@ -0,0 +1,10 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver; + +/** + * @author hakonhall + */ +public class ConfigServerException extends RuntimeException { + public ConfigServerException(String message) { super(message); } + public ConfigServerException(String message, Throwable cause) { super(message, cause); } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java new file mode 100644 index 00000000000..86c52efe282 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java @@ -0,0 +1,43 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver; + +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; +import org.apache.http.NoHttpResponseException; + +import java.io.EOFException; +import java.net.SocketException; +import java.net.SocketTimeoutException; + +/** + * @author freva + */ +@SuppressWarnings("serial") +public class ConnectionException extends ConvergenceException { + + private ConnectionException(String message, Throwable cause) { + super(message, cause, true); + } + + /** + * Returns {@link ConnectionException} if the given Throwable is of a known and well understood error or + * a RuntimeException with the given exception as cause otherwise. + */ + public static RuntimeException handleException(String prefix, Throwable t) { + if (isKnownConnectionException(t)) + return new ConnectionException(prefix + t.getMessage(), t); + + return new RuntimeException(prefix, t); + } + + static boolean isKnownConnectionException(Throwable t) { + for (; t != null; t = t.getCause()) { + if (t instanceof SocketException || + t instanceof SocketTimeoutException || + t instanceof NoHttpResponseException || + t instanceof EOFException) + return true; + } + + return false; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java new file mode 100644 index 00000000000..64b1ebe239d --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java @@ -0,0 +1,72 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver; + +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; + +import javax.ws.rs.core.Response; + +/** + * @author hakonhall + */ +@SuppressWarnings("serial") +public class HttpException extends ConvergenceException { + + private final boolean isRetryable; + + private HttpException(int statusCode, String message, boolean isRetryable) { + super("HTTP status code " + statusCode + ": " + message, null, !isRetryable); + this.isRetryable = isRetryable; + } + + private HttpException(Response.Status status, String message, boolean isRetryable) { + super(status.toString() + " (" + status.getStatusCode() + "): " + message, null, !isRetryable); + this.isRetryable = isRetryable; + } + + boolean isRetryable() { + return isRetryable; + } + + /** + * Returns on success. + * @throws HttpException for all non-expected status codes. + */ + static void handleStatusCode(int statusCode, String message) { + Response.Status status = Response.Status.fromStatusCode(statusCode); + if (status == null) { + throw new HttpException(statusCode, message, true); + } + + switch (status.getFamily()) { + case SUCCESSFUL: return; + case CLIENT_ERROR: + switch (status) { + case FORBIDDEN: + throw new ForbiddenException(message); + case NOT_FOUND: + throw new NotFoundException(message); + case CONFLICT: + // A response body is assumed to be present, and + // will later be interpreted as an error. + return; + } + throw new HttpException(status, message, false); + } + + // Other errors like server-side errors are assumed to be NOT retryable, + // in case retries would put additional load on a bogged down server. + throw new HttpException(status, message, false); + } + + public static class NotFoundException extends HttpException { + public NotFoundException(String message) { + super(Response.Status.NOT_FOUND, message, false); + } + } + + public static class ForbiddenException extends HttpException { + public ForbiddenException(String message) { + super(Response.Status.FORBIDDEN, message, false); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java new file mode 100644 index 00000000000..8ee346246ae --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/RealConfigServerClients.java @@ -0,0 +1,70 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver; + +import com.yahoo.vespa.flags.FlagRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.cores.Cores; +import com.yahoo.vespa.hosted.node.admin.configserver.cores.CoresImpl; +import com.yahoo.vespa.hosted.node.admin.configserver.flags.RealFlagRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.RealNodeRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; +import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorImpl; +import com.yahoo.vespa.hosted.node.admin.configserver.state.State; +import com.yahoo.vespa.hosted.node.admin.configserver.state.StateImpl; + +/** + * {@link ConfigServerClients} using the default implementation for the various clients, + * and backed by a {@link ConfigServerApi}. + * + * @author freva + */ +public class RealConfigServerClients implements ConfigServerClients { + private final ConfigServerApi configServerApi; + private final NodeRepository nodeRepository; + private final Orchestrator orchestrator; + private final State state; + private final RealFlagRepository flagRepository; + private final Cores cores; + + /** + * @param configServerApi the backend API to use - will be closed at {@link #stop()}. + */ + public RealConfigServerClients(ConfigServerApi configServerApi) { + this.configServerApi = configServerApi; + nodeRepository = new RealNodeRepository(configServerApi); + orchestrator = new OrchestratorImpl(configServerApi); + state = new StateImpl(configServerApi); + flagRepository = new RealFlagRepository(configServerApi); + cores = new CoresImpl(configServerApi); + } + + @Override + public NodeRepository nodeRepository() { + return nodeRepository; + } + + @Override + public Orchestrator orchestrator() { + return orchestrator; + } + + @Override + public State state() { + return state; + } + + @Override + public FlagRepository flagRepository() { + return flagRepository; + } + + @Override + public Cores cores() { + return cores; + } + + @Override + public void stop() { + configServerApi.close(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/StandardConfigServerResponse.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/StandardConfigServerResponse.java new file mode 100644 index 00000000000..c967091ccbf --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/StandardConfigServerResponse.java @@ -0,0 +1,22 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Strings; + +/** + * @author hakonhall + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +public class StandardConfigServerResponse { + @JsonProperty("message") public String message; + @JsonProperty("error-code") public String errorCode; + + public void throwOnError(String detail) { + if (!Strings.isNullOrEmpty(errorCode)) + throw new ConfigServerException(detail + ": " + message + " " + errorCode); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoreDumpMetadata.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoreDumpMetadata.java new file mode 100644 index 00000000000..2f4595ce5d1 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoreDumpMetadata.java @@ -0,0 +1,96 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.cores; + +import com.yahoo.config.provision.DockerImage; + +import java.nio.file.Path; +import java.time.Instant; +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +/** + * @author hakonhall + */ +public class CoreDumpMetadata { + public enum Type { CORE_DUMP, JVM_HEAP, OOM } + + private Type type; + private String binPath; + private Instant created; + private List backtrace; + private List backtraceAllThreads; + private Path coreDumpPath; + private String decryptionToken; + private String kernelVersion; + private String cpuMicrocodeVersion; + private DockerImage dockerImage; + private String vespaVersion; + + public CoreDumpMetadata() {} + + public Optional type() { return Optional.ofNullable(type); } + public Optional binPath() { return Optional.ofNullable(binPath); } + public Optional created() { return Optional.ofNullable(created); } + public Optional> backtrace() { return Optional.ofNullable(backtrace); } + public Optional> backtraceAllThreads() { return Optional.ofNullable(backtraceAllThreads); } + public Optional coredumpPath() { return Optional.ofNullable(coreDumpPath); } + public Optional decryptionToken() { return Optional.ofNullable(decryptionToken); } + public Optional kernelVersion() { return Optional.ofNullable(kernelVersion); } + public Optional cpuMicrocodeVersion() { return Optional.ofNullable(cpuMicrocodeVersion); } + public Optional dockerImage() { return Optional.ofNullable(dockerImage); } + public Optional vespaVersion() { return Optional.ofNullable(vespaVersion); } + + public CoreDumpMetadata setType(Type type) { this.type = type; return this; } + public CoreDumpMetadata setBinPath(String binPath) { this.binPath = binPath; return this; } + public CoreDumpMetadata setCreated(Instant created) { this.created = created; return this; } + public CoreDumpMetadata setBacktrace(List backtrace) { this.backtrace = backtrace; return this; } + public CoreDumpMetadata setBacktraceAllThreads(List backtraceAllThreads) { this.backtraceAllThreads = backtraceAllThreads; return this; } + public CoreDumpMetadata setCoreDumpPath(Path coreDumpPath) { this.coreDumpPath = coreDumpPath; return this; } + public CoreDumpMetadata setDecryptionToken(String decryptionToken) { this.decryptionToken = decryptionToken; return this; } + public CoreDumpMetadata setKernelVersion(String kernelVersion) { this.kernelVersion = kernelVersion; return this; } + public CoreDumpMetadata setCpuMicrocodeVersion(String cpuMicrocodeVersion) { this.cpuMicrocodeVersion = cpuMicrocodeVersion; return this; } + public CoreDumpMetadata setDockerImage(DockerImage dockerImage) { this.dockerImage = dockerImage; return this; } + public CoreDumpMetadata setVespaVersion(String vespaVersion) { this.vespaVersion = vespaVersion; return this; } + + @Override + public String toString() { + return "CoreDumpMetadata{" + + "type=" + type + + ", binPath=" + binPath + + ", created=" + created + + ", backtrace=" + backtrace + + ", backtraceAllThreads=" + backtraceAllThreads + + ", coreDumpPath=" + coreDumpPath + + ", decryptionToken=" + decryptionToken + + ", kernelVersion='" + kernelVersion + '\'' + + ", cpuMicrocodeVersion='" + cpuMicrocodeVersion + '\'' + + ", dockerImage=" + dockerImage + + ", vespaVersion=" + vespaVersion + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + CoreDumpMetadata metadata = (CoreDumpMetadata) o; + return type == metadata.type && + Objects.equals(binPath, metadata.binPath) && + Objects.equals(created, metadata.created) && + Objects.equals(backtrace, metadata.backtrace) && + Objects.equals(backtraceAllThreads, metadata.backtraceAllThreads) && + Objects.equals(coreDumpPath, metadata.coreDumpPath) && + Objects.equals(decryptionToken, metadata.decryptionToken) && + Objects.equals(kernelVersion, metadata.kernelVersion) && + Objects.equals(cpuMicrocodeVersion, metadata.cpuMicrocodeVersion) && + Objects.equals(dockerImage, metadata.dockerImage) && + Objects.equals(vespaVersion, metadata.vespaVersion); + } + + @Override + public int hashCode() { + return Objects.hash(type, binPath, created, backtrace, backtraceAllThreads, coreDumpPath, decryptionToken, kernelVersion, + cpuMicrocodeVersion, dockerImage, vespaVersion); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/Cores.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/Cores.java new file mode 100644 index 00000000000..b168c6f6dbe --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/Cores.java @@ -0,0 +1,16 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.cores; + +import com.yahoo.config.provision.HostName; + +/** + * @author hakonhall + */ +public interface Cores { + /** + * @param hostname Hostname of the node that produced the core. + * @param id The ID (aka UUID aka docid) of the core. + * @param metadata Core dump metadata. + */ + void report(HostName hostname, String id, CoreDumpMetadata metadata); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoresImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoresImpl.java new file mode 100644 index 00000000000..200fe97283c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/CoresImpl.java @@ -0,0 +1,26 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.cores; + +import com.yahoo.config.provision.HostName; +import com.yahoo.vespa.hosted.node.admin.configserver.ConfigServerApi; +import com.yahoo.vespa.hosted.node.admin.configserver.StandardConfigServerResponse; +import com.yahoo.vespa.hosted.node.admin.configserver.cores.bindings.ReportCoreDumpRequest; + +/** + * @author hakonhall + */ +public class CoresImpl implements Cores { + private final ConfigServerApi configServerApi; + + public CoresImpl(ConfigServerApi configServerApi) { + this.configServerApi = configServerApi; + } + + @Override + public void report(HostName hostname, String id, CoreDumpMetadata metadata) { + var request = new ReportCoreDumpRequest().fillFrom(metadata); + String uriPath = "/cores/v1/report/" + hostname.value() + "/" + id; + configServerApi.post(uriPath, request, StandardConfigServerResponse.class) + .throwOnError("Failed to report core dump at " + metadata.coredumpPath()); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/bindings/ReportCoreDumpRequest.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/bindings/ReportCoreDumpRequest.java new file mode 100644 index 00000000000..435367cd1ca --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/bindings/ReportCoreDumpRequest.java @@ -0,0 +1,97 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.cores.bindings; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.yahoo.config.provision.DockerImage; +import com.yahoo.vespa.hosted.node.admin.configserver.cores.CoreDumpMetadata; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.time.Instant; +import java.util.List; +import java.util.Optional; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Jackson class of JSON request, with names of fields verified in unit test. + * + * @author hakonhall + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ReportCoreDumpRequest { + private static final ObjectMapper objectMapper = new ObjectMapper(); + + public List backtrace; + public List backtrace_all_threads; + public Long created; + public String type; + public String bin_path; + public String coredump_path; + public String cpu_microcode_version; + public String decryption_token; + public String docker_image; + public String kernel_version; + public String vespa_version; + + public ReportCoreDumpRequest() {} + + /** Fill this from metadata and return this. */ + @JsonIgnore + public ReportCoreDumpRequest fillFrom(CoreDumpMetadata metadata) { + metadata.type().ifPresent(type -> this.type = type.name()); + metadata.binPath().ifPresent(binPath -> this.bin_path = binPath); + metadata.created().ifPresent(created -> this.created = created.toEpochMilli()); + metadata.backtrace().ifPresent(backtrace -> this.backtrace = List.copyOf(backtrace)); + metadata.backtraceAllThreads().ifPresent(backtraceAllThreads -> this.backtrace_all_threads = List.copyOf(backtraceAllThreads)); + metadata.coredumpPath().ifPresent(coredumpPath -> this.coredump_path = coredumpPath.toString()); + metadata.decryptionToken().ifPresent(decryptionToken -> this.decryption_token = decryptionToken); + metadata.kernelVersion().ifPresent(kernelVersion -> this.kernel_version = kernelVersion); + metadata.cpuMicrocodeVersion().ifPresent(cpuMicrocodeVersion -> this.cpu_microcode_version = cpuMicrocodeVersion); + metadata.dockerImage().ifPresent(dockerImage -> this.docker_image = dockerImage.asString()); + metadata.vespaVersion().ifPresent(vespaVersion -> this.vespa_version = vespaVersion); + return this; + } + + @JsonIgnore + public void populateMetadata(CoreDumpMetadata metadata, FileSystem fileSystem) { + if (type != null) metadata.setType(CoreDumpMetadata.Type.valueOf(type)); + if (bin_path != null) metadata.setBinPath(bin_path); + if (created != null) metadata.setCreated(Instant.ofEpochMilli(created)); + if (backtrace != null) metadata.setBacktrace(backtrace); + if (backtrace_all_threads != null) metadata.setBacktraceAllThreads(backtrace_all_threads); + if (coredump_path != null) metadata.setCoreDumpPath(fileSystem.getPath(coredump_path)); + if (decryption_token != null) metadata.setDecryptionToken(decryption_token); + if (kernel_version != null) metadata.setKernelVersion(kernel_version); + if (cpu_microcode_version != null) metadata.setCpuMicrocodeVersion(cpu_microcode_version); + if (docker_image != null) metadata.setDockerImage(DockerImage.fromString(docker_image)); + if (vespa_version != null) metadata.setVespaVersion(vespa_version); + } + + @JsonIgnore + public void save(Path path) { + String serialized = uncheck(() -> objectMapper.writeValueAsString(this)); + uncheck(() -> Files.writeString(path, serialized)); + } + + @JsonIgnore + public static Optional load(Path path) { + final String serialized; + try { + serialized = Files.readString(path); + } catch (NoSuchFileException e) { + return Optional.empty(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return Optional.of(uncheck(() -> objectMapper.readValue(serialized, ReportCoreDumpRequest.class))); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/package-info.java new file mode 100644 index 00000000000..d8a07b2b0df --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/cores/package-info.java @@ -0,0 +1,9 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +/** + * @author hakonhall + */ +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.configserver.cores; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepository.java new file mode 100644 index 00000000000..97c93e6a48a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/RealFlagRepository.java @@ -0,0 +1,29 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.flags; + +import com.yahoo.vespa.flags.FlagId; +import com.yahoo.vespa.flags.FlagRepository; +import com.yahoo.vespa.flags.json.FlagData; +import com.yahoo.vespa.flags.json.wire.WireFlagDataList; +import com.yahoo.vespa.hosted.node.admin.configserver.ConfigServerApi; + +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * @author hakonhall + */ +public class RealFlagRepository implements FlagRepository { + private final ConfigServerApi configServerApi; + + public RealFlagRepository(ConfigServerApi configServerApi) { + this.configServerApi = configServerApi; + } + + @Override + public Map getAllFlagData() { + WireFlagDataList list = configServerApi.get("/flags/v1/data?recursive=true", WireFlagDataList.class); + return FlagData.listFromWire(list).stream().collect(Collectors.toMap(FlagData::id, Function.identity())); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/package-info.java new file mode 100644 index 00000000000..b5f1bc2a3bc --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/flags/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.configserver.flags; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/Acl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/Acl.java new file mode 100644 index 00000000000..dd13658ba27 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/Acl.java @@ -0,0 +1,236 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.google.common.net.InetAddresses; +import com.yahoo.vespa.hosted.node.admin.task.util.network.IPVersion; + +import java.net.InetAddress; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * This class represents an ACL for a specific container instance. + * + * @author mpolden + * @author smorgrav + */ +public class Acl { + + public static final Acl EMPTY = new Acl(Set.of(), Set.of(), Set.of(), Set.of()); + + private final Set trustedNodes; + private final Set trustedPorts; + private final Set trustedUdpPorts; + private final Set trustedNetworks; + + /** + * @param trustedPorts TCP Ports to trust + * @param trustedUdpPorts UDP ports to trust + * @param trustedNodes Nodes to trust + * @param trustedNetworks Networks (in CIDR notation) to trust + */ + public Acl(Set trustedPorts, Set trustedUdpPorts, Set trustedNodes, Set trustedNetworks) { + this.trustedNodes = copyOfNullable(trustedNodes); + this.trustedPorts = copyOfNullable(trustedPorts); + this.trustedUdpPorts = copyOfNullable(trustedUdpPorts); + this.trustedNetworks = copyOfNullable(trustedNetworks); + } + + public Acl(Set trustedPorts, Set trustedNodes) { + this(trustedPorts, Set.of(), trustedNodes, Set.of()); + } + + public List toRules(IPVersion ipVersion) { + List rules = new LinkedList<>(); + + // We reject with rules instead of using policies + rules.add("-P INPUT ACCEPT"); + rules.add("-P FORWARD ACCEPT"); + rules.add("-P OUTPUT ACCEPT"); + + // Allow packets belonging to established connections + rules.add( "-A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT"); + + // Allow any loopback traffic + rules.add("-A INPUT -i lo -j ACCEPT"); + + // Allow ICMP packets. See http://shouldiblockicmp.com/ + rules.add("-A INPUT -p " + ipVersion.icmpProtocol() + " -j ACCEPT"); + + // Allow trusted ports if any + if (!trustedPorts.isEmpty()) { + rules.add("-A INPUT -p tcp -m multiport --dports " + joinPorts(trustedPorts) + " -j ACCEPT"); + } + + // Allow trusted UDP ports if any + if (!trustedUdpPorts.isEmpty()) { + rules.add("-A INPUT -p udp -m multiport --dports " + joinPorts(trustedUdpPorts) + " -j ACCEPT"); + } + + // Allow traffic from trusted nodes, limited to specific ports, if any + getTrustedNodes(ipVersion).stream() + .map(node -> { + StringBuilder rule = new StringBuilder(); + rule.append("-A INPUT -s ") + .append(node.inetAddressString()) + .append(ipVersion.singleHostCidr()); + if (!node.ports.isEmpty()) { + rule.append(" -p tcp -m multiport --dports ") + .append(joinPorts(node.ports())); + } + rule.append(" -j ACCEPT"); + return rule.toString(); + }) + .sorted() + .forEach(rules::add); + + // Allow traffic from trusted networks + addressesOf(ipVersion, trustedNetworks).stream() + .map(network -> "-A INPUT -s " + network + " -j ACCEPT") + .sorted() + .forEach(rules::add); + + // We reject instead of dropping to give us an easier time to figure out potential network issues + rules.add("-A INPUT -j REJECT --reject-with " + ipVersion.icmpPortUnreachable()); + + return Collections.unmodifiableList(rules); + } + + private static String joinPorts(Collection ports) { + return ports.stream().sorted().map(String::valueOf).collect(Collectors.joining(",")); + } + + public Set getTrustedNodes() { + return trustedNodes; + } + + public Set getTrustedNodes(IPVersion ipVersion) { + return trustedNodes.stream() + .filter(node -> ipVersion.match(node.inetAddress())) + .collect(Collectors.toSet()); + } + + public Set getTrustedPorts() { + return trustedPorts; + } + + public Set getTrustedUdpPorts() { + return trustedUdpPorts; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Acl acl = (Acl) o; + return trustedNodes.equals(acl.trustedNodes) && + trustedPorts.equals(acl.trustedPorts) && + trustedUdpPorts.equals(acl.trustedUdpPorts) && + trustedNetworks.equals(acl.trustedNetworks); + } + + @Override + public int hashCode() { + return Objects.hash(trustedNodes, trustedPorts, trustedUdpPorts, trustedNetworks); + } + + @Override + public String toString() { + return "Acl{" + + "trustedNodes=" + trustedNodes + + ", trustedPorts=" + trustedPorts + + ", trustedUdpPorts=" + trustedUdpPorts + + ", trustedNetworks=" + trustedNetworks + + '}'; + } + + private static Set addressesOf(IPVersion version, Set addresses) { + return addresses.stream() + .filter(version::match) + .collect(Collectors.toUnmodifiableSet()); + } + + private static Set copyOfNullable(Set set) { + return Optional.ofNullable(set).map(Set::copyOf).orElseGet(Set::of); + } + + public record Node(String hostname, InetAddress inetAddress, Set ports) { + + public Node(String hostname, String ipAddress, Set ports) { + this(hostname, InetAddresses.forString(ipAddress), ports); + } + + public String inetAddressString() { + return InetAddresses.toAddrString(inetAddress); + } + + @Override + public String toString() { + return "Node{" + + "hostname='" + hostname + '\'' + + ", inetAddress=" + inetAddress + + ", ports=" + ports + + '}'; + } + } + + public static class Builder { + + private final Set trustedNodes = new HashSet<>(); + private final Set trustedPorts = new HashSet<>(); + private final Set trustedUdpPorts = new HashSet<>(); + private final Set trustedNetworks = new HashSet<>(); + + public Builder() { } + + public Builder(Acl acl) { + trustedNodes.addAll(acl.trustedNodes); + trustedPorts.addAll(acl.trustedPorts); + trustedNetworks.addAll(acl.trustedNetworks); + } + + public Builder withTrustedNode(Node node) { + trustedNodes.add(node); + return this; + } + + public Builder withTrustedNode(String hostname, String ipAddress) { + return withTrustedNode(hostname, ipAddress, Set.of()); + } + + public Builder withTrustedNode(String hostname, String ipAddress, Set ports) { + return withTrustedNode(new Node(hostname, ipAddress, ports)); + } + + public Builder withTrustedNode(String hostname, InetAddress inetAddress, Set ports) { + return withTrustedNode(new Node(hostname, inetAddress, ports)); + } + + public Builder withTrustedPorts(Integer... ports) { + trustedPorts.addAll(List.of(ports)); + return this; + } + + public Builder withTrustedUdpPorts(Integer... ports) { + trustedUdpPorts.addAll(List.of(ports)); + return this; + } + + public Builder withTrustedNetworks(Set networks) { + trustedNetworks.addAll(networks); + return this; + } + + public Acl build() { + return new Acl(trustedPorts, trustedUdpPorts, trustedNodes, trustedNetworks); + } + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/AddNode.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/AddNode.java new file mode 100644 index 00000000000..47b59414efd --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/AddNode.java @@ -0,0 +1,86 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.host.FlavorOverrides; + +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +/** + * @author freva + */ +public class AddNode { + + public final String hostname; + public final String id; + public final Optional parentHostname; + public final Optional nodeFlavor; + public final Optional flavorOverrides; + public final Optional nodeResources; + public final NodeType nodeType; + public final Set ipAddresses; + public final Set additionalIpAddresses; + + public static AddNode forHost(String hostname, String id, String nodeFlavor, Optional flavorOverrides, NodeType nodeType, Set ipAddresses, Set additionalIpAddresses) { + return new AddNode(hostname, id, Optional.empty(), Optional.of(nodeFlavor), flavorOverrides, Optional.empty(), nodeType, ipAddresses, additionalIpAddresses); + } + + public static AddNode forNode(String hostname, String id, String parentHostname, NodeResources nodeResources, NodeType nodeType, Set ipAddresses) { + return new AddNode(hostname, id, Optional.of(parentHostname), Optional.empty(), Optional.empty(), Optional.of(nodeResources), nodeType, ipAddresses, Set.of()); + } + + private AddNode(String hostname, String id, Optional parentHostname, + Optional nodeFlavor, Optional flavorOverrides, + Optional nodeResources, + NodeType nodeType, Set ipAddresses, Set additionalIpAddresses) { + this.hostname = hostname; + this.id = id; + this.parentHostname = parentHostname; + this.nodeFlavor = nodeFlavor; + this.flavorOverrides = flavorOverrides; + this.nodeResources = nodeResources; + this.nodeType = nodeType; + this.ipAddresses = ipAddresses; + this.additionalIpAddresses = additionalIpAddresses; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AddNode addNode = (AddNode) o; + return Objects.equals(hostname, addNode.hostname) && + Objects.equals(id, addNode.id) && + Objects.equals(parentHostname, addNode.parentHostname) && + Objects.equals(nodeFlavor, addNode.nodeFlavor) && + Objects.equals(flavorOverrides, addNode.flavorOverrides) && + Objects.equals(nodeResources, addNode.nodeResources) && + nodeType == addNode.nodeType && + Objects.equals(ipAddresses, addNode.ipAddresses) && + Objects.equals(additionalIpAddresses, addNode.additionalIpAddresses); + } + + @Override + public int hashCode() { + return Objects.hash(hostname, id, parentHostname, nodeFlavor, flavorOverrides, nodeResources, nodeType, ipAddresses, additionalIpAddresses); + } + + @Override + public String toString() { + return "AddNode{" + + "hostname='" + hostname + '\'' + + ", id=" + id + + ", parentHostname=" + parentHostname + + ", nodeFlavor='" + nodeFlavor + '\'' + + ", flavorOverrides='" + flavorOverrides + '\'' + + ", nodeResources='" + nodeResources + '\'' + + ", nodeType=" + nodeType + + ", ipAddresses=" + ipAddresses + + ", additionalIpAddresses=" + additionalIpAddresses + + '}'; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/Event.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/Event.java new file mode 100644 index 00000000000..554e9f4df13 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/Event.java @@ -0,0 +1,54 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import java.time.Instant; +import java.util.Objects; + +/** + * @author freva + */ +public class Event { + private final String agent; + private final String type; + private final Instant at; + + public Event(String agent, String type, Instant at) { + this.agent = Objects.requireNonNull(agent); + this.type = Objects.requireNonNull(type); + this.at = Objects.requireNonNull(at); + } + + public String agent() { + return agent; + } + + public String type() { + return type; + } + + public Instant at() { + return at; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Event event1 = (Event) o; + return agent.equals(event1.agent) && type.equals(event1.type) && at.equals(event1.at); + } + + @Override + public int hashCode() { + return Objects.hash(agent, type, at); + } + + @Override + public String toString() { + return "Event{" + + "agent='" + agent + '\'' + + ", type='" + type + '\'' + + ", at=" + at + + '}'; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NoSuchNodeException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NoSuchNodeException.java new file mode 100644 index 00000000000..4c77019f9ba --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NoSuchNodeException.java @@ -0,0 +1,8 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +public class NoSuchNodeException extends NodeRepositoryException { + public NoSuchNodeException(String message) { + super(message); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeAttributes.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeAttributes.java new file mode 100644 index 00000000000..9b22de3f279 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeAttributes.java @@ -0,0 +1,192 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.fasterxml.jackson.databind.JsonNode; +import com.yahoo.component.Version; +import com.yahoo.config.provision.DockerImage; +import com.yahoo.config.provision.WireguardKey; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports.BaseReport; + +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * A node in the node repository is modified by setting which attributes to modify in this class, + * and then patching the node repository node through {@link NodeRepository#updateNodeAttributes(String, NodeAttributes)}. + * + * @author Haakon Dybdahl + * @author Valerij Fredriksen + */ +public class NodeAttributes { + + private Optional hostId = Optional.empty(); + private Optional restartGeneration = Optional.empty(); + private Optional rebootGeneration = Optional.empty(); + private Optional dockerImage = Optional.empty(); + private Optional vespaVersion = Optional.empty(); + private Optional currentOsVersion = Optional.empty(); + private Optional currentFirmwareCheck = Optional.empty(); + private List trustStore = List.of(); + private Optional wireguardPubkey = Optional.empty(); + /** The list of reports to patch. A null value is used to remove the report. */ + private Map reports = new TreeMap<>(); + + public NodeAttributes() { } + + public NodeAttributes withHostId(String hostId) { + this.hostId = Optional.of(hostId); + return this; + } + + public NodeAttributes withRestartGeneration(Optional restartGeneration) { + this.restartGeneration = restartGeneration; + return this; + } + + public NodeAttributes withRestartGeneration(long restartGeneration) { + return withRestartGeneration(Optional.of(restartGeneration)); + } + + public NodeAttributes withRebootGeneration(long rebootGeneration) { + this.rebootGeneration = Optional.of(rebootGeneration); + return this; + } + + public NodeAttributes withDockerImage(DockerImage dockerImage) { + this.dockerImage = Optional.of(dockerImage); + return this; + } + + public NodeAttributes withVespaVersion(Version vespaVersion) { + this.vespaVersion = Optional.of(vespaVersion); + return this; + } + + public NodeAttributes withCurrentOsVersion(Version currentOsVersion) { + this.currentOsVersion = Optional.of(currentOsVersion); + return this; + } + + public NodeAttributes withCurrentFirmwareCheck(Instant currentFirmwareCheck) { + this.currentFirmwareCheck = Optional.of(currentFirmwareCheck); + return this; + } + + public NodeAttributes withTrustStore(List trustStore) { + this.trustStore = List.copyOf(trustStore); + return this; + } + + public NodeAttributes withWireguardPubkey(WireguardKey wireguardPubkey) { + this.wireguardPubkey = Optional.of(wireguardPubkey); + return this; + } + + public NodeAttributes withReports(Map nodeReports) { + this.reports = new TreeMap<>(nodeReports); + return this; + } + + public NodeAttributes withReport(String reportId, JsonNode jsonNode) { + reports.put(reportId, jsonNode); + return this; + } + + public NodeAttributes withReportRemoved(String reportId) { + reports.put(reportId, null); + return this; + } + + public Optional getHostId() { + return hostId; + } + + public Optional getRestartGeneration() { + return restartGeneration; + } + + public Optional getRebootGeneration() { + return rebootGeneration; + } + + public Optional getDockerImage() { + return dockerImage; + } + + public Optional getVespaVersion() { + return vespaVersion; + } + + public Optional getCurrentOsVersion() { + return currentOsVersion; + } + + public Optional getCurrentFirmwareCheck() { + return currentFirmwareCheck; + } + + public List getTrustStore() { + return trustStore; + } + + public Optional getWireguardPubkey() { return wireguardPubkey; } + + public Map getReports() { + return reports; + } + + public Optional getReport(String reportId, Class classInstance) { + return Optional.ofNullable(reports.get(reportId)).map(jn -> BaseReport.fromJsonNode(jn, classInstance)); + } + + @Override + public int hashCode() { + return Objects.hash(hostId, restartGeneration, rebootGeneration, dockerImage, vespaVersion, currentOsVersion, + currentFirmwareCheck, trustStore, wireguardPubkey, reports); + } + + public boolean isEmpty() { + return equals(new NodeAttributes()); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof NodeAttributes other)) { + return false; + } + + return Objects.equals(hostId, other.hostId) + && Objects.equals(restartGeneration, other.restartGeneration) + && Objects.equals(rebootGeneration, other.rebootGeneration) + && Objects.equals(dockerImage, other.dockerImage) + && Objects.equals(vespaVersion, other.vespaVersion) + && Objects.equals(currentOsVersion, other.currentOsVersion) + && Objects.equals(currentFirmwareCheck, other.currentFirmwareCheck) + && Objects.equals(trustStore, other.trustStore) + && Objects.equals(wireguardPubkey, other.wireguardPubkey) + && Objects.equals(reports, other.reports); + } + + @Override + public String toString() { + return Stream.of(hostId.map(id -> "hostId=" + id), + restartGeneration.map(gen -> "restartGeneration=" + gen), + rebootGeneration.map(gen -> "rebootGeneration=" + gen), + dockerImage.map(img -> "dockerImage=" + img.asString()), + vespaVersion.map(ver -> "vespaVersion=" + ver.toFullString()), + currentOsVersion.map(ver -> "currentOsVersion=" + ver.toFullString()), + currentFirmwareCheck.map(at -> "currentFirmwareCheck=" + at), + Optional.ofNullable(trustStore.isEmpty() ? null : "trustStore=" + trustStore), + Optional.ofNullable(wireguardPubkey.isEmpty() ? null : "wireguardPubkey=" + wireguardPubkey), + Optional.ofNullable(reports.isEmpty() ? null : "reports=" + reports)) + .filter(Optional::isPresent) + .map(Optional::get) + .collect(Collectors.joining(", ", "{", "}")); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeMembership.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeMembership.java new file mode 100644 index 00000000000..c70eccfa0ea --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeMembership.java @@ -0,0 +1,115 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import java.util.Objects; + +/** + * @author freva + */ +public class NodeMembership { + private final ClusterType clusterType; + private final String clusterId; + private final String group; + private final int index; + private final boolean retired; + + public NodeMembership(String clusterType, String clusterId, String group, int index, boolean retired) { + this.clusterType = new ClusterType(clusterType); + this.clusterId = clusterId; + this.group = group; + this.index = index; + this.retired = retired; + } + + public ClusterType type() { + return clusterType; + } + + public String clusterId() { + return clusterId; + } + + public String group() { + return group; + } + + public int index() { return index; } + + public boolean isRetired() { + return retired; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + NodeMembership that = (NodeMembership) o; + + if (index != that.index) return false; + if (retired != that.retired) return false; + if (!clusterType.equals(that.clusterType)) return false; + if (!clusterId.equals(that.clusterId)) return false; + return group.equals(that.group); + + } + + @Override + public int hashCode() { + int result = clusterType.hashCode(); + result = 31 * result + clusterId.hashCode(); + result = 31 * result + group.hashCode(); + result = 31 * result + index; + result = 31 * result + (retired ? 1 : 0); + return result; + } + + @Override + public String toString() { + return "Membership {" + + " clusterType = " + clusterType + + " clusterId = " + clusterId + + " group = " + group + + " index = " + index + + " retired = " + retired + + " }"; + } + + public static class ClusterType { + private final String type; + + private ClusterType(String type) { + this.type = Objects.requireNonNull(type); + } + + public boolean isAdmin() { return "admin".equals(type); } + public boolean isContent() { return "content".equals(type); } + public boolean isCombined() { return "combined".equals(type); } + public boolean isContainer() { return "container".equals(type); } + public boolean hasContainer() { return isContainer() || isCombined(); } + public boolean hasContent() { return isContent() || isCombined(); } + + public String value() { + return type; + } + + @Override + public String toString() { + return type; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ClusterType that = (ClusterType) o; + return type.equals(that.type); + } + + @Override + public int hashCode() { + return type.hashCode(); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeReports.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeReports.java new file mode 100644 index 00000000000..c45c2dd9578 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeReports.java @@ -0,0 +1,103 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports.BaseReport; + +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.TreeMap; +import java.util.stream.Collectors; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * API of node reports within node-admin. + * + * @author hakonhall + */ +public class NodeReports { + private static final ObjectMapper mapper = new ObjectMapper(); + + private final Map reports = new TreeMap<>(); + + public NodeReports() { } + + public NodeReports(NodeReports reports) { + this.reports.putAll(reports.reports); + } + + private NodeReports(Map reports) { + this.reports.putAll(Objects.requireNonNull(reports)); + } + + public static NodeReports fromMap(Map reports) { + return new NodeReports(reports); + } + + public void setReport(String reportId, JsonNode jsonNode) { + reports.put(reportId, jsonNode); + } + + public boolean hasReport(String reportId) { return reports.containsKey(reportId); } + + public Optional getReport(String reportId, Class jacksonClass) { + return Optional.ofNullable(reports.get(reportId)).map(r -> uncheck(() -> mapper.treeToValue(r, jacksonClass))); + } + + /** Gets all reports of the given types and deserialize with the given jacksonClass. */ + public TreeMap getReports(Class jacksonClass, BaseReport.Type... types) { + Set typeSet = Set.of(types); + + return reports.entrySet().stream() + .filter(entry -> { + JsonNode reportType = entry.getValue().findValue(BaseReport.TYPE_FIELD); + if (reportType == null || !reportType.isTextual()) return false; + Optional type = BaseReport.Type.deserialize(reportType.asText()); + return type.map(typeSet::contains).orElse(false); + }) + .collect(Collectors.toMap( + entry -> entry.getKey(), + entry -> uncheck(() -> mapper.treeToValue(entry.getValue(), jacksonClass)), + (x,y) -> x, // resolves key collisions - cannot happen. + TreeMap::new + )); + } + + public void removeReport(String reportId) { + if (reports.containsKey(reportId)) { + reports.put(reportId, null); + } + } + + public Map getRawMap() { + return new TreeMap<>(reports); + } + + /** Apply the override to this. null value means removing report. */ + public void updateFromRawMap(Map override) { + override.forEach((reportId, jsonNode) -> { + if (jsonNode == null) { + reports.remove(reportId); + } else { + reports.put(reportId, jsonNode); + } + }); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NodeReports that = (NodeReports) o; + return Objects.equals(reports, that.reports); + } + + @Override + public int hashCode() { + return Objects.hash(reports); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepository.java new file mode 100644 index 00000000000..ac1f8ec059f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepository.java @@ -0,0 +1,38 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.yahoo.vespa.hosted.node.admin.wireguard.WireguardPeer; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * @author stiankri + */ +public interface NodeRepository { + + void addNodes(List nodes); + + List getNodes(String baseHostName); + + default NodeSpec getNode(String hostName) { + return getOptionalNode(hostName).orElseThrow(() -> new NoSuchNodeException(hostName + " not found in node-repo")); + } + + Optional getOptionalNode(String hostName); + + Map getAcls(String hostname); + + List getExclavePeers(); + + List getConfigserverPeers(); + + void updateNodeAttributes(String hostName, NodeAttributes nodeAttributes); + + void setNodeState(String hostName, NodeState nodeState); + + default void reboot(String hostname) { + throw new UnsupportedOperationException("Rebooting not supported in " + getClass().getName()); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java new file mode 100644 index 00000000000..f46f0c9f446 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java @@ -0,0 +1,10 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; + +public class NodeRepositoryException extends ConvergenceException { + public NodeRepositoryException(String message) { + super(message, null, true); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeSpec.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeSpec.java new file mode 100644 index 00000000000..3700b57d169 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeSpec.java @@ -0,0 +1,880 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.fasterxml.jackson.databind.JsonNode; +import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudAccount; +import com.yahoo.config.provision.DockerImage; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.WireguardKey; +import com.yahoo.config.provision.WireguardKeyWithTimestamp; +import com.yahoo.vespa.hosted.node.admin.task.util.file.DiskSize; + +import java.net.URI; +import java.time.Instant; +import java.util.EnumSet; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast; +import static com.yahoo.config.provision.NodeResources.DiskSpeed.slow; + +/** + * @author stiankri + */ +public class NodeSpec { + + private final String hostname; + private final String id; + private final NodeState state; + private final NodeType type; + private final CloudAccount cloudAccount; + private final String flavor; + + private final Optional wantedDockerImage; + private final Optional currentDockerImage; + + private final Optional wantedVespaVersion; + private final Optional currentVespaVersion; + + private final Optional wantedOsVersion; + private final Optional currentOsVersion; + + private final Optional wantedRestartGeneration; + private final Optional currentRestartGeneration; + + private final long wantedRebootGeneration; + private final long currentRebootGeneration; + + private final Optional wantedFirmwareCheck; + private final Optional currentFirmwareCheck; + + private final Optional modelName; + + private final OrchestratorStatus orchestratorStatus; + private final Optional owner; + private final Optional membership; + + private final NodeResources resources; + private final NodeResources realResources; + private final Set ipAddresses; + private final Set additionalIpAddresses; + + private final NodeReports reports; + private final List events; + + private final Optional parentHostname; + private final Optional archiveUri; + + private final Optional exclusiveTo; + + private final List trustStore; + + private final Optional wireguardKeyWithTimestamp; + + private final boolean wantToRebuild; + + public NodeSpec( + String hostname, + String id, + Optional wantedDockerImage, + Optional currentDockerImage, + NodeState state, + NodeType type, + CloudAccount cloudAccount, + String flavor, + Optional wantedVespaVersion, + Optional currentVespaVersion, + Optional wantedOsVersion, + Optional currentOsVersion, + OrchestratorStatus orchestratorStatus, + Optional owner, + Optional membership, + Optional wantedRestartGeneration, + Optional currentRestartGeneration, + long wantedRebootGeneration, + long currentRebootGeneration, + Optional wantedFirmwareCheck, + Optional currentFirmwareCheck, + Optional modelName, + NodeResources resources, + NodeResources realResources, + Set ipAddresses, + Set additionalIpAddresses, + NodeReports reports, + List events, + Optional parentHostname, + Optional archiveUri, + Optional exclusiveTo, + List trustStore, + Optional wireguardPubkey, + boolean wantToRebuild) { + + if (state == NodeState.active) { + requireOptional(owner, "owner"); + requireOptional(membership, "membership"); + requireOptional(wantedVespaVersion, "wantedVespaVersion"); + requireOptional(wantedDockerImage, "wantedDockerImage"); + requireOptional(wantedRestartGeneration, "restartGeneration"); + requireOptional(currentRestartGeneration, "currentRestartGeneration"); + } + + this.hostname = Objects.requireNonNull(hostname); + this.id = Objects.requireNonNull(id); + this.wantedDockerImage = Objects.requireNonNull(wantedDockerImage); + this.currentDockerImage = Objects.requireNonNull(currentDockerImage); + this.state = Objects.requireNonNull(state); + this.type = Objects.requireNonNull(type); + this.cloudAccount = Objects.requireNonNull(cloudAccount); + this.flavor = Objects.requireNonNull(flavor); + this.modelName = Objects.requireNonNull(modelName); + this.wantedVespaVersion = Objects.requireNonNull(wantedVespaVersion); + this.currentVespaVersion = Objects.requireNonNull(currentVespaVersion); + this.wantedOsVersion = Objects.requireNonNull(wantedOsVersion); + this.currentOsVersion = Objects.requireNonNull(currentOsVersion); + this.orchestratorStatus = Objects.requireNonNull(orchestratorStatus); + this.owner = Objects.requireNonNull(owner); + this.membership = Objects.requireNonNull(membership); + this.wantedRestartGeneration = wantedRestartGeneration; + this.currentRestartGeneration = currentRestartGeneration; + this.wantedRebootGeneration = wantedRebootGeneration; + this.currentRebootGeneration = currentRebootGeneration; + this.wantedFirmwareCheck = Objects.requireNonNull(wantedFirmwareCheck); + this.currentFirmwareCheck = Objects.requireNonNull(currentFirmwareCheck); + this.resources = Objects.requireNonNull(resources); + this.realResources = Objects.requireNonNull(realResources); + this.ipAddresses = Set.copyOf(ipAddresses); + this.additionalIpAddresses = Set.copyOf(additionalIpAddresses); + this.reports = Objects.requireNonNull(reports); + this.events = List.copyOf(events); + this.parentHostname = Objects.requireNonNull(parentHostname); + this.archiveUri = Objects.requireNonNull(archiveUri); + this.exclusiveTo = Objects.requireNonNull(exclusiveTo); + this.trustStore = Objects.requireNonNull(trustStore); + this.wireguardKeyWithTimestamp = Objects.requireNonNull(wireguardPubkey); + this.wantToRebuild = wantToRebuild; + } + + public String hostname() { + return hostname; + } + + /** Returns unique node ID */ + public String id() { + return id; + } + + public NodeState state() { + return state; + } + + public NodeType type() { + return type; + } + + public CloudAccount cloudAccount() { + return cloudAccount; + } + + public String flavor() { + return flavor; + } + + public Optional wantedDockerImage() { + return wantedDockerImage; + } + + public Optional currentDockerImage() { + return currentDockerImage; + } + + public Optional wantedVespaVersion() { + return wantedVespaVersion; + } + + public Optional currentVespaVersion() { + return currentVespaVersion; + } + + public Optional currentOsVersion() { + return currentOsVersion; + } + + public Optional wantedOsVersion() { + return wantedOsVersion; + } + + public Optional wantedRestartGeneration() { + return wantedRestartGeneration; + } + + public Optional currentRestartGeneration() { + return currentRestartGeneration; + } + + public long wantedRebootGeneration() { + return wantedRebootGeneration; + } + + public long currentRebootGeneration() { + return currentRebootGeneration; + } + + public Optional wantedFirmwareCheck() { + return wantedFirmwareCheck; + } + + public Optional currentFirmwareCheck() { + return currentFirmwareCheck; + } + + public Optional modelName() { + return modelName; + } + + public OrchestratorStatus orchestratorStatus() { + return orchestratorStatus; + } + + public Optional owner() { + return owner; + } + + public Optional membership() { + return membership; + } + + public NodeResources resources() { + return resources; + } + + public NodeResources realResources() { + return realResources; + } + + public double vcpu() { + return realResources.vcpu(); + } + + public double memoryGb() { + return realResources.memoryGb(); + } + + public DiskSize diskSize() { + return DiskSize.of(realResources.diskGb(), DiskSize.Unit.GB); + } + + public double diskGb() { + return realResources.diskGb(); + } + + public boolean isFastDisk() { + return realResources.diskSpeed() == fast; + } + + public double bandwidthGbps() { + return realResources.bandwidthGbps(); + } + + public Set ipAddresses() { + return ipAddresses; + } + + public Set additionalIpAddresses() { + return additionalIpAddresses; + } + + public NodeReports reports() { return reports; } + + public List events() { + return events; + } + + public Optional parentHostname() { + return parentHostname; + } + + public Optional archiveUri() { + return archiveUri; + } + + public Optional exclusiveTo() { + return exclusiveTo; + } + + public List trustStore() { + return trustStore; + } + + public Optional wireguardKeyWithTimestamp() { return wireguardKeyWithTimestamp; } + + public boolean wantToRebuild() { + return wantToRebuild; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof NodeSpec that)) return false; + + return Objects.equals(hostname, that.hostname) && + Objects.equals(id, that.id) && + Objects.equals(wantedDockerImage, that.wantedDockerImage) && + Objects.equals(currentDockerImage, that.currentDockerImage) && + Objects.equals(state, that.state) && + Objects.equals(type, that.type) && + Objects.equals(cloudAccount, that.cloudAccount) && + Objects.equals(flavor, that.flavor) && + Objects.equals(modelName, that.modelName) && + Objects.equals(wantedVespaVersion, that.wantedVespaVersion) && + Objects.equals(currentVespaVersion, that.currentVespaVersion) && + Objects.equals(wantedOsVersion, that.wantedOsVersion) && + Objects.equals(currentOsVersion, that.currentOsVersion) && + Objects.equals(orchestratorStatus, that.orchestratorStatus) && + Objects.equals(owner, that.owner) && + Objects.equals(membership, that.membership) && + Objects.equals(wantedRestartGeneration, that.wantedRestartGeneration) && + Objects.equals(currentRestartGeneration, that.currentRestartGeneration) && + Objects.equals(wantedRebootGeneration, that.wantedRebootGeneration) && + Objects.equals(currentRebootGeneration, that.currentRebootGeneration) && + Objects.equals(wantedFirmwareCheck, that.wantedFirmwareCheck) && + Objects.equals(currentFirmwareCheck, that.currentFirmwareCheck) && + Objects.equals(resources, that.resources) && + Objects.equals(realResources, that.realResources) && + Objects.equals(ipAddresses, that.ipAddresses) && + Objects.equals(additionalIpAddresses, that.additionalIpAddresses) && + Objects.equals(reports, that.reports) && + Objects.equals(events, that.events) && + Objects.equals(parentHostname, that.parentHostname) && + Objects.equals(archiveUri, that.archiveUri) && + Objects.equals(exclusiveTo, that.exclusiveTo) && + Objects.equals(trustStore, that.trustStore) && + Objects.equals(wireguardKeyWithTimestamp, that.wireguardKeyWithTimestamp) && + Objects.equals(wantToRebuild, that.wantToRebuild); + } + + @Override + public int hashCode() { + return Objects.hash( + hostname, + id, + wantedDockerImage, + currentDockerImage, + state, + type, + cloudAccount, + flavor, + modelName, + wantedVespaVersion, + currentVespaVersion, + wantedOsVersion, + currentOsVersion, + orchestratorStatus, + owner, + membership, + wantedRestartGeneration, + currentRestartGeneration, + wantedRebootGeneration, + currentRebootGeneration, + wantedFirmwareCheck, + currentFirmwareCheck, + resources, + realResources, + ipAddresses, + additionalIpAddresses, + reports, + events, + parentHostname, + archiveUri, + exclusiveTo, + trustStore, + wireguardKeyWithTimestamp, + wantToRebuild); + } + + @Override + public String toString() { + return getClass().getSimpleName() + " {" + + " hostname=" + hostname + + " id=" + id + + " wantedDockerImage=" + wantedDockerImage + + " currentDockerImage=" + currentDockerImage + + " state=" + state + + " type=" + type + + " cloudAccount=" + cloudAccount + + " flavor=" + flavor + + " modelName=" + modelName + + " wantedVespaVersion=" + wantedVespaVersion + + " currentVespaVersion=" + currentVespaVersion + + " wantedOsVersion=" + wantedOsVersion + + " currentOsVersion=" + currentOsVersion + + " orchestratorStatus=" + orchestratorStatus + + " owner=" + owner + + " membership=" + membership + + " wantedRestartGeneration=" + wantedRestartGeneration + + " currentRestartGeneration=" + currentRestartGeneration + + " wantedRebootGeneration=" + wantedRebootGeneration + + " currentRebootGeneration=" + currentRebootGeneration + + " wantedFirmwareCheck=" + wantedFirmwareCheck + + " currentFirmwareCheck=" + currentFirmwareCheck + + " resources=" + resources + + " realResources=" + realResources + + " ipAddresses=" + ipAddresses + + " additionalIpAddresses=" + additionalIpAddresses + + " reports=" + reports + + " events=" + events + + " parentHostname=" + parentHostname + + " archiveUri=" + archiveUri + + " exclusiveTo=" + exclusiveTo + + " trustStore=" + trustStore + + " wireguardPubkey=" + wireguardKeyWithTimestamp + + " wantToRebuild=" + wantToRebuild + + " }"; + } + + public static class Builder { + private String hostname; + private String id; + private NodeState state; + private NodeType type; + private CloudAccount cloudAccount = CloudAccount.empty; + private String flavor; + private Optional wantedDockerImage = Optional.empty(); + private Optional currentDockerImage = Optional.empty(); + private Optional wantedVespaVersion = Optional.empty(); + private Optional currentVespaVersion = Optional.empty(); + private Optional wantedOsVersion = Optional.empty(); + private Optional currentOsVersion = Optional.empty(); + private OrchestratorStatus orchestratorStatus = OrchestratorStatus.NO_REMARKS; + private Optional owner = Optional.empty(); + private Optional membership = Optional.empty(); + private Optional wantedRestartGeneration = Optional.empty(); + private Optional currentRestartGeneration = Optional.empty(); + private long wantedRebootGeneration; + private long currentRebootGeneration; + private Optional wantedFirmwareCheck = Optional.empty(); + private Optional currentFirmwareCheck = Optional.empty(); + private Optional modelName = Optional.empty(); + private NodeResources resources; + private NodeResources realResources; + private Set ipAddresses = Set.of(); + private Set additionalIpAddresses = Set.of(); + private NodeReports reports = new NodeReports(); + private List events = List.of(); + private Optional parentHostname = Optional.empty(); + private Optional archiveUri = Optional.empty(); + private Optional exclusiveTo = Optional.empty(); + private List trustStore = List.of(); + private Optional wireguardPubkey = Optional.empty(); + private boolean wantToRebuild = false; + + public Builder() {} + + public Builder(NodeSpec node) { + hostname(node.hostname); + id(node.id); + state(node.state); + type(node.type); + flavor(node.flavor); + resources(node.resources); + realResources(node.realResources); + ipAddresses(node.ipAddresses); + additionalIpAddresses(node.additionalIpAddresses); + wantedRebootGeneration(node.wantedRebootGeneration); + currentRebootGeneration(node.currentRebootGeneration); + orchestratorStatus(node.orchestratorStatus); + reports(new NodeReports(node.reports)); + events(node.events); + node.wantedDockerImage.ifPresent(this::wantedDockerImage); + node.currentDockerImage.ifPresent(this::currentDockerImage); + node.wantedVespaVersion.ifPresent(this::wantedVespaVersion); + node.currentVespaVersion.ifPresent(this::currentVespaVersion); + node.wantedOsVersion.ifPresent(this::wantedOsVersion); + node.currentOsVersion.ifPresent(this::currentOsVersion); + node.owner.ifPresent(this::owner); + node.membership.ifPresent(this::membership); + node.wantedRestartGeneration.ifPresent(this::wantedRestartGeneration); + node.currentRestartGeneration.ifPresent(this::currentRestartGeneration); + node.wantedFirmwareCheck.ifPresent(this::wantedFirmwareCheck); + node.currentFirmwareCheck.ifPresent(this::currentFirmwareCheck); + node.parentHostname.ifPresent(this::parentHostname); + node.archiveUri.ifPresent(this::archiveUri); + node.exclusiveTo.ifPresent(this::exclusiveTo); + trustStore(node.trustStore); + node.wireguardKeyWithTimestamp.ifPresent(this::wireguardKeyWithTimestamp); + wantToRebuild(node.wantToRebuild); + } + + public Builder hostname(String hostname) { + this.hostname = hostname; + return this; + } + + public Builder id(String id) { + this.id = id; + return this; + } + + public Builder wantedDockerImage(DockerImage wantedDockerImage) { + this.wantedDockerImage = Optional.of(wantedDockerImage); + return this; + } + + public Builder currentDockerImage(DockerImage currentDockerImage) { + this.currentDockerImage = Optional.of(currentDockerImage); + return this; + } + + public Builder state(NodeState state) { + this.state = state; + return this; + } + + public Builder type(NodeType nodeType) { + this.type = nodeType; + return this; + } + + public Builder cloudAccount(CloudAccount cloudAccount) { + this.cloudAccount = cloudAccount; + return this; + } + + public Builder flavor(String flavor) { + this.flavor = flavor; + return this; + } + + public Builder wantedVespaVersion(Version wantedVespaVersion) { + this.wantedVespaVersion = Optional.of(wantedVespaVersion); + return this; + } + + public Builder currentVespaVersion(Version vespaVersion) { + this.currentVespaVersion = Optional.of(vespaVersion); + return this; + } + + public Builder wantedOsVersion(Version wantedOsVersion) { + this.wantedOsVersion = Optional.of(wantedOsVersion); + return this; + } + + public Builder currentOsVersion(Version currentOsVersion) { + this.currentOsVersion = Optional.of(currentOsVersion); + return this; + } + + public Builder orchestratorStatus(OrchestratorStatus orchestratorStatus) { + this.orchestratorStatus = orchestratorStatus; + return this; + } + + public Builder owner(ApplicationId owner) { + this.owner = Optional.of(owner); + return this; + } + + public Builder membership(NodeMembership membership) { + this.membership = Optional.of(membership); + return this; + } + + public Builder wantedRestartGeneration(long wantedRestartGeneration) { + this.wantedRestartGeneration = Optional.of(wantedRestartGeneration); + return this; + } + + public Builder currentRestartGeneration(long currentRestartGeneration) { + this.currentRestartGeneration = Optional.of(currentRestartGeneration); + return this; + } + + public Builder wantedRebootGeneration(long wantedRebootGeneration) { + this.wantedRebootGeneration = wantedRebootGeneration; + return this; + } + + public Builder currentRebootGeneration(long currentRebootGeneration) { + this.currentRebootGeneration = currentRebootGeneration; + return this; + } + + public Builder wantedFirmwareCheck(Instant wantedFirmwareCheck) { + this.wantedFirmwareCheck = Optional.of(wantedFirmwareCheck); + return this; + } + + public Builder currentFirmwareCheck(Instant currentFirmwareCheck) { + this.currentFirmwareCheck = Optional.of(currentFirmwareCheck); + return this; + } + + public Builder resources(NodeResources resources) { + this.resources = resources; + return this; + } + + public Builder realResources(NodeResources realResources) { + this.realResources = realResources; + return this; + } + + public Builder vcpu(double vcpu) { + return realResources(realResources.withVcpu(vcpu)); + } + + public Builder memoryGb(double memoryGb) { + return realResources(realResources.withMemoryGb(memoryGb)); + } + + public Builder diskGb(double diskGb) { + return realResources(realResources.withDiskGb(diskGb)); + } + + public Builder fastDisk(boolean fastDisk) { + return realResources(realResources.with(fastDisk ? fast : slow)); + } + + public Builder bandwidthGbps(double bandwidthGbps) { + return realResources(realResources.withBandwidthGbps(bandwidthGbps)); + } + + public Builder ipAddresses(Set ipAddresses) { + this.ipAddresses = ipAddresses; + return this; + } + + public Builder additionalIpAddresses(Set additionalIpAddresses) { + this.additionalIpAddresses = additionalIpAddresses; + return this; + } + + public Builder reports(NodeReports reports) { + this.reports = reports; + return this; + } + + public Builder report(String reportId, JsonNode report) { + this.reports.setReport(reportId, report); + return this; + } + + public Builder removeReport(String reportId) { + reports.removeReport(reportId); + return this; + } + + public Builder events(List events) { + this.events = events; + return this; + } + + public Builder parentHostname(String parentHostname) { + this.parentHostname = Optional.of(parentHostname); + return this; + } + + public Builder archiveUri(URI archiveUri) { + this.archiveUri = Optional.of(archiveUri); + return this; + } + + public Builder exclusiveTo(ApplicationId applicationId) { + this.exclusiveTo = Optional.of(applicationId); + return this; + } + + public Builder trustStore(List trustStore) { + this.trustStore = List.copyOf(trustStore); + return this; + } + + public Builder wireguardPubkey(WireguardKey wireguardPubkey) { + this.wireguardPubkey = Optional.of(new WireguardKeyWithTimestamp(wireguardPubkey, Instant.EPOCH)); + return this; + } + + public Builder wireguardKeyWithTimestamp(WireguardKeyWithTimestamp wireguardPubKey) { + this.wireguardPubkey = Optional.of(wireguardPubKey); + return this; + } + + public Builder wantToRebuild(boolean wantToRebuild) { + this.wantToRebuild = wantToRebuild; + return this; + } + + public Builder updateFromNodeAttributes(NodeAttributes attributes) { + attributes.getHostId().ifPresent(this::id); + attributes.getDockerImage().ifPresent(this::currentDockerImage); + attributes.getCurrentOsVersion().ifPresent(this::currentOsVersion); + attributes.getRebootGeneration().ifPresent(this::currentRebootGeneration); + attributes.getRestartGeneration().ifPresent(this::currentRestartGeneration); + // Always replace entire trust store + trustStore(attributes.getTrustStore()); + attributes.getWireguardPubkey().ifPresent(this::wireguardPubkey); + this.reports.updateFromRawMap(attributes.getReports()); + + return this; + } + + public String hostname() { + return hostname; + } + + public Optional wantedDockerImage() { + return wantedDockerImage; + } + + public Optional currentDockerImage() { + return currentDockerImage; + } + + public NodeState state() { + return state; + } + + public NodeType type() { + return type; + } + + public CloudAccount cloudAccount() { + return cloudAccount; + } + + public String flavor() { + return flavor; + } + + public Optional wantedVespaVersion() { + return wantedVespaVersion; + } + + public Optional currentVespaVersion() { + return currentVespaVersion; + } + + public Optional wantedOsVersion() { + return wantedOsVersion; + } + + public Optional currentOsVersion() { + return currentOsVersion; + } + + public OrchestratorStatus orchestratorStatus() { + return orchestratorStatus; + } + + public Optional owner() { + return owner; + } + + public Optional membership() { + return membership; + } + + public Optional wantedRestartGeneration() { + return wantedRestartGeneration; + } + + public Optional currentRestartGeneration() { + return currentRestartGeneration; + } + + public long wantedRebootGeneration() { + return wantedRebootGeneration; + } + + public long currentRebootGeneration() { + return currentRebootGeneration; + } + + public NodeResources resources() { + return resources; + } + + public NodeResources realResources() { + return realResources; + } + + public Set ipAddresses() { + return ipAddresses; + } + + public Set additionalIpAddresses() { + return additionalIpAddresses; + } + + public NodeReports reports() { + return reports; + } + + public List events() { + return events; + } + + public Optional parentHostname() { + return parentHostname; + } + + public Optional archiveUri() { + return archiveUri; + } + + public NodeSpec build() { + return new NodeSpec(hostname, id, wantedDockerImage, currentDockerImage, state, type, cloudAccount, flavor, + wantedVespaVersion, currentVespaVersion, wantedOsVersion, currentOsVersion, orchestratorStatus, + owner, membership, + wantedRestartGeneration, currentRestartGeneration, + wantedRebootGeneration, currentRebootGeneration, + wantedFirmwareCheck, currentFirmwareCheck, modelName, + resources, realResources, ipAddresses, additionalIpAddresses, + reports, events, parentHostname, archiveUri, exclusiveTo, trustStore, + wireguardPubkey, wantToRebuild); + } + + + public static Builder testSpec(String hostname) { + return testSpec(hostname, NodeState.active); + } + + /** + * Creates a NodeSpec.Builder that has the given hostname, in a given state, and some + * reasonable values for the remaining required NodeSpec fields. + */ + public static Builder testSpec(String hostname, NodeState state) { + Builder builder = new Builder() + .id(hostname) + .hostname(hostname) + .state(state) + .type(NodeType.tenant) + .flavor("d-2-8-50") + .resources(new NodeResources(2, 8, 50, 10)) + .realResources(new NodeResources(2, 8, 50, 10)) + .events(List.of(new Event("operator", "rebooted", Instant.EPOCH))); + + // Set the required allocated fields + if (EnumSet.of(NodeState.active, NodeState.inactive, NodeState.reserved).contains(state)) { + builder .owner(ApplicationId.defaultId()) + .membership(new NodeMembership("container", "my-id", "group", 0, false)) + .wantedVespaVersion(Version.fromString("7.1.1")) + .wantedDockerImage(DockerImage.fromString("docker.domain.tld/repo/image:7.1.1")) + .currentRestartGeneration(0) + .wantedRestartGeneration(0); + } + + return builder; + } + } + + private static void requireOptional(Optional optional, String name) { + if (optional == null || optional.isEmpty()) + throw new IllegalArgumentException(name + " must be set, was " + optional); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java new file mode 100644 index 00000000000..8e66480c92a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +/** + * All the states a node can be in the node-repository. + * + * See com.yahoo.vespa.hosted.provision.NodeState + * + * @author freva + */ +public enum NodeState { + provisioned, ready, reserved, active, inactive, dirty, failed, parked, deprovisioned, breakfixed +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/OrchestratorStatus.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/OrchestratorStatus.java new file mode 100644 index 00000000000..d8532188c64 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/OrchestratorStatus.java @@ -0,0 +1,23 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import java.util.stream.Stream; + +public enum OrchestratorStatus { + NO_REMARKS, ALLOWED_TO_BE_DOWN, PERMANENTLY_DOWN, UNKNOWN; + + public static OrchestratorStatus fromString(String statusString) { + return Stream.of(values()) + .filter(status -> status.asString().equals(statusString)) + .findFirst() + .orElse(UNKNOWN); + } + + public String asString() { + return name(); + } + + public boolean isSuspended() { + return this != NO_REMARKS; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java new file mode 100644 index 00000000000..d340aa9fd3d --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java @@ -0,0 +1,406 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.net.InetAddresses; +import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudAccount; +import com.yahoo.config.provision.DockerImage; +import com.yahoo.config.provision.HostName; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.WireguardKey; +import com.yahoo.config.provision.WireguardKeyWithTimestamp; +import com.yahoo.config.provision.host.FlavorOverrides; +import com.yahoo.vespa.hosted.node.admin.configserver.ConfigServerApi; +import com.yahoo.vespa.hosted.node.admin.configserver.HttpException; +import com.yahoo.vespa.hosted.node.admin.configserver.StandardConfigServerResponse; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings.GetAclResponse; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings.GetNodesResponse; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings.GetWireguardResponse; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings.NodeRepositoryNode; +import com.yahoo.vespa.hosted.node.admin.task.util.network.VersionedIpAddress; +import com.yahoo.vespa.hosted.node.admin.wireguard.WireguardPeer; + +import java.net.URI; +import java.time.Instant; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.TreeMap; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * @author stiankri + * @author dybis + */ +public class RealNodeRepository implements NodeRepository { + private static final Logger logger = Logger.getLogger(RealNodeRepository.class.getName()); + + private final ConfigServerApi configServerApi; + + public RealNodeRepository(ConfigServerApi configServerApi) { + this.configServerApi = configServerApi; + } + + @Override + public void addNodes(List nodes) { + List nodesToPost = nodes.stream() + .map(RealNodeRepository::nodeRepositoryNodeFromAddNode) + .toList(); + + configServerApi.post("/nodes/v2/node", nodesToPost, StandardConfigServerResponse.class) + .throwOnError("Failed to add nodes"); + } + + @Override + public List getNodes(String baseHostName) { + String path = "/nodes/v2/node/?recursive=true&parentHost=" + baseHostName; + final GetNodesResponse nodesForHost = configServerApi.get(path, GetNodesResponse.class); + + return nodesForHost.nodes.stream() + .map(RealNodeRepository::createNodeSpec) + .toList(); + } + + @Override + public Optional getOptionalNode(String hostName) { + try { + NodeRepositoryNode nodeResponse = configServerApi.get("/nodes/v2/node/" + hostName, + NodeRepositoryNode.class); + + return Optional.ofNullable(nodeResponse).map(RealNodeRepository::createNodeSpec); + } catch (HttpException.NotFoundException | HttpException.ForbiddenException e) { + // Return empty on 403 in addition to 404 as it likely means we're trying to access a node that + // has been deleted. When a node is deleted, the parent-child relationship no longer exists and + // authorization cannot be granted. + return Optional.empty(); + } + } + + /** + * Get all ACLs that belongs to a hostname. Usually this is a parent host and all + * ACLs for child nodes are returned. + */ + @Override + public Map getAcls(String hostName) { + String path = String.format("/nodes/v2/acl/%s?children=true", hostName); + GetAclResponse response = configServerApi.get(path, GetAclResponse.class); + + // Group ports by container hostname that trusts them + Map> trustedPorts = response.trustedPorts.stream() + .collect(Collectors.groupingBy( + GetAclResponse.Port::getTrustedBy, + Collectors.mapping(port -> port.port, Collectors.toSet()))); + + // Group UDP ports by container hostname that trusts them + Map> trustedUdpPorts = response.trustedUdpPorts.stream() + .collect(Collectors.groupingBy( + GetAclResponse.Port::getTrustedBy, + Collectors.mapping(port -> port.port, Collectors.toSet()))); + + // Group node ip-addresses by container hostname that trusts them + Map> trustedNodes = response.trustedNodes.stream() + .collect(Collectors.groupingBy( + GetAclResponse.Node::getTrustedBy, + Collectors.mapping( + node -> new Acl.Node(node.hostname, node.ipAddress, Set.copyOf(node.ports)), + Collectors.toSet()))); + + // Group trusted networks by container hostname that trusts them + Map> trustedNetworks = response.trustedNetworks.stream() + .collect(Collectors.groupingBy(GetAclResponse.Network::getTrustedBy, + Collectors.mapping(node -> node.network, Collectors.toSet()))); + + + // For each hostname create an ACL + return Stream.of(trustedNodes.keySet(), trustedPorts.keySet(), trustedUdpPorts.keySet(), trustedNetworks.keySet()) + .flatMap(Set::stream) + .distinct() + .collect(Collectors.toMap( + Function.identity(), + hostname -> new Acl(trustedPorts.get(hostname), + trustedUdpPorts.get(hostname), + trustedNodes.get(hostname), + trustedNetworks.get(hostname)))); + } + + @Override + public List getExclavePeers() { + String path = "/nodes/v2/node/?recursive=true&enclave=true"; + final GetNodesResponse response = configServerApi.get(path, GetNodesResponse.class); + + return response.nodes.stream() + .mapMulti((NodeRepositoryNode node, Consumer consumer) -> { + var keyWithTimestamp = createWireguardKeyWithTimestamp(node.wireguardKeyWithTimestamp, + node.wireguardPubkey, + node.wireguardKeyTimestamp); + if (keyWithTimestamp == null) return; + + List ipAddresses = getIpAddresses(node); + if (ipAddresses.isEmpty()) return; + + consumer.accept(new WireguardPeer(HostName.of(node.hostname), ipAddresses, keyWithTimestamp)); + }) + .sorted() + .toList(); + } + + private static List getIpAddresses(NodeRepositoryNode node) { + return node.ipAddresses.stream() + .map(InetAddresses::forString) + .filter(address -> !address.isLoopbackAddress() && !address.isLinkLocalAddress() && !address.isSiteLocalAddress()) + .map(VersionedIpAddress::from) + .toList(); + } + + @Override + public List getConfigserverPeers() { + GetWireguardResponse response = configServerApi.get("/nodes/v2/wireguard", GetWireguardResponse.class); + return response.configservers.stream() + .map(RealNodeRepository::createConfigserverPeer) + .sorted(Comparator.comparing(WireguardPeer::hostname)) + .toList(); + } + + @Override + public void updateNodeAttributes(String hostName, NodeAttributes nodeAttributes) { + configServerApi.patch("/nodes/v2/node/" + hostName, + nodeRepositoryNodeFromNodeAttributes(nodeAttributes), + StandardConfigServerResponse.class) + .throwOnError("Failed to update node attributes"); + } + + @Override + public void setNodeState(String hostName, NodeState nodeState) { + String state = nodeState.name(); + StandardConfigServerResponse response = configServerApi.put("/nodes/v2/state/" + state + "/" + hostName, + Optional.empty(), /* body */ + StandardConfigServerResponse.class); + logger.info(response.message); + response.throwOnError("Failed to set node state"); + } + + @Override + public void reboot(String hostname) { + String uri = "/nodes/v2/command/reboot?hostname=" + hostname; + StandardConfigServerResponse response = configServerApi.post(uri, Optional.empty(), StandardConfigServerResponse.class); + logger.info(response.message); + response.throwOnError("Failed to reboot " + hostname); + } + + private static NodeSpec createNodeSpec(NodeRepositoryNode node) { + Objects.requireNonNull(node.type, "Unknown node type"); + NodeType nodeType = NodeType.valueOf(node.type); + + Objects.requireNonNull(node.state, "Unknown node state"); + NodeState nodeState = NodeState.valueOf(node.state); + + Optional membership = Optional.ofNullable(node.membership) + .map(m -> new NodeMembership(m.clusterType, m.clusterId, m.group, m.index, m.retired)); + NodeReports reports = NodeReports.fromMap(Optional.ofNullable(node.reports).orElseGet(Map::of)); + List events = node.history.stream() + .map(event -> new Event(event.agent, event.event, Optional.ofNullable(event.at).map(Instant::ofEpochMilli).orElse(Instant.EPOCH))) + .toList(); + + List trustStore = Optional.ofNullable(node.trustStore).orElse(List.of()).stream() + .map(item -> new TrustStoreItem(item.fingerprint, Instant.ofEpochMilli(item.expiry))) + .toList(); + + + return new NodeSpec( + node.hostname, + node.id, + Optional.ofNullable(node.wantedDockerImage).map(DockerImage::fromString), + Optional.ofNullable(node.currentDockerImage).map(DockerImage::fromString), + nodeState, + nodeType, + Optional.ofNullable(node.cloudAccount).map(CloudAccount::from).orElse(CloudAccount.empty), + node.flavor, + Optional.ofNullable(node.wantedVespaVersion).map(Version::fromString), + Optional.ofNullable(node.vespaVersion).map(Version::fromString), + Optional.ofNullable(node.wantedOsVersion).map(Version::fromString), + Optional.ofNullable(node.currentOsVersion).map(Version::fromString), + Optional.ofNullable(node.orchestratorStatus).map(OrchestratorStatus::fromString).orElse(OrchestratorStatus.NO_REMARKS), + Optional.ofNullable(node.owner).map(o -> ApplicationId.from(o.tenant, o.application, o.instance)), + membership, + Optional.ofNullable(node.restartGeneration), + Optional.ofNullable(node.currentRestartGeneration), + node.rebootGeneration, + node.currentRebootGeneration, + Optional.ofNullable(node.wantedFirmwareCheck).map(Instant::ofEpochMilli), + Optional.ofNullable(node.currentFirmwareCheck).map(Instant::ofEpochMilli), + Optional.ofNullable(node.modelName), + nodeResources(node.resources), + nodeResources(node.realResources), + node.ipAddresses, + node.additionalIpAddresses, + reports, + events, + Optional.ofNullable(node.parentHostname), + Optional.ofNullable(node.archiveUri).map(URI::create), + Optional.ofNullable(node.exclusiveTo).map(ApplicationId::fromSerializedForm), + trustStore, + Optional.ofNullable(createWireguardKeyWithTimestamp(node.wireguardKeyWithTimestamp, + node.wireguardPubkey, + node.wireguardKeyTimestamp)), + node.wantToRebuild); + } + + private static NodeResources nodeResources(NodeRepositoryNode.NodeResources nodeResources) { + return new NodeResources( + nodeResources.vcpu, + nodeResources.memoryGb, + nodeResources.diskGb, + nodeResources.bandwidthGbps, + diskSpeedFromString(nodeResources.diskSpeed), + storageTypeFromString(nodeResources.storageType), + architectureFromString(nodeResources.architecture), + gpuResourcesFrom(nodeResources)); + } + + private static NodeResources.GpuResources gpuResourcesFrom(NodeRepositoryNode.NodeResources nodeResources) { + if (nodeResources.gpuCount == null || nodeResources.gpuMemoryGb == null) return NodeResources.GpuResources.zero(); + return new NodeResources.GpuResources(nodeResources.gpuCount, nodeResources.gpuMemoryGb); + } + + private static NodeResources.DiskSpeed diskSpeedFromString(String diskSpeed) { + if (diskSpeed == null) return NodeResources.DiskSpeed.getDefault(); + return switch (diskSpeed) { + case "fast" -> NodeResources.DiskSpeed.fast; + case "slow" -> NodeResources.DiskSpeed.slow; + case "any" -> NodeResources.DiskSpeed.any; + default -> throw new IllegalArgumentException("Unknown disk speed '" + diskSpeed + "'"); + }; + } + + private static NodeResources.StorageType storageTypeFromString(String storageType) { + if (storageType == null) return NodeResources.StorageType.getDefault(); + return switch (storageType) { + case "remote" -> NodeResources.StorageType.remote; + case "local" -> NodeResources.StorageType.local; + case "any" -> NodeResources.StorageType.any; + default -> throw new IllegalArgumentException("Unknown storage type '" + storageType + "'"); + }; + } + + private static NodeResources.Architecture architectureFromString(String architecture) { + if (architecture == null) return NodeResources.Architecture.getDefault(); + return switch (architecture) { + case "arm64" -> NodeResources.Architecture.arm64; + case "x86_64" -> NodeResources.Architecture.x86_64; + case "any" -> NodeResources.Architecture.any; + default -> throw new IllegalArgumentException("Unknown architecture '" + architecture + "'"); + }; + } + + private static String toString(NodeResources.DiskSpeed diskSpeed) { + return switch (diskSpeed) { + case fast -> "fast"; + case slow -> "slow"; + case any -> "any"; + }; + } + + private static String toString(NodeResources.StorageType storageType) { + return switch (storageType) { + case remote -> "remote"; + case local -> "local"; + case any -> "any"; + }; + } + + private static String toString(NodeResources.Architecture architecture) { + return switch (architecture) { + case arm64 -> "arm64"; + case x86_64 -> "x86_64"; + case any -> "any"; + }; + } + + private static NodeRepositoryNode nodeRepositoryNodeFromAddNode(AddNode addNode) { + NodeRepositoryNode node = new NodeRepositoryNode(); + node.id = addNode.id; + node.hostname = addNode.hostname; + node.parentHostname = addNode.parentHostname.orElse(null); + addNode.nodeFlavor.ifPresent(f -> node.flavor = f); + addNode.flavorOverrides.flatMap(FlavorOverrides::diskGb).ifPresent(d -> { + node.resources = new NodeRepositoryNode.NodeResources(); + node.resources.diskGb = d; + }); + addNode.nodeResources.ifPresent(resources -> { + node.resources = new NodeRepositoryNode.NodeResources(); + node.resources.vcpu = resources.vcpu(); + node.resources.memoryGb = resources.memoryGb(); + node.resources.diskGb = resources.diskGb(); + node.resources.bandwidthGbps = resources.bandwidthGbps(); + node.resources.diskSpeed = toString(resources.diskSpeed()); + node.resources.storageType = toString(resources.storageType()); + node.resources.architecture = toString(resources.architecture()); + if (!resources.gpuResources().isZero()) { + node.resources.gpuCount = resources.gpuResources().count(); + node.resources.gpuMemoryGb = resources.gpuResources().memoryGb(); + } + }); + node.type = addNode.nodeType.name(); + node.ipAddresses = addNode.ipAddresses; + node.additionalIpAddresses = addNode.additionalIpAddresses; + return node; + } + + public static NodeRepositoryNode nodeRepositoryNodeFromNodeAttributes(NodeAttributes nodeAttributes) { + NodeRepositoryNode node = new NodeRepositoryNode(); + node.id = nodeAttributes.getHostId().orElse(null); + node.currentDockerImage = nodeAttributes.getDockerImage().map(DockerImage::asString).orElse(null); + node.currentRestartGeneration = nodeAttributes.getRestartGeneration().orElse(null); + node.currentRebootGeneration = nodeAttributes.getRebootGeneration().orElse(null); + node.vespaVersion = nodeAttributes.getVespaVersion().map(Version::toFullString).orElse(null); + node.currentOsVersion = nodeAttributes.getCurrentOsVersion().map(Version::toFullString).orElse(null); + node.currentFirmwareCheck = nodeAttributes.getCurrentFirmwareCheck().map(Instant::toEpochMilli).orElse(null); + node.trustStore = nodeAttributes.getTrustStore().stream() + .map(item -> new NodeRepositoryNode.TrustStoreItem(item.fingerprint(), item.expiry().toEpochMilli())) + .toList(); + // This is used for patching, and timestamp must only be set on the server side, hence sending EPOCH. + node.wireguardKeyWithTimestamp = nodeAttributes.getWireguardPubkey() + .map(key -> new NodeRepositoryNode.WireguardKeyWithTimestamp(key.value(), 0L)) + .orElse(null); + Map reports = nodeAttributes.getReports(); + node.reports = reports == null || reports.isEmpty() ? null : new TreeMap<>(reports); + + // TODO wg: remove when all nodes are using new key+timestamp format + node.wireguardPubkey = nodeAttributes.getWireguardPubkey().map(WireguardKey::value).orElse(null); + return node; + } + + private static WireguardPeer createConfigserverPeer(GetWireguardResponse.Configserver configServer) { + return new WireguardPeer(HostName.of(configServer.hostname), + configServer.ipAddresses.stream().map(VersionedIpAddress::from).toList(), + createWireguardKeyWithTimestamp(configServer.wireguardKeyWithTimestamp, + configServer.wireguardPubkey, + configServer.wireguardKeyTimestamp)); + } + + private static WireguardKeyWithTimestamp createWireguardKeyWithTimestamp(NodeRepositoryNode.WireguardKeyWithTimestamp wirguardJson, + String oldKeyJson, Long oldTimestampJson) { + if (wirguardJson != null && wirguardJson.key != null && ! wirguardJson.key.isEmpty()) { + return new WireguardKeyWithTimestamp(WireguardKey.from(wirguardJson.key), + Instant.ofEpochMilli(wirguardJson.timestamp)); + // TODO wg: remove when all nodes are using new key+timestamp format + } else if (oldKeyJson != null) { + var timestamp = oldTimestampJson != null ? oldTimestampJson : 0L; + return new WireguardKeyWithTimestamp(WireguardKey.from(oldKeyJson), + Instant.ofEpochMilli(timestamp)); + // TODO END + } else return null; + + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/TrustStoreItem.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/TrustStoreItem.java new file mode 100644 index 00000000000..dfec70288e3 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/TrustStoreItem.java @@ -0,0 +1,48 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import java.time.Instant; +import java.util.Objects; + +/** + * @author mortent + */ +public class TrustStoreItem { + private final String fingerprint; + private final Instant expiry; + + public TrustStoreItem(String fingerprint, Instant expiry) { + this.fingerprint = fingerprint; + this.expiry = expiry; + } + + public String fingerprint() { + return fingerprint; + } + + public Instant expiry() { + return expiry; + } + + @Override + public String toString() { + return "TrustStoreItem{" + + "fingerprint='" + fingerprint + '\'' + + ", expiry=" + expiry + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TrustStoreItem that = (TrustStoreItem) o; + return Objects.equals(fingerprint, that.fingerprint) && Objects.equals(expiry, that.expiry); + } + + @Override + public int hashCode() { + return Objects.hash(fingerprint, expiry); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetAclResponse.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetAclResponse.java new file mode 100644 index 00000000000..d20f31e256e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetAclResponse.java @@ -0,0 +1,114 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.List; + +/** + * This class represents a response from the /nodes/v2/acl/ API. + * + * @author mpolden + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class GetAclResponse { + + @JsonProperty("trustedNodes") + public final List trustedNodes; + + @JsonProperty("trustedNetworks") + public final List trustedNetworks; + + @JsonProperty("trustedPorts") + public final List trustedPorts; + + @JsonProperty("trustedUdpPorts") + public final List trustedUdpPorts; + + @JsonCreator + public GetAclResponse(@JsonProperty("trustedNodes") List trustedNodes, + @JsonProperty("trustedNetworks") List trustedNetworks, + @JsonProperty("trustedPorts") List trustedPorts, + @JsonProperty("trustedUdpPorts") List trustedUdpPorts) { + this.trustedNodes = trustedNodes == null ? List.of() : List.copyOf(trustedNodes); + this.trustedNetworks = trustedNetworks == null ? List.of() : List.copyOf(trustedNetworks); + this.trustedPorts = trustedPorts == null ? List.of() : List.copyOf(trustedPorts); + this.trustedUdpPorts = trustedUdpPorts == null ? List.of() : List.copyOf(trustedUdpPorts); + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Node { + + @JsonProperty("hostname") + public final String hostname; + + @JsonProperty("type") + public final String nodeType; + + @JsonProperty("ipAddress") + public final String ipAddress; + + @JsonProperty("ports") + public final List ports; + + @JsonProperty("trustedBy") + public final String trustedBy; + + @JsonCreator + public Node(@JsonProperty("hostname") String hostname, @JsonProperty("type") String nodeType, + @JsonProperty("ipAddress") String ipAddress, @JsonProperty("ports") List ports, + @JsonProperty("trustedBy") String trustedBy) { + this.hostname = hostname; + this.nodeType = nodeType; + this.ipAddress = ipAddress; + this.ports = ports == null ? List.of() : List.copyOf(ports); + this.trustedBy = trustedBy; + } + + public String getTrustedBy() { + return trustedBy; + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Network { + + @JsonProperty("network") + public final String network; + + @JsonProperty("trustedBy") + public final String trustedBy; + + @JsonCreator + public Network(@JsonProperty("network") String network, @JsonProperty("trustedBy") String trustedBy) { + this.network = network; + this.trustedBy = trustedBy; + } + + public String getTrustedBy() { + return trustedBy; + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Port { + + @JsonProperty("port") + public final Integer port; + + @JsonProperty("trustedBy") + public final String trustedBy; + + @JsonCreator + public Port(@JsonProperty("port") Integer port, @JsonProperty("trustedBy") String trustedBy) { + this.port = port; + this.trustedBy = trustedBy; + } + + public String getTrustedBy() { + return trustedBy; + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetNodesResponse.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetNodesResponse.java new file mode 100644 index 00000000000..b744c935247 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetNodesResponse.java @@ -0,0 +1,25 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Collections; +import java.util.List; + +/** + * This class represents a response from the /nodes/v2/node/ API. It is designed to be + * usable by any module, by not depending itself on any module-specific classes. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class GetNodesResponse { + + public final List nodes; + + @JsonCreator + public GetNodesResponse(@JsonProperty("nodes") List nodes) { + this.nodes = Collections.unmodifiableList(nodes); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetWireguardResponse.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetWireguardResponse.java new file mode 100644 index 00000000000..572323d733b --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/GetWireguardResponse.java @@ -0,0 +1,50 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.List; + +/** + * A response from the /nodes/v2/wireguard api. + * + * @author gjoranv + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +public class GetWireguardResponse { + + public final List configservers; + + @JsonCreator + public GetWireguardResponse(@JsonProperty("configservers") List configservers) { + this.configservers = configservers; + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Configserver { + + @JsonProperty("hostname") + public String hostname; + + @JsonProperty("ipAddresses") + public List ipAddresses; + + @JsonProperty("wireguard") + public NodeRepositoryNode.WireguardKeyWithTimestamp wireguardKeyWithTimestamp; + + + // TODO wg: remove when all nodes use new key+timestamp format + @JsonProperty("wireguardPubkey") + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public String wireguardPubkey; + @JsonProperty("wireguardKeyTimestamp") + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public Long wireguardKeyTimestamp; + + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNode.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNode.java new file mode 100644 index 00000000000..c377d521648 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNode.java @@ -0,0 +1,279 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.JsonNode; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * @author freva + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +public class NodeRepositoryNode { + + @JsonProperty("state") + public String state; + @JsonProperty("hostname") + public String hostname; + @JsonProperty("ipAddresses") + public Set ipAddresses; + @JsonProperty("additionalIpAddresses") + public Set additionalIpAddresses; + @JsonProperty("id") + public String id; + @JsonProperty("cloudAccount") + public String cloudAccount; + @JsonProperty("flavor") + public String flavor; + @JsonProperty("resources") + public NodeResources resources; + @JsonProperty("realResources") + public NodeResources realResources; + @JsonProperty("membership") + public Membership membership; + @JsonProperty("owner") + public Owner owner; + @JsonProperty("restartGeneration") + public Long restartGeneration; + @JsonProperty("rebootGeneration") + public Long rebootGeneration; + @JsonProperty("currentRestartGeneration") + public Long currentRestartGeneration; + @JsonProperty("currentRebootGeneration") + public Long currentRebootGeneration; + @JsonProperty("vespaVersion") + public String vespaVersion; + @JsonProperty("wantedVespaVersion") + public String wantedVespaVersion; + @JsonProperty("currentOsVersion") + public String currentOsVersion; + @JsonProperty("wantedOsVersion") + public String wantedOsVersion; + @JsonProperty("currentFirmwareCheck") + public Long currentFirmwareCheck; + @JsonProperty("wantedFirmwareCheck") + public Long wantedFirmwareCheck; + @JsonProperty("modelName") + public String modelName; + @JsonProperty("failCount") + public Integer failCount; + @JsonProperty("environment") + public String environment; + @JsonProperty("reservedTo") + public String reservedTo; + @JsonProperty("type") + public String type; + @JsonProperty("wantedDockerImage") + public String wantedDockerImage; + @JsonProperty("currentDockerImage") + public String currentDockerImage; + @JsonProperty("parentHostname") + public String parentHostname; + @JsonProperty("wantToRetire") + public Boolean wantToRetire; + @JsonProperty("wantToDeprovision") + public Boolean wantToDeprovision; + @JsonProperty("wantToRebuild") + public Boolean wantToRebuild; + @JsonProperty("orchestratorStatus") + public String orchestratorStatus; + @JsonProperty("archiveUri") + public String archiveUri; + @JsonProperty("exclusiveTo") + public String exclusiveTo; + @JsonProperty("history") + public List history; + @JsonProperty("trustStore") + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public List trustStore; + @JsonProperty("wireguard") + public WireguardKeyWithTimestamp wireguardKeyWithTimestamp; + + // TODO wg: remove separate key and timestamp when all nodes use new keyWithTimestamp + @JsonProperty("wireguardPubkey") + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public String wireguardPubkey; + @JsonProperty("wireguardKeyTimestamp") + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public Long wireguardKeyTimestamp; + + @JsonProperty("reports") + public Map reports = null; + + @Override + public String toString() { + return "NodeRepositoryNode{" + + "state='" + state + '\'' + + ", hostname='" + hostname + '\'' + + ", ipAddresses=" + ipAddresses + + ", additionalIpAddresses=" + additionalIpAddresses + + ", id='" + id + '\'' + + ", flavor='" + flavor + '\'' + + ", resources=" + resources + + ", realResources=" + realResources + + ", membership=" + membership + + ", owner=" + owner + + ", restartGeneration=" + restartGeneration + + ", rebootGeneration=" + rebootGeneration + + ", currentRestartGeneration=" + currentRestartGeneration + + ", currentRebootGeneration=" + currentRebootGeneration + + ", vespaVersion='" + vespaVersion + '\'' + + ", wantedVespaVersion='" + wantedVespaVersion + '\'' + + ", currentOsVersion='" + currentOsVersion + '\'' + + ", wantedOsVersion='" + wantedOsVersion + '\'' + + ", currentFirmwareCheck=" + currentFirmwareCheck + + ", wantedFirmwareCheck=" + wantedFirmwareCheck + + ", modelName='" + modelName + '\'' + + ", failCount=" + failCount + + ", environment='" + environment + '\'' + + ", reservedTo='" + reservedTo + '\'' + + ", type='" + type + '\'' + + ", wantedDockerImage='" + wantedDockerImage + '\'' + + ", currentDockerImage='" + currentDockerImage + '\'' + + ", parentHostname='" + parentHostname + '\'' + + ", wantToRetire=" + wantToRetire + + ", wantToDeprovision=" + wantToDeprovision + + ", wantToRebuild=" + wantToRebuild + + ", orchestratorStatus='" + orchestratorStatus + '\'' + + ", archiveUri='" + archiveUri + '\'' + + ", exclusiveTo='" + exclusiveTo + '\'' + + ", history=" + history + + ", trustStore=" + trustStore + + ", wireguard=" + wireguardKeyTimestamp + + ", reports=" + reports + + '}'; + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class WireguardKeyWithTimestamp { + @JsonProperty("key") + public String key; + @JsonProperty("timestamp") + public long timestamp; + + public WireguardKeyWithTimestamp(@JsonProperty("key") String key, @JsonProperty("timestamp") long timestamp) { + this.key = key; + this.timestamp = timestamp; + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Owner { + @JsonProperty("tenant") + public String tenant; + @JsonProperty("application") + public String application; + @JsonProperty("instance") + public String instance; + + public String toString() { + return "Owner {" + + " tenant = " + tenant + + " application = " + application + + " instance = " + instance + + " }"; + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Membership { + @JsonProperty("clustertype") + public String clusterType; + @JsonProperty("clusterid") + public String clusterId; + @JsonProperty("group") + public String group; + @JsonProperty("index") + public int index; + @JsonProperty("retired") + public boolean retired; + + @Override + public String toString() { + return "Membership {" + + " clusterType = " + clusterType + + " clusterId = " + clusterId + + " group = " + group + + " index = " + index + + " retired = " + retired + + " }"; + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class NodeResources { + @JsonProperty + public Double vcpu; + @JsonProperty + public Double memoryGb; + @JsonProperty + public Double diskGb; + @JsonProperty + public Double bandwidthGbps; + @JsonProperty + public String diskSpeed; + @JsonProperty + public String storageType; + @JsonProperty + public String architecture; + @JsonProperty + public Integer gpuCount; + @JsonProperty + public Double gpuMemoryGb; + + @Override + public String toString() { + return "NodeResources{" + + "vcpu=" + vcpu + + ", memoryGb=" + memoryGb + + ", diskGb=" + diskGb + + ", bandwidthGbps=" + bandwidthGbps + + ", diskSpeed='" + diskSpeed + '\'' + + ", storageType='" + storageType + '\'' + + ", architecture='" + architecture + '\'' + + ", gpuCount=" + gpuCount + + ", gpuMemoryGb=" + gpuMemoryGb + + '}'; + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Event { + @JsonProperty + public String event; + @JsonProperty + public String agent; + @JsonProperty + public Long at; + + @Override + public String toString() { + return "Event{" + + "agent=" + agent + + ", event=" + event + + ", at=" + at + + '}'; + } + } + @JsonIgnoreProperties(ignoreUnknown = true) + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class TrustStoreItem { + @JsonProperty ("fingerprint") + public String fingerprint; + @JsonProperty ("expiry") + public long expiry; + + public TrustStoreItem(@JsonProperty("fingerprint") String fingerprint, @JsonProperty("expiry") long expiry) { + this.fingerprint = fingerprint; + this.expiry = expiry; + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/package-info.java new file mode 100644 index 00000000000..bf83a1a4bdf --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/BaseReport.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/BaseReport.java new file mode 100644 index 00000000000..ccc1f469e1e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/BaseReport.java @@ -0,0 +1,147 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.util.Objects; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.stream.Stream; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * The most basic form of a node repository report on a node. + * + *

This class can be used directly for simple reports, or can be used as a base class for richer reports. + * + *

Subclass requirements + * + *

    + *
  1. A subclass must be a Jackson class that can be mapped to {@link JsonNode} with {@link #toJsonNode()}, + * and from {@link JsonNode} with {@link #fromJsonNode(JsonNode, Class)}.
  2. + *
  3. A subclass must override {@link #updates(BaseReport)} and make sure to return true if + * {@code super.updates(current)}.
  4. + *
+ * + * @author hakonhall + */ +// @Immutable +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +public class BaseReport { + /** The time the report was created, in milliseconds since Epoch. */ + public static final String CREATED_FIELD = "createdMillis"; + /** The description of the error (implies wanting to fail out node). */ + public static final String DESCRIPTION_FIELD = "description"; + /** The type of report, see {@link Type} enum. */ + public static final String TYPE_FIELD = "type"; + + protected static final ObjectMapper mapper = new ObjectMapper(); + + private final OptionalLong createdMillis; + private final Optional description; + private final Type type; + + public enum Type { + /** The default type if none given, or not recognized. */ + UNSPECIFIED, + /** A program to be executed once. */ + ONCE, + /** The host has a soft failure and should be parked for manual inspection. */ + SOFT_FAIL, + /** The host has a hard failure and should be given back to siteops. */ + HARD_FAIL; + + public static Optional deserialize(String typeString) { + return Stream.of(Type.values()).filter(type -> type.name().equalsIgnoreCase(typeString)).findAny(); + } + + public String serialize() { return name(); } + } + + @JsonCreator + public BaseReport(@JsonProperty(CREATED_FIELD) Long createdMillisOrNull, + @JsonProperty(DESCRIPTION_FIELD) String descriptionOrNull, + @JsonProperty(TYPE_FIELD) Type typeOrNull) { + this.createdMillis = createdMillisOrNull == null ? OptionalLong.empty() : OptionalLong.of(createdMillisOrNull); + this.description = Optional.ofNullable(descriptionOrNull); + this.type = typeOrNull == null ? Type.UNSPECIFIED : typeOrNull; + } + + public BaseReport(Long createdMillisOrNull, String descriptionOrNull) { + this(createdMillisOrNull, descriptionOrNull, Type.UNSPECIFIED); + } + + @JsonGetter(CREATED_FIELD) + public final Long getCreatedMillisOrNull() { + return createdMillis.isPresent() ? createdMillis.getAsLong() : null; + } + + @JsonGetter(DESCRIPTION_FIELD) + public final String getDescriptionOrNull() { + return description.orElse(null); + } + + /** null is returned on UNSPECIFIED to avoid noisy reports. */ + @JsonGetter(TYPE_FIELD) + public final Type getTypeOrNull() { + return type == Type.UNSPECIFIED ? null : type; + } + + public Type getType() { + return type; + } + + /** + * Assume {@code this} is a freshly made report, and {@code current} is the report in the node repository: + * Return true iff the node repository should be updated. + * + *

The createdMillis field is ignored in this method (unless it is earlier than {@code current}'s?). + */ + public boolean updates(BaseReport current) { + if (this == current) return false; + if (this.getClass() != current.getClass()) return true; + return !Objects.equals(description, current.description) || + !Objects.equals(type, current.type); + } + + /** A variant of {@link #updates(BaseReport)} handling possibly absent reports, whether new or old. */ + public static + boolean updates2(Optional newReport, Optional oldReport) { + if (newReport.isPresent() ^ oldReport.isPresent()) return true; + return newReport.map(r -> r.updates(oldReport.get())).orElse(false); + } + + public static BaseReport fromJsonNode(JsonNode jsonNode) { + return fromJsonNode(jsonNode, BaseReport.class); + } + + public static R fromJsonNode(JsonNode jsonNode, Class jacksonClass) { + return uncheck(() -> mapper.treeToValue(jsonNode, jacksonClass)); + } + + public static BaseReport fromJson(String json) { + return fromJson(json, BaseReport.class); + } + + public static R fromJson(String json, Class jacksonClass) { + return uncheck(() -> mapper.readValue(json, jacksonClass)); + } + + /** Returns {@code this} as a {@link JsonNode}. */ + public JsonNode toJsonNode() { + return uncheck(() -> mapper.valueToTree(this)); + } + + /** Returns {@code this} as a compact JSON string. */ + public String toJson() { + return uncheck(() -> mapper.writeValueAsString(this)); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/DropDocumentsReport.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/DropDocumentsReport.java new file mode 100644 index 00000000000..2bc8bea013a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/DropDocumentsReport.java @@ -0,0 +1,55 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports; + +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * @author freva + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +public class DropDocumentsReport extends BaseReport { + private static final String REPORT_ID = "dropDocuments"; + private static final String DROPPED_AT_FIELD = "droppedAt"; + private static final String READIED_AT_FIELD = "readiedAt"; + private static final String STARTED_AT_FIELD = "startedAt"; + + private final Long droppedAt; + private final Long readiedAt; + private final Long startedAt; + + public DropDocumentsReport(@JsonProperty(CREATED_FIELD) Long createdMillisOrNull, + @JsonProperty(DROPPED_AT_FIELD) Long droppedAtOrNull, + @JsonProperty(READIED_AT_FIELD) Long readiedAtOrNull, + @JsonProperty(STARTED_AT_FIELD) Long startedAtOrNull) { + super(createdMillisOrNull, null); + this.droppedAt = droppedAtOrNull; + this.readiedAt = readiedAtOrNull; + this.startedAt = startedAtOrNull; + } + + @JsonGetter(DROPPED_AT_FIELD) + public Long droppedAt() { return droppedAt; } + + @JsonGetter(READIED_AT_FIELD) + public Long readiedAt() { return readiedAt; } + + @JsonGetter(STARTED_AT_FIELD) + public Long startedAt() { return startedAt; } + + public DropDocumentsReport withDroppedAt(long droppedAt) { + return new DropDocumentsReport(getCreatedMillisOrNull(), droppedAt, readiedAt, startedAt); + } + + public DropDocumentsReport withStartedAt(long startedAt) { + return new DropDocumentsReport(getCreatedMillisOrNull(), droppedAt, readiedAt, startedAt); + } + + public static String reportId() { + return REPORT_ID; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/package-info.java new file mode 100644 index 00000000000..cd8a1383966 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/reports/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/Orchestrator.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/Orchestrator.java new file mode 100644 index 00000000000..f16f2ca9be3 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/Orchestrator.java @@ -0,0 +1,36 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.orchestrator; + +import java.util.List; + +/** + * Abstraction for communicating with Orchestrator. + * + * @author bakksjo + */ +public interface Orchestrator { + + /** + * Suspends a host. + * + * @throws OrchestratorException if suspend was denied + * @throws OrchestratorNotFoundException if host is unknown to the orchestrator + */ + void suspend(String hostName); + + /** + * Resumes a host. + * + * @throws OrchestratorException if resume was denied + * @throws OrchestratorNotFoundException if host is unknown to the orchestrator + */ + void resume(String hostName); + + /** + * Suspends a list of nodes on a parent. + * + * @throws OrchestratorException if batch suspend was denied + */ + void suspend(String parentHostName, List hostNames); + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java new file mode 100644 index 00000000000..5c5c1183ea6 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java @@ -0,0 +1,16 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.orchestrator; + +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; + +@SuppressWarnings("serial") +public class OrchestratorException extends ConvergenceException { + /** Creates a transient convergence exception. */ + public OrchestratorException(String message) { + this(message, false); + } + + protected OrchestratorException(String message, boolean isError) { + super(message, null, isError); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java new file mode 100644 index 00000000000..614a79719ca --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java @@ -0,0 +1,143 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.orchestrator; + +import com.yahoo.vespa.hosted.node.admin.configserver.ConfigServerApi; +import com.yahoo.vespa.hosted.node.admin.configserver.ConnectionException; +import com.yahoo.vespa.hosted.node.admin.configserver.HttpException; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; +import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult; +import com.yahoo.vespa.orchestrator.restapi.wire.HostStateChangeDenialReason; +import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse; + +import java.net.URI; +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * @author stiankri + * @author bakksjo + * @author dybis + */ +public class OrchestratorImpl implements Orchestrator { + private static final Logger logger = Logger.getLogger(OrchestratorImpl.class.getName()); + + // The server-side Orchestrator has an internal timeout of 10s. + // + // Note: A 409 has been observed to be returned after 33s in a case possibly involving + // zk leader election (which is unfortunate as it is difficult to differentiate between + // transient timeouts (do not allow suspend on timeout) and the config server being + // permanently down (allow suspend)). For now we'd like to investigate such long + // requests so keep the timeout low(er). + private static final Duration CONNECTION_TIMEOUT = Duration.ofSeconds(15); + + // TODO: Find a way to avoid duplicating this (present in orchestrator's services.xml also). + private static final String ORCHESTRATOR_PATH_PREFIX = "/orchestrator"; + static final String ORCHESTRATOR_PATH_PREFIX_HOST_API + = ORCHESTRATOR_PATH_PREFIX + "/v1/hosts"; + static final String ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API + = ORCHESTRATOR_PATH_PREFIX + "/v1/suspensions/hosts"; + + private final ConfigServerApi configServerApi; + + public OrchestratorImpl(ConfigServerApi configServerApi) { + this.configServerApi = configServerApi; + } + + @Override + public void suspend(final String hostName) { + UpdateHostResponse response; + try { + var params = new ConfigServerApi + .Params() + .setConnectionTimeout(CONNECTION_TIMEOUT) + .setRetryPolicy(createRetryPolicyForSuspend()); + response = configServerApi.put(getSuspendPath(hostName), Optional.empty(), UpdateHostResponse.class, params); + } catch (HttpException.NotFoundException n) { + throw new OrchestratorNotFoundException("Failed to suspend " + hostName + ", host not found"); + } catch (HttpException e) { + throw new OrchestratorException("Failed to suspend " + hostName + ": " + e); + } catch (ConnectionException e) { + throw ConvergenceException.ofTransient("Failed to suspend " + hostName + ": " + e.getMessage()); + } catch (RuntimeException e) { + throw new RuntimeException("Got error on suspend", e); + } + + Optional.ofNullable(response.reason()).ifPresent(reason -> { + throw new OrchestratorException(reason.message()); + }); + } + + private static ConfigServerApi.RetryPolicy createRetryPolicyForSuspend() { + return new ConfigServerApi.RetryPolicy<>() { + @Override + public boolean tryNextConfigServer(URI configServerEndpoint, UpdateHostResponse response) { + HostStateChangeDenialReason reason = response.reason(); + if (reason == null) { + return false; + } + + // The config server has likely just bootstrapped, so try the next. + if ("unknown-service-status".equals(reason.constraintName())) { + // Warn for now and until this feature has proven to work well + logger.warning("Config server at [" + configServerEndpoint + + "] failed with transient error (will try next): " + + reason.message()); + + return true; + } + + return false; + } + }; + } + + @Override + public void suspend(String parentHostName, List hostNames) { + final BatchOperationResult batchOperationResult; + try { + var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT); + String hostnames = String.join("&hostname=", hostNames); + String url = String.format("%s/%s?hostname=%s", ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API, + parentHostName, hostnames); + batchOperationResult = configServerApi.put(url, Optional.empty(), BatchOperationResult.class, params); + } catch (HttpException e) { + throw new OrchestratorException("Failed to batch suspend for " + parentHostName + ": " + e); + } catch (ConnectionException e) { + throw ConvergenceException.ofTransient("Failed to batch suspend for " + parentHostName + ": " + e.getMessage()); + } catch (RuntimeException e) { + throw new RuntimeException("Got error on batch suspend for " + parentHostName + ", with nodes " + hostNames, e); + } + + batchOperationResult.getFailureReason().ifPresent(reason -> { + throw new OrchestratorException(reason); + }); + } + + @Override + public void resume(final String hostName) { + UpdateHostResponse response; + try { + String path = getSuspendPath(hostName); + response = configServerApi.delete(path, UpdateHostResponse.class); + } catch (HttpException.NotFoundException n) { + throw new OrchestratorNotFoundException("Failed to resume " + hostName + ", host not found"); + } catch (HttpException e) { + throw new OrchestratorException("Failed to resume " + hostName + ": " + e); + } catch (ConnectionException e) { + throw ConvergenceException.ofTransient("Failed to resume " + hostName + ": " + e.getMessage()); + } catch (RuntimeException e) { + throw new RuntimeException("Got error on resume", e); + } + + Optional.ofNullable(response.reason()).ifPresent(reason -> { + throw new OrchestratorException(reason.message()); + }); + } + + private String getSuspendPath(String hostName) { + return ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName + "/suspended"; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java new file mode 100644 index 00000000000..8025eb8df93 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java @@ -0,0 +1,9 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.orchestrator; + +@SuppressWarnings("serial") +public class OrchestratorNotFoundException extends OrchestratorException { + public OrchestratorNotFoundException(String message) { + super(message, true); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/package-info.java new file mode 100644 index 00000000000..6c89fbce90b --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.configserver.orchestrator; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/package-info.java new file mode 100644 index 00000000000..af925db8b4e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.configserver; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/HealthCode.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/HealthCode.java new file mode 100644 index 00000000000..a82a82e56b0 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/HealthCode.java @@ -0,0 +1,32 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.state; + +/** + * The healthiness of a remote Vespa server based on REST API + * + * @author hakon + */ +public enum HealthCode { + DOWN("down"), + INITIALIZING("initializing"), + UP("up"); + + private final String code; + + HealthCode(String code) { + this.code = code; + } + + public static HealthCode fromString(String code) { + return HealthCode.valueOf(code.toUpperCase()); + } + + public String asString() { + return code; + } + + @Override + public String toString() { + return asString(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/State.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/State.java new file mode 100644 index 00000000000..0887637d5a1 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/State.java @@ -0,0 +1,12 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.state; + +/** + * The /state/v1 REST API of the config server + * + * @author hakon + */ +public interface State { + /** Issue GET on /state/v1/health */ + HealthCode getHealth(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/StateImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/StateImpl.java new file mode 100644 index 00000000000..2471069cb4a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/StateImpl.java @@ -0,0 +1,29 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.state; + +import com.yahoo.vespa.hosted.node.admin.configserver.ConfigServerApi; +import com.yahoo.vespa.hosted.node.admin.configserver.ConnectionException; +import com.yahoo.vespa.hosted.node.admin.configserver.HttpException; +import com.yahoo.vespa.hosted.node.admin.configserver.state.bindings.HealthResponse; + +/** + * @author hakon + */ +public class StateImpl implements State { + private final ConfigServerApi configServerApi; + + public StateImpl(ConfigServerApi configServerApi) { + this.configServerApi = configServerApi; + } + + @Override + public HealthCode getHealth() { + try { + HealthResponse response = configServerApi.get("/state/v1/health", HealthResponse.class); + return HealthCode.fromString(response.status.code); + } catch (ConnectionException | HttpException e) { + return HealthCode.DOWN; + } + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/bindings/HealthResponse.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/bindings/HealthResponse.java new file mode 100644 index 00000000000..d0b94324941 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/bindings/HealthResponse.java @@ -0,0 +1,36 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.configserver.state.bindings; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * Response from /state/v1/health + * + * @author hakon + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class HealthResponse { + @JsonProperty("status") + public Status status = new Status(); + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Status { + @JsonProperty("code") + public String code = "down"; + + @Override + public String toString() { + return "Status{" + + "code='" + code + '\'' + + '}'; + } + } + + @Override + public String toString() { + return "HealthResponse{" + + "status=" + status + + '}'; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/package-info.java new file mode 100644 index 00000000000..fd237ec6cb4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/state/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.configserver.state; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java new file mode 100644 index 00000000000..f6f9ebd79e9 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/Container.java @@ -0,0 +1,85 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.config.provision.DockerImage; + +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * A Podman container. + * + * @author mpolden + */ +public class Container extends PartialContainer { + + private final String hostname; + private final ContainerResources resources; + private final int conmonPid; + private final List networks; + + public Container(ContainerId id, ContainerName name, Instant createdAt, State state, String imageId, DockerImage image, + Map labels, int pid, int conmonPid, String hostname, + ContainerResources resources, List networks, boolean managed) { + super(id, name, createdAt, state, imageId, image, labels, pid, managed); + this.hostname = Objects.requireNonNull(hostname); + this.resources = Objects.requireNonNull(resources); + this.conmonPid = conmonPid; + this.networks = List.copyOf(Objects.requireNonNull(networks)); + } + + /** The hostname of this, if any */ + public String hostname() { + return hostname; + } + + /** Resource limits for this*/ + public ContainerResources resources() { + return resources; + } + + /** Pid of the conmon process for this container */ + public int conmonPid() { + return conmonPid; + } + + /** The networks used by this */ + public List networks() { + return networks; + } + + @Override + public String toString() { + return "Container{" + + "hostname='" + hostname + '\'' + + ", resources=" + resources + + ", conmonPid=" + conmonPid + + ", networks=" + networks + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + Container that = (Container) o; + return conmonPid == that.conmonPid && hostname.equals(that.hostname) && resources.equals(that.resources) && networks.equals(that.networks); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), hostname, resources, conmonPid, networks); + } + + /** The network of a container */ + public record Network(String name, String ipv4Address) { + public Network { + Objects.requireNonNull(name); + Objects.requireNonNull(ipv4Address); + } + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java new file mode 100644 index 00000000000..26c3ba2a45b --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerEngine.java @@ -0,0 +1,70 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.config.provision.DockerImage; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.image.Image; +import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandLine; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; + +/** + * Interface for a container engine, such as Docker or Podman. + * + * @author mpolden + */ +public interface ContainerEngine { + + /** + * Create a new container + * @return ContainerData that can be used to write files inside container + */ + ContainerData createContainer(NodeAgentContext context, ContainerResources containerResources); + + /** Start a created container */ + void startContainer(NodeAgentContext context); + + /** Update an existing container with new resources */ + void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources); + + /** Remove given container. The container will be stopped if necessary */ + void removeContainer(TaskContext context, PartialContainer container); + + /** Get container for given context */ + Optional getContainer(NodeAgentContext context); + + /** Returns all containers known by this */ + List listContainers(TaskContext context); + + /** Returns the network interface used by container in given context */ + String networkInterface(NodeAgentContext context); + + /** Execute command inside container as given user. Ignores non-zero exit code */ + CommandResult execute(NodeAgentContext context, UnixUser user, Duration timeout, String... command); + + /** Execute command inside the container's network namespace. Throws on non-zero exit code */ + CommandResult executeInNetworkNamespace(NodeAgentContext context, CommandLine.Options options, String... command); + + default CommandResult executeInNetworkNamespace(NodeAgentContext context, String... command) { + return executeInNetworkNamespace(context, new CommandLine.Options(), command); + } + + /** Download given image */ + void pullImage(TaskContext context, DockerImage image, RegistryCredentials registryCredentials); + + /** Returns whether given image is already downloaded */ + boolean hasImage(TaskContext context, DockerImage image); + + /** Remove image by id */ + void removeImage(TaskContext context, String id); + + /** Returns images available in this */ + List listImages(TaskContext context); + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerId.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerId.java new file mode 100644 index 00000000000..5a800efcbd0 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerId.java @@ -0,0 +1,36 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// +package com.yahoo.vespa.hosted.node.admin.container; + +import java.util.Objects; + +/** + * The ID of a container. + * + * @author hakon + */ +public class ContainerId { + private final String id; + + public ContainerId(String id) { + this.id = Objects.requireNonNull(id, "id cannot be null"); + } + + @Override + public String toString() { + return id; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ContainerId that = (ContainerId) o; + return id.equals(that.id); + } + + @Override + public int hashCode() { + return Objects.hash(id); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java new file mode 100644 index 00000000000..c504e38575c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerName.java @@ -0,0 +1,59 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import java.util.Objects; +import java.util.regex.Pattern; + +/** + * Type-safe value wrapper for docker container names. + * + * @author bakksjo + */ +public class ContainerName implements Comparable { + + private static final Pattern LEGAL_CONTAINER_NAME_PATTERN = Pattern.compile("^[a-zA-Z0-9_-]+$"); + private final String name; + + public ContainerName(final String name) { + this.name = Objects.requireNonNull(name); + if (! LEGAL_CONTAINER_NAME_PATTERN.matcher(name).matches()) { + throw new IllegalArgumentException("Illegal container name: " + name + ". Must match " + + LEGAL_CONTAINER_NAME_PATTERN.pattern()); + } + } + + public String asString() { + return name; + } + + public static ContainerName fromHostname(final String hostName) { + return new ContainerName(hostName.split("\\.", 2)[0]); + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ContainerName other)) { + return false; + } + + return Objects.equals(name, other.name); + } + + @Override + public String toString() { + return getClass().getSimpleName() + " {" + + " name=" + name + + " }"; + } + + @Override + public int compareTo(ContainerName o) { + return name.compareTo(o.name); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerNetworkMode.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerNetworkMode.java new file mode 100644 index 00000000000..a737b049e11 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerNetworkMode.java @@ -0,0 +1,29 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import java.util.Objects; + +/** + * Container network modes supported by node-admin. + * + * @author hakon + */ +public enum ContainerNetworkMode { + + /** Network Prefix-Translated networking. */ + NPT("vespa-bridge"), + + /** A host running a single container in the host network namespace. */ + HOST_NETWORK("host"); + + private final String networkName; + + ContainerNetworkMode(String networkName) { + this.networkName = Objects.requireNonNull(networkName); + } + + public String networkName() { + return networkName; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java new file mode 100644 index 00000000000..ce26f8e69e7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java @@ -0,0 +1,153 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.config.provision.DockerImage; +import com.yahoo.jdisc.Timer; +import com.yahoo.vespa.hosted.node.admin.cgroup.Cgroup; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.image.ContainerImageDownloader; +import com.yahoo.vespa.hosted.node.admin.container.image.ContainerImagePruner; +import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandLine; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; + +import java.nio.file.FileSystem; +import java.time.Duration; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +/** + * High-level interface for container operations. Code managing containers should use this and not + * {@link ContainerEngine} directly. + * + * @author hakonhall + * @author mpolden + */ +public class ContainerOperations { + + private final ContainerEngine containerEngine; + private final ContainerImageDownloader imageDownloader; + private final ContainerImagePruner imagePruner; + private final ContainerStatsCollector containerStatsCollector; + + public ContainerOperations(ContainerEngine containerEngine, Cgroup cgroup, FileSystem fileSystem, Timer timer) { + this.containerEngine = Objects.requireNonNull(containerEngine); + this.imageDownloader = new ContainerImageDownloader(containerEngine, timer); + this.imagePruner = new ContainerImagePruner(containerEngine, timer); + this.containerStatsCollector = new ContainerStatsCollector(containerEngine, cgroup, fileSystem); + } + + public ContainerData createContainer(NodeAgentContext context, ContainerResources containerResources) { + return containerEngine.createContainer(context, containerResources); + } + + public void startContainer(NodeAgentContext context) { + containerEngine.startContainer(context); + } + + public void removeContainer(NodeAgentContext context, Container container) { + containerEngine.removeContainer(context, container); + } + + public void updateContainer(NodeAgentContext context, ContainerId containerId, ContainerResources containerResources) { + containerEngine.updateContainer(context, containerId, containerResources); + } + + public Optional getContainer(NodeAgentContext context) { + return containerEngine.getContainer(context); + } + + /** Pull image asynchronously. Returns true if image is still downloading and false if download is complete */ + public boolean pullImageAsyncIfNeeded(TaskContext context, DockerImage dockerImage, RegistryCredentialsProvider credentialsProvider) { + return !imageDownloader.get(context, dockerImage, credentialsProvider); + } + + /** Executes a command inside container identified by given context. Does NOT throw on non-zero exit code */ + public CommandResult executeCommandInContainer(NodeAgentContext context, UnixUser user, String... command) { + return executeCommandInContainer(context, user, CommandLine.DEFAULT_TIMEOUT, command); + } + + /** Execute command inside container identified by given context. Does NOT throw on non-zero exit code */ + public CommandResult executeCommandInContainer(NodeAgentContext context, UnixUser user, Duration timeout, String... command) { + return containerEngine.execute(context, user, timeout, command); + } + + /** Execute command in inside containers network namespace, identified by given context. Throws on non-zero exit code */ + public CommandResult executeCommandInNetworkNamespace(NodeAgentContext context, String... command) { + return executeCommandInNetworkNamespace(context, new CommandLine.Options(), command); + } + + public CommandResult executeCommandInNetworkNamespace(NodeAgentContext context, CommandLine.Options options, String... command) { + return containerEngine.executeInNetworkNamespace(context, options, command); + } + + /** Resume node. Resuming a node means that it is ready to receive traffic */ + public String resumeNode(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "resume"); + } + + /** + * Suspend node and return output. Suspending a node means the node should be taken temporarily offline, + * such that maintenance of the node can be done (upgrading, rebooting, etc). + */ + public String suspendNode(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "suspend"); + } + + /** Restart Vespa inside container. Same as running suspend, stop, start and resume */ + public String restartVespa(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "restart-vespa"); + } + + /** Start Vespa inside container */ + public String startServices(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "start"); + } + + /** Stop Vespa inside container */ + public String stopServices(NodeAgentContext context) { + return executeNodeCtlInContainer(context, "stop"); + } + + /** Get container statistics */ + public Optional getContainerStats(NodeAgentContext context) { + String iface = containerEngine.networkInterface(context); + return getContainer(context).flatMap(container -> containerStatsCollector.collect(context, container.id(), container.pid(), iface)); + } + + /** Returns true if no containers managed by node-admin are running */ + public boolean noManagedContainersRunning(TaskContext context) { + return containerEngine.listContainers(context).stream() + .filter(PartialContainer::managed) + .noneMatch(container -> container.state() == Container.State.running); + } + + /** + * Stop and remove all managed containers except the given ones + * + * @return true if any containers were removed + */ + public boolean retainManagedContainers(TaskContext context, Set containerNames) { + return containerEngine.listContainers(context).stream() + .filter(PartialContainer::managed) + .filter(container -> !containerNames.contains(container.name())) + .peek(container -> containerEngine.removeContainer(context, container)) + .count() > 0; + } + + /** Deletes the local images that are currently not in use by any container and not recently used. */ + public boolean deleteUnusedContainerImages(TaskContext context, List excludes, Duration minImageAgeToDelete) { + List excludedRefs = excludes.stream().map(DockerImage::asString).toList(); + return imagePruner.removeUnusedImages(context, excludedRefs, minImageAgeToDelete); + } + + private String executeNodeCtlInContainer(NodeAgentContext context, String program) { + String[] command = new String[] {context.paths().underVespaHome("bin/vespa-nodectl").pathInContainer(), program}; + return executeCommandInContainer(context, context.users().vespa(), command).getOutput(); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResources.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResources.java new file mode 100644 index 00000000000..05398e90053 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerResources.java @@ -0,0 +1,135 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import java.util.Objects; + +/** + * @author freva + */ +public class ContainerResources { + + public static final ContainerResources UNLIMITED = ContainerResources.from(0, 0, 0); + public static final int CPU_PERIOD_US = 100_000; // 100 ms + + /** + * Hard limit on container's CPU usage: Implemented using Completely Fair Scheduler (CFS) by allocating a given + * time within a given period, Container's processes are not bound to any specific CPU, which may create significant + * performance degradation as processes are scheduled on another CPU after exhausting the quota. + */ + private final double cpus; + + /** + * Soft limit on container's CPU usage: When plenty of CPU cycles are available, all containers use as much + * CPU as they need. It prioritizes container CPU resources for the available CPU cycles. + * It does not guarantee or reserve any specific CPU access. + */ + private final int cpuShares; + + /** The maximum amount, in bytes, of memory the container can use. */ + private final long memoryBytes; + + public ContainerResources(double cpus, int cpuShares, long memoryBytes) { + this.cpus = cpus; + this.cpuShares = cpuShares; + this.memoryBytes = memoryBytes; + + if (cpus < 0) + throw new IllegalArgumentException("CPUs must be a positive number or 0 for unlimited, was " + cpus); + if (cpuShares != 0 && (cpuShares < 2 || cpuShares > 262_144)) + throw new IllegalArgumentException("CPU shares must be a positive integer in [2, 262144] or 0 for unlimited, was " + cpuShares); + if (memoryBytes < 0) + throw new IllegalArgumentException("memoryBytes must be a positive integer or 0 for unlimited, was " + memoryBytes); + } + + /** + * Create container resources from required fields. + * + * @param maxVcpu the amount of vcpu that allocation policies should allocate exclusively to this container. + * This is a hard upper limit. To allow an unlimited amount use 0. + * @param minVcpu the minimal amount of vcpu dedicated to this container. + * To avoid dedicating any cpu at all, use 0. + * @param memoryGb the amount of memory that allocation policies should allocate to this container. + * This is a hard upper limit. To allow the container to allocate an unlimited amount use 0. + * @return the container resources encapsulating the parameters + */ + public static ContainerResources from(double maxVcpu, double minVcpu, double memoryGb) { + return new ContainerResources(maxVcpu, + (int) Math.round(32 * minVcpu), + (long) ((1L << 30) * memoryGb)); + } + + public double cpus() { + return cpus; + } + + /** Returns the CFS CPU quota per {@link #cpuPeriod()}, or -1 if disabled. */ + public int cpuQuota() { + return cpus > 0 ? (int) (cpus * CPU_PERIOD_US) : -1; + } + + /** Duration (in µs) of a single period used as the basis for process scheduling */ + public int cpuPeriod() { + return CPU_PERIOD_US; + } + + public int cpuShares() { + return cpuShares; + } + + public long memoryBytes() { + return memoryBytes; + } + + /** Returns true iff the memory component(s) of between this and other are equal */ + public boolean equalsMemory(ContainerResources other) { + return memoryBytes == other.memoryBytes; + } + + /** Returns true iff the CPU component(s) of between this and other are equal */ + public boolean equalsCpu(ContainerResources other) { + return Math.abs(other.cpus - cpus) < 0.0001 && + // When using CGroups V2, CPU shares (range [2, 262144]) is mapped to CPU weight (range [1, 10000]), + // because there are ~26.2 shares/weight, we must allow for small deviation in cpuShares + // when comparing ContainerResources created from NodeResources vs one created from reading the + // CGroups weight file + Math.abs(cpuShares - other.cpuShares) < 28; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ContainerResources that = (ContainerResources) o; + return equalsMemory(that) && equalsCpu(that); + } + + @Override + public int hashCode() { + return Objects.hash(cpus, cpuShares, memoryBytes); + } + + + /** Returns only the memory component(s) of {@link #toString()} */ + public String toStringMemory() { + return (memoryBytes > 0 ? memoryBytes + "B" : "unlimited") + " memory"; + } + + /** Returns only the CPU component(s) of {@link #toString()} */ + public String toStringCpu() { + return (cpus > 0 ? String.format("%.2f", cpus) : "unlimited") +" CPUs, " + + (cpuShares > 0 ? cpuShares : "unlimited") + " CPU Shares"; + } + + @Override + public String toString() { + return toStringCpu() + ", " + toStringMemory(); + } + + public ContainerResources withMemoryBytes(long memoryBytes) { + return new ContainerResources(cpus, cpuShares, memoryBytes); + } + + public ContainerResources withUnlimitedCpus() { + return new ContainerResources(0, 0, memoryBytes); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java new file mode 100644 index 00000000000..9c1b8db144c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStats.java @@ -0,0 +1,94 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import ai.vespa.validation.Validation; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * CPU, GPU, memory and network statistics collected from a container. + * + * @author freva + */ +public record ContainerStats(Map networks, + MemoryStats memoryStats, + CpuStats cpuStats, + List gpuStats) { + + public ContainerStats(Map networks, MemoryStats memoryStats, CpuStats cpuStats, List gpuStats) { + this.networks = Collections.unmodifiableMap(new LinkedHashMap<>(Objects.requireNonNull(networks))); + this.memoryStats = Objects.requireNonNull(memoryStats); + this.cpuStats = Objects.requireNonNull(cpuStats); + this.gpuStats = List.copyOf(Objects.requireNonNull(gpuStats)); + } + + /** + * Statistics for network usage + * + * @param rxBytes received bytes + * @param rxDropped received bytes, which were dropped + * @param rxErrors received errors + * @param txBytes transmitted bytes + * @param txDropped transmitted bytes, which were dropped + * @param txErrors transmission errors + */ + public record NetworkStats(long rxBytes, long rxDropped, long rxErrors, long txBytes, long txDropped, long txErrors) {} + + /** + * Statistics for memory usage + * + * @param cache memory used by cache in bytes + * @param usage memory usage in bytes + * @param limit memory limit in bytes + * @param sock network transmission buffers in bytes + * @param slab in-kernel data structures in bytes + * @param slabReclaimable part of "slab" that might be reclaimed in bytes + * @param anon anonymous mappings in bytes + */ + public record MemoryStats(long cache, long usage, long limit, long sock, long slab, long slabReclaimable, long anon) { + public MemoryStats(long cache, long usage, long limit) { this(cache, usage, limit, 0, 0, 0, 0); } + } + + /** + * Statistics for CPU usage + * + * @param onlineCpus CPU cores + * @param systemCpuUsage Total CPU time (in µs) spent executing all the processes on this host + * @param totalUsage Total CPU time (in µs) spent running all the processes in this container + * @param usageInKernelMode Total CPU time (in µs) spent in kernel mode while executing processes in this container + * @param throttledTime Total CPU time (in µs) processes in this container were throttled for + * @param throttlingActivePeriods Number of periods with throttling enabled for this container + * @param throttledPeriods Number of periods this container hit the throttling limit + */ + public record CpuStats(int onlineCpus, + long systemCpuUsage, + long totalUsage, + long usageInKernelMode, + long throttledTime, + long throttlingActivePeriods, + long throttledPeriods) {} + + /** + * GPU statistics + * + * @param deviceNumber GPU device number + * @param loadPercentage Load/utilization in % + * @param memoryTotalBytes Total memory, in bytes + * @param memoryUsedBytes Memory used, in bytes + */ + public record GpuStats(int deviceNumber, int loadPercentage, long memoryTotalBytes, long memoryUsedBytes) { + + public GpuStats { + Validation.requireAtLeast(deviceNumber, "deviceNumber", 0); + Validation.requireAtLeast(loadPercentage, "loadPercentage", 0); + Validation.requireAtLeast(memoryTotalBytes, "memoryTotalBytes", 0L); + Validation.requireAtLeast(memoryUsedBytes, "memoryUsedBytes", 0L); + } + + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java new file mode 100644 index 00000000000..aa6f8d8f5f6 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerStatsCollector.java @@ -0,0 +1,168 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.vespa.hosted.node.admin.cgroup.Cgroup; +import com.yahoo.vespa.hosted.node.admin.cgroup.CpuController; +import com.yahoo.vespa.hosted.node.admin.cgroup.Size; +import com.yahoo.vespa.hosted.node.admin.cgroup.MemoryController; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Stream; + +/** + * Collects CPU, GPU, memory and network statistics for a container. + * + * Uses same approach as runc: https://github.com/opencontainers/runc/tree/master/libcontainer/cgroups/fs + * + * @author mpolden + */ +class ContainerStatsCollector { + + private final ContainerEngine containerEngine; + private final FileSystem fileSystem; + private final Cgroup rootCgroup; + private final int onlineCpus; + + ContainerStatsCollector(ContainerEngine containerEngine, Cgroup rootCgroup, FileSystem fileSystem) { + this(containerEngine, rootCgroup, fileSystem, Runtime.getRuntime().availableProcessors()); + } + + ContainerStatsCollector(ContainerEngine containerEngine, Cgroup rootCgroup, FileSystem fileSystem, int onlineCpus) { + this.containerEngine = Objects.requireNonNull(containerEngine); + this.fileSystem = Objects.requireNonNull(fileSystem); + this.rootCgroup = Objects.requireNonNull(rootCgroup); + this.onlineCpus = onlineCpus; + } + + /** Collect statistics for given container ID and PID */ + public Optional collect(NodeAgentContext context, ContainerId containerId, int pid, String iface) { + try { + ContainerStats.CpuStats cpuStats = collectCpuStats(containerId); + ContainerStats.MemoryStats memoryStats = collectMemoryStats(containerId); + Map networkStats = Map.of(iface, collectNetworkStats(iface, pid)); + List gpuStats = collectGpuStats(context); + return Optional.of(new ContainerStats(networkStats, memoryStats, cpuStats, gpuStats)); + } catch (NoSuchFileException ignored) { + return Optional.empty(); // Container disappeared while we collected stats + } catch (UncheckedIOException e) { + if (e.getCause() != null && e.getCause() instanceof NoSuchFileException) + return Optional.empty(); + throw e; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private List collectGpuStats(NodeAgentContext context) { + boolean hasGpu = Files.exists(fileSystem.getPath("/dev/nvidia0")); + if (!hasGpu) { + return List.of(); + } + Stream lines = containerEngine.execute(context, UnixUser.ROOT, Duration.ofSeconds(5), + "nvidia-smi", + "--query-gpu=index,utilization.gpu,memory.total,memory.free", + "--format=csv,noheader,nounits") + .getOutputLinesStream(); + return lines.map(ContainerStatsCollector::parseGpuStats).toList(); + } + + private static ContainerStats.GpuStats parseGpuStats(String s) { + String[] fields = fields(s, ",\\s*"); + if (fields.length < 4) throw new IllegalArgumentException("Could not parse GPU stats from '" + s + "'"); + int deviceNumber = Integer.parseInt(fields[0]); + int loadPercentage = Integer.parseInt(fields[1]); + long mega = 2 << 19; + long memoryTotalBytes = Long.parseLong(fields[2]) * mega; + long memoryFreeBytes = Long.parseLong(fields[3]) * mega; + long memoryUsedBytes = memoryTotalBytes - memoryFreeBytes; + return new ContainerStats.GpuStats(deviceNumber, loadPercentage, memoryTotalBytes, memoryUsedBytes); + } + + private ContainerStats.CpuStats collectCpuStats(ContainerId containerId) throws IOException { + Map cpuStats = rootCgroup.resolveContainer(containerId).cpu().readStats(); + return new ContainerStats.CpuStats(onlineCpus, + systemCpuUsage(), + cpuStats.get(CpuController.StatField.TOTAL_USAGE_USEC), + cpuStats.get(CpuController.StatField.SYSTEM_USAGE_USEC), + cpuStats.get(CpuController.StatField.THROTTLED_TIME_USEC), + cpuStats.get(CpuController.StatField.TOTAL_PERIODS), + cpuStats.get(CpuController.StatField.THROTTLED_PERIODS)); + } + + private ContainerStats.MemoryStats collectMemoryStats(ContainerId containerId) throws IOException { + MemoryController memoryController = rootCgroup.resolveContainer(containerId).memory(); + Size max = memoryController.readMax(); + long memoryUsageInBytes = memoryController.readCurrent().value(); + var stats = memoryController.readStat(); + return new ContainerStats.MemoryStats( + stats.file().value(), memoryUsageInBytes, max.isMax() ? -1 : max.value(), + stats.sock().value(), stats.slab().value(), stats.slabReclaimable().value(), stats.anon().value()); + } + + private ContainerStats.NetworkStats collectNetworkStats(String iface, int containerPid) throws IOException { + for (var line : Files.readAllLines(netDevPath(containerPid))) { + String[] fields = fields(line.trim()); + if (fields.length < 17 || !fields[0].equals(iface + ":")) continue; + + long rxBytes = Long.parseLong(fields[1]); + long rxErrors = Long.parseLong(fields[3]); + long rxDropped = Long.parseLong(fields[4]); + + long txBytes = Long.parseLong(fields[9]); + long txErrors = Long.parseLong(fields[11]); + long txDropped = Long.parseLong(fields[12]); + + return new ContainerStats.NetworkStats(rxBytes, rxDropped, rxErrors, txBytes, txDropped, txErrors); + } + throw new IllegalArgumentException("No statistics found for interface " + iface); + } + + /** Returns total CPU time in µs spent executing all the processes on this host */ + private long systemCpuUsage() throws IOException { + long ticks = parseLong(Files.readAllLines(fileSystem.getPath("/proc/stat")), "cpu"); + return userHzToMicroSeconds(ticks); + } + + private long parseLong(List lines, String fieldName) { + long value = 0; + for (var line : lines) { + String[] fields = fields(line); + if (fields.length < 2 || !fields[0].equals(fieldName)) continue; + for (int i = 1; i < fields.length; i++) { + value += Long.parseLong(fields[i]); + } + break; + } + return value; + } + + private Path netDevPath(int containerPid) { + return fileSystem.getPath("/proc/" + containerPid + "/net/dev"); + } + + static long userHzToMicroSeconds(long ticks) { + // Ideally we would read this from _SC_CLK_TCK, but then we need JNI. However, in practice this is always 100 on x86 Linux + return ticks * 10_000; + } + + private static String[] fields(String s) { + return fields(s, "\\s+"); + } + + private static String[] fields(String s, String regex) { + return s.trim().split(regex); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java new file mode 100644 index 00000000000..c9310897df9 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/PartialContainer.java @@ -0,0 +1,139 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.config.provision.DockerImage; + +import java.time.Instant; +import java.util.Map; +import java.util.Objects; + +/** + * A partial container, containing only fields returned by a container list command such as 'podman ps'. + * + * @author mpolden + */ +public class PartialContainer { + + private final ContainerId id; + private final ContainerName name; + private final Instant createdAt; + private final State state; + private final String imageId; + private final DockerImage image; + private final Map labels; + private final int pid; + private final boolean managed; + + public PartialContainer(ContainerId id, ContainerName name, Instant createdAt, State state, String imageId, + DockerImage image, Map labels, int pid, boolean managed) { + this.id = Objects.requireNonNull(id); + this.name = Objects.requireNonNull(name); + this.createdAt = Objects.requireNonNull(createdAt); + this.state = Objects.requireNonNull(state); + this.imageId = Objects.requireNonNull(imageId); + this.image = Objects.requireNonNull(image); + this.labels = Map.copyOf(Objects.requireNonNull(labels)); + this.pid = pid; + this.managed = managed; + } + + /** A unique identifier for this. Typically generated by the container engine */ + public ContainerId id() { + return id; + } + + /** The given name of this */ + public ContainerName name() { + return name; + } + + /** Timestamp when this container was created */ + public Instant createdAt() { + return createdAt; + } + + /** Current state of this */ + public State state() { + return state; + } + + /** A unique identifier for the image in use by this */ + public String imageId() { + return imageId; + } + + /** The image in use by this */ + public DockerImage image() { + return image; + } + + /** The labels set on this */ + public Map labels() { + return labels; + } + + /** The PID of this */ + public int pid() { + return pid; + } + + /** Returns whether this container is managed by node-admin */ + public boolean managed() { + return managed; + } + + /** Returns the value of given label key */ + public String label(String key) { + String labelValue = labels.get(key); + if (labelValue == null) throw new IllegalArgumentException("No such label '" + key + "'"); + return labelValue; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PartialContainer that = (PartialContainer) o; + return pid == that.pid && managed == that.managed && id.equals(that.id) && name.equals(that.name) && createdAt.equals(that.createdAt) && state == that.state && imageId.equals(that.imageId) && image.equals(that.image) && labels.equals(that.labels); + } + + @Override + public int hashCode() { + return Objects.hash(id, name, createdAt, state, imageId, image, labels, pid, managed); + } + + /** The state of a container */ + public enum State { + + unknown, + configured, + created, + running, + stopped, + paused, + exited, + removing, + stopping; + + public boolean isRunning() { + return this == running; + } + + public static Container.State from(String state) { + switch (state) { + case "unknown": return unknown; + case "configured": return configured; + case "created": return created; + case "running": return running; + case "stopped": return stopped; + case "paused": return paused; + case "exited": return exited; + case "removing": return removing; + case "stopping": return stopping; + } + throw new IllegalArgumentException("Invalid state '" + state + "'"); + } + + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/RegistryCredentials.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/RegistryCredentials.java new file mode 100644 index 00000000000..7a5f46dab74 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/RegistryCredentials.java @@ -0,0 +1,25 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +import java.util.Objects; + +/** + * Credentials for a container registry server. + * + * @author mpolden + */ +public record RegistryCredentials(String username, String password) { + + public static final RegistryCredentials none = new RegistryCredentials("", ""); + + public RegistryCredentials { + Objects.requireNonNull(username); + Objects.requireNonNull(password); + } + + @Override + public String toString() { + return "registry credentials [username=" + username + ",password=]"; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/RegistryCredentialsProvider.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/RegistryCredentialsProvider.java new file mode 100644 index 00000000000..8711227058a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/RegistryCredentialsProvider.java @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container; + +/** + * Interface for retrieving credentials for a container registry. + * + * @author mpolden + */ +public interface RegistryCredentialsProvider { + + RegistryCredentials get(); + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java new file mode 100644 index 00000000000..ab2adc061fd --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImageDownloader.java @@ -0,0 +1,66 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.image; + +import com.yahoo.concurrent.DaemonThreadFactory; +import com.yahoo.config.provision.DockerImage; +import com.yahoo.jdisc.Timer; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.ContainerEngine; +import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentialsProvider; + +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Download a container image asynchronously. + * + * @author mpolden + */ +public class ContainerImageDownloader { + + private static final Logger LOG = Logger.getLogger(ContainerImageDownloader.class.getName()); + + private final ContainerEngine containerEngine; + private final Timer timer; + + private final ExecutorService executorService = Executors.newSingleThreadExecutor( + new DaemonThreadFactory("container-image-downloader")); // Download one image at a time + private final Set pendingDownloads = Collections.synchronizedSet(new HashSet<>()); + + public ContainerImageDownloader(ContainerEngine containerEngine, Timer timer) { + this.containerEngine = Objects.requireNonNull(containerEngine); + this.timer = Objects.requireNonNull(timer); + } + + /** + * Download given container image. + * + * @return true if the image download has completed. + */ + public boolean get(TaskContext context, DockerImage image, RegistryCredentialsProvider credentialsProvider) { + if (pendingDownloads.contains(image)) return false; + if (containerEngine.hasImage(context, image)) return true; + executorService.submit(() -> { + try { + Instant start = timer.currentTime(); + containerEngine.pullImage(context, image, credentialsProvider.get()); + LOG.log(Level.INFO, "Downloaded container image " + image + " in " + Duration.between(start, timer.currentTime())); + } catch (RuntimeException e) { + LOG.log(Level.SEVERE, "Failed to download container image " + image, e); + } finally { + pendingDownloads.remove(image); + } + }); + pendingDownloads.add(image); + return false; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java new file mode 100644 index 00000000000..51bf238fa67 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/ContainerImagePruner.java @@ -0,0 +1,164 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.image; + +import com.yahoo.collections.Pair; +import com.yahoo.jdisc.Timer; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.ContainerEngine; +import com.yahoo.vespa.hosted.node.admin.container.PartialContainer; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * This class removes container images that have not been recently used by any containers. + * + *

Definitions: + *

    + *
  • Every image has exactly 1 id
  • + *
  • Every image has between 0..n tags, see + * docker tag for more
  • + *
  • Every image has 0..1 parent ids
  • + *
+ * + *

Limitations: + *

    + *
  1. Image that has more than 1 tag cannot be deleted by ID
  2. + *
  3. Deleting a tag of an image with multiple tags will only remove the tag, the image with the + * remaining tags will remain
  4. + *
  5. Deleting the last tag of an image will delete the entire image.
  6. + *
  7. Image cannot be deleted if: + *

    - It has 1 or more children + *

    - A container uses it + *

  8. + *
+ * + * @author freva + * @author mpolden + */ +public class ContainerImagePruner { + + private static final Logger LOG = Logger.getLogger(ContainerImagePruner.class.getName()); + + private final Timer timer; + private final ContainerEngine containerEngine; + + private final Map lastTimeUsedByImageId = new ConcurrentHashMap<>(); + + public ContainerImagePruner(ContainerEngine containerEngine, Timer timer) { + this.containerEngine = Objects.requireNonNull(containerEngine); + this.timer = Objects.requireNonNull(timer); + } + + /** + * Remove unused container images. + * + * Note: This method must be called frequently enough to see all containers to know which images are being used. + * + * @param excludedRefs List of image references (tag or id) to keep, regardless of their status + * @param minAge Minimum age of for image to be removed + * @return true if any image was remove + */ + public boolean removeUnusedImages(TaskContext context, List excludedRefs, Duration minAge) { + List images = containerEngine.listImages(context); + List containers = containerEngine.listContainers(context); + + Map imageByImageId = images.stream().collect(Collectors.toMap(Image::id, Function.identity())); + + // The set of images that we want to keep is: + // 1. The images that were recently used + // 2. The images that were explicitly excluded + Set imagesToKeep = Stream + .concat( + updateRecentlyUsedImageIds(images, containers, minAge).stream(), // 1 + referencesToImages(excludedRefs, images).stream()) // 2 + .collect(Collectors.toSet()); + + // Now take all the images we have locally + List imagesToRemove = imageByImageId.keySet().stream() + // filter out images we want to keep + .filter(imageId -> !imagesToKeep.contains(imageId)) + .map(imageByImageId::get) + .collect(Collectors.toCollection(ArrayList::new)); + + // We cannot delete an image that is referenced by other images as parent. Computing parent image is complicated, see + // https://github.com/containers/podman/blob/d7b2f03f8a5d0e3789ac185ea03989463168fb76/vendor/github.com/containers/common/libimage/layer_tree.go#L235:L299 + // https://github.com/containers/podman/blob/d7b2f03f8a5d0e3789ac185ea03989463168fb76/vendor/github.com/containers/common/libimage/oci.go#L30:L97 + // In practice, our images do not have any parents on prod machines, so we should be able to delete in any + // order. In case we ever do get a parent on a host somehow, we could get stuck if we always attempt to delete + // in wrong order, so shuffle first to ensure this eventually converges + Collections.shuffle(imagesToRemove); + + imagesToRemove.forEach(image -> { + // Deleting an image by image ID with multiple tags will fail -> delete by tags instead + referencesOf(image).forEach(imageReference -> { + LOG.info("Deleting unused image " + imageReference); + containerEngine.removeImage(context, imageReference); + }); + lastTimeUsedByImageId.remove(image.id()); + }); + return !imagesToRemove.isEmpty(); + } + + private Set updateRecentlyUsedImageIds(List images, List containers, Duration minImageAgeToDelete) { + final Instant now = timer.currentTime(); + + // Add any already downloaded image to the list once + images.forEach(image -> lastTimeUsedByImageId.putIfAbsent(image.id(), now)); + + // Update last used time for all current containers + containers.forEach(container -> lastTimeUsedByImageId.put(container.imageId(), now)); + + // Return list of images that have been used within minImageAgeToDelete + return lastTimeUsedByImageId.entrySet().stream() + .filter(entry -> Duration.between(entry.getValue(), now).minus(minImageAgeToDelete).isNegative()) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } + + /** + * Map given references (image tags or ids) to images. + * + * This only works if the given tag is actually present locally. This is fine, because if it isn't - we can't delete + * it, so no harm done. + */ + private Set referencesToImages(List references, List images) { + Map imageIdByImageTag = images.stream() + .flatMap(image -> referencesOf(image).stream() + .map(repoTag -> new Pair<>(repoTag, image.id()))) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + + return references.stream() + .map(ref -> imageIdByImageTag.getOrDefault(ref, ref)) + .collect(Collectors.toUnmodifiableSet()); + } + + /** + * Returns list of references to given image, preferring image tag(s), if any exist. + * + * If image is untagged, its ID is returned instead. + */ + private static List referencesOf(Image image) { + if (image.names().isEmpty()) { + return List.of(image.id()); + } + return image.names().stream() + .map(tag -> { + if (":".equals(tag)) return image.id(); + return tag; + }) + .toList(); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java new file mode 100644 index 00000000000..223304f058e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/Image.java @@ -0,0 +1,50 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.image; + +import java.util.List; +import java.util.Objects; + +/** + * This represents a container image that exists locally. + * + * @author mpolden + */ +public class Image { + + private final String id; + private final List names; + + public Image(String id, List names) { + this.id = Objects.requireNonNull(id); + this.names = List.copyOf(Objects.requireNonNull(names)); + } + + /** The identifier of this image */ + public String id() { + return id; + } + + /** Names for this image, such as tags or digests */ + public List names() { + return names; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Image image = (Image) o; + return id.equals(image.id) && names.equals(image.names); + } + + @Override + public int hashCode() { + return Objects.hash(id, names); + } + + @Override + public String toString() { + return "image " + id; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java new file mode 100644 index 00000000000..fa348209520 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/image/package-info.java @@ -0,0 +1,8 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @author mpolden + */ +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.container.image; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Counter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Counter.java new file mode 100644 index 00000000000..e6d05e04965 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Counter.java @@ -0,0 +1,28 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.metrics; + +/** + * @author freva + */ +public class Counter implements MetricValue { + private final Object lock = new Object(); + + private long value = 0; + + public void increment() { + add(1L); + } + + public void add(long n) { + synchronized (lock) { + value += n; + } + } + + @Override + public Number getValue() { + synchronized (lock) { + return value; + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/DimensionMetrics.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/DimensionMetrics.java new file mode 100644 index 00000000000..724432431cd --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/DimensionMetrics.java @@ -0,0 +1,76 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.metrics; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * @author freva + */ +public class DimensionMetrics { + + private final String application; + private final Dimensions dimensions; + private final Map metrics; + + DimensionMetrics(String application, Dimensions dimensions, Map metrics) { + this.application = Objects.requireNonNull(application); + this.dimensions = Objects.requireNonNull(dimensions); + this.metrics = metrics.entrySet().stream() + .filter(DimensionMetrics::metricIsFinite) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + public String getApplication() { + return application; + } + + public Dimensions getDimensions() { + return dimensions; + } + + public Map getMetrics() { + return metrics; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DimensionMetrics that = (DimensionMetrics) o; + return application.equals(that.application) && + dimensions.equals(that.dimensions) && + metrics.equals(that.metrics); + } + + @Override + public int hashCode() { + return Objects.hash(application, dimensions, metrics); + } + + private static boolean metricIsFinite(Map.Entry metric) { + return ! (metric.getValue() instanceof Double) || Double.isFinite((double) metric.getValue()); + } + + public static class Builder { + private final String application; + private final Dimensions dimensions; + private final Map metrics = new HashMap<>(); + + public Builder(String application, Dimensions dimensions) { + this.application = application; + this.dimensions = dimensions; + } + + public Builder withMetric(String metricName, Number metricValue) { + metrics.put(metricName, metricValue); + return this; + } + + public DimensionMetrics build() { + return new DimensionMetrics(application, dimensions, metrics); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Dimensions.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Dimensions.java new file mode 100644 index 00000000000..0f9144b9ca1 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Dimensions.java @@ -0,0 +1,30 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.metrics; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author freva + */ +public record Dimensions(Map dimensionsMap) { + + public static final Dimensions NONE = new Dimensions(Map.of()); + + public Dimensions(Map dimensionsMap) { + this.dimensionsMap = Map.copyOf(dimensionsMap); + } + + public static class Builder { + private final Map dimensionsMap = new HashMap<>(); + + public Dimensions.Builder add(String dimensionName, String dimensionValue) { + dimensionsMap.put(dimensionName, dimensionValue); + return this; + } + + public Dimensions build() { + return new Dimensions(dimensionsMap); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Gauge.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Gauge.java new file mode 100644 index 00000000000..d97db8f0242 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Gauge.java @@ -0,0 +1,24 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.metrics; + +/** + * @author freva + */ +public class Gauge implements MetricValue { + private final Object lock = new Object(); + + private double value; + + public void sample(double x) { + synchronized (lock) { + this.value = x; + } + } + + @Override + public Number getValue() { + synchronized (lock) { + return value; + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/MetricValue.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/MetricValue.java new file mode 100644 index 00000000000..da05464e0be --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/MetricValue.java @@ -0,0 +1,9 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.metrics; + +/** + * @author freva + */ +public interface MetricValue { + Number getValue(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Metrics.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Metrics.java new file mode 100644 index 00000000000..e144f3a91e3 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/Metrics.java @@ -0,0 +1,139 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.container.metrics; + +import com.yahoo.component.annotation.Inject; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * Stores the latest metric for the given application, name, dimension triplet in memory + * + * @author freva + */ +public class Metrics { + // Application names used + public static final String APPLICATION_HOST = "vespa.host"; + public static final String APPLICATION_NODE = "vespa.node"; + + private final Object monitor = new Object(); + private final Map> metrics = new HashMap<>(); + + @Inject + public Metrics() { } + + /** + * Creates a counter metric under vespa.host application, with no dimensions and default dimension type + * See {@link #declareCounter(String, String, Dimensions, DimensionType)} + */ + public Counter declareCounter(String name) { + return declareCounter(name, Dimensions.NONE); + } + + /** + * Creates a counter metric under vespa.host application, with the given dimensions and default dimension type + * See {@link #declareCounter(String, String, Dimensions, DimensionType)} + */ + public Counter declareCounter(String name, Dimensions dimensions) { + return declareCounter(APPLICATION_HOST, name, dimensions, DimensionType.DEFAULT); + } + + /** Creates a counter metric. This method is idempotent. */ + public Counter declareCounter(String application, String name, Dimensions dimensions, DimensionType type) { + synchronized (monitor) { + return (Counter) getOrCreateApplicationMetrics(application, type) + .computeIfAbsent(dimensions, d -> new HashMap<>()) + .computeIfAbsent(name, n -> new Counter()); + } + } + + /** + * Creates a gauge metric under vespa.host application, with no dimensions and default dimension type + * See {@link #declareGauge(String, String, Dimensions, DimensionType)} + */ + public Gauge declareGauge(String name) { + return declareGauge(name, Dimensions.NONE); + } + + /** + * Creates a gauge metric under vespa.host application, with the given dimensions and default dimension type + * See {@link #declareGauge(String, String, Dimensions, DimensionType)} + */ + public Gauge declareGauge(String name, Dimensions dimensions) { + return declareGauge(APPLICATION_HOST, name, dimensions, DimensionType.DEFAULT); + } + + /** Creates a gauge metric. This method is idempotent */ + public Gauge declareGauge(String application, String name, Dimensions dimensions, DimensionType type) { + synchronized (monitor) { + return (Gauge) getOrCreateApplicationMetrics(application, type) + .computeIfAbsent(dimensions, d -> new HashMap<>()) + .computeIfAbsent(name, n -> new Gauge()); + } + } + + public List getDefaultMetrics() { + return getMetricsByType(DimensionType.DEFAULT); + } + + public List getMetricsByType(DimensionType type) { + synchronized (monitor) { + List dimensionMetrics = new ArrayList<>(); + metrics.getOrDefault(type, Map.of()) + .forEach((application, applicationMetrics) -> applicationMetrics.metricsByDimensions().entrySet().stream() + .map(entry -> new DimensionMetrics(application, entry.getKey(), + entry.getValue().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, value -> value.getValue().getValue())))) + .forEach(dimensionMetrics::add)); + return dimensionMetrics; + } + } + + public void deleteMetricByDimension(String name, Dimensions dimensionsToRemove, DimensionType type) { + synchronized (monitor) { + Optional.ofNullable(metrics.get(type)) + .map(m -> m.get(name)) + .map(ApplicationMetrics::metricsByDimensions) + .ifPresent(m -> m.remove(dimensionsToRemove)); + } + } + + public void deleteMetricByName(String application, String metricName, DimensionType type) { + synchronized (monitor) { + Optional.ofNullable(metrics.get(type)) + .map(m -> m.get(application)) + .map(ApplicationMetrics::metricsByDimensions) + .ifPresent(dims -> + dims.values().forEach(metrics -> metrics.remove(metricName)) + ); + } + } + + Map> getOrCreateApplicationMetrics(String application, DimensionType type) { + return metrics.computeIfAbsent(type, m -> new HashMap<>()) + .computeIfAbsent(application, app -> new ApplicationMetrics()) + .metricsByDimensions(); + } + + // "Application" is the monitoring application, not Vespa application + private static class ApplicationMetrics { + private final Map> metricsByDimensions = new LinkedHashMap<>(); + + Map> metricsByDimensions() { + return metricsByDimensions; + } + } + + // Used to distinguish whether metrics have been populated with all tag values + public enum DimensionType { + /** Default metrics get added default dimensions set in check config */ + DEFAULT, + + /** Pretagged metrics will only get the dimensions explicitly set when creating the counter/gauge */ + PRETAGGED + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/package-info.java new file mode 100644 index 00000000000..e6ddfa2f4c8 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/metrics/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.container.metrics; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/package-info.java new file mode 100644 index 00000000000..86f3c31ff39 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.container; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/ContainerWireguardTask.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/ContainerWireguardTask.java new file mode 100644 index 00000000000..332a225bda3 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/ContainerWireguardTask.java @@ -0,0 +1,16 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance; + +import com.yahoo.vespa.hosted.node.admin.container.ContainerId; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; + +/** + * Wireguard task for containers. + * + * @author gjoranv + */ +public interface ContainerWireguardTask { + + void converge(NodeAgentContext context, ContainerId containerId); + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java new file mode 100644 index 00000000000..8bfb3f86aa7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -0,0 +1,196 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.NodeType; +import com.yahoo.jdisc.Timer; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.container.Container; +import com.yahoo.vespa.hosted.node.admin.container.ContainerName; +import com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoredumpHandler; +import com.yahoo.vespa.hosted.node.admin.maintenance.disk.CoredumpCleanupRule; +import com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanup; +import com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule; +import com.yahoo.vespa.hosted.node.admin.maintenance.disk.LinearCleanupRule; +import com.yahoo.vespa.hosted.node.admin.maintenance.sync.SyncClient; +import com.yahoo.vespa.hosted.node.admin.maintenance.sync.SyncFileInfo; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask; +import com.yahoo.vespa.hosted.node.admin.task.util.file.DiskSize; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; +import com.yahoo.vespa.hosted.node.admin.task.util.process.Terminal; + +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule.Priority; + +/** + * @author freva + */ +public class StorageMaintainer { + private static final Logger logger = Logger.getLogger(StorageMaintainer.class.getName()); + private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter + .ofPattern("yyyyMMddHHmmss").withZone(ZoneOffset.UTC); + + private final Terminal terminal; + private final CoredumpHandler coredumpHandler; + private final DiskCleanup diskCleanup; + private final SyncClient syncClient; + private final Timer timer; + private final Path archiveContainerStoragePath; + + // We cache disk usage to avoid doing expensive disk operations so often + private final Cache diskUsage = CacheBuilder.newBuilder() + .maximumSize(100) + .expireAfterWrite(5, TimeUnit.MINUTES) + .build(); + + public StorageMaintainer(Terminal terminal, CoredumpHandler coredumpHandler, DiskCleanup diskCleanup, + SyncClient syncClient, Timer timer, Path archiveContainerStoragePath) { + this.terminal = terminal; + this.coredumpHandler = coredumpHandler; + this.diskCleanup = diskCleanup; + this.syncClient = syncClient; + this.timer = timer; + this.archiveContainerStoragePath = archiveContainerStoragePath; + } + + public boolean syncLogs(NodeAgentContext context, boolean throttle) { + Optional archiveUri = context.node().archiveUri(); + if (archiveUri.isEmpty()) return false; + ApplicationId owner = context.node().owner().orElseThrow(); + + List syncFileInfos = FileFinder.files(context.paths().underVespaHome("logs/vespa")) + .maxDepth(2) + .stream() + .sorted(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime)) + .flatMap(fa -> SyncFileInfo.forLogFile(archiveUri.get(), fa.path(), throttle, owner).stream()) + .toList(); + + return syncClient.sync(context, syncFileInfos, throttle ? 1 : 100); + } + + public Optional diskUsageFor(NodeAgentContext context) { + try { + DiskSize cachedDiskUsage = diskUsage.getIfPresent(context.containerName()); + if (cachedDiskUsage != null) return Optional.of(cachedDiskUsage); + + DiskSize diskUsageBytes = getDiskUsed(context, context.paths().of("/").pathOnHost()); + diskUsage.put(context.containerName(), diskUsageBytes); + return Optional.of(diskUsageBytes); + } catch (Exception e) { + context.log(logger, Level.WARNING, "Failed to get disk usage", e); + return Optional.empty(); + } + } + + DiskSize getDiskUsed(TaskContext context, Path pathOnHost) { + if (!Files.exists(pathOnHost)) return DiskSize.ZERO; + + String output = terminal.newCommandLine(context) + .add("du", "-xsk", pathOnHost.toString()) + .setTimeout(Duration.ofSeconds(60)) + .executeSilently() + .getOutput(); + + String[] results = output.split("\t"); + if (results.length != 2) { + throw ConvergenceException.ofError("Result from disk usage command not as expected: " + output); + } + + return DiskSize.of(Long.parseLong(results[0]), DiskSize.Unit.kiB); + } + + public boolean cleanDiskIfFull(NodeAgentContext context) { + if (context.isDisabled(NodeAgentTask.DiskCleanup)) return false; + + double totalBytes = context.node().diskSize().bytes(); + // Delete enough bytes to get below 70% disk usage, but only if we are already using more than 80% disk + long bytesToRemove = diskUsageFor(context) + .map(diskUsage -> (long) (diskUsage.bytes() - 0.7 * totalBytes)) + .filter(bytes -> bytes > totalBytes * 0.1) + .orElse(0L); + + if (bytesToRemove > 0 && diskCleanup.cleanup(context, createCleanupRules(context), bytesToRemove)) { + diskUsage.invalidate(context.containerName()); + return true; + } + return false; + } + + private List createCleanupRules(NodeAgentContext context) { + Instant start = timer.currentTime(); + double oneMonthSeconds = Duration.ofDays(30).getSeconds(); + Function monthNormalizer = instant -> Duration.between(instant, start).getSeconds() / oneMonthSeconds; + List rules = new ArrayList<>(); + + rules.add(CoredumpCleanupRule.forContainer(context.paths().underVespaHome("var/crash"))); + + rules.add(new LinearCleanupRule(() -> FileFinder.files(context.paths().underVespaHome("var/tmp")).list(), + fa -> monthNormalizer.apply(fa.lastModifiedTime()), Priority.LOWEST, Priority.HIGHEST)); + + if (context.node().membership().map(m -> m.type().hasContainer()).orElse(false)) { + rules.add(new LinearCleanupRule(() -> FileFinder.files(context.paths().underVespaHome("logs/vespa/access")).list(), + fa -> monthNormalizer.apply(fa.lastModifiedTime()), Priority.LOWEST, Priority.HIGHEST)); + } + if (context.nodeType() == NodeType.tenant && context.node().membership().map(m -> m.type().isAdmin()).orElse(false)) + rules.add(new LinearCleanupRule(() -> FileFinder.files(context.paths().underVespaHome("logs/vespa/logarchive")).list(), + fa -> monthNormalizer.apply(fa.lastModifiedTime()), Priority.LOWEST, Priority.HIGHEST)); + + if (context.nodeType() == NodeType.proxy) + rules.add(new LinearCleanupRule(() -> FileFinder.files(context.paths().underVespaHome("logs/nginx")).list(), + fa -> monthNormalizer.apply(fa.lastModifiedTime()), Priority.LOWEST, Priority.MEDIUM)); + + return rules; + } + + /** Checks if container has any new coredumps, reports and archives them if so */ + public void handleCoreDumpsForContainer(NodeAgentContext context, Optional container, boolean throwIfCoreBeingWritten) { + if (context.isDisabled(NodeAgentTask.CoreDumps)) return; + coredumpHandler.converge(context, container.map(Container::image), throwIfCoreBeingWritten); + } + + /** + * Prepares the container-storage for the next container by deleting/archiving all the data of the current container. + * Removes old files, reports coredumps and archives container data, runs when container enters state "dirty" + */ + public void archiveNodeStorage(NodeAgentContext context) { + ContainerPath logsDirInContainer = context.paths().underVespaHome("logs"); + Path containerLogsInArchiveDir = archiveContainerStoragePath + .resolve(context.containerName().asString() + "_" + DATE_TIME_FORMATTER.format(timer.currentTime()) + logsDirInContainer.pathInContainer()); + + // Files.move() does not support moving non-empty directories across providers, move using host paths + UnixPath containerLogsOnHost = new UnixPath(logsDirInContainer.pathOnHost()); + if (containerLogsOnHost.exists()) { + new UnixPath(containerLogsInArchiveDir).createParents(); + containerLogsOnHost.moveIfExists(containerLogsInArchiveDir); + } + new UnixPath(context.paths().of("/")).deleteRecursively(); + + // Operations on ContainerPath will fail if Container FS root doesn't exist, it is therefore important that + // it exists as long as NodeAgent is running. Normally the root is only created when NodeAgent is first + // started. Because non-tenant nodes are never removed from node-repo, we immediately re-create the new root + // after archiving the previous + if (context.nodeType() != NodeType.tenant) + context.paths().of("/").getFileSystem().createRoot(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java new file mode 100644 index 00000000000..99715e6cad9 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainer.java @@ -0,0 +1,138 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.acl; + +import com.google.common.net.InetAddresses; +import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask; +import com.yahoo.vespa.hosted.node.admin.task.util.file.Editor; +import com.yahoo.vespa.hosted.node.admin.task.util.file.LineEditor; +import com.yahoo.vespa.hosted.node.admin.task.util.network.IPAddresses; +import com.yahoo.vespa.hosted.node.admin.task.util.network.IPVersion; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandLine; + +import java.io.IOException; +import java.net.InetAddress; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * This class maintains the iptables (ipv4 and ipv6) for all running containers. + * The filter table is synced with ACLs fetched from the Node repository while the nat table + * is synched with the proper redirect rule. + *

+ * If an ACL cannot be configured (e.g. iptables process execution fails) we attempted to flush the rules + * rendering the firewall open. + *

+ * This class currently assumes control over the filter and nat table. + *

+ * The configuration will be retried the next time the maintainer runs. + * + * @author mpolden + * @author smorgrav + */ +public class AclMaintainer { + private static final Logger logger = Logger.getLogger(AclMaintainer.class.getName()); + + private final ContainerOperations containerOperations; + private final IPAddresses ipAddresses; + + public AclMaintainer(ContainerOperations containerOperations, IPAddresses ipAddresses) { + this.containerOperations = containerOperations; + this.ipAddresses = ipAddresses; + } + + // ip(6)tables operate while having the xtables lock, run with synchronized to prevent multiple NodeAgents + // invoking ip(6)tables concurrently. + public synchronized void converge(NodeAgentContext context) { + if (context.isDisabled(NodeAgentTask.AclMaintainer)) return; + + // Apply acl to the filter table + editFlushOnError(context, IPVersion.IPv4, "filter", FilterTableLineEditor.from(context.acl(), IPVersion.IPv4)); + editFlushOnError(context, IPVersion.IPv6, "filter", FilterTableLineEditor.from(context.acl(), IPVersion.IPv6)); + + ipAddresses.getAddress(context.hostname().value(), IPVersion.IPv4).ifPresent(addr -> applyRedirect(context, addr)); + ipAddresses.getAddress(context.hostname().value(), IPVersion.IPv6).ifPresent(addr -> applyRedirect(context, addr)); + } + + private void applyRedirect(NodeAgentContext context, InetAddress address) { + IPVersion ipVersion = IPVersion.get(address); + // Necessary to avoid the routing packets destined for the node's own public IP address + // via the bridge, which is illegal. + String redirectRule = "-A OUTPUT -d " + InetAddresses.toAddrString(address) + ipVersion.singleHostCidr() + " -j REDIRECT"; + editLogOnError(context, ipVersion, "nat", NatTableLineEditor.from(redirectRule)); + } + + private boolean editFlushOnError(NodeAgentContext context, IPVersion ipVersion, String table, LineEditor lineEditor) { + return edit(context, table, ipVersion, lineEditor, true); + } + + private boolean editLogOnError(NodeAgentContext context, IPVersion ipVersion, String table, LineEditor lineEditor) { + return edit(context, table, ipVersion, lineEditor, false); + } + + private boolean edit(NodeAgentContext context, String table, IPVersion ipVersion, LineEditor lineEditor, boolean flush) { + Editor editor = new Editor( + ipVersion.iptablesCmd() + "-" + table, + listTable(context, table, ipVersion), + restoreTable(context, table, ipVersion, flush), + lineEditor); + return editor.edit(message -> context.log(logger, message)); + } + + private Supplier> listTable(NodeAgentContext context, String table, IPVersion ipVersion) { + return () -> containerOperations + .executeCommandInNetworkNamespace(context, new CommandLine.Options().setSilent(true), ipVersion.iptablesCmd(), "-S", "-t", table) + .mapEachLine(String::trim); + } + + private Consumer> restoreTable(NodeAgentContext context, String table, IPVersion ipVersion, boolean flush) { + return list -> { + try (TemporaryIpTablesFileHandler fileHandler = new TemporaryIpTablesFileHandler(table)) { + String rules = String.join("\n", list); + String fileContent = "*" + table + "\n" + rules + "\nCOMMIT\n"; + fileHandler.writeUtf8Content(fileContent); + containerOperations.executeCommandInNetworkNamespace(context, ipVersion.iptablesRestore(), fileHandler.absolutePath()); + } catch (Exception e) { + if (flush) { + context.log(logger, Level.SEVERE, "Exception occurred while syncing iptable " + table + ", attempting rollback", e); + try { + containerOperations.executeCommandInNetworkNamespace(context, ipVersion.iptablesCmd(), "-F", "-t", table); + } catch (Exception ne) { + context.log(logger, Level.SEVERE, "Rollback of table " + table + " failed, giving up", ne); + } + } else { + context.log(logger, Level.WARNING, "Unable to sync iptables for " + table, e); + } + } + }; + } + + private static class TemporaryIpTablesFileHandler implements AutoCloseable { + private final Path path; + + private TemporaryIpTablesFileHandler(String table) { + this.path = uncheck(() -> Files.createTempFile("iptables-restore", "." + table)); + } + + private void writeUtf8Content(String content) throws IOException { + Files.writeString(path, content); + } + + private String absolutePath() { + return path.toAbsolutePath().toString(); + } + + @Override + public void close() throws IOException { + Files.deleteIfExists(path); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/FilterTableLineEditor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/FilterTableLineEditor.java new file mode 100644 index 00000000000..4b831745f27 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/FilterTableLineEditor.java @@ -0,0 +1,61 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.acl; + +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; +import com.yahoo.vespa.hosted.node.admin.task.util.file.LineEdit; +import com.yahoo.vespa.hosted.node.admin.task.util.file.LineEditor; +import com.yahoo.vespa.hosted.node.admin.task.util.network.IPVersion; + +import java.util.List; + +/** + * An editor that assumes all rules in the filter table are exactly as the wanted rules + * + * @author smorgrav + */ +class FilterTableLineEditor implements LineEditor { + + private final List wantedRules; + private int position = 0; + + private FilterTableLineEditor(List wantedRules) { + this.wantedRules = List.copyOf(wantedRules); + } + + static FilterTableLineEditor from(Acl acl, IPVersion ipVersion) { + List rules = acl.toRules(ipVersion); + return new FilterTableLineEditor(rules); + } + + @Override + public LineEdit edit(String line) { + int index = indexOf(wantedRules, line, position); + // Unwanted rule, remove + if (index < 0) return LineEdit.remove(); + + // Wanted rule at the expected position, no diff + if (index == position) { + position++; + return LineEdit.none(); + } + + // Insert the rules between position and index before index + List toInsert = wantedRules.subList(position, index); + position = ++index; + return LineEdit.insertBefore(toInsert); + } + + @Override + public List onComplete() { + return wantedRules.subList(position, wantedRules.size()); + } + + private static int indexOf(List list, T value, int startPos) { + for (int i = startPos; i < list.size(); i++) { + if (value.equals(list.get(i))) + return i; + } + + return -1; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/NatTableLineEditor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/NatTableLineEditor.java new file mode 100644 index 00000000000..9eff816d467 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/NatTableLineEditor.java @@ -0,0 +1,49 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.acl; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.LineEdit; +import com.yahoo.vespa.hosted.node.admin.task.util.file.LineEditor; + +import java.util.List; + +/** + * An editor that only cares about the REDIRECT statement + * + * @author smorgrav + */ +class NatTableLineEditor implements LineEditor { + + private final String redirectRule; + private boolean redirectExists; + + private NatTableLineEditor(String redirectRule) { + this.redirectRule = redirectRule; + } + + static NatTableLineEditor from(String redirectRule) { + return new NatTableLineEditor(redirectRule); + } + + @Override + public LineEdit edit(String line) { + if (line.endsWith("REDIRECT")) { + if (redirectExists) { + // Only allow one redirect rule + return LineEdit.remove(); + } else { + redirectExists = true; + if (line.equals(redirectRule)) { + return LineEdit.none(); + } else { + return LineEdit.replaceWith(redirectRule); + } + } + } else return LineEdit.none(); + } + + @Override + public List onComplete() { + if (redirectExists) return List.of(); + return List.of(redirectRule); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/package-info.java new file mode 100644 index 00000000000..f98a32ba488 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.maintenance.acl; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java new file mode 100644 index 00000000000..0028784eec8 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java @@ -0,0 +1,132 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import com.yahoo.vespa.hosted.node.admin.configserver.cores.CoreDumpMetadata; +import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; + +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Takes in an uncompressed core dump and collects relevant metadata. + * + * @author freva + */ +public class CoreCollector { + private static final Logger logger = Logger.getLogger(CoreCollector.class.getName()); + + private static final Pattern JAVA_HEAP_DUMP_PATTERN = Pattern.compile("java_pid.*\\.hprof$"); + private static final Pattern CORE_GENERATOR_PATH_PATTERN = Pattern.compile("(?m)^Core was generated by `(?.*?)'\\."); + private static final Pattern EXECFN_PATH_PATTERN = Pattern.compile("^.* execfn: '(?.*?)'"); + private static final Pattern FROM_PATH_PATTERN = Pattern.compile("^.* from '(?.*?)'"); + static final String GDB_PATH_RHEL8 = "/opt/rh/gcc-toolset-12/root/bin/gdb"; + + private final ContainerOperations container; + + public CoreCollector(ContainerOperations container) { + this.container = container; + } + + String readBinPathFallback(NodeAgentContext context, ContainerPath coredumpPath) { + String[] command = {GDB_PATH_RHEL8, "-n", "-batch", "-core", coredumpPath.pathInContainer()}; + CommandResult result = container.executeCommandInContainer(context, context.users().root(), command); + + Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getOutput()); + if (! matcher.find()) { + throw ConvergenceException.ofError(String.format("Failed to extract binary path from GDB, result: %s, command: %s", + asString(result), Arrays.toString(command))); + } + return matcher.group("path").split(" ")[0]; + } + + String readBinPath(NodeAgentContext context, ContainerPath coredumpPath) { + String[] command = {"file", coredumpPath.pathInContainer()}; + try { + CommandResult result = container.executeCommandInContainer(context, context.users().root(), command); + if (result.getExitCode() != 0) { + throw ConvergenceException.ofError("file command failed with " + asString(result)); + } + + Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getOutput()); + if (execfnMatcher.find()) { + return execfnMatcher.group("path").split(" ")[0]; + } + + Matcher fromMatcher = FROM_PATH_PATTERN.matcher(result.getOutput()); + if (fromMatcher.find()) { + return fromMatcher.group("path").split(" ")[0]; + } + } catch (RuntimeException e) { + context.log(logger, Level.WARNING, String.format("Failed getting bin path, command: %s. " + + "Trying fallback instead", Arrays.toString(command)), e); + } + + return readBinPathFallback(context, coredumpPath); + } + + List readBacktrace(NodeAgentContext context, ContainerPath coredumpPath, String binPath, boolean allThreads) { + String threads = allThreads ? "thread apply all bt" : "bt"; + String[] command = {GDB_PATH_RHEL8, "-n", "-ex", "set print frame-arguments none", + "-ex", threads, "-batch", binPath, coredumpPath.pathInContainer()}; + + CommandResult result = container.executeCommandInContainer(context, context.users().root(), command); + if (result.getExitCode() != 0) + throw ConvergenceException.ofError("Failed to read backtrace " + asString(result) + ", Command: " + Arrays.toString(command)); + + return List.of(result.getOutput().split("\n")); + } + + List readJstack(NodeAgentContext context, ContainerPath coredumpPath, String binPath) { + String[] command = {"jhsdb", "jstack", "--exe", binPath, "--core", coredumpPath.pathInContainer()}; + + CommandResult result = container.executeCommandInContainer(context, context.users().root(), command); + if (result.getExitCode() != 0) + throw ConvergenceException.ofError("Failed to read jstack " + asString(result) + ", Command: " + Arrays.toString(command)); + + return List.of(result.getOutput().split("\n")); + } + + CoreDumpMetadata collect(NodeAgentContext context, ContainerPath coredumpPath) { + var metadata = new CoreDumpMetadata() + .setCreated(new UnixPath(coredumpPath).getLastModifiedTime()); + + if (JAVA_HEAP_DUMP_PATTERN.matcher(coredumpPath.getFileName().toString()).find()) { + metadata.setType(CoreDumpMetadata.Type.JVM_HEAP) + .setBinPath("java") + .setBacktrace(List.of("Heap dump, no backtrace available")); + return metadata; + } + + try { + String binPath = context.paths().underVespaHome("").resolve(readBinPath(context, coredumpPath)).pathInContainer(); + metadata.setType(CoreDumpMetadata.Type.CORE_DUMP).setBinPath(binPath); + + if (Path.of(binPath).getFileName().toString().equals("java")) { + metadata.setBacktraceAllThreads(readJstack(context, coredumpPath, binPath)); + } else { + metadata.setBacktrace(readBacktrace(context, coredumpPath, binPath, false)); + metadata.setBacktraceAllThreads(readBacktrace(context, coredumpPath, binPath, true)); + } + } catch (ConvergenceException e) { + context.log(logger, Level.WARNING, "Failed to extract backtrace: " + e.getMessage()); + } catch (RuntimeException e) { + context.log(logger, Level.WARNING, "Failed to extract backtrace", e); + } + return metadata; + } + + private String asString(CommandResult result) { + return "exit status " + result.getExitCode() + ", output '" + result.getOutput() + "'"; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java new file mode 100644 index 00000000000..a3386a3032f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java @@ -0,0 +1,338 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import com.yahoo.config.provision.DockerImage; +import com.yahoo.jdisc.Timer; +import com.yahoo.security.KeyId; +import com.yahoo.security.SecretSharedKey; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.StringFlag; +import com.yahoo.vespa.hosted.node.admin.configserver.cores.CoreDumpMetadata; +import com.yahoo.vespa.hosted.node.admin.configserver.cores.Cores; +import com.yahoo.vespa.hosted.node.admin.configserver.cores.bindings.ReportCoreDumpRequest; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; +import com.yahoo.vespa.hosted.node.admin.container.metrics.Dimensions; +import com.yahoo.vespa.hosted.node.admin.container.metrics.Metrics; +import com.yahoo.vespa.hosted.node.admin.maintenance.sync.ZstdCompressingInputStream; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileDeleter; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileMover; +import com.yahoo.vespa.hosted.node.admin.task.util.file.MakeDirectory; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import java.util.function.Predicate; +import java.util.function.Supplier; +import java.util.logging.Logger; +import java.util.regex.Pattern; +import java.util.stream.IntStream; + +import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.nameEndsWith; +import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.nameMatches; +import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.nameStartsWith; +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Finds coredumps, collects metadata and reports them + * + * @author freva + */ +public class CoredumpHandler { + + public static final String COREDUMP_FILENAME_PREFIX = "dump_"; + + private static final Logger logger = Logger.getLogger(CoredumpHandler.class.getName()); + private static final Pattern HS_ERR_PATTERN = Pattern.compile("hs_err_pid[0-9]+\\.log"); + private static final String PROCESSING_DIRECTORY_NAME = "processing"; + private static final String METADATA2_FILE_NAME = "metadata2.json"; + private static final String COMPRESSED_EXTENSION = ".zst"; + private static final String ENCRYPTED_EXTENSION = ".enc"; + + private final CoreCollector coreCollector; + private final Cores cores; + private final String crashPatchInContainer; + private final Path doneCoredumpsPath; + private final Metrics metrics; + private final Timer timer; + private final Supplier coredumpIdSupplier; + private final SecretSharedKeySupplier secretSharedKeySupplier; + private final StringFlag coreEncryptionPublicKeyIdFlag; + + /** + * @param crashPathInContainer path inside the container where core dump are dumped + * @param doneCoredumpsPath path on host where processed core dumps are stored + */ + public CoredumpHandler(CoreCollector coreCollector, Cores cores, + String crashPathInContainer, Path doneCoredumpsPath, Metrics metrics, Timer timer, + SecretSharedKeySupplier secretSharedKeySupplier, FlagSource flagSource) { + this(coreCollector, cores, crashPathInContainer, doneCoredumpsPath, + metrics, timer, () -> UUID.randomUUID().toString(), secretSharedKeySupplier, + flagSource); + } + + CoredumpHandler(CoreCollector coreCollector, Cores cores, + String crashPathInContainer, Path doneCoredumpsPath, Metrics metrics, + Timer timer, Supplier coredumpIdSupplier, + SecretSharedKeySupplier secretSharedKeySupplier, FlagSource flagSource) { + this.coreCollector = coreCollector; + this.cores = cores; + this.crashPatchInContainer = crashPathInContainer; + this.doneCoredumpsPath = doneCoredumpsPath; + this.metrics = metrics; + this.timer = timer; + this.coredumpIdSupplier = coredumpIdSupplier; + this.secretSharedKeySupplier = secretSharedKeySupplier; + this.coreEncryptionPublicKeyIdFlag = Flags.CORE_ENCRYPTION_PUBLIC_KEY_ID.bindTo(flagSource); + } + + + public void converge(NodeAgentContext context, Optional dockerImage, boolean throwIfCoreBeingWritten) { + ContainerPath containerCrashPath = context.paths().of(crashPatchInContainer, context.users().vespa()); + ContainerPath containerProcessingPath = containerCrashPath.resolve(PROCESSING_DIRECTORY_NAME); + + updateMetrics(context, containerCrashPath); + + if (throwIfCoreBeingWritten) { + List pendingCores = FileFinder.files(containerCrashPath) + .match(fileAttributes -> !isReadyForProcessing(fileAttributes)) + .maxDepth(1).stream() + .map(FileFinder.FileAttributes::filename) + .toList(); + if (!pendingCores.isEmpty()) + throw ConvergenceException.ofError(String.format("Cannot process %s coredumps: Still being written", + pendingCores.size() < 5 ? pendingCores : pendingCores.size())); + } + + // Check if we have already started to process a core dump or we can enqueue a new core one + getCoredumpToProcess(context, containerCrashPath, containerProcessingPath) + .ifPresent(path -> processAndReportSingleCoreDump(context, path, dockerImage)); + } + + /** @return path to directory inside processing directory that contains a core dump file to process */ + Optional getCoredumpToProcess(NodeAgentContext context, ContainerPath containerCrashPath, ContainerPath containerProcessingPath) { + return FileFinder.directories(containerProcessingPath).stream() + .map(FileFinder.FileAttributes::path) + .findAny() + .map(ContainerPath.class::cast) + .or(() -> enqueueCoredump(context, containerCrashPath, containerProcessingPath)); + } + + /** + * Moves a coredump and related hs_err file(s) to a new directory under the processing/ directory. + * Limit to only processing one coredump at the time, starting with the oldest. + * + * Assumption: hs_err files are much smaller than core files and are written (last modified time) + * before the core file. + * + * @return path to directory inside processing directory which contains the enqueued core dump file + */ + Optional enqueueCoredump(NodeAgentContext context, ContainerPath containerCrashPath, ContainerPath containerProcessingPath) { + Predicate isCoreDump = filename -> !HS_ERR_PATTERN.matcher(filename).matches(); + + List toProcess = FileFinder.files(containerCrashPath) + .match(attributes -> { + if (isReadyForProcessing(attributes)) { + return true; + } else { + if (isCoreDump.test(attributes.filename())) + context.log(logger, attributes.path() + " is still being written"); + return false; + } + }) + .maxDepth(1) + .stream() + .sorted(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime)) + .map(FileFinder.FileAttributes::path) + .toList(); + + int coredumpIndex = IntStream.range(0, toProcess.size()) + .filter(i -> isCoreDump.test(toProcess.get(i).getFileName().toString())) + .findFirst() + .orElse(-1); + + // Either there are no files in crash directory, or all the files are hs_err files. + if (coredumpIndex == -1) return Optional.empty(); + + ContainerPath enqueuedDir = containerProcessingPath.resolve(coredumpIdSupplier.get()); + new MakeDirectory(enqueuedDir).createParents().converge(context); + IntStream.range(0, coredumpIndex + 1) + .forEach(i -> { + Path path = toProcess.get(i); + String prefix = i == coredumpIndex ? COREDUMP_FILENAME_PREFIX : ""; + new FileMover(path, enqueuedDir.resolve(prefix + path.getFileName())).converge(context); + }); + return Optional.of(enqueuedDir); + } + + private String corePublicKeyFlagValue(NodeAgentContext context) { + return coreEncryptionPublicKeyIdFlag.with(FetchVector.Dimension.NODE_TYPE, context.nodeType().name()).value(); + } + + static OutputStream wrapWithEncryption(OutputStream wrappedStream, SecretSharedKey sharedCoreKey) { + return sharedCoreKey.makeEncryptionCipher().wrapOutputStream(wrappedStream); + } + + /** + * Compresses and, if a key is provided, encrypts core file (and deletes the uncompressed core), then moves + * the entire core dump processing directory to {@link #doneCoredumpsPath} for archive + */ + private void finishProcessing(NodeAgentContext context, ContainerPath coredumpDirectory, SecretSharedKey sharedCoreKey) { + ContainerPath coreFile = findCoredumpFileInProcessingDirectory(coredumpDirectory); + String extension = COMPRESSED_EXTENSION + ENCRYPTED_EXTENSION; + ContainerPath compressedCoreFile = coreFile.resolveSibling(coreFile.getFileName() + extension); + + try (ZstdCompressingInputStream zcis = new ZstdCompressingInputStream(Files.newInputStream(coreFile)); + OutputStream fos = wrapWithEncryption(Files.newOutputStream(compressedCoreFile), sharedCoreKey)) { + zcis.transferTo(fos); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + new FileDeleter(coreFile).converge(context); + + Path newCoredumpDirectory = doneCoredumpsPath.resolve(context.containerName().asString()); + new MakeDirectory(newCoredumpDirectory).createParents().converge(context); + // Files.move() does not support moving non-empty directories across providers, move using host paths + new FileMover(coredumpDirectory.pathOnHost(), newCoredumpDirectory.resolve(coredumpDirectory.getFileName().toString())) + .converge(context); + } + + ContainerPath findCoredumpFileInProcessingDirectory(ContainerPath coredumpProccessingDirectory) { + return (ContainerPath) FileFinder.files(coredumpProccessingDirectory) + .match(nameStartsWith(COREDUMP_FILENAME_PREFIX).and(nameEndsWith(COMPRESSED_EXTENSION).negate()) + .and(nameEndsWith(ENCRYPTED_EXTENSION).negate())) + .maxDepth(1) + .stream() + .map(FileFinder.FileAttributes::path) + .findFirst() + .orElseThrow(() -> new IllegalStateException( + "No coredump file found in processing directory " + coredumpProccessingDirectory)); + } + + void updateMetrics(NodeAgentContext context, ContainerPath containerCrashPath) { + Dimensions dimensions = generateDimensions(context); + + // Unprocessed coredumps + int numberOfUnprocessedCoredumps = FileFinder.files(containerCrashPath) + .match(nameStartsWith(".").negate()) + .match(nameMatches(HS_ERR_PATTERN).negate()) + .match(nameEndsWith(COMPRESSED_EXTENSION).negate()) + .match(nameEndsWith(ENCRYPTED_EXTENSION).negate()) + .match(nameStartsWith("metadata").negate()) + .list().size(); + + metrics.declareGauge(Metrics.APPLICATION_NODE, "coredumps.enqueued", dimensions, Metrics.DimensionType.PRETAGGED).sample(numberOfUnprocessedCoredumps); + + // Processed coredumps + Path processedCoredumpsPath = doneCoredumpsPath.resolve(context.containerName().asString()); + int numberOfProcessedCoredumps = FileFinder.directories(processedCoredumpsPath) + .maxDepth(1) + .list().size(); + + metrics.declareGauge(Metrics.APPLICATION_NODE, "coredumps.processed", dimensions, Metrics.DimensionType.PRETAGGED).sample(numberOfProcessedCoredumps); + } + + private Dimensions generateDimensions(NodeAgentContext context) { + NodeSpec node = context.node(); + Dimensions.Builder dimensionsBuilder = new Dimensions.Builder() + .add("host", node.hostname()) + .add("flavor", node.flavor()) + .add("state", node.state().toString()) + .add("zone", context.zone().getId().value()); + + node.owner().ifPresent(owner -> + dimensionsBuilder + .add("tenantName", owner.tenant().value()) + .add("applicationName", owner.application().value()) + .add("instanceName", owner.instance().value()) + .add("app", String.join(".", owner.application().value(), owner.instance().value())) + .add("applicationId", owner.toFullString()) + ); + + node.membership().ifPresent(membership -> + dimensionsBuilder + .add("clustertype", membership.type().value()) + .add("clusterid", membership.clusterId()) + ); + + node.parentHostname().ifPresent(parent -> dimensionsBuilder.add("parentHostname", parent)); + dimensionsBuilder.add("system", context.zone().getSystemName().value()); + + return dimensionsBuilder.build(); + } + + private boolean isReadyForProcessing(FileFinder.FileAttributes fileAttributes) { + // Wait at least a minute until we start processing a core/heap dump to ensure that + // kernel/JVM has finished writing it + return timer.currentTime().minusSeconds(60).isAfter(fileAttributes.lastModifiedTime()); + } + + void processAndReportSingleCoreDump(NodeAgentContext context, ContainerPath coreDumpDirectory, + Optional dockerImage) { + CoreDumpMetadata metadata = gatherMetadata(context, coreDumpDirectory); + dockerImage.ifPresent(metadata::setDockerImage); + dockerImage.flatMap(DockerImage::tag).ifPresent(metadata::setVespaVersion); + dockerImage.ifPresent(metadata::setDockerImage); + SecretSharedKey sharedCoreKey = Optional.of(corePublicKeyFlagValue(context)) + .filter(k -> !k.isEmpty()) + .map(KeyId::ofString) + .flatMap(secretSharedKeySupplier::create) + .orElseThrow(() -> ConvergenceException.ofError("No core dump encryption key provided")); + metadata.setDecryptionToken(sharedCoreKey.sealedSharedKey().toTokenString()); + + String coreDumpId = coreDumpDirectory.getFileName().toString(); + cores.report(context.hostname(), coreDumpId, metadata); + context.log(logger, "Core dump reported: " + coreDumpId); + finishProcessing(context, coreDumpDirectory, sharedCoreKey); + } + + CoreDumpMetadata gatherMetadata(NodeAgentContext context, ContainerPath coreDumpDirectory) { + ContainerPath metadataPath = coreDumpDirectory.resolve(METADATA2_FILE_NAME); + Optional request = ReportCoreDumpRequest.load(metadataPath); + if (request.isPresent()) { + var metadata = new CoreDumpMetadata(); + request.get().populateMetadata(metadata, doneCoredumpsPath.getFileSystem()); + return metadata; + } + + ContainerPath coreDumpFile = findCoredumpFileInProcessingDirectory(coreDumpDirectory); + CoreDumpMetadata metadata = coreCollector.collect(context, coreDumpFile); + metadata.setCpuMicrocodeVersion(getMicrocodeVersion()) + .setKernelVersion(System.getProperty("os.version")) + .setCoreDumpPath(doneCoredumpsPath.resolve(context.containerName().asString()) + .resolve(coreDumpDirectory.getFileName().toString()) + .resolve(coreDumpFile.getFileName().toString())); + + ReportCoreDumpRequest requestInstance = new ReportCoreDumpRequest(); + requestInstance.fillFrom(metadata); + requestInstance.save(metadataPath); + context.log(logger, "Wrote " + metadataPath.pathOnHost()); + return metadata; + } + + private String getMicrocodeVersion() { + String output = uncheck(() -> Files.readAllLines(doneCoredumpsPath.getFileSystem().getPath("/proc/cpuinfo")).stream() + .filter(line -> line.startsWith("microcode")) + .findFirst() + .orElse("microcode : UNKNOWN")); + + String[] results = output.split(":"); + if (results.length != 2) { + throw ConvergenceException.ofError("Result from detect microcode command not as expected: " + output); + } + + return results[1].trim(); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/SecretSharedKeySupplier.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/SecretSharedKeySupplier.java new file mode 100644 index 00000000000..e5291c837a2 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/SecretSharedKeySupplier.java @@ -0,0 +1,17 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import com.yahoo.security.KeyId; +import com.yahoo.security.SecretSharedKey; + +import java.util.Optional; + +/** + * @author vekterli + */ +@FunctionalInterface +public interface SecretSharedKeySupplier { + + Optional create(KeyId publicKeyId); + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/package-info.java new file mode 100644 index 00000000000..0b6b3d18b01 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/CoredumpCleanupRule.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/CoredumpCleanupRule.java new file mode 100644 index 00000000000..50cd16f5617 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/CoredumpCleanupRule.java @@ -0,0 +1,106 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.disk; + +import com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoredumpHandler; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; + +import java.nio.file.Path; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.temporal.ChronoField; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule.PrioritizedFileAttributes; +import static com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule.Priority; +import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.FileAttributes; +import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.nameStartsWith; + +/** + * @author freva + */ +public class CoredumpCleanupRule { + + private static final Comparator CORE_DUMP_FILE_ATTRIBUTE_COMPARATOR = Comparator + .comparing((FileAttributes fa) -> !fa.filename().contains("vespa-")) + .thenComparing(FileAttributes::lastModifiedTime); + + public static DiskCleanupRule forContainer(Path containerCrashPath) { + return new ContainerCoredumpCleanupRule(containerCrashPath); + } + + public static DiskCleanupRule forHost(Path processedCoredumpsPath) { + return new HostCoredumpCleanupRule(processedCoredumpsPath); + } + + /** Assigns MEDIUM priority to the oldest, unprocessed coredump and HIGHEST for the remaining */ + private static class ContainerCoredumpCleanupRule implements DiskCleanupRule { + private final Path containerCrashPath; + + private ContainerCoredumpCleanupRule(Path containerCrashPath) { + this.containerCrashPath = containerCrashPath; + } + + @Override + public Collection prioritize() { + List fileAttributes = FileFinder.files(containerCrashPath) + .maxDepth(1).stream() + .sorted(CORE_DUMP_FILE_ATTRIBUTE_COMPARATOR) + .toList(); + + return mapFirstAndRemaining(fileAttributes, Priority.MEDIUM, Priority.HIGHEST).toList(); + } + } + + /** Assigns MEDIUM priority to the first coredump of the day for each container, HIGH for the remaining */ + private static class HostCoredumpCleanupRule implements DiskCleanupRule { + private final Path processedCoredumpsPath; + + private HostCoredumpCleanupRule(Path processedCoredumpsPath) { + this.processedCoredumpsPath = processedCoredumpsPath; + } + + @Override + public Collection prioritize() { + Map> fileAttributesByContainerDay = FileFinder.files(processedCoredumpsPath) + .match(nameStartsWith(CoredumpHandler.COREDUMP_FILENAME_PREFIX)) + .stream() + .sorted(CORE_DUMP_FILE_ATTRIBUTE_COMPARATOR) + .collect(Collectors.groupingBy( + // Group FileAttributes by string [container-name]_[day of year], e.g. zt00534-v6-2_234 + fa -> containerNameFromProcessedCoredumpPath(fa.path()) + "_" + dayOfYear(fa.lastModifiedTime()), + Collectors.collectingAndThen( + Collectors.toCollection(ArrayList::new), + l -> { l.sort(CORE_DUMP_FILE_ATTRIBUTE_COMPARATOR); return l; } ))); + + return fileAttributesByContainerDay.values().stream() + .flatMap(fa -> mapFirstAndRemaining(fa, Priority.MEDIUM, Priority.HIGH)) + .toList(); + } + } + + /** + * Maps list of FileAttributes into list of PrioritizedFileAttributes where the first FileAttribute is given + * {@code first} priority, while the remaining FileAttributes are given {@code remaining} priority */ + private static Stream mapFirstAndRemaining(List fileAttributes, Priority first, Priority remaining) { + return IntStream.range(0, fileAttributes.size()) + .mapToObj(i -> new PrioritizedFileAttributes(fileAttributes.get(i), i == 0 ? first : remaining)); + } + + /** Extracts container-name from path under processed-coredumps or empty string */ + private static String containerNameFromProcessedCoredumpPath(Path path) { + if (path.getNameCount() < 3) return ""; // Path is too short + return path.getName(path.getNameCount() - 3).toString(); + } + + /** Returns day number of the year (1-365 (or 366 for leap years)) */ + private static int dayOfYear(Instant instant) { + return instant.atOffset(ZoneOffset.UTC).get(ChronoField.DAY_OF_YEAR); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanup.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanup.java new file mode 100644 index 00000000000..54cf9324909 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanup.java @@ -0,0 +1,59 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.disk; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.DiskSize; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.logging.Logger; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanupRule.PrioritizedFileAttributes; +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * @author freva + */ +public class DiskCleanup { + + private static final Logger logger = Logger.getLogger(DiskCleanup.class.getName()); + private static final Comparator PRIORITIZED_FILE_ATTRIBUTES_COMPARATOR = Comparator + .comparing(PrioritizedFileAttributes::priority) + .thenComparingLong(f -> f.fileAttributes().size()) + .reversed(); + + public boolean cleanup(TaskContext context, List rules, long bytesToRemove) { + if (bytesToRemove <= 0) return false; + + long[] btr = new long[] { bytesToRemove }; + List deletedPaths = new ArrayList<>(); + try { + rules.stream() + .flatMap(rule -> rule.prioritize().stream()) + .sorted(PRIORITIZED_FILE_ATTRIBUTES_COMPARATOR) + .takeWhile(fa -> btr[0] > 0) + .forEach(pfa -> { + if (uncheck(() -> Files.deleteIfExists(pfa.fileAttributes().path()))) { + btr[0] -= pfa.fileAttributes().size(); + deletedPaths.add(pfa.fileAttributes().path()); + } + }); + + } finally { + String wantedDeleteSize = DiskSize.of(bytesToRemove).asString(); + String deletedSize = DiskSize.of(bytesToRemove - btr[0]).asString(); + if (deletedPaths.size() > 20) { + context.log(logger, "Deleted %d files (%s) because disk was getting full", deletedPaths.size(), deletedSize); + } else if (deletedPaths.size() > 0) { + context.log(logger, "Deleted %s because disk was getting full from: %s", deletedSize, deletedPaths); + } else { + context.log(logger, "Wanted to delete %s, but failed to find any files to delete", wantedDeleteSize); + } + } + + return !deletedPaths.isEmpty(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanupRule.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanupRule.java new file mode 100644 index 00000000000..88b89f1f201 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/DiskCleanupRule.java @@ -0,0 +1,20 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.disk; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; + +import java.util.Collection; + +/** + * @author freva + */ +public interface DiskCleanupRule { + + Collection prioritize(); + + enum Priority { + LOWEST, LOW, MEDIUM, HIGH, HIGHEST + } + + record PrioritizedFileAttributes(FileFinder.FileAttributes fileAttributes, Priority priority) { } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/LinearCleanupRule.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/LinearCleanupRule.java new file mode 100644 index 00000000000..961d978bfcf --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/LinearCleanupRule.java @@ -0,0 +1,48 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.disk; + +import java.util.Collection; +import java.util.List; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.FileAttributes; + +/** + * Prioritizes files by first scoring them with the given scoring function and then mapping the scores to a + * priority within the given range. + * The priority room is evenly split between given lowest and highest priority for range [0, 1.0). Scores below 0 + * are assigned lowest, while scores at or higher than 1 are assigned highest priority. + * + * Typical use-case is for log files. The scoring function calculates the file age and normalizes it by dividing it + * by expected max age of log files. The oldest logs will then by prioritized by highest given priority. + * + * @author freva + */ +public class LinearCleanupRule implements DiskCleanupRule { + private final Supplier> lister; + private final Function prioritizer; + + public LinearCleanupRule(Supplier> lister, + Function scorer, Priority lowest, Priority highest) { + if (lowest.ordinal() > highest.ordinal()) + throw new IllegalArgumentException("Lowest priority: " + lowest + " is higher than highest priority: " + highest); + + this.lister = lister; + + Priority[] values = Priority.values(); + int range = highest.ordinal() - lowest.ordinal() + 1; + this.prioritizer = fa -> { + int ordinal = (int) (lowest.ordinal() + scorer.apply(fa) * range); + return values[Math.max(lowest.ordinal(), Math.min(highest.ordinal(), ordinal))]; + }; + } + + @Override + public Collection prioritize() { + return lister.get().stream() + .map(fa -> new PrioritizedFileAttributes(fa, prioritizer.apply(fa))) + .toList(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/package-info.java new file mode 100644 index 00000000000..6b5f60a66c7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/disk/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.maintenance.disk; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java new file mode 100644 index 00000000000..f46950aa448 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java @@ -0,0 +1,433 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.identity; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.jdisc.Timer; +import com.yahoo.security.KeyAlgorithm; +import com.yahoo.security.KeyUtils; +import com.yahoo.security.Pkcs10Csr; +import com.yahoo.security.SslContextBuilder; +import com.yahoo.security.X509CertificateUtils; +import com.yahoo.vespa.athenz.api.AthenzIdentity; +import com.yahoo.vespa.athenz.api.AthenzRole; +import com.yahoo.vespa.athenz.client.zts.DefaultZtsClient; +import com.yahoo.vespa.athenz.client.zts.InstanceIdentity; +import com.yahoo.vespa.athenz.client.zts.ZtsClient; +import com.yahoo.vespa.athenz.client.zts.ZtsClientException; +import com.yahoo.vespa.athenz.identity.ServiceIdentityProvider; +import com.yahoo.vespa.athenz.identityprovider.api.EntityBindingsMapper; +import com.yahoo.vespa.athenz.identityprovider.api.IdentityDocument; +import com.yahoo.vespa.athenz.identityprovider.api.IdentityDocumentClient; +import com.yahoo.vespa.athenz.identityprovider.api.SignedIdentityDocument; +import com.yahoo.vespa.athenz.identityprovider.client.CsrGenerator; +import com.yahoo.vespa.athenz.identityprovider.client.DefaultIdentityDocumentClient; +import com.yahoo.vespa.athenz.tls.AthenzIdentityVerifier; +import com.yahoo.vespa.athenz.utils.SiaUtils; +import com.yahoo.vespa.flags.BooleanFlag; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.hosted.node.admin.component.ConfigServerInfo; +import com.yahoo.vespa.hosted.node.admin.container.ContainerName; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import javax.net.ssl.HostnameVerifier; +import javax.net.ssl.SSLContext; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.KeyPair; +import java.security.PrivateKey; +import java.security.cert.X509Certificate; +import java.time.Duration; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.identity.AthenzCredentialsMaintainer.IdentityType.NODE; +import static com.yahoo.vespa.hosted.node.admin.maintenance.identity.AthenzCredentialsMaintainer.IdentityType.TENANT; + +/** + * A maintainer that is responsible for providing and refreshing Athenz credentials for a container. + * + * @author bjorncs + */ +public class AthenzCredentialsMaintainer implements CredentialsMaintainer { + + private static final Logger logger = Logger.getLogger(AthenzCredentialsMaintainer.class.getName()); + + private static final Duration EXPIRY_MARGIN = Duration.ofDays(1); + private static final Duration REFRESH_PERIOD = Duration.ofDays(1); + private static final Duration REFRESH_BACKOFF = Duration.ofHours(1); // Backoff when refresh fails to ensure ZTS is not DDoS'ed. + + private static final String CONTAINER_SIA_DIRECTORY = "/var/lib/sia"; + private static final String LEGACY_SIA_DIRECTORY = "/opt/vespa/var/vespa/sia"; + + private final Path ztsTrustStorePath; + private final Timer timer; + private final String certificateDnsSuffix; + private final ServiceIdentityProvider hostIdentityProvider; + private final IdentityDocumentClient identityDocumentClient; + + // Used as an optimization to ensure ZTS is not DDoS'ed on continuously failing refresh attempts + private final Map lastRefreshAttempt = new ConcurrentHashMap<>(); + + public AthenzCredentialsMaintainer(Path ztsTrustStorePath, + ConfigServerInfo configServerInfo, + String certificateDnsSuffix, + ServiceIdentityProvider hostIdentityProvider, + Timer timer) { + this.ztsTrustStorePath = ztsTrustStorePath; + this.certificateDnsSuffix = certificateDnsSuffix; + this.hostIdentityProvider = hostIdentityProvider; + this.identityDocumentClient = new DefaultIdentityDocumentClient( + configServerInfo.getLoadBalancerEndpoint(), + hostIdentityProvider, + new AthenzIdentityVerifier(Set.of(configServerInfo.getConfigServerIdentity()))); + this.timer = timer; + } + + public boolean converge(NodeAgentContext context) { + var modified = false; + modified |= maintain(context, NODE); + + if (context.zone().getSystemName().isPublic()) + return modified; + + modified |= maintain(context, TENANT); + return modified; + } + + private boolean maintain(NodeAgentContext context, IdentityType identityType) { + if (context.isDisabled(NodeAgentTask.CredentialsMaintainer)) return false; + + try { + var modified = false; + context.log(logger, Level.FINE, "Checking certificate"); + ContainerPath siaDirectory = context.paths().of(CONTAINER_SIA_DIRECTORY, context.users().vespa()); + ContainerPath identityDocumentFile = siaDirectory.resolve(identityType.getIdentityDocument()); + Optional optionalAthenzIdentity = getAthenzIdentity(context, identityType, identityDocumentFile); + if (optionalAthenzIdentity.isEmpty()) + return false; + AthenzIdentity athenzIdentity = optionalAthenzIdentity.get(); + ContainerPath privateKeyFile = (ContainerPath) SiaUtils.getPrivateKeyFile(siaDirectory, athenzIdentity); + ContainerPath certificateFile = (ContainerPath) SiaUtils.getCertificateFile(siaDirectory, athenzIdentity); + if (!Files.exists(privateKeyFile) || !Files.exists(certificateFile) || !Files.exists(identityDocumentFile)) { + context.log(logger, "Certificate/private key/identity document file does not exist"); + Files.createDirectories(privateKeyFile.getParent()); + Files.createDirectories(certificateFile.getParent()); + Files.createDirectories(identityDocumentFile.getParent()); + registerIdentity(context, privateKeyFile, certificateFile, identityDocumentFile, identityType, athenzIdentity); + modified = true; + } + + X509Certificate certificate = readCertificateFromFile(certificateFile); + Instant now = timer.currentTime(); + Instant expiry = certificate.getNotAfter().toInstant(); + var doc = EntityBindingsMapper.readSignedIdentityDocumentFromFile(identityDocumentFile); + if (refreshIdentityDocument(doc, context)) { + context.log(logger, "Identity document is outdated (version=%d)", doc.documentVersion()); + registerIdentity(context, privateKeyFile, certificateFile, identityDocumentFile, identityType, athenzIdentity); + modified = true; + } else if (isCertificateExpired(expiry, now)) { + context.log(logger, "Certificate has expired (expiry=%s)", expiry.toString()); + registerIdentity(context, privateKeyFile, certificateFile, identityDocumentFile, identityType, athenzIdentity); + modified = true; + } + + Duration age = Duration.between(certificate.getNotBefore().toInstant(), now); + if (shouldRefreshCredentials(age)) { + context.log(logger, "Certificate is ready to be refreshed (age=%s)", age.toString()); + if (shouldThrottleRefreshAttempts(context.containerName(), now)) { + context.log(logger, Level.WARNING, String.format( + "Skipping refresh attempt as last refresh was on %s (less than %s ago)", + lastRefreshAttempt.get(context.containerName()).toString(), REFRESH_BACKOFF.toString())); + } else { + lastRefreshAttempt.put(context.containerName(), now); + refreshIdentity(context, privateKeyFile, certificateFile, identityDocumentFile, doc.identityDocument(), identityType, athenzIdentity); + modified = true; + } + } + + if (identityType == TENANT) { + modified |= maintainRoleCertificates(context, siaDirectory, privateKeyFile, certificateFile, athenzIdentity, doc.identityDocument()); + copyCredsToLegacyPath(context, privateKeyFile, certificateFile); + } + return modified; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private boolean maintainRoleCertificates(NodeAgentContext context, + ContainerPath siaDirectory, + ContainerPath privateKeyFile, + ContainerPath certificateFile, + AthenzIdentity identity, + IdentityDocument identityDocument) { + var modified = false; + + for (var role : getRoleList(context)) { + try { + var roleCertificatePath = siaDirectory.resolve("certs") + .resolve(String.format("%s.cert.pem", role)); + var roleKeyPath = siaDirectory.resolve("keys") + .resolve(String.format("%s.key.pem", role)); + if (Files.notExists(roleCertificatePath)) { + writeRoleCredentials(context, privateKeyFile, certificateFile, roleCertificatePath, roleKeyPath, identity, identityDocument, role); + modified = true; + } else if (shouldRefreshCertificate(context, roleCertificatePath)) { + writeRoleCredentials(context, privateKeyFile, certificateFile, roleCertificatePath, roleKeyPath, identity, identityDocument, role); + modified = true; + } + } catch (IOException e) { + context.log(logger, Level.WARNING, "Failed to maintain role certificate " + role, e); + } + } + return modified; + } + + private boolean shouldRefreshCertificate(NodeAgentContext context, ContainerPath certificatePath) throws IOException { + var certificate = readCertificateFromFile(certificatePath); + var now = timer.currentTime(); + var shouldRefresh = now.isAfter(certificate.getNotAfter().toInstant()) || + now.isAfter(certificate.getNotBefore().toInstant().plus(REFRESH_PERIOD)); + return !shouldThrottleRefreshAttempts(context.containerName(), now) && + shouldRefresh; + } + + private void writeRoleCredentials(NodeAgentContext context, + ContainerPath privateKeyFile, + ContainerPath certificateFile, + ContainerPath roleCertificatePath, + ContainerPath roleKeyPath, + AthenzIdentity identity, + IdentityDocument identityDocument, + String role) throws IOException { + HostnameVerifier ztsHostNameVerifier = (hostname, sslSession) -> true; + var keyPair = KeyUtils.generateKeypair(KeyAlgorithm.RSA); + var athenzRole = AthenzRole.fromResourceNameString(role); + + try (ZtsClient ztsClient = ztsClient(identityDocument.ztsUrl(), privateKeyFile, certificateFile, ztsHostNameVerifier)) { + var csrGenerator = new CsrGenerator(certificateDnsSuffix, identityDocument.providerService().getFullName()); + var csr = csrGenerator.generateRoleCsr( + identity, athenzRole, identityDocument.providerUniqueId(), identityDocument.clusterType(), keyPair); + var roleCertificate = ztsClient.getRoleCertificate(athenzRole, csr); + writePrivateKeyAndCertificate(roleKeyPath, keyPair.getPrivate(), roleCertificatePath, roleCertificate); + context.log(logger, "Role certificate successfully retrieved written to file " + roleCertificatePath.pathInContainer()); + } + } + + private boolean refreshIdentityDocument(SignedIdentityDocument signedIdentityDocument, NodeAgentContext context) { + int expectedVersion = documentVersion(context); + return signedIdentityDocument.outdated() || signedIdentityDocument.documentVersion() != expectedVersion; + } + + public void clearCredentials(NodeAgentContext context) { + FileFinder.files(context.paths().of(CONTAINER_SIA_DIRECTORY)) + .deleteRecursively(context); + lastRefreshAttempt.remove(context.containerName()); + } + + @Override + public Duration certificateLifetime(NodeAgentContext context) { + ContainerPath containerSiaDirectory = context.paths().of(CONTAINER_SIA_DIRECTORY); + ContainerPath certificateFile = (ContainerPath) SiaUtils.getCertificateFile(containerSiaDirectory, context.identity()); + try { + X509Certificate certificate = readCertificateFromFile(certificateFile); + Instant now = timer.currentTime(); + Instant expiry = certificate.getNotAfter().toInstant(); + return Duration.between(now, expiry); + } catch (IOException e) { + context.log(logger, Level.SEVERE, "Unable to read certificate at " + certificateFile, e); + return Duration.ZERO; + } + } + + @Override + public String name() { + return "node-certificate"; + } + + private boolean shouldRefreshCredentials(Duration age) { + return age.compareTo(REFRESH_PERIOD) >= 0; + } + + private boolean shouldThrottleRefreshAttempts(ContainerName containerName, Instant now) { + return REFRESH_BACKOFF.compareTo( + Duration.between( + lastRefreshAttempt.getOrDefault(containerName, Instant.EPOCH), + now)) > 0; + } + + private void registerIdentity(NodeAgentContext context, ContainerPath privateKeyFile, ContainerPath certificateFile, ContainerPath identityDocumentFile, IdentityType identityType, AthenzIdentity identity) { + KeyPair keyPair = KeyUtils.generateKeypair(KeyAlgorithm.RSA); + SignedIdentityDocument signedDoc = signedIdentityDocument(context, identityType); + IdentityDocument doc = signedDoc.identityDocument(); + CsrGenerator csrGenerator = new CsrGenerator(certificateDnsSuffix, doc.providerService().getFullName()); + Pkcs10Csr csr = csrGenerator.generateInstanceCsr( + identity, doc.providerUniqueId(), doc.ipAddresses(), doc.clusterType(), keyPair); + + // Allow all zts hosts while removing SIS + HostnameVerifier ztsHostNameVerifier = (hostname, sslSession) -> true; + try (ZtsClient ztsClient = ztsClient(doc.ztsUrl(), hostIdentityProvider.privateKeyPath(), hostIdentityProvider.certificatePath(), ztsHostNameVerifier)) { + InstanceIdentity instanceIdentity = + ztsClient.registerInstance( + doc.providerService(), + identity, + EntityBindingsMapper.toAttestationData(signedDoc), + csr); + EntityBindingsMapper.writeSignedIdentityDocumentToFile(identityDocumentFile, signedDoc); + writePrivateKeyAndCertificate(privateKeyFile, keyPair.getPrivate(), certificateFile, instanceIdentity.certificate()); + context.log(logger, "Instance successfully registered and credentials written to file"); + } + } + + private void refreshIdentity(NodeAgentContext context, ContainerPath privateKeyFile, ContainerPath certificateFile, + ContainerPath identityDocumentFile, IdentityDocument doc, IdentityType identityType, AthenzIdentity identity) { + try { + // Do not rotate private key on every refresh. + // TODO: rotate key pair only on Vespa upgrade or similar + PrivateKey privateKey = readPrivateKeyFromFile(privateKeyFile); + KeyPair keyPair = KeyUtils.toKeyPair(privateKey); + CsrGenerator csrGenerator = new CsrGenerator(certificateDnsSuffix, doc.providerService().getFullName()); + Pkcs10Csr csr = csrGenerator.generateInstanceCsr( + identity, doc.providerUniqueId(), doc.ipAddresses(), doc.clusterType(), keyPair); + + // Allow all zts hosts while removing SIS + HostnameVerifier ztsHostNameVerifier = (hostname, sslSession) -> true; + try (ZtsClient ztsClient = ztsClient(doc.ztsUrl(), privateKeyFile, certificateFile, ztsHostNameVerifier)) { + InstanceIdentity instanceIdentity = + ztsClient.refreshInstance( + doc.providerService(), + identity, + doc.providerUniqueId().asDottedString(), + csr); + writePrivateKeyAndCertificate(privateKeyFile, keyPair.getPrivate(), certificateFile, instanceIdentity.certificate()); + context.log(logger, "Instance successfully refreshed and credentials written to file"); + } catch (ZtsClientException e) { + if (e.getErrorCode() == 403 && e.getDescription().startsWith("Certificate revoked")) { + context.log(logger, Level.SEVERE, "Certificate cannot be refreshed as it is revoked by ZTS - re-registering the instance now", e); + registerIdentity(context, privateKeyFile, certificateFile, identityDocumentFile, identityType, identity); + } else { + throw e; + } + } + } catch (Exception e) { + context.log(logger, Level.SEVERE, "Certificate refresh failed: " + e.getMessage(), e); + } + } + + + private static void writePrivateKeyAndCertificate(ContainerPath privateKeyFile, + PrivateKey privateKey, + ContainerPath certificateFile, + X509Certificate certificate) { + writeFile(privateKeyFile, KeyUtils.toPem(privateKey)); + writeFile(certificateFile, X509CertificateUtils.toPem(certificate)); + } + + private static void writeFile(ContainerPath path, String utf8Content) { + new UnixPath(path.resolveSibling(path.getFileName() + ".tmp")) + .writeUtf8File(utf8Content, "r--------") + .atomicMove(path); + } + + private static X509Certificate readCertificateFromFile(ContainerPath certificateFile) throws IOException { + String pemEncodedCertificate = new String(Files.readAllBytes(certificateFile)); + return X509CertificateUtils.fromPem(pemEncodedCertificate); + } + + private static PrivateKey readPrivateKeyFromFile(ContainerPath privateKeyFile) throws IOException { + String pemEncodedKey = new String(Files.readAllBytes(privateKeyFile)); + return KeyUtils.fromPemEncodedPrivateKey(pemEncodedKey); + } + + private static boolean isCertificateExpired(Instant expiry, Instant now) { + return now.isAfter(expiry.minus(EXPIRY_MARGIN)); + } + + private SignedIdentityDocument signedIdentityDocument(NodeAgentContext context, IdentityType identityType) { + return switch (identityType) { + case NODE -> identityDocumentClient.getNodeIdentityDocument(context.hostname().value(), documentVersion(context)); + case TENANT -> identityDocumentClient.getTenantIdentityDocument(context.hostname().value(), documentVersion(context)).get(); + }; + } + + private Optional getAthenzIdentity(NodeAgentContext context, IdentityType identityType, ContainerPath identityDocumentFile) { + return switch (identityType) { + case NODE -> Optional.of(context.identity()); + case TENANT -> getTenantIdentity(context, identityDocumentFile); + }; + } + + private Optional getTenantIdentity(NodeAgentContext context, ContainerPath identityDocumentFile) { + if (Files.exists(identityDocumentFile)) { + return Optional.of(EntityBindingsMapper.readSignedIdentityDocumentFromFile(identityDocumentFile).identityDocument().serviceIdentity()); + } else { + return identityDocumentClient.getTenantIdentityDocument(context.hostname().value(), documentVersion(context)) + .map(doc -> doc.identityDocument().serviceIdentity()); + } + } + + private void copyCredsToLegacyPath(NodeAgentContext context, ContainerPath privateKeyFile, ContainerPath certificateFile) throws IOException { + var legacySiaDirectory = context.paths().of(LEGACY_SIA_DIRECTORY, context.users().vespa()); + var keysDirectory = legacySiaDirectory.resolve("keys"); + var certsDirectory = legacySiaDirectory.resolve("certs"); + Files.createDirectories(keysDirectory); + Files.createDirectories(certsDirectory); + writeFile(certsDirectory.resolve(certificateFile.getFileName()), Files.readString(certificateFile)); + writeFile(keysDirectory.resolve(privateKeyFile.getFileName()), Files.readString(privateKeyFile)); + } + + /** Get the document version to ask for */ + private int documentVersion(NodeAgentContext context) { + return SignedIdentityDocument.DEFAULT_DOCUMENT_VERSION; + } + + private ZtsClient ztsClient(URI ztsEndpoint, Path privateKeyFile, Path certificateFile, HostnameVerifier hostnameVerifier) { + SSLContext sslContext = new SslContextBuilder() + .withKeyStore(privateKeyFile, certificateFile) + .withTrustStore(ztsTrustStorePath) + .build(); + return new DefaultZtsClient.Builder(ztsEndpoint) + .withSslContext(sslContext) + .withHostnameVerifier(hostnameVerifier) + .build(); + } + + private List getRoleList(NodeAgentContext context) { + try { + return identityDocumentClient.getNodeRoles(context.hostname().value()); + } catch (Exception e) { + context.log(logger, Level.WARNING, "Failed to retrieve role list", e); + return List.of(); + } + } + + enum IdentityType { + NODE("vespa-node-identity-document.json"), + TENANT("vespa-tenant-identity-document.json"); + + private final String identityDocument; + IdentityType(String identityDocument) { + this.identityDocument = identityDocument; + } + + public String getIdentityDocument() { + return identityDocument; + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/CredentialsMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/CredentialsMaintainer.java new file mode 100644 index 00000000000..0e387ac2731 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/CredentialsMaintainer.java @@ -0,0 +1,29 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.identity; + +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; + +import java.time.Duration; + +/** + * A maintainer that is responsible for providing and refreshing credentials for a container. + * + * @author freva + */ +public interface CredentialsMaintainer { + + /** + * Creates/refreshes credentials for the given NodeAgentContext. Called for every NodeAgent tick. + * @return false if already converged, i.e. was a no-op. + */ + boolean converge(NodeAgentContext context); + + /** Remove any existing credentials. This method is called just before container data is archived. */ + void clearCredentials(NodeAgentContext context); + + /** Get time until the certificate expires. Invoked each time metrics are collected. */ + Duration certificateLifetime(NodeAgentContext context); + + /** Name used when reporting metrics */ + String name(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/package-info.java new file mode 100644 index 00000000000..a48f4f45aa8 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/package-info.java @@ -0,0 +1,8 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @author bjorncs + */ +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.maintenance.identity; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/package-info.java new file mode 100644 index 00000000000..2ef78aa9c54 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.maintenance; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/Artifact.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/Artifact.java new file mode 100644 index 00000000000..ee8da84e9cb --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/Artifact.java @@ -0,0 +1,55 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.util.Optional; + +/** + * An artifact file produced by a {@link ArtifactProducer}. + * + * @author bjorncs + */ +class Artifact { + + enum Classification { + CONFIDENTIAL("confidential"), + INTERNAL("internal"); + + private final String value; + Classification(String value) { this.value = value; } + public String value() { return value; } + } + + private final Classification classification; + private final ContainerPath file; + private final boolean compressOnUpload; + + private Artifact(Builder builder) { + if (builder.file == null) { + throw new IllegalArgumentException("No file specified"); + } + this.file = builder.file; + this.classification = builder.classification; + this.compressOnUpload = Boolean.TRUE.equals(builder.compressOnUpload); + } + + static Builder newBuilder() { return new Builder(); } + + Optional classification() { return Optional.ofNullable(classification); } + ContainerPath file() { return file; } + boolean compressOnUpload() { return compressOnUpload; } + + static class Builder { + private Classification classification; + private ContainerPath file; + private Boolean compressOnUpload; + + private Builder() {} + + Builder classification(Classification c) { this.classification = c; return this; } + Builder file(ContainerPath f) { this.file = f; return this; } + Builder compressOnUpload() { this.compressOnUpload = true; return this; } + Artifact build() { return new Artifact(this); } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducer.java new file mode 100644 index 00000000000..87ab1ef8bf5 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducer.java @@ -0,0 +1,37 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; + +import java.util.List; +import java.util.OptionalDouble; + +/** + * Produces service dump artifacts. + * + * @author bjorncs + */ +interface ArtifactProducer { + + String artifactName(); + String description(); + List produceArtifacts(Context ctx); + + interface Context { + String serviceId(); + int servicePid(); + CommandResult executeCommandInNode(List command, boolean logOutput); + ContainerPath outputContainerPath(); + ContainerPath containerPathUnderVespaHome(String relativePath); + Options options(); + + interface Options { + OptionalDouble duration(); + boolean callGraphRecording(); + boolean sendProfilingSignal(); + } + } + + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducers.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducers.java new file mode 100644 index 00000000000..939bebc5fac --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducers.java @@ -0,0 +1,109 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.yolean.concurrent.Sleeper; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * @author bjorncs + */ +class ArtifactProducers { + + private final Map producers; + private final Map> aliases; + + private ArtifactProducers(Set producers, + Map>> aliases) { + var producerMap = producers.stream() + .collect(Collectors.toMap(ArtifactProducer::artifactName, Function.identity())); + Map> aliasMap = new HashMap<>(); + aliases.forEach((alias, mapping) -> { + List concreteMapping = mapping.stream() + .map(type -> producers.stream() + .filter(p -> p.getClass().equals(type)) + .findAny() + .orElseThrow(() -> new IllegalArgumentException("No producer of type " + type))) + .toList(); + if (producerMap.containsKey(alias)) { + throw new IllegalStateException("Alias name '" + alias + "' conflicts with producer"); + } + aliasMap.put(alias, concreteMapping); + }); + this.producers = producerMap; + this.aliases = aliasMap; + } + + static ArtifactProducers createDefault(Sleeper sleeper) { + var producers = Set.of( + new PerfReporter(), + new JvmDumper.JavaFlightRecorder(sleeper), + new JvmDumper.HeapDump(), + new JvmDumper.Jmap(), + new JvmDumper.Jstat(), + new JvmDumper.Jstack(), + new PmapReporter(), + new VespaLogDumper(sleeper), + new ZooKeeperSnapshotDumper(), + new ConfigDumper()); + var aliases = + Map.of( + "jvm-dump", + List.of( + JvmDumper.HeapDump.class, JvmDumper.Jmap.class, JvmDumper.Jstat.class, + JvmDumper.Jstack.class, VespaLogDumper.class) + ); + return new ArtifactProducers(producers, aliases); + } + + static ArtifactProducers createCustom(Set producers, + Map>> aliases) { + return new ArtifactProducers(producers, aliases); + } + + List resolve(List requestedArtifacts) { + List resolved = new ArrayList<>(); + for (String artifact : requestedArtifacts) { + if (aliases.containsKey(artifact)) { + aliases.get(artifact).stream() + .filter(p -> !resolved.contains(p)) + .forEach(resolved::add); + } else if (producers.containsKey(artifact)) { + ArtifactProducer producer = producers.get(artifact); + if (!resolved.contains(producer)) { + resolved.add(producer); + } + } else { + throw createInvalidArtifactException(artifact); + } + } + return resolved; + } + + private IllegalArgumentException createInvalidArtifactException(String artifact) { + String producersString = producers.keySet().stream() + .map(a -> "'" + a + "'") + .sorted() + .collect(Collectors.joining(", ", "[", "]")); + String aliasesString = aliases.entrySet().stream() + .map(e -> String.format( + "'%s': %s", + e.getKey(), + e.getValue().stream() + .map(p -> "'" + p.artifactName() + "'") + .sorted() + .collect(Collectors.joining(", ", "[", "]"))) + ) + .collect(Collectors.joining(", ", "[", "]")); + String msg = String.format( + "Invalid artifact type '%s'. Valid types are %s and valid aliases are %s", + artifact, producersString, aliasesString); + return new IllegalArgumentException(msg); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ConfigDumper.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ConfigDumper.java new file mode 100644 index 00000000000..8eadabf07cf --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ConfigDumper.java @@ -0,0 +1,35 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.util.List; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.CONFIDENTIAL; +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.INTERNAL; + +/** + * Performs dump of config on a node. + * + * @author hmusum + */ +class ConfigDumper implements ArtifactProducer { + @Override public String artifactName() { return "config-dump"; } + @Override public String description() { return "Dumps config"; } + + @Override + public List produceArtifacts(Context ctx) { + ContainerPath dir = ctx.outputContainerPath().resolve("config"); + ContainerPath configDump = ctx.outputContainerPath().resolve("config-dump.tar.zst"); + List cmd = List.of("bash", "-c", + String.format("mkdir -p %s; /opt/vespa/bin/vespa-configproxy-cmd -m dumpcache %s; tar cvf %s.tar %s; zstd %s.tar -o %s", + dir.pathInContainer(), + dir.pathInContainer(), + dir.pathInContainer(), + dir.pathInContainer(), + dir.pathInContainer(), + configDump.pathInContainer())); + ctx.executeCommandInNode(cmd, true); + return List.of(Artifact.newBuilder().classification(INTERNAL).file(configDump).build()); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JvmDumper.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JvmDumper.java new file mode 100644 index 00000000000..360a212646f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JvmDumper.java @@ -0,0 +1,103 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; +import com.yahoo.yolean.concurrent.Sleeper; + +import java.time.Duration; +import java.util.List; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.CONFIDENTIAL; +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.INTERNAL; + +/** + * @author bjorncs + */ +class JvmDumper { + private JvmDumper() {} + + static class HeapDump implements ArtifactProducer { + @Override public String artifactName() { return "jvm-heap-dump"; } + @Override public String description() { return "JVM heap dump"; } + + @Override + public List produceArtifacts(Context ctx) { + ContainerPath heapDumpFile = ctx.outputContainerPath().resolve("jvm-heap-dump.hprof"); + List cmd = List.of( + "jmap", "-dump:live,format=b,file=" + heapDumpFile.pathInContainer(), Integer.toString(ctx.servicePid())); + ctx.executeCommandInNode(cmd, true); + return List.of( + Artifact.newBuilder().classification(CONFIDENTIAL).file(heapDumpFile).compressOnUpload().build()); + } + } + + static class Jmap implements ArtifactProducer { + @Override public String artifactName() { return "jvm-jmap"; } + @Override public String description() { return "JVM jmap output"; } + + @Override + public List produceArtifacts(Context ctx) { + ContainerPath jmapReport = ctx.outputContainerPath().resolve("jvm-jmap.txt"); + List cmd = List.of("bash", "-c", "jhsdb jmap --heap --pid " + ctx.servicePid() + " > " + jmapReport.pathInContainer()); + ctx.executeCommandInNode(cmd, true); + return List.of(Artifact.newBuilder().classification(INTERNAL).file(jmapReport).build()); + } + } + + static class Jstat implements ArtifactProducer { + @Override public String artifactName() { return "jvm-jstat"; } + @Override public String description() { return "JVM jstat output"; } + + @Override + public List produceArtifacts(Context ctx) { + ContainerPath jstatReport = ctx.outputContainerPath().resolve("jvm-jstat.txt"); + List cmd = List.of("bash", "-c", "jstat -gcutil " + ctx.servicePid() + " > " + jstatReport.pathInContainer()); + ctx.executeCommandInNode(cmd, true); + return List.of(Artifact.newBuilder().classification(INTERNAL).file(jstatReport).build()); + } + } + + static class Jstack implements ArtifactProducer { + @Override public String artifactName() { return "jvm-jstack"; } + @Override public String description() { return "JVM jstack output"; } + + @Override + public List produceArtifacts(Context ctx) { + ContainerPath jstackReport = ctx.outputContainerPath().resolve("jvm-jstack.txt"); + ctx.executeCommandInNode(List.of("bash", "-c", "jstack " + ctx.servicePid() + " > " + jstackReport.pathInContainer()), true); + return List.of(Artifact.newBuilder().classification(INTERNAL).file(jstackReport).build()); + } + } + + static class JavaFlightRecorder implements ArtifactProducer { + private final Sleeper sleeper; + + JavaFlightRecorder(Sleeper sleeper) { this.sleeper = sleeper; } + + @Override public String artifactName() { return "jvm-jfr"; } + @Override public String description() { return "Java Flight Recorder recording"; } + + @Override + public List produceArtifacts(ArtifactProducer.Context ctx) { + int seconds = (int) (ctx.options().duration().orElse(30.0)); + ContainerPath outputFile = ctx.outputContainerPath().resolve("recording.jfr"); + List startCommand = List.of("jcmd", Integer.toString(ctx.servicePid()), "JFR.start", "name=host-admin", + "path-to-gc-roots=true", "settings=profile", "filename=" + outputFile.pathInContainer(), "duration=" + seconds + "s"); + ctx.executeCommandInNode(startCommand, true); + sleeper.sleep(Duration.ofSeconds(seconds).plusSeconds(1)); + int maxRetries = 10; + List checkCommand = List.of("jcmd", Integer.toString(ctx.servicePid()), "JFR.check", "name=host-admin"); + for (int i = 0; i < maxRetries; i++) { + boolean stillRunning = ctx.executeCommandInNode(checkCommand, true).getOutputLines().stream() + .anyMatch(l -> l.contains("name=host-admin") && l.contains("running")); + if (!stillRunning) { + Artifact a = Artifact.newBuilder() + .classification(CONFIDENTIAL).file(outputFile).compressOnUpload().build(); + return List.of(a); + } + sleeper.sleep(Duration.ofSeconds(1)); + } + throw new RuntimeException("Failed to wait for JFR dump to complete after " + maxRetries + " retries"); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/PerfReporter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/PerfReporter.java new file mode 100644 index 00000000000..f4b4307b0d7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/PerfReporter.java @@ -0,0 +1,40 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.util.ArrayList; +import java.util.List; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.CONFIDENTIAL; +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.INTERNAL; + +/** + * @author bjorncs + */ +class PerfReporter implements ArtifactProducer { + + PerfReporter() {} + + @Override public String artifactName() { return "perf-report"; } + @Override public String description() { return "Perf recording and report"; } + + @Override + public List produceArtifacts(Context ctx) { + int duration = (int)ctx.options().duration().orElse(30.0); + List perfRecordCommand = new ArrayList<>(List.of("perf", "record")); + if (ctx.options().callGraphRecording()) { + perfRecordCommand.add("-g"); + } + ContainerPath recordFile = ctx.outputContainerPath().resolve("perf-record.bin"); + perfRecordCommand.addAll( + List.of("--output=" + recordFile.pathInContainer(), + "--pid=" + ctx.servicePid(), "sleep", Integer.toString(duration))); + ctx.executeCommandInNode(perfRecordCommand, true); + ContainerPath reportFile = ctx.outputContainerPath().resolve("perf-report.txt"); + ctx.executeCommandInNode(List.of("bash", "-c", "perf report --input=" + recordFile.pathInContainer() + " > " + reportFile.pathInContainer()), true); + return List.of( + Artifact.newBuilder().classification(CONFIDENTIAL).file(recordFile).compressOnUpload().build(), + Artifact.newBuilder().classification(INTERNAL).file(reportFile).build()); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/PmapReporter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/PmapReporter.java new file mode 100644 index 00000000000..8f8feb57c27 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/PmapReporter.java @@ -0,0 +1,24 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.util.List; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.INTERNAL; + +/** + * @author bjorncs + */ +class PmapReporter implements ArtifactProducer { + @Override public String artifactName() { return "pmap"; } + @Override public String description() { return "Pmap report"; } + + @Override + public List produceArtifacts(Context ctx) { + ContainerPath pmapReport = ctx.outputContainerPath().resolve("pmap.txt"); + List cmd = List.of("bash", "-c", "pmap -x " + ctx.servicePid() + " > " + pmapReport.pathInContainer()); + ctx.executeCommandInNode(cmd, true); + return List.of(Artifact.newBuilder().classification(INTERNAL).file(pmapReport).build()); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ServiceDumpReport.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ServiceDumpReport.java new file mode 100644 index 00000000000..744eeefca07 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ServiceDumpReport.java @@ -0,0 +1,143 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports.BaseReport; + +import java.net.URI; +import java.time.Instant; +import java.util.List; + +/** + * JSON representation of Vespa service dump report. + * + * @author bjorncs + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +@JsonIgnoreProperties(ignoreUnknown = true) +class ServiceDumpReport extends BaseReport { + + public static final String REPORT_ID = "serviceDump"; + + private static final String STARTED_AT_FIELD = "startedAt"; + private static final String COMPLETED_AT_FIELD = "completedAt"; + private static final String FAILED_AT_FIELD = "failedAt"; + private static final String LOCATION_FIELD = "location"; + private static final String CONFIG_ID_FIELD = "configId"; + private static final String EXPIRE_AT_FIELD = "expireAt"; + private static final String ERROR_FIELD = "error"; + private static final String ARTIFACTS_FIELD = "artifacts"; + private static final String DUMP_OPTIONS_FIELD = "dumpOptions"; + + private final Long startedAt; + private final Long completedAt; + private final Long failedAt; + private final String location; + private final String configId; + private final Long expireAt; + private final String error; + private final List artifacts; + private final DumpOptions dumpOptions; + + @JsonCreator + public ServiceDumpReport(@JsonProperty(CREATED_FIELD) Long createdAt, + @JsonProperty(STARTED_AT_FIELD) Long startedAt, + @JsonProperty(COMPLETED_AT_FIELD) Long completedAt, + @JsonProperty(FAILED_AT_FIELD) Long failedAt, + @JsonProperty(LOCATION_FIELD) String location, + @JsonProperty(CONFIG_ID_FIELD) String configId, + @JsonProperty(EXPIRE_AT_FIELD) Long expireAt, + @JsonProperty(ERROR_FIELD) String error, + @JsonProperty(ARTIFACTS_FIELD) List artifacts, + @JsonProperty(DUMP_OPTIONS_FIELD) DumpOptions dumpOptions) { + super(createdAt, null); + this.startedAt = startedAt; + this.completedAt = completedAt; + this.failedAt = failedAt; + this.location = location; + this.configId = configId; + this.expireAt = expireAt; + this.error = error; + this.artifacts = artifacts; + this.dumpOptions = dumpOptions; + } + + public static ServiceDumpReport createRequestReport(Instant createdAt, Instant expireAt, String configId, + List artifacts, DumpOptions options) { + return new ServiceDumpReport( + createdAt.toEpochMilli(), null, null, null, null, configId, + expireAt != null ? expireAt.toEpochMilli() : null, null, artifacts, options); + } + + public static ServiceDumpReport createStartedReport(ServiceDumpReport request, Instant startedAt) { + return new ServiceDumpReport( + request.getCreatedMillisOrNull(), startedAt.toEpochMilli(), null, null, null, request.configId(), + request.expireAt(), null, request.artifacts(), request.dumpOptions()); + } + + public static ServiceDumpReport createSuccessReport( + ServiceDumpReport request, Instant startedAt, Instant completedAt, URI location) { + return new ServiceDumpReport( + request.getCreatedMillisOrNull(), startedAt.toEpochMilli(), completedAt.toEpochMilli(), null, + location.toString(), request.configId(), request.expireAt(), null, request.artifacts(), + request.dumpOptions()); + } + + public static ServiceDumpReport createErrorReport( + ServiceDumpReport reqOrNull, Instant startedAt, Instant failedAt, String message) { + Long createdAt = reqOrNull != null ? reqOrNull.getCreatedMillisOrNull() : Long.valueOf(startedAt.toEpochMilli()); + String configId = reqOrNull != null ? reqOrNull.configId() : "unknown"; + Long expireAt = reqOrNull != null ? reqOrNull.expireAt() : null; + List artifacts = reqOrNull != null ? reqOrNull.artifacts() : List.of(); + DumpOptions dumpOptions = reqOrNull != null ? reqOrNull.dumpOptions() : null; + return new ServiceDumpReport( + createdAt, startedAt.toEpochMilli(), null, failedAt.toEpochMilli(), null, + configId, expireAt, message, artifacts, dumpOptions); + } + + @JsonGetter(STARTED_AT_FIELD) public Long startedAt() { return startedAt; } + @JsonGetter(COMPLETED_AT_FIELD) public Long completedAt() { return completedAt; } + @JsonGetter(FAILED_AT_FIELD) public Long failedAt() { return failedAt; } + @JsonGetter(LOCATION_FIELD) public String location() { return location; } + @JsonGetter(CONFIG_ID_FIELD) public String configId() { return configId; } + @JsonGetter(EXPIRE_AT_FIELD) public Long expireAt() { return expireAt; } + @JsonGetter(ERROR_FIELD) public String error() { return error; } + @JsonGetter(ARTIFACTS_FIELD) public List artifacts() { return artifacts; } + @JsonGetter(DUMP_OPTIONS_FIELD) public DumpOptions dumpOptions() { return dumpOptions; } + + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonIgnoreProperties(ignoreUnknown = true) + public static class DumpOptions { + + private static final String CALL_GRAPH_RECORDING_FIELD = "callGraphRecording"; + private static final String DURATION_FIELD = "duration"; + private static final String SEND_PROFILING_SIGNAL_FIELD = "sendProfilingSignal"; + + private final Boolean callGraphRecording; + private final Double duration; + private final Boolean sendProfilingSignal; + + @JsonCreator + public DumpOptions(@JsonProperty(CALL_GRAPH_RECORDING_FIELD) Boolean callGraphRecording, + @JsonProperty(DURATION_FIELD) Double duration, + @JsonProperty(SEND_PROFILING_SIGNAL_FIELD) Boolean sendProfilingSignal) { + this.callGraphRecording = callGraphRecording; + this.duration = duration; + this.sendProfilingSignal = sendProfilingSignal; + } + + @JsonGetter(CALL_GRAPH_RECORDING_FIELD) public Boolean callGraphRecording() { return callGraphRecording; } + @JsonGetter(DURATION_FIELD) public Double duration() { return duration; } + @JsonGetter(SEND_PROFILING_SIGNAL_FIELD) public Boolean sendProfilingSignal() { return sendProfilingSignal; } + } + + @JsonIgnore public boolean isCompletedOrFailed() { return !isNullTimestamp(failedAt) || !isNullTimestamp(completedAt); } + + public static boolean isNullTimestamp(Long timestamp) { return timestamp == null || timestamp == 0; } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaLogDumper.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaLogDumper.java new file mode 100644 index 00000000000..32814e38d39 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaLogDumper.java @@ -0,0 +1,47 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; +import com.yahoo.yolean.concurrent.Sleeper; + +import java.nio.file.Files; +import java.time.Duration; +import java.util.List; +import java.util.logging.Logger; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.CONFIDENTIAL; +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * @author bjorncs + */ +class VespaLogDumper implements ArtifactProducer { + + private static final Logger log = Logger.getLogger(VespaLogDumper.class.getName()); + + private final Sleeper sleeper; + + VespaLogDumper(Sleeper sleeper) { this.sleeper = sleeper; } + + @Override public String artifactName() { return "vespa-log"; } + @Override public String description() { return "Current Vespa logs"; } + + @Override + public List produceArtifacts(Context ctx) { + if (ctx.options().sendProfilingSignal()) { + log.info("Sending SIGPROF to process to include vespa-malloc dump in Vespa log"); + ctx.executeCommandInNode(List.of("kill", "-SIGPROF", Integer.toString(ctx.servicePid())), true); + sleeper.sleep(Duration.ofSeconds(3)); + } + ContainerPath vespaLogFile = ctx.containerPathUnderVespaHome("logs/vespa/vespa.log"); + ContainerPath destination = ctx.outputContainerPath().resolve("vespa.log"); + if (Files.exists(vespaLogFile)) { + uncheck(() -> Files.copy(vespaLogFile, destination)); + return List.of( + Artifact.newBuilder().classification(CONFIDENTIAL).file(destination).compressOnUpload().build()); + } else { + log.info("Log file '" + vespaLogFile + "' does not exist"); + return List.of(); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumper.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumper.java new file mode 100644 index 00000000000..1f474295660 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumper.java @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; + +/** + * @author bjorncs + */ +public interface VespaServiceDumper { + void processServiceDumpRequest(NodeAgentContext context); + + VespaServiceDumper DUMMY_INSTANCE = context -> {}; +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImpl.java new file mode 100644 index 00000000000..1279d9a4b28 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImpl.java @@ -0,0 +1,269 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudName; +import com.yahoo.jdisc.Timer; +import com.yahoo.text.Lowercase; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeAttributes; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; +import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; +import com.yahoo.vespa.hosted.node.admin.maintenance.sync.SyncClient; +import com.yahoo.vespa.hosted.node.admin.maintenance.sync.SyncFileInfo; +import com.yahoo.vespa.hosted.node.admin.maintenance.sync.SyncFileInfo.Compression; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; +import com.yahoo.yolean.concurrent.Sleeper; + +import java.io.UncheckedIOException; +import java.net.URI; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.OptionalDouble; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.ServiceDumpReport.isNullTimestamp; + +/** + * Generates dumps for Vespa services and uploads resulting files to S3. + * + * @author bjorncs + */ +public class VespaServiceDumperImpl implements VespaServiceDumper { + + private static final Logger log = Logger.getLogger(VespaServiceDumperImpl.class.getName()); + + private final ContainerOperations container; + private final SyncClient syncClient; + private final NodeRepository nodeRepository; + private final Timer timer; + private final ArtifactProducers artifactProducers; + + public VespaServiceDumperImpl(ContainerOperations container, SyncClient syncClient, NodeRepository nodeRepository, Timer timer) { + this(ArtifactProducers.createDefault(Sleeper.DEFAULT), container, syncClient, nodeRepository, timer); + } + + // For unit testing + VespaServiceDumperImpl(ArtifactProducers producers, ContainerOperations container, SyncClient syncClient, + NodeRepository nodeRepository, Timer timer) { + this.container = container; + this.syncClient = syncClient; + this.nodeRepository = nodeRepository; + this.timer = timer; + this.artifactProducers = producers; + } + + @Override + public void processServiceDumpRequest(NodeAgentContext context) { + if (context.zone().getCloudName().equals(CloudName.GCP)) return; + + Instant startedAt = timer.currentTime(); + NodeSpec nodeSpec = context.node(); + ServiceDumpReport request; + try { + request = nodeSpec.reports().getReport(ServiceDumpReport.REPORT_ID, ServiceDumpReport.class) + .orElse(null); + } catch (IllegalArgumentException | UncheckedIOException e) { + handleFailure(context, null, startedAt, e, "Invalid JSON in service dump request"); + return; + } + if (request == null || request.isCompletedOrFailed()) { + context.log(log, Level.FINE, "No service dump requested or dump already completed/failed"); + return; + } + if (isNullTimestamp(request.getCreatedMillisOrNull())) { + handleFailure(context, request, startedAt, "'createdMillis' is missing or null"); + return; + } + String configId = request.configId(); + if (configId == null) { + handleFailure(context, request, startedAt, "Service config id is missing from request"); + return; + } + Instant expiry = expireAt(startedAt, request); + if (expiry.isBefore(startedAt)) { + handleFailure(context, request, startedAt, "Request already expired"); + return; + } + List requestedArtifacts = request.artifacts(); + if (requestedArtifacts == null || requestedArtifacts.isEmpty()) { + handleFailure(context, request, startedAt, "No artifacts requested"); + return; + } + ContainerPath directory = context.paths().underVespaHome("var/tmp/vespa-service-dump-" + request.getCreatedMillisOrNull()); + UnixPath unixPathDirectory = new UnixPath(directory); + try { + context.log(log, Level.INFO, + "Creating service dump for " + configId + " requested at " + + Instant.ofEpochMilli(request.getCreatedMillisOrNull())); + storeReport(context, ServiceDumpReport.createStartedReport(request, startedAt)); + if (unixPathDirectory.exists()) { + context.log(log, Level.INFO, "Removing existing directory '" + unixPathDirectory +"'."); + unixPathDirectory.deleteRecursively(); + } + context.log(log, Level.INFO, "Creating '" + unixPathDirectory +"'."); + unixPathDirectory.createDirectory("rwxr-x---"); + URI destination = serviceDumpDestination(nodeSpec, createDumpId(request)); + ProducerContext producerCtx = new ProducerContext(context, directory, request); + List producedArtifacts = new ArrayList<>(); + for (ArtifactProducer producer : artifactProducers.resolve(requestedArtifacts)) { + context.log(log, "Producing artifact of type '" + producer.artifactName() + "'"); + producedArtifacts.addAll(producer.produceArtifacts(producerCtx)); + } + uploadArtifacts(context, destination, producedArtifacts); + storeReport(context, ServiceDumpReport.createSuccessReport(request, startedAt, timer.currentTime(), destination)); + } catch (Exception e) { + handleFailure(context, request, startedAt, e, e.getMessage()); + } finally { + if (unixPathDirectory.exists()) { + context.log(log, Level.INFO, "Deleting directory '" + unixPathDirectory +"'."); + unixPathDirectory.deleteRecursively(); + } + } + } + + private void uploadArtifacts(NodeAgentContext ctx, URI destination, + List producedArtifacts) { + ApplicationId owner = ctx.node().owner().orElseThrow(); + List filesToUpload = producedArtifacts.stream() + .map(a -> { + Compression compression = a.compressOnUpload() ? Compression.ZSTD : Compression.NONE; + String classification = a.classification().map(Artifact.Classification::value).orElse(null); + return SyncFileInfo.forServiceDump(destination, a.file(), compression, owner, classification); + }) + .toList(); + ctx.log(log, Level.INFO, + "Uploading " + filesToUpload.size() + " file(s) with destination " + destination); + if (!syncClient.sync(ctx, filesToUpload, Integer.MAX_VALUE)) { + throw new RuntimeException("Unable to upload all files"); + } + ctx.log(log, Level.INFO, "Upload complete"); + } + + private static Instant expireAt(Instant startedAt, ServiceDumpReport request) { + return isNullTimestamp(request.expireAt()) + ? startedAt.plus(7, ChronoUnit.DAYS) + : Instant.ofEpochMilli(request.expireAt()); + } + + private void handleFailure(NodeAgentContext context, ServiceDumpReport requestOrNull, Instant startedAt, + Exception failure, String message) { + context.log(log, Level.WARNING, failure.toString(), failure); + ServiceDumpReport report = ServiceDumpReport.createErrorReport(requestOrNull, startedAt, timer.currentTime(), message); + storeReport(context, report); + } + + private void handleFailure(NodeAgentContext context, ServiceDumpReport requestOrNull, Instant startedAt, String message) { + context.log(log, Level.WARNING, message); + ServiceDumpReport report = ServiceDumpReport.createErrorReport(requestOrNull, startedAt, timer.currentTime(), message); + storeReport(context, report); + } + + private void storeReport(NodeAgentContext context, ServiceDumpReport report) { + NodeAttributes nodeAttributes = new NodeAttributes(); + nodeAttributes.withReport(ServiceDumpReport.REPORT_ID, report.toJsonNode()); + nodeRepository.updateNodeAttributes(context.hostname().value(), nodeAttributes); + } + + static String createDumpId(ServiceDumpReport request) { + String sanitizedConfigId = Lowercase.toLowerCase(request.configId()).replaceAll("[^a-z_0-9]", "-"); + return sanitizedConfigId + "-" + request.getCreatedMillisOrNull().toString(); + } + + private static URI serviceDumpDestination(NodeSpec spec, String dumpId) { + URI archiveUri = spec.archiveUri() + .orElseThrow(() -> new IllegalStateException("Archive URI is missing for " + spec.hostname())); + String targetDirectory = "service-dump/" + dumpId + "/"; + return archiveUri.resolve(targetDirectory); + } + + private class ProducerContext implements ArtifactProducer.Context, ArtifactProducer.Context.Options { + + final NodeAgentContext nodeAgentCtx; + final ContainerPath path; + final ServiceDumpReport request; + volatile int pid = -1; + + ProducerContext(NodeAgentContext nodeAgentCtx, ContainerPath path, ServiceDumpReport request) { + this.nodeAgentCtx = nodeAgentCtx; + this.path = path; + this.request = request; + } + + @Override public String serviceId() { return request.configId(); } + + @Override + public int servicePid() { + if (pid == -1) { + pid = findServicePid(serviceId()); + } + return pid; + } + + private int findServicePid(String serviceId) { + ContainerPath findPidBinary = nodeAgentCtx.paths().underVespaHome("libexec/vespa/find-pid"); + CommandResult findPidResult = executeCommandInNode(List.of(findPidBinary.pathInContainer(), serviceId), true); + return Integer.parseInt(findPidResult.getOutput()); + } + + @Override + public CommandResult executeCommandInNode(List command, boolean logOutput) { + CommandResult result = container.executeCommandInContainer(nodeAgentCtx, nodeAgentCtx.users().vespa(), command.toArray(new String[0])); + String cmdString = command.stream().map(s -> "'" + s + "'").collect(Collectors.joining(" ", "\"", "\"")); + int exitCode = result.getExitCode(); + String output = result.getOutput().trim(); + String prefixedOutput = output.contains("\n") + ? "\n" + output + : (output.isEmpty() ? "" : output); + if (exitCode > 0) { + String errorMsg = logOutput + ? String.format("Failed to execute %s (exited with code %d): %s", cmdString, exitCode, prefixedOutput) + : String.format("Failed to execute %s (exited with code %d)", cmdString, exitCode); + throw new RuntimeException(errorMsg); + } else { + String logMsg = logOutput + ? String.format("Executed command %s. Exited with code %d and output: %s", cmdString, exitCode, prefixedOutput) + : String.format("Executed command %s. Exited with code %d.", cmdString, exitCode); + nodeAgentCtx.log(log, logMsg); + } + return result; + } + + @Override public ContainerPath outputContainerPath() { return path; } + + @Override + public ContainerPath containerPathUnderVespaHome(String relativePath) { + return nodeAgentCtx.paths().underVespaHome(relativePath); + } + + @Override public Options options() { return this; } + + @Override + public OptionalDouble duration() { + Double duration = dumpOptions() + .map(ServiceDumpReport.DumpOptions::duration) + .orElse(null); + return duration != null ? OptionalDouble.of(duration) : OptionalDouble.empty(); + } + + @Override + public boolean callGraphRecording() { + return dumpOptions().map(ServiceDumpReport.DumpOptions::callGraphRecording).orElse(false); + } + + @Override + public boolean sendProfilingSignal() { + return dumpOptions().map(ServiceDumpReport.DumpOptions::sendProfilingSignal).orElse(false); + } + + Optional dumpOptions() { return Optional.ofNullable(request.dumpOptions()); } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ZooKeeperSnapshotDumper.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ZooKeeperSnapshotDumper.java new file mode 100644 index 00000000000..c8f930464e0 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ZooKeeperSnapshotDumper.java @@ -0,0 +1,27 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.util.List; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.CONFIDENTIAL; + +/** + * Performs dump of ZooKeeper snapshots. Can be used for controllers, config servers, cluster controllers and tenant containers + * where zookeeper is configured. + * + * @author hmusum + */ +class ZooKeeperSnapshotDumper implements ArtifactProducer { + @Override public String artifactName() { return "zookeeper-snapshot"; } + @Override public String description() { return "Dumps ZooKeeper snapshots"; } + + @Override + public List produceArtifacts(Context ctx) { + ContainerPath zookeeperSnapshot = ctx.outputContainerPath().resolve("zookeeper-snapshot.tgz"); + List cmd = List.of("bash", "-c", String.format("/opt/vespa/bin/vespa-backup-zk-data.sh -o %s -k -f", zookeeperSnapshot.pathInContainer())); + ctx.executeCommandInNode(cmd, true); + return List.of(Artifact.newBuilder().classification(CONFIDENTIAL).file(zookeeperSnapshot).build()); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/package-info.java new file mode 100644 index 00000000000..3ea43b6129a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/package-info.java @@ -0,0 +1,8 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @author bjorncs + */ +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncClient.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncClient.java new file mode 100644 index 00000000000..b1e467ad446 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncClient.java @@ -0,0 +1,22 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.sync; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.util.List; + +/** + * @author freva + */ +public interface SyncClient { + + /** + * Syncs the given files, will only upload each file once. + * + * @param context context used to log which files were synced + * @param syncFileInfos list of files and their metadata to sync + * @param limit max number of files to upload for this invocation, to avoid blocking for too long + * @return true iff any files were uploaded + */ + boolean sync(TaskContext context, List syncFileInfos, int limit); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncFileInfo.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncFileInfo.java new file mode 100644 index 00000000000..c65f2abb6fd --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/SyncFileInfo.java @@ -0,0 +1,143 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.sync; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; + +import java.net.URI; +import java.nio.file.Path; +import java.time.Duration; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; + +/** + * @author freva + */ +public class SyncFileInfo { + + private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter + .ofPattern("yyyy-MM-dd.HH-mm-ss").withZone(ZoneOffset.UTC); + + private final Path source; + private final Function destination; + private final Compression uploadCompression; + private final Map tags; + private final Optional minDurationBetweenSync; + + private SyncFileInfo(Path source, Function destination, Compression uploadCompression, + Map tags, Duration minDurationBetweenSyncOrNull) { + this.source = source; + this.destination = destination; + this.uploadCompression = uploadCompression; + this.tags = Map.copyOf(tags); + this.minDurationBetweenSync = Optional.ofNullable(minDurationBetweenSyncOrNull); + } + + /** Source path of the file to sync */ + public Path source() { + return source; + } + + /** Remote URI to store the file at */ + public URI destination() { + return destination.apply(""); + } + + /** Returns a destination URI after adding a suffix to the base name of the filename. */ + public URI destinationWithBasenameSuffix(String suffix) { + return destination.apply(suffix); + } + + /** Compression algorithm to use when uploading the file */ + public Compression uploadCompression() { + return uploadCompression; + } + + public Map tags() { return tags; } + + public Optional minDurationBetweenSync() { return minDurationBetweenSync; } + + public static Optional forLogFile(URI uri, Path logFile, boolean rotatedOnly, ApplicationId owner) { + String filename = logFile.getFileName().toString(); + Compression compression; + final String dir; + String remoteFilename = logFile.getFileName().toString(); + Duration minDurationBetweenSync = null; + + if (filename.startsWith("vespa.log")) { + dir = "logs/vespa/"; + compression = Compression.ZSTD; + if (filename.length() == 9) { + if (!rotatedOnly) remoteFilename = "vespa.log-" + DATE_TIME_FORMATTER.format(new UnixPath(logFile).getLastModifiedTime()); + minDurationBetweenSync = rotatedOnly ? Duration.ofHours(1) : Duration.ZERO; + } + } else if (filename.startsWith("zookeeper.") && filename.endsWith(".log")) { + compression = Compression.ZSTD; + dir = "logs/zookeeper/"; + remoteFilename = rotatedOnly && filename.endsWith(".0.log") ? "zookeeper.log" : + "zookeeper.log-" + DATE_TIME_FORMATTER.format(new UnixPath(logFile).getLastModifiedTime()); + minDurationBetweenSync = filename.endsWith(".0.log") ? rotatedOnly ? Duration.ofHours(1) : Duration.ZERO : null; + } else if (filename.startsWith("start-services.out-")) { + compression = Compression.ZSTD; + dir = "logs/start-services/"; + } else if (filename.startsWith("nginx-error")) { + compression = Compression.ZSTD; + if ("nginx-error.log".equals(filename)) { + if (!rotatedOnly) remoteFilename = "nginx-error.log"; + minDurationBetweenSync = rotatedOnly ? Duration.ofHours(1) : Duration.ZERO; + } + dir = "logs/nginx/"; + } else { + compression = filename.endsWith(".zst") ? Compression.NONE : Compression.ZSTD; + if (rotatedOnly && compression != Compression.NONE) + dir = null; + else if (filename.contains(".metrics-proxy.")) // See AccessLogComponent.java for filename. + dir = null; + else if (filename.startsWith("JsonAccessLog.") || filename.startsWith("access")) + dir = "logs/access/"; + else if (filename.startsWith("ConnectionLog.")) + dir = "logs/connection/"; + else + dir = null; + } + + if (dir == null) return Optional.empty(); + String finalRemoteFilename = remoteFilename; + Function destination = suffix -> uri.resolve(dir + finalRemoteFilename + suffix + compression.extension); + return Optional.of(new SyncFileInfo(logFile, destination, compression, defaultTags(owner), minDurationBetweenSync)); + } + + public static SyncFileInfo forServiceDump(URI destinationDir, Path file, Compression compression, + ApplicationId owner, String assetClassification) { + String filename = file.getFileName().toString(); + Function location = suffix -> destinationDir.resolve(filename + suffix + compression.extension); + Map tags = defaultTags(owner); + if (assetClassification != null) { + tags.put("vespa:AssetClassification", assetClassification); + } + return new SyncFileInfo(file, location, compression, tags, null); + } + + private static Map defaultTags(ApplicationId owner) { + var tags = new HashMap(); + tags.put("corp:Application", owner.toFullString()); + return tags; + } + + public boolean overwriteIfExists() { + return minDurationBetweenSync.isPresent(); + } + + public enum Compression { + NONE(""), ZSTD(".zst"); + + private final String extension; + Compression(String extension) { + this.extension = extension; + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/ZstdCompressingInputStream.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/ZstdCompressingInputStream.java new file mode 100644 index 00000000000..eeec7e1b59c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/ZstdCompressingInputStream.java @@ -0,0 +1,83 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.sync; + +import com.yahoo.compress.ZstdCompressor; + +import java.io.IOException; +import java.io.InputStream; + +/** + * InputStream that outputs given InputStream compressed with the ZStandard. + * + * @author freva + */ +public class ZstdCompressingInputStream extends InputStream { + + public static final int DEFAULT_INPUT_BUFFER_SIZE = 8 * 1024; + private final ZstdCompressor compressor = new ZstdCompressor(); + + private final InputStream is; + private final byte[] inputBuffer; + private final byte[] outputBuffer; + + private boolean firstRead = true; + private boolean eof = false; + private int outputPosition = 0; + private int outputLength = 0; + private boolean isClosed = false; + + public ZstdCompressingInputStream(InputStream is, int inputBufferSize) { + this.is = is; + this.inputBuffer = new byte[inputBufferSize]; + this.outputBuffer = new byte[ZstdCompressor.getMaxCompressedLength(inputBufferSize)]; + } + + public ZstdCompressingInputStream(InputStream is) { + this(is, DEFAULT_INPUT_BUFFER_SIZE); + } + + @Override + public int read() throws IOException { + throwIfClosed(); + + if (outputPosition >= outputLength) { + int readLength = eof ? -1 : is.read(inputBuffer); + if (readLength == -1) { + if (!firstRead) + return -1; + // zstd compressing an empty file results in a 13 bytes file. + eof = true; + readLength = 0; + } + firstRead = false; + + outputLength = compressor.compress(inputBuffer, 0, readLength, outputBuffer, 0, outputBuffer.length); + outputPosition = 0; + } + + return Byte.toUnsignedInt(outputBuffer[outputPosition++]); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int first = read(); + if (first == -1) return -1; + + b[off++] = (byte) first; + len = Math.min(Math.min(len, outputLength - outputPosition), b.length - off); + System.arraycopy(outputBuffer, outputPosition, b, off, len); + outputPosition += len; + return len + 1; + } + + @Override + public void close() throws IOException { + throwIfClosed(); + is.close(); + isClosed = true; + } + + private void throwIfClosed() { + if (isClosed) throw new IllegalArgumentException("Input stream is already closed"); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/package-info.java new file mode 100644 index 00000000000..becf11945e3 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/sync/package-info.java @@ -0,0 +1,8 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @author freva + */ +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.maintenance.sync; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java new file mode 100644 index 00000000000..686c32fd5ee --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java @@ -0,0 +1,41 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeadmin; + +/** + * Exception specially handled to avoid dumping full stack trace on convergence failure. + * + * @author hakonhall + */ +@SuppressWarnings("serial") +public class ConvergenceException extends RuntimeException { + /** Create an exception that will NOT increment the monitored unhandled_exceptions metric. */ + public static ConvergenceException ofTransient(String message) { return ofTransient(message, null); } + + /** Create an exception that will NOT increment the monitored unhandled_exceptions metric. */ + public static ConvergenceException ofTransient(String message, Throwable t) { return new ConvergenceException(message, t, false); } + + /** Create an exception that increments the monitored unhandled_exceptions metric. */ + public static ConvergenceException ofError(String message) { return ofError(message, null); } + + /** Create an exception that increments the monitored unhandled_exceptions metric. */ + public static ConvergenceException ofError(String message, Throwable t) { return new ConvergenceException(message, t, true); } + + /** Create an exception with the same transient/error as the cause. */ + public static ConvergenceException ofNested(String message, ConvergenceException cause) { return new ConvergenceException(message, cause, cause.isError); } + + private final boolean isError; + + /** @param isError whether the exception should increment the monitored unhandled_exception metric. */ + protected ConvergenceException(String message, boolean isError) { + this(message, null, isError); + } + + /** @param isError whether the exception should increment the monitored unhandled_exception metric. */ + protected ConvergenceException(String message, Throwable t, boolean isError) { + super(message, t); + this.isError = isError; + } + + /** Whether the exception signals an error someone may want to look at, or whether it is expected to be transient (false). */ + public boolean isError() { return isError; } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java new file mode 100644 index 00000000000..986f6b4eebc --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdmin.java @@ -0,0 +1,56 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeadmin; + +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; + +import java.time.Duration; +import java.util.Set; + +/** + * NodeAdmin manages the life cycle of NodeAgents. + * @author Haakon Dybdahl + */ +public interface NodeAdmin { + + /** Start/stop NodeAgents and schedule next NodeAgent ticks with the given NodeAgentContexts */ + void refreshContainersToRun(Set nodeAgentContexts); + + /** Update node admin metrics */ + void updateMetrics(boolean isSuspended); + + /** + * Attempts to freeze/unfreeze all NodeAgents and itself. To freeze a NodeAgent means that + * they will not pick up any changes from NodeRepository. + * + * @param frozen whether NodeAgents and NodeAdmin should be frozen + * @return True if all the NodeAgents and NodeAdmin has converged to the desired state + */ + boolean setFrozen(boolean frozen); + + /** + * Returns whether NodeAdmin itself is currently frozen, meaning it will not pick up any changes + * from NodeRepository. + */ + boolean isFrozen(); + + /** + * Returns an upper bound on the time some or all parts of the node admin (including agents) + * have been frozen. Returns 0 if not frozen, nor trying to freeze. + */ + Duration subsystemFreezeDuration(); + + /** + * Stop all services on these nodes + */ + void stopNodeAgentServices(); + + /** + * Start node-admin schedulers. + */ + void start(); + + /** + * Stop the NodeAgents. Will not delete the storage or stop the container. + */ + void stop(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java new file mode 100644 index 00000000000..446f21d53e7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -0,0 +1,261 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeadmin; + +import ai.vespa.metrics.ContainerMetrics; +import com.yahoo.jdisc.Timer; +import com.yahoo.vespa.hosted.node.admin.container.ContainerStats; +import com.yahoo.vespa.hosted.node.admin.container.metrics.Counter; +import com.yahoo.vespa.hosted.node.admin.container.metrics.Dimensions; +import com.yahoo.vespa.hosted.node.admin.container.metrics.Gauge; +import com.yahoo.vespa.hosted.node.admin.container.metrics.Metrics; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextManager; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentFactory; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentScheduler; + +import java.nio.file.FileSystem; +import java.time.Duration; +import java.time.Instant; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Administers a host (for now only docker hosts) and its nodes (docker containers nodes). + * + * @author stiankri + */ +public class NodeAdminImpl implements NodeAdmin { + private static final Duration NODE_AGENT_FREEZE_TIMEOUT = Duration.ofSeconds(5); + private static final Duration NODE_AGENT_SPREAD = Duration.ofSeconds(3); + + private final NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory; + + private final Timer timer; + private final Duration freezeTimeout; + private final Duration spread; + private boolean previousWantFrozen; + private boolean isFrozen; + private Instant startOfFreezeConvergence; + private final Map nodeAgentWithSchedulerByHostname = new ConcurrentHashMap<>(); + + private final ProcMeminfoReader procMeminfoReader; + private final Gauge jvmHeapUsed; + private final Gauge jvmHeapFree; + private final Gauge jvmHeapTotal; + private final Gauge containerCount; + private final Counter numberOfUnhandledExceptions; + private final Metrics metrics; + private Dimensions previousMemoryOverheadDimensions = null; + + public NodeAdminImpl(NodeAgentFactory nodeAgentFactory, Metrics metrics, Timer timer, FileSystem fileSystem) { + this(nodeAgentContext -> create(timer, nodeAgentFactory, nodeAgentContext), + metrics, timer, NODE_AGENT_FREEZE_TIMEOUT, NODE_AGENT_SPREAD, new ProcMeminfoReader(fileSystem)); + } + + public NodeAdminImpl(NodeAgentFactory nodeAgentFactory, Metrics metrics, + Timer timer, Duration freezeTimeout, Duration spread, ProcMeminfoReader procMeminfoReader) { + this(nodeAgentContext -> create(timer, nodeAgentFactory, nodeAgentContext), + metrics, timer, freezeTimeout, spread, procMeminfoReader); + } + + NodeAdminImpl(NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory, + Metrics metrics, Timer timer, Duration freezeTimeout, Duration spread, + ProcMeminfoReader procMeminfoReader) { + this.nodeAgentWithSchedulerFactory = nodeAgentWithSchedulerFactory; + this.timer = timer; + this.freezeTimeout = freezeTimeout; + this.spread = spread; + this.previousWantFrozen = true; + this.isFrozen = true; + this.startOfFreezeConvergence = timer.currentTime(); + + this.numberOfUnhandledExceptions = metrics.declareCounter("unhandled_exceptions", + new Dimensions(Map.of("src", "node-agents"))); + + this.procMeminfoReader = procMeminfoReader; + this.jvmHeapUsed = metrics.declareGauge(ContainerMetrics.MEM_HEAP_USED.baseName()); + this.jvmHeapFree = metrics.declareGauge(ContainerMetrics.MEM_HEAP_FREE.baseName()); + this.jvmHeapTotal = metrics.declareGauge(ContainerMetrics.MEM_HEAP_TOTAL.baseName()); + this.containerCount = metrics.declareGauge("container.count"); + this.metrics = metrics; + } + + @Override + public void refreshContainersToRun(Set nodeAgentContexts) { + Map nodeAgentContextsByHostname = nodeAgentContexts.stream() + .collect(Collectors.toMap(ctx -> ctx.node().id(), Function.identity())); + + // Stop and remove NodeAgents that should no longer be running + diff(nodeAgentWithSchedulerByHostname.keySet(), nodeAgentContextsByHostname.keySet()) + .forEach(hostname -> nodeAgentWithSchedulerByHostname.remove(hostname).stopForRemoval()); + + // Start NodeAgent for hostnames that should be running, but aren't yet + diff(nodeAgentContextsByHostname.keySet(), nodeAgentWithSchedulerByHostname.keySet()).forEach(hostname -> { + NodeAgentWithScheduler naws = nodeAgentWithSchedulerFactory.create(nodeAgentContextsByHostname.get(hostname)); + naws.start(); + nodeAgentWithSchedulerByHostname.put(hostname, naws); + }); + + Duration timeBetweenNodeAgents = spread.dividedBy(Math.max(nodeAgentContextsByHostname.size() - 1, 1)); + Instant nextAgentStart = timer.currentTime(); + // At this point, nodeAgentContextsByHostname and nodeAgentWithSchedulerByHostname should have the same keys + for (Map.Entry entry : nodeAgentContextsByHostname.entrySet()) { + nodeAgentWithSchedulerByHostname.get(entry.getKey()).scheduleTickWith(entry.getValue(), nextAgentStart); + nextAgentStart = nextAgentStart.plus(timeBetweenNodeAgents); + } + } + + @Override + public void updateMetrics(boolean isSuspended) { + int numContainers = 0; + long totalContainerMemoryBytes = 0; + + for (NodeAgentWithScheduler nodeAgentWithScheduler : nodeAgentWithSchedulerByHostname.values()) { + int count = nodeAgentWithScheduler.getAndResetNumberOfUnhandledExceptions(); + if (!isSuspended) numberOfUnhandledExceptions.add(count); + Optional containerStats = nodeAgentWithScheduler.updateContainerNodeMetrics(isSuspended); + if (containerStats.isPresent()) { + ++numContainers; + totalContainerMemoryBytes += containerStats.get().memoryStats().usage(); + } + } + + Runtime runtime = Runtime.getRuntime(); + runtime.gc(); + long freeMemory = runtime.freeMemory(); + long totalMemory = runtime.totalMemory(); + long usedMemory = totalMemory - freeMemory; + jvmHeapFree.sample(freeMemory); + jvmHeapUsed.sample(usedMemory); + jvmHeapTotal.sample(totalMemory); + + // No container stats are found while suspended, so skip setting these if so. + if (!isSuspended) { + containerCount.sample(numContainers); + ProcMeminfo meminfo = procMeminfoReader.read(); + updateMemoryOverheadMetric(numContainers, meminfo.memTotalBytes() - meminfo.memAvailableBytes() - totalContainerMemoryBytes); + } + } + + private void updateMemoryOverheadMetric(int numContainers, double memoryOverhead) { + final String name = "mem.system.overhead"; + Dimensions dimensions = new Dimensions(Map.of("containers", Integer.toString(numContainers))); + metrics.declareGauge(Metrics.APPLICATION_HOST, name, dimensions, Metrics.DimensionType.DEFAULT) + .sample(memoryOverhead); + if (previousMemoryOverheadDimensions != null && !previousMemoryOverheadDimensions.equals(dimensions)) + metrics.deleteMetricByDimension(name, previousMemoryOverheadDimensions, Metrics.DimensionType.DEFAULT); + previousMemoryOverheadDimensions = dimensions; + } + + @Override + public boolean setFrozen(boolean wantFrozen) { + if (wantFrozen != previousWantFrozen) { + if (wantFrozen) { + this.startOfFreezeConvergence = timer.currentTime(); + } else { + this.startOfFreezeConvergence = null; + } + + previousWantFrozen = wantFrozen; + } + + // Use filter with count instead of allMatch() because allMatch() will short circuit on first non-match + boolean allNodeAgentsConverged = parallelStreamOfNodeAgentWithScheduler() + .filter(nodeAgentScheduler -> !nodeAgentScheduler.setFrozen(wantFrozen, freezeTimeout)) + .count() == 0; + + if (wantFrozen) { + if (allNodeAgentsConverged) isFrozen = true; + } else isFrozen = false; + + return allNodeAgentsConverged; + } + + @Override + public boolean isFrozen() { + return isFrozen; + } + + @Override + public Duration subsystemFreezeDuration() { + if (startOfFreezeConvergence == null) { + return Duration.ZERO; + } else { + return Duration.between(startOfFreezeConvergence, timer.currentTime()); + } + } + + @Override + public void stopNodeAgentServices() { + // Each container may spend 1-1:30 minutes stopping + parallelStreamOfNodeAgentWithScheduler().forEach(NodeAgentWithScheduler::stopForHostSuspension); + } + + @Override + public void start() { + + } + + @Override + public void stop() { + // Stop all node-agents in parallel, will block until the last NodeAgent is stopped + parallelStreamOfNodeAgentWithScheduler().forEach(NodeAgentWithScheduler::stopForRemoval); + } + + /** + * Returns a parallel stream of NodeAgentWithScheduler. + * + *

Why not just call nodeAgentWithSchedulerByHostname.values().parallelStream()? Experiments + * with Java 11 have shown that with 10 nodes and forEach(), there are a maximum of 3 concurrent + * threads. With HashMap it produces 5. With List it produces 10 concurrent threads.

+ */ + private Stream parallelStreamOfNodeAgentWithScheduler() { + return List.copyOf(nodeAgentWithSchedulerByHostname.values()).parallelStream(); + } + + // Set-difference. Returns minuend minus subtrahend. + private static Set diff(Set minuend, Set subtrahend) { + var result = new HashSet<>(minuend); + result.removeAll(subtrahend); + return result; + } + + static class NodeAgentWithScheduler implements NodeAgentScheduler { + private final NodeAgent nodeAgent; + private final NodeAgentScheduler nodeAgentScheduler; + + private NodeAgentWithScheduler(NodeAgent nodeAgent, NodeAgentScheduler nodeAgentScheduler) { + this.nodeAgent = nodeAgent; + this.nodeAgentScheduler = nodeAgentScheduler; + } + + void start() { nodeAgent.start(currentContext()); } + void stopForHostSuspension() { nodeAgent.stopForHostSuspension(currentContext()); } + void stopForRemoval() { nodeAgent.stopForRemoval(currentContext()); } + Optional updateContainerNodeMetrics(boolean isSuspended) { return nodeAgent.updateContainerNodeMetrics(currentContext(), isSuspended); } + int getAndResetNumberOfUnhandledExceptions() { return nodeAgent.getAndResetNumberOfUnhandledExceptions(); } + + @Override public void scheduleTickWith(NodeAgentContext context, Instant at) { nodeAgentScheduler.scheduleTickWith(context, at); } + @Override public boolean setFrozen(boolean frozen, Duration timeout) { return nodeAgentScheduler.setFrozen(frozen, timeout); } + @Override public NodeAgentContext currentContext() { return nodeAgentScheduler.currentContext(); } + } + + @FunctionalInterface + interface NodeAgentWithSchedulerFactory { + NodeAgentWithScheduler create(NodeAgentContext context); + } + + private static NodeAgentWithScheduler create(Timer timer, NodeAgentFactory nodeAgentFactory, NodeAgentContext context) { + NodeAgentContextManager contextManager = new NodeAgentContextManager(timer, context); + NodeAgent nodeAgent = nodeAgentFactory.create(contextManager, context); + return new NodeAgentWithScheduler(nodeAgent, contextManager); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java new file mode 100644 index 00000000000..dc10eaee46c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java @@ -0,0 +1,180 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeadmin; + +import com.yahoo.concurrent.ThreadFactoryFactory; +import com.yahoo.config.provision.HostName; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeState; +import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextFactory; +import com.yahoo.yolean.Exceptions; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.RESUMED; +import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.SUSPENDED; +import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.SUSPENDED_NODE_ADMIN; +import static com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater.State.TRANSITIONING; + +/** + * Pulls information from node repository and forwards containers to run to node admin. + * + * @author dybis, stiankri + */ +public class NodeAdminStateUpdater { + private static final Logger log = Logger.getLogger(NodeAdminStateUpdater.class.getName()); + private static final Duration FREEZE_CONVERGENCE_TIMEOUT = Duration.ofMinutes(5); + + private final ScheduledExecutorService metricsScheduler = + Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("metricsscheduler")); + + private final NodeAgentContextFactory nodeAgentContextFactory; + private final NodeRepository nodeRepository; + private final Orchestrator orchestrator; + private final NodeAdmin nodeAdmin; + private final String hostHostname; + + public enum State { TRANSITIONING, RESUMED, SUSPENDED_NODE_ADMIN, SUSPENDED } + + private volatile State currentState = SUSPENDED_NODE_ADMIN; + + public NodeAdminStateUpdater( + NodeAgentContextFactory nodeAgentContextFactory, + NodeRepository nodeRepository, + Orchestrator orchestrator, + NodeAdmin nodeAdmin, + HostName hostHostname) { + this.nodeAgentContextFactory = nodeAgentContextFactory; + this.nodeRepository = nodeRepository; + this.orchestrator = orchestrator; + this.nodeAdmin = nodeAdmin; + this.hostHostname = hostHostname.value(); + } + + public void start() { + nodeAdmin.start(); + + EnumSet suspendedStates = EnumSet.of(SUSPENDED_NODE_ADMIN, SUSPENDED); + metricsScheduler.scheduleAtFixedRate(() -> { + try { + nodeAdmin.updateMetrics(suspendedStates.contains(currentState)); + } catch (Throwable e) { + log.log(Level.WARNING, "Metric fetcher scheduler failed", e); + } + }, 10, 55, TimeUnit.SECONDS); + } + + public void stop() { + metricsScheduler.shutdown(); + + // Stop all node-agents in parallel, will block until the last NodeAgent is stopped + nodeAdmin.stop(); + + do { + try { + metricsScheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); + } catch (InterruptedException e) { + log.info("Was interrupted while waiting for metricsScheduler and shutdown"); + } + } while (!metricsScheduler.isTerminated()); + } + + /** + * This method attempts to converge node-admin w/agents to a {@link State} + * with respect to: freeze, Orchestrator, and services running. + */ + public void converge(State wantedState) { + NodeSpec node = nodeRepository.getNode(hostHostname); + boolean hostIsActiveInNR = node.state() == NodeState.active; + if (wantedState == RESUMED) { + adjustNodeAgentsToRunFromNodeRepository(); + } else if (currentState == TRANSITIONING && nodeAdmin.subsystemFreezeDuration().compareTo(FREEZE_CONVERGENCE_TIMEOUT) > 0) { + // We have spent too much time trying to freeze and node admin is still not frozen. + // To avoid node agents stalling for too long, we'll force unfrozen ticks now. + adjustNodeAgentsToRunFromNodeRepository(); + nodeAdmin.setFrozen(false); + + if (hostIsActiveInNR) orchestrator.resume(hostHostname); + + throw ConvergenceException.ofTransient("Timed out trying to freeze all nodes: will force an unfrozen tick"); + } + + boolean wantFrozen = wantedState != RESUMED; + if (currentState == wantedState && wantFrozen == node.orchestratorStatus().isSuspended()) return; + currentState = TRANSITIONING; + + if (!nodeAdmin.setFrozen(wantFrozen)) + throw ConvergenceException.ofTransient("NodeAdmin is not yet " + (wantFrozen ? "frozen" : "unfrozen")); + + switch (wantedState) { + case RESUMED: + if (hostIsActiveInNR) orchestrator.resume(hostHostname); + break; + case SUSPENDED_NODE_ADMIN: + if (hostIsActiveInNR) orchestrator.suspend(hostHostname); + break; + case SUSPENDED: + // Fetch active nodes from node repo before suspending nodes. + // It is only possible to suspend active nodes, + // the orchestrator will fail if trying to suspend nodes in other states. + // Even though state is frozen we need to interact with node repo, but + // the data from node repo should not be used for anything else. + // We should also suspend host's hostname to suspend node-admin + List nodesInActiveState = getNodesInActiveState(); + + List nodesToSuspend = new ArrayList<>(nodesInActiveState); + if (hostIsActiveInNR) nodesToSuspend.add(hostHostname); + if (!nodesToSuspend.isEmpty()) { + orchestrator.suspend(hostHostname, nodesToSuspend); + log.info("Orchestrator allows suspension of " + nodesToSuspend); + } + + // The node agent services are stopped by this thread, which is OK only + // because the node agents are frozen (see above). + nodeAdmin.stopNodeAgentServices(); + break; + default: + throw new IllegalStateException("Unknown wanted state " + wantedState); + } + + log.info("State changed from " + currentState + " to " + wantedState); + currentState = wantedState; + } + + void adjustNodeAgentsToRunFromNodeRepository() { + try { + Map aclByHostname = nodeRepository.getAcls(hostHostname); + + Set nodeAgentContexts = nodeRepository.getNodes(hostHostname).stream() + .map(node -> nodeAgentContextFactory.create(node, aclByHostname.getOrDefault(node.hostname(), Acl.EMPTY))) + .collect(Collectors.toSet()); + nodeAdmin.refreshContainersToRun(nodeAgentContexts); + } catch (ConvergenceException e) { + log.log(Level.WARNING, "Failed to update which containers should be running: " + Exceptions.toMessageString(e)); + } catch (RuntimeException e) { + log.log(Level.WARNING, "Failed to update which containers should be running", e); + } + } + + private List getNodesInActiveState() { + return nodeRepository.getNodes(hostHostname) + .stream() + .filter(node -> node.state() == NodeState.active) + .map(NodeSpec::hostname) + .toList(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfo.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfo.java new file mode 100644 index 00000000000..0c0d8dc348c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfo.java @@ -0,0 +1,12 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeadmin; + +/** + * Represents /proc/meminfo, see proc(5). + * + * @param memTotalBytes Total usable RAM (i.e., physical RAM minus a few reserved bits and the kernel binary code). + * @param memAvailableBytes An estimate of how much memory is available for starting new applications, without swapping. + * + * @author hakon + */ +public record ProcMeminfo(long memTotalBytes, long memAvailableBytes) { } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfoReader.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfoReader.java new file mode 100644 index 00000000000..d13aa1ea03c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfoReader.java @@ -0,0 +1,42 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeadmin; + +import com.yahoo.yolean.Exceptions; + +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Reads /proc/meminfo, see proc(5). + * + * @author hakon + */ +public class ProcMeminfoReader { + private static final String PROC_MEMINFO = "/proc/meminfo"; + private static final Pattern MEM_TOTAL_PATTERN = Pattern.compile("MemTotal: *([0-9]+) kB"); + private static final Pattern MEM_AVAILABLE_PATTERN = Pattern.compile("MemAvailable: *([0-9]+) kB"); + + private final FileSystem fileSystem; + + public ProcMeminfoReader(FileSystem fileSystem) { + this.fileSystem = fileSystem; + } + + public ProcMeminfo read() { + return read(Exceptions.uncheck(() -> Files.readString(fileSystem.getPath(PROC_MEMINFO)))); + } + + static ProcMeminfo read(String meminfoContent) { + return new ProcMeminfo(readKbGroup(meminfoContent, MEM_TOTAL_PATTERN), + readKbGroup(meminfoContent, MEM_AVAILABLE_PATTERN)); + } + + private static long readKbGroup(String string, Pattern pattern) { + Matcher matcher = pattern.matcher(string); + if (!matcher.find()) + throw new IllegalArgumentException(pattern + " did not match anything in " + PROC_MEMINFO); + return Long.parseLong(matcher.group(1)) * 1024; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/package-info.java new file mode 100644 index 00000000000..68af4e59d45 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.nodeadmin; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/ContainerData.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/ContainerData.java new file mode 100644 index 00000000000..3f7ff63c90b --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/ContainerData.java @@ -0,0 +1,42 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.nio.file.Path; + +/** + * Utility for manipulating the initial file system the Docker container will start with. + * + * @author hakon + */ +public interface ContainerData { + + /** Add or overwrite file in container at path. */ + void addFile(ContainerPath path, String data); + + /** + * @param path Container path to write + * @param data UTF-8 file content + * @param permissions file permissions, see {@link UnixPath#setPermissions(String)} for format. + */ + void addFile(ContainerPath path, String data, String permissions); + + /** + * @param path Container path to create directory at + * @param permissions optional file permissions, see {@link UnixPath#setPermissions(String)} for format. + */ + void addDirectory(ContainerPath path, String... permissions); + + /** + * Symlink to a file in container at path. + * @param symlink The path to the symlink inside the container + * @param target The path to the target file for the symbolic link inside the container + */ + void addSymlink(ContainerPath symlink, Path target); + + /** Writes all the files, directories and symlinks that were previously added */ + void converge(NodeAgentContext context); +} + diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/HealthChecker.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/HealthChecker.java new file mode 100644 index 00000000000..78c907ad277 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/HealthChecker.java @@ -0,0 +1,15 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +/** + * Interface for verifying the health of the node. + * + * @author hakonhall + */ +public interface HealthChecker extends AutoCloseable { + /** Verify the health of an active node, just before updating the node repo and calling Orchestrator resume. */ + void verifyHealth(NodeAgentContext context); + + @Override + void close(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java new file mode 100644 index 00000000000..b37b4dd665a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java @@ -0,0 +1,43 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.vespa.hosted.node.admin.container.ContainerStats; + +import java.util.Optional; + +/** + * Responsible for management of a single node over its lifecycle. + * May own its own resources, threads etc. Runs independently, but receives signals + * on state changes in the environment that may trigger this agent to take actions. + * + * @author bakksjo + */ +public interface NodeAgent { + /** + * Starts the agent. After this method is called, the agent will asynchronously maintain the node, continuously + * striving to make the current state equal to the wanted state. + */ + void start(NodeAgentContext context); + + /** + * Stop the node in anticipation of host suspension, e.g. reboot or docker upgrade. + */ + void stopForHostSuspension(NodeAgentContext context); + + /** + * Signals to the agent that the node is at the end of its lifecycle and no longer needs a managing agent. + * Cleans up any resources the agent owns, such as threads, connections etc. Cleanup is synchronous; when this + * method returns, no more actions will be taken by the agent. + */ + void stopForRemoval(NodeAgentContext context); + + /** + * Updates metric receiver with the latest node-agent stats, and returns the container stats if available. + */ + default Optional updateContainerNodeMetrics(NodeAgentContext context, boolean isSuspended) { return Optional.empty(); } + + /** + * Returns and resets number of unhandled exceptions + */ + int getAndResetNumberOfUnhandledExceptions(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java new file mode 100644 index 00000000000..9409ae2bee1 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java @@ -0,0 +1,66 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.HostName; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.zone.ZoneApi; +import com.yahoo.vespa.athenz.api.AthenzIdentity; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; +import com.yahoo.vespa.hosted.node.admin.container.ContainerName; +import com.yahoo.vespa.hosted.node.admin.container.ContainerNetworkMode; + +import java.util.Optional; + +public interface NodeAgentContext extends TaskContext { + + /** @return node specification from node-repository */ + NodeSpec node(); + + /** @return node ACL from node-repository */ + Acl acl(); + + /** @return name of the linux container this context applies to */ + ContainerName containerName(); + + /** @return hostname of the linux container this context applies to */ + default HostName hostname() { + return HostName.of(node().hostname()); + } + + default NodeType nodeType() { + return node().type(); + } + + AthenzIdentity identity(); + + ContainerNetworkMode networkMode(); + + ZoneApi zone(); + + /** @return information about users/user namespace of the linux container this context applies to */ + UserScope users(); + + /** @return methods to resolve paths within container's file system */ + PathScope paths(); + + default boolean isDisabled(NodeAgentTask task) { + return false; + } + + /** + * The vcpu value in NodeSpec is the number of vcpus required by the node on a fixed historical + * baseline machine. However the current host has a faster per-vcpu performance by a scale factor + * (see flavors.def cpuSpeedup), and therefore do not need to set aside the full number of vcpus + * to run the node. This method returns that reduced number of vcpus. + * + * @return the vcpus required by the node on this host. + */ + double vcpuOnThisHost(); + + Optional hostExclusiveTo(); + + boolean exclave(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextFactory.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextFactory.java new file mode 100644 index 00000000000..4e8db239867 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextFactory.java @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; + +/** + * @author freva + */ +@FunctionalInterface +public interface NodeAgentContextFactory { + NodeAgentContext create(NodeSpec nodeSpec, Acl acl); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java new file mode 100644 index 00000000000..21d1cfd632c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java @@ -0,0 +1,283 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.zone.ZoneApi; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.athenz.api.AthenzIdentity; +import com.yahoo.vespa.athenz.api.AthenzService; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.InMemoryFlagSource; +import com.yahoo.vespa.flags.PermanentFlags; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; +import com.yahoo.vespa.hosted.node.admin.container.ContainerName; +import com.yahoo.vespa.hosted.node.admin.container.ContainerNetworkMode; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerFileSystem; + +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * @author freva + */ +public class NodeAgentContextImpl implements NodeAgentContext { + + private final String logPrefix; + private final NodeSpec node; + private final Acl acl; + private final ContainerName containerName; + private final AthenzIdentity identity; + private final ContainerNetworkMode containerNetworkMode; + private final ZoneApi zone; + private final UserScope userScope; + private final PathScope pathScope; + private final double cpuSpeedup; + private final Set disabledNodeAgentTasks; + private final Optional hostExclusiveTo; + private final boolean exclave; + + public NodeAgentContextImpl(NodeSpec node, Acl acl, AthenzIdentity identity, + ContainerNetworkMode containerNetworkMode, ZoneApi zone, + FlagSource flagSource, UserScope userScope, PathScope pathScope, + double cpuSpeedup, Optional hostExclusiveTo, boolean exclave) { + if (cpuSpeedup <= 0) + throw new IllegalArgumentException("cpuSpeedUp must be positive, was: " + cpuSpeedup); + + this.node = Objects.requireNonNull(node); + this.acl = Objects.requireNonNull(acl); + this.containerName = ContainerName.fromHostname(node.hostname()); + this.identity = Objects.requireNonNull(identity); + this.containerNetworkMode = Objects.requireNonNull(containerNetworkMode); + this.zone = Objects.requireNonNull(zone); + this.userScope = Objects.requireNonNull(userScope); + this.pathScope = Objects.requireNonNull(pathScope); + this.logPrefix = containerName.asString() + ": "; + this.cpuSpeedup = cpuSpeedup; + this.disabledNodeAgentTasks = NodeAgentTask.fromString( + PermanentFlags.DISABLED_HOST_ADMIN_TASKS.bindTo(flagSource) + .with(FetchVector.Dimension.HOSTNAME, node.hostname()) + .with(FetchVector.Dimension.NODE_TYPE, node.type().name()).value()); + this.hostExclusiveTo = hostExclusiveTo; + this.exclave = exclave; + } + + @Override + public NodeSpec node() { + return node; + } + + @Override + public Acl acl() { + return acl; + } + + @Override + public ContainerName containerName() { + return containerName; + } + + @Override + public AthenzIdentity identity() { + return identity; + } + + @Override + public ContainerNetworkMode networkMode() { + return containerNetworkMode; + } + + @Override + public ZoneApi zone() { + return zone; + } + + @Override + public UserScope users() { + return userScope; + } + + @Override + public PathScope paths() { + return pathScope; + } + + @Override + public boolean isDisabled(NodeAgentTask task) { + return disabledNodeAgentTasks.contains(task); + } + + @Override + public double vcpuOnThisHost() { + return node.vcpu() / cpuSpeedup; + } + + @Override + public Optional hostExclusiveTo() { + return hostExclusiveTo; + } + + @Override + public void recordSystemModification(Logger logger, String message) { + log(logger, message); + } + + @Override + public void log(Logger logger, Level level, String message) { + logger.log(level, logPrefix + message); + } + + @Override + public void log(Logger logger, Level level, String message, Throwable throwable) { + logger.log(level, logPrefix + message, throwable); + } + + @Override + public boolean exclave() { + return exclave; + } + + public static NodeAgentContextImpl.Builder builder(NodeSpec node) { + return new Builder(new NodeSpec.Builder(node)); + } + + /** + * Creates a NodeAgentContext.Builder with a NodeSpec that has the given hostname and some + * reasonable values for the remaining required NodeSpec fields. Use {@link #builder(NodeSpec)} + * if you want to control the entire NodeSpec. + */ + public static NodeAgentContextImpl.Builder builder(String hostname) { + return new Builder(NodeSpec.Builder.testSpec(hostname)); + } + + /** For testing only! */ + public static class Builder { + private static final Path DEFAULT_CONTAINER_STORAGE = Path.of("/data/vespa/storage"); + + private NodeSpec.Builder nodeSpecBuilder; + private Acl acl; + private AthenzIdentity identity; + private ContainerNetworkMode containerNetworkMode; + private ZoneApi zone; + private UserNamespace userNamespace; + private Path containerStorage; + private FlagSource flagSource; + private double cpuSpeedUp = 1; + private Optional hostExclusiveTo = Optional.empty(); + private boolean exclave = false; + + private Builder(NodeSpec.Builder nodeSpecBuilder) { + this.nodeSpecBuilder = nodeSpecBuilder; + } + + public Builder nodeSpecBuilder(Function nodeSpecBuilderModifier) { + this.nodeSpecBuilder = nodeSpecBuilderModifier.apply(nodeSpecBuilder); + return this; + } + + public Builder acl(Acl acl) { + this.acl = acl; + return this; + } + + public Builder identity(AthenzIdentity identity) { + this.identity = identity; + return this; + } + + public Builder networkMode(ContainerNetworkMode containerNetworkMode) { + this.containerNetworkMode = containerNetworkMode; + return this; + } + + public Builder zone(ZoneApi zone) { + this.zone = zone; + return this; + } + + public Builder userNamespace(UserNamespace userNamespace) { + this.userNamespace = userNamespace; + return this; + } + + /** Sets the file system to use for paths. */ + public Builder fileSystem(FileSystem fileSystem) { + return containerStorage(fileSystem.getPath(DEFAULT_CONTAINER_STORAGE.toString())); + } + + public Builder flagSource(FlagSource flagSource) { + this.flagSource = flagSource; + return this; + } + + public Builder cpuSpeedUp(double cpuSpeedUp) { + this.cpuSpeedUp = cpuSpeedUp; + return this; + } + + public Builder containerStorage(Path path) { + this.containerStorage = path; + return this; + } + + public Builder hostExclusiveTo(ApplicationId applicationId) { + this.hostExclusiveTo = Optional.ofNullable(applicationId); + return this; + } + + public Builder exclave(boolean exclave) { + this.exclave = exclave; + return this; + } + + public NodeAgentContextImpl build() { + Objects.requireNonNull(containerStorage, "Must set one of containerStorage or fileSystem"); + + UserScope userScope = UserScope.create( + Optional.ofNullable(userNamespace).orElseGet(() -> new UserNamespace(100000, 100000, 100000))); + ContainerFileSystem containerFs = ContainerFileSystem.create(containerStorage + .resolve(nodeSpecBuilder.hostname().split("\\.")[0]), userScope); + containerFs.createRoot(); + + return new NodeAgentContextImpl( + nodeSpecBuilder.build(), + Optional.ofNullable(acl).orElse(Acl.EMPTY), + Optional.ofNullable(identity).orElseGet(() -> new AthenzService("domain", "service")), + Optional.ofNullable(containerNetworkMode).orElse(ContainerNetworkMode.HOST_NETWORK), + Optional.ofNullable(zone).orElseGet(() -> new ZoneApi() { + @Override + public SystemName getSystemName() { + return SystemName.defaultSystem(); + } + + @Override + public ZoneId getId() { + return ZoneId.defaultId(); + } + + @Override + public CloudName getCloudName() { + return CloudName.DEFAULT; + } + + @Override + public String getCloudNativeRegionName() { + return getId().region().value(); + } + }), + Optional.ofNullable(flagSource).orElseGet(InMemoryFlagSource::new), + userScope, + new PathScope(containerFs, "/opt/vespa"), + cpuSpeedUp, hostExclusiveTo, exclave); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java new file mode 100644 index 00000000000..ee3c86b838f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java @@ -0,0 +1,124 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.jdisc.Timer; + +import java.time.Duration; +import java.time.Instant; +import java.util.Objects; + +/** + * This class should be used by exactly 2 threads, 1 for each interface it implements. + * + * @author freva + */ +public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAgentScheduler { + + private final Object monitor = new Object(); + private final Timer timer; + + private NodeAgentContext currentContext; + private NodeAgentContext nextContext; + private Instant nextContextAt; + private boolean wantFrozen = false; + private boolean isFrozen = true; + private boolean interrupted = false; + private boolean isWaitingForNextContext = false; + + public NodeAgentContextManager(Timer timer, NodeAgentContext context) { + this.timer = timer; + this.currentContext = context; + } + + @Override + public void scheduleTickWith(NodeAgentContext context, Instant at) { + synchronized (monitor) { + nextContext = Objects.requireNonNull(context); + nextContextAt = Objects.requireNonNull(at); + monitor.notifyAll(); // Notify of new context + } + } + + @Override + public boolean setFrozen(boolean frozen, Duration timeout) { + synchronized (monitor) { + if (wantFrozen != frozen) { + wantFrozen = frozen; + monitor.notifyAll(); // Notify the supplier of the wantFrozen change + } + + boolean successful; + long remainder; + long end = timer.currentTime().plus(timeout).toEpochMilli(); + while (!(successful = isFrozen == frozen) && (remainder = end - timer.currentTimeMillis()) > 0) { + try { + monitor.wait(remainder); // Wait with timeout until the supplier is has reached wanted frozen state + } catch (InterruptedException ignored) { } + } + + return successful; + } + } + + @Override + public NodeAgentContext nextContext() throws ContextSupplierInterruptedException { + synchronized (monitor) { + nextContext = null; // Reset any previous context and wait for the next one + isWaitingForNextContext = true; + monitor.notifyAll(); + Duration untilNextContext = Duration.ZERO; + while (true) { + if (interrupted) throw new ContextSupplierInterruptedException(); + + if (!setAndGetIsFrozen(wantFrozen) && + nextContext != null && + (untilNextContext = Duration.between(Instant.now(), nextContextAt)).toMillis() <= 0) + break; + + try { + monitor.wait(Math.max(untilNextContext.toMillis(), 0L)); // Wait until scheduler provides a new context + } catch (InterruptedException ignored) { } + } + + isWaitingForNextContext = false; + currentContext = nextContext; + return currentContext; + } + } + + @Override + public NodeAgentContext currentContext() { + synchronized (monitor) { + return currentContext; + } + } + + @Override + public void interrupt() { + synchronized (monitor) { + interrupted = true; + monitor.notifyAll(); + } + } + + private boolean setAndGetIsFrozen(boolean isFrozen) { + synchronized (monitor) { + if (this.isFrozen != isFrozen) { + this.isFrozen = isFrozen; + monitor.notifyAll(); // Notify the scheduler of the isFrozen change + } + return this.isFrozen; + } + } + + /** FOR TESTING ONLY */ + void waitUntilWaitingForNextContext() { + synchronized (monitor) { + while (!isWaitingForNextContext) { + try { + monitor.wait(); + } catch (InterruptedException ignored) { } + } + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextSupplier.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextSupplier.java new file mode 100644 index 00000000000..a4450626766 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextSupplier.java @@ -0,0 +1,20 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +/** + * @author freva + */ +public interface NodeAgentContextSupplier { + + /** + * Blocks until the next context is ready + * @return context + * @throws ContextSupplierInterruptedException if {@link #interrupt()} was called before this method returned + */ + NodeAgentContext nextContext() throws ContextSupplierInterruptedException; + + /** Interrupts the thread(s) currently waiting in {@link #nextContext()} */ + void interrupt(); + + class ContextSupplierInterruptedException extends RuntimeException { } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentFactory.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentFactory.java new file mode 100644 index 00000000000..ef67ff88471 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentFactory.java @@ -0,0 +1,10 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +/** + * @author freva + */ +@FunctionalInterface +public interface NodeAgentFactory { + NodeAgent create(NodeAgentContextSupplier contextSupplier, NodeAgentContext nodeAgentContext); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java new file mode 100644 index 00000000000..43dc3d72c46 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -0,0 +1,633 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.DockerImage; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.zone.ZoneApi; +import com.yahoo.jdisc.Timer; +import com.yahoo.vespa.flags.DoubleFlag; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.PermanentFlags; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeAttributes; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeMembership; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeState; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.reports.DropDocumentsReport; +import com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator; +import com.yahoo.vespa.hosted.node.admin.container.Container; +import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations; +import com.yahoo.vespa.hosted.node.admin.container.ContainerResources; +import com.yahoo.vespa.hosted.node.admin.container.RegistryCredentialsProvider; +import com.yahoo.vespa.hosted.node.admin.maintenance.ContainerWireguardTask; +import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer; +import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer; +import com.yahoo.vespa.hosted.node.admin.maintenance.identity.CredentialsMaintainer; +import com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.VespaServiceDumper; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static com.yahoo.vespa.flags.FetchVector.Dimension.INSTANCE_ID; +import static com.yahoo.vespa.flags.FetchVector.Dimension.CLUSTER_ID; +import static com.yahoo.vespa.flags.FetchVector.Dimension.CLUSTER_TYPE; +import static com.yahoo.vespa.flags.FetchVector.Dimension.HOSTNAME; +import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextSupplier.ContextSupplierInterruptedException; +import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.ABSENT; +import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.STARTING; +import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.UNKNOWN; + +/** + * @author dybis + * @author bakksjo + */ +public class NodeAgentImpl implements NodeAgent { + + // Container is started with uncapped CPU and is kept that way until the first successful health check + this duration + // Subtract 1 second to avoid warmup coming in lockstep with tick time and always end up using an extra tick when there are just a few ms left + private static final Duration DEFAULT_WARM_UP_DURATION = Duration.ofSeconds(90).minus(Duration.ofSeconds(1)); + + private static final Logger logger = Logger.getLogger(NodeAgentImpl.class.getName()); + + private final NodeAgentContextSupplier contextSupplier; + private final NodeRepository nodeRepository; + private final Orchestrator orchestrator; + private final ContainerOperations containerOperations; + private final RegistryCredentialsProvider registryCredentialsProvider; + private final StorageMaintainer storageMaintainer; + private final List credentialsMaintainers; + private final Optional aclMaintainer; + private final Optional healthChecker; + private final Timer timer; + private final Duration warmUpDuration; + private final DoubleFlag containerCpuCap; + private final VespaServiceDumper serviceDumper; + private final List wireguardTasks; + + private Thread loopThread; + private ContainerState containerState = UNKNOWN; + private NodeSpec lastNode; + + private final AtomicBoolean terminated = new AtomicBoolean(false); + private boolean hasResumedNode = false; + private boolean hasStartedServices = true; + private Optional firstSuccessfulHealthCheckInstant = Optional.empty(); + private boolean suspendedInOrchestrator = false; + + private int numberOfUnhandledException = 0; + private long currentRebootGeneration = 0; + private Optional currentRestartGeneration = Optional.empty(); + + /** + * ABSENT means container is definitely absent - A container that was absent will not suddenly appear without + * NodeAgent explicitly starting it. + * STARTING state is set just before we attempt to start a container, if successful we move to the next state. + * Otherwise we can't be certain. A container that was running a minute ago may no longer be running without + * NodeAgent doing anything (container could have crashed). Therefore we always have to ask docker daemon + * to get updated state of the container. + */ + enum ContainerState { + ABSENT, + STARTING, + UNKNOWN + } + + + public NodeAgentImpl(NodeAgentContextSupplier contextSupplier, NodeRepository nodeRepository, + Orchestrator orchestrator, ContainerOperations containerOperations, + RegistryCredentialsProvider registryCredentialsProvider, StorageMaintainer storageMaintainer, + FlagSource flagSource, List credentialsMaintainers, + Optional aclMaintainer, Optional healthChecker, Timer timer, + VespaServiceDumper serviceDumper, List wireguardTasks) { + this(contextSupplier, nodeRepository, orchestrator, containerOperations, registryCredentialsProvider, + storageMaintainer, flagSource, credentialsMaintainers, aclMaintainer, healthChecker, timer, + DEFAULT_WARM_UP_DURATION, serviceDumper, wireguardTasks); + } + + public NodeAgentImpl(NodeAgentContextSupplier contextSupplier, NodeRepository nodeRepository, + Orchestrator orchestrator, ContainerOperations containerOperations, + RegistryCredentialsProvider registryCredentialsProvider, StorageMaintainer storageMaintainer, + FlagSource flagSource, List credentialsMaintainers, + Optional aclMaintainer, Optional healthChecker, Timer timer, + Duration warmUpDuration, VespaServiceDumper serviceDumper, + List wireguardTasks) { + this.contextSupplier = contextSupplier; + this.nodeRepository = nodeRepository; + this.orchestrator = orchestrator; + this.containerOperations = containerOperations; + this.registryCredentialsProvider = registryCredentialsProvider; + this.storageMaintainer = storageMaintainer; + this.credentialsMaintainers = credentialsMaintainers; + this.aclMaintainer = aclMaintainer; + this.healthChecker = healthChecker; + this.timer = timer; + this.warmUpDuration = warmUpDuration; + this.containerCpuCap = PermanentFlags.CONTAINER_CPU_CAP.bindTo(flagSource); + this.serviceDumper = serviceDumper; + this.wireguardTasks = new ArrayList<>(wireguardTasks); + } + + @Override + public void start(NodeAgentContext initialContext) { + if (loopThread != null) + throw new IllegalStateException("Can not re-start a node agent."); + + loopThread = new Thread(() -> { + while (!terminated.get()) { + try { + converge(contextSupplier.nextContext()); + } catch (ContextSupplierInterruptedException ignored) { } + } + }); + loopThread.setName("tick-" + initialContext.hostname()); + loopThread.start(); + } + + @Override + public void stopForRemoval(NodeAgentContext context) { + if (!terminated.compareAndSet(false, true)) + throw new IllegalStateException("Can not re-stop a node agent."); + + contextSupplier.interrupt(); + + do { + try { + loopThread.join(); + } catch (InterruptedException ignored) { } + } while (loopThread.isAlive()); + + context.log(logger, "Stopped"); + } + + void startServicesIfNeeded(NodeAgentContext context) { + if (!hasStartedServices) { + context.log(logger, "Invoking vespa-nodectl to start services"); + String output = containerOperations.startServices(context); + if (!output.isBlank()) { + context.log(logger, "Start services output: " + output); + } + hasStartedServices = true; + } + } + + void resumeNodeIfNeeded(NodeAgentContext context) { + if (!hasResumedNode) { + context.log(logger, "Invoking vespa-nodectl to resume services"); + String output = containerOperations.resumeNode(context); + if (!output.isBlank()) { + context.log(logger, "Resume services output: " + output); + } + hasResumedNode = true; + } + } + + private void updateNodeRepoWithCurrentAttributes(NodeAgentContext context, Optional containerCreatedAt) { + final NodeAttributes currentNodeAttributes = new NodeAttributes(); + final NodeAttributes newNodeAttributes = new NodeAttributes(); + boolean changed = false; + + if (context.node().wantedRestartGeneration().isPresent() && + !Objects.equals(context.node().currentRestartGeneration(), currentRestartGeneration)) { + currentNodeAttributes.withRestartGeneration(context.node().currentRestartGeneration()); + newNodeAttributes.withRestartGeneration(currentRestartGeneration); + changed = true; + } + + boolean createdAtAfterRebootedEvent = context.node().events().stream() + .filter(event -> event.type().equals("rebooted")) + .map(event -> containerCreatedAt + .map(createdAt -> createdAt.isAfter(event.at())) + .orElse(false)) // Container not created + .findFirst() + .orElse(containerCreatedAt.isPresent()); // No rebooted event + if (!Objects.equals(context.node().currentRebootGeneration(), currentRebootGeneration) || createdAtAfterRebootedEvent) { + currentNodeAttributes.withRebootGeneration(context.node().currentRebootGeneration()); + newNodeAttributes.withRebootGeneration(currentRebootGeneration); + changed = true; + } + + Optional wantedDockerImage = context.node().wantedDockerImage().filter(n -> containerState == UNKNOWN); + if (!Objects.equals(context.node().currentDockerImage(), wantedDockerImage)) { + DockerImage currentImage = context.node().currentDockerImage().orElse(DockerImage.EMPTY); + DockerImage newImage = wantedDockerImage.orElse(DockerImage.EMPTY); + + currentNodeAttributes.withDockerImage(currentImage); + currentNodeAttributes.withVespaVersion(context.node().currentVespaVersion().orElse(Version.emptyVersion)); + newNodeAttributes.withDockerImage(newImage); + newNodeAttributes.withVespaVersion(context.node().wantedVespaVersion().orElse(Version.emptyVersion)); + changed = true; + } + + Optional report = context.node().reports().getReport(DropDocumentsReport.reportId(), DropDocumentsReport.class); + if (report.isPresent() && report.get().startedAt() == null && report.get().readiedAt() != null) { + newNodeAttributes.withReport(DropDocumentsReport.reportId(), report.get().withStartedAt(timer.currentTimeMillis()).toJsonNode()); + changed = true; + } + + if (changed) { + context.log(logger, "Publishing new set of attributes to node repo: %s -> %s", + currentNodeAttributes, newNodeAttributes); + nodeRepository.updateNodeAttributes(context.hostname().value(), newNodeAttributes); + } + } + + private Container startContainer(NodeAgentContext context) { + ContainerResources wantedResources = warmUpDuration(context).isNegative() ? + getContainerResources(context) : getContainerResources(context).withUnlimitedCpus(); + ContainerData containerData = containerOperations.createContainer(context, wantedResources); + writeContainerData(context, containerData); + containerOperations.startContainer(context); + + currentRebootGeneration = context.node().wantedRebootGeneration(); + currentRestartGeneration = context.node().wantedRestartGeneration(); + hasStartedServices = true; // Automatically started with the container + hasResumedNode = false; + context.log(logger, "Container successfully started, new containerState is " + containerState); + return containerOperations.getContainer(context).orElseThrow(() -> + ConvergenceException.ofError("Did not find container that was just started")); + } + + private Optional removeContainerIfNeededUpdateContainerState( + NodeAgentContext context, Optional existingContainer) { + if (existingContainer.isPresent()) { + List reasons = shouldRemoveContainer(context, existingContainer.get()); + if (!reasons.isEmpty()) { + removeContainer(context, existingContainer.get(), reasons, false); + return Optional.empty(); + } + + shouldRestartServices(context, existingContainer.get()).ifPresent(restartReason -> { + context.log(logger, "Invoking vespa-nodectl to restart services: " + restartReason); + orchestratorSuspendNode(context); + + ContainerResources currentResources = existingContainer.get().resources(); + ContainerResources wantedResources = currentResources.withUnlimitedCpus(); + if ( ! warmUpDuration(context).isNegative() && ! wantedResources.equals(currentResources)) { + context.log(logger, "Updating container resources: %s -> %s", + existingContainer.get().resources().toStringCpu(), wantedResources.toStringCpu()); + containerOperations.updateContainer(context, existingContainer.get().id(), wantedResources); + } + + String output = containerOperations.restartVespa(context); + if ( ! output.isBlank()) { + context.log(logger, "Restart services output: " + output); + } + currentRestartGeneration = context.node().wantedRestartGeneration(); + firstSuccessfulHealthCheckInstant = Optional.empty(); + }); + } + + return existingContainer; + } + + private Optional shouldRestartServices(NodeAgentContext context, Container existingContainer) { + NodeSpec node = context.node(); + if (!existingContainer.state().isRunning() || node.state() != NodeState.active) return Optional.empty(); + + // Restart generation is only optional because it does not exist for unallocated nodes + if (currentRestartGeneration.get() < node.wantedRestartGeneration().get()) { + return Optional.of("Restart requested - wanted restart generation has been bumped: " + + currentRestartGeneration.get() + " -> " + node.wantedRestartGeneration().get()); + } + + return Optional.empty(); + } + + private void stopServicesIfNeeded(NodeAgentContext context) { + if (hasStartedServices && context.node().owner().isEmpty()) + stopServices(context); + } + + private void stopServices(NodeAgentContext context) { + context.log(logger, "Stopping services"); + if (containerState == ABSENT) return; + hasStartedServices = hasResumedNode = false; + firstSuccessfulHealthCheckInstant = Optional.empty(); + containerOperations.stopServices(context); + } + + @Override + public void stopForHostSuspension(NodeAgentContext context) { + getContainer(context).ifPresent(container -> removeContainer(context, container, List.of("Suspending host"), true)); + } + + public void suspend(NodeAgentContext context) { + if (containerState == ABSENT) return; + try { + hasResumedNode = false; + context.log(logger, "Invoking vespa-nodectl to suspend services"); + String output = containerOperations.suspendNode(context); + if (!output.isBlank()) { + context.log(logger, "Suspend services output: " + output); + } + } catch (RuntimeException e) { + // It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to + // remove container, we will not be able to upgrade to fix any problems in the suspend logic! + context.log(logger, Level.WARNING, "Failed trying to suspend container", e); + } + } + + private List shouldRemoveContainer(NodeAgentContext context, Container existingContainer) { + final NodeState nodeState = context.node().state(); + List reasons = new ArrayList<>(); + if (nodeState == NodeState.dirty || nodeState == NodeState.provisioned) + reasons.add("Node in state " + nodeState + ", container should no longer be running"); + + if (context.node().wantedDockerImage().isPresent() && + !context.node().wantedDockerImage().get().equals(existingContainer.image())) { + reasons.add("The node is supposed to run a new Docker image: " + + existingContainer.image().asString() + " -> " + context.node().wantedDockerImage().get().asString()); + } + + if (!existingContainer.state().isRunning()) + reasons.add("Container no longer running"); + + if (currentRebootGeneration < context.node().wantedRebootGeneration()) { + reasons.add(String.format("Container reboot wanted. Current: %d, Wanted: %d", + currentRebootGeneration, context.node().wantedRebootGeneration())); + } + + ContainerResources wantedContainerResources = getContainerResources(context); + if (!wantedContainerResources.equalsMemory(existingContainer.resources())) { + reasons.add("Container should be running with different memory allocation, wanted: " + + wantedContainerResources.toStringMemory() + ", actual: " + existingContainer.resources().toStringMemory()); + } + + if (containerState == STARTING) + reasons.add("Container failed to start"); + + return reasons; + } + + private void removeContainer(NodeAgentContext context, Container existingContainer, List reasons, boolean alreadySuspended) { + context.log(logger, "Will remove container: " + String.join(", ", reasons)); + + if (existingContainer.state().isRunning()) { + if (!alreadySuspended) { + orchestratorSuspendNode(context); + } + + try { + if (context.node().state() == NodeState.active) { + suspend(context); + } + stopServices(context); + } catch (Exception e) { + context.log(logger, Level.WARNING, "Failed stopping services, ignoring", e); + } + } + + storageMaintainer.handleCoreDumpsForContainer(context, Optional.of(existingContainer), true); + containerOperations.removeContainer(context, existingContainer); + containerState = ABSENT; + context.log(logger, "Container successfully removed, new containerState is " + containerState); + } + + + private Container updateContainerIfNeeded(NodeAgentContext context, Container existingContainer) { + ContainerResources wantedContainerResources = getContainerResources(context); + + if (healthChecker.isPresent() && firstSuccessfulHealthCheckInstant + .map(timer.currentTime().minus(warmUpDuration(context))::isBefore) + .orElse(true)) + return existingContainer; + + if (wantedContainerResources.equalsCpu(existingContainer.resources())) return existingContainer; + context.log(logger, "Container should be running with different CPU allocation, wanted: %s, current: %s", + wantedContainerResources.toStringCpu(), existingContainer.resources().toStringCpu()); + + // Only update CPU resources + containerOperations.updateContainer(context, existingContainer.id(), wantedContainerResources.withMemoryBytes(existingContainer.resources().memoryBytes())); + return containerOperations.getContainer(context).orElseThrow(() -> + ConvergenceException.ofError("Did not find container that was just updated")); + } + + private ContainerResources getContainerResources(NodeAgentContext context) { + double cpuCap = context.vcpuOnThisHost() * containerCpuCap + .with(INSTANCE_ID, context.node().owner().map(ApplicationId::serializedForm)) + .with(CLUSTER_ID, context.node().membership().map(NodeMembership::clusterId)) + .with(CLUSTER_TYPE, context.node().membership().map(membership -> membership.type().value())) + .with(HOSTNAME, context.node().hostname()) + .value(); + + return ContainerResources.from(cpuCap, context.vcpuOnThisHost(), context.node().memoryGb()); + } + + private boolean downloadImageIfNeeded(NodeAgentContext context, Optional container) { + NodeSpec node = context.node(); + if (node.wantedDockerImage().equals(container.map(c -> c.image()))) return false; + + return node.wantedDockerImage() + .map(image -> containerOperations.pullImageAsyncIfNeeded(context, image, registryCredentialsProvider)) + .orElse(false); + } + + private void dropDocsIfNeeded(NodeAgentContext context, Optional container) { + Optional report = context.node().reports() + .getReport(DropDocumentsReport.reportId(), DropDocumentsReport.class); + if (report.isEmpty() || report.get().readiedAt() != null) return; + + if (report.get().droppedAt() == null) { + container.ifPresent(c -> removeContainer(context, c, List.of("Dropping documents"), true)); + FileFinder.from(context.paths().underVespaHome("var/db/vespa/search")).deleteRecursively(context); + nodeRepository.updateNodeAttributes(context.node().hostname(), + new NodeAttributes().withReport(DropDocumentsReport.reportId(), report.get().withDroppedAt(timer.currentTimeMillis()).toJsonNode())); + } + + throw ConvergenceException.ofTransient("Documents already dropped, waiting for signal to start the container"); + } + + public void converge(NodeAgentContext context) { + try { + doConverge(context); + context.log(logger, Level.INFO, "Converged"); + } catch (ConvergenceException e) { + context.log(logger, e.getMessage()); + if (e.isError()) + numberOfUnhandledException++; + } catch (Throwable e) { + numberOfUnhandledException++; + context.log(logger, Level.SEVERE, "Unhandled exception, ignoring", e); + } + } + + // Non-private for testing + void doConverge(NodeAgentContext context) { + NodeSpec node = context.node(); + Optional container = getContainer(context); + + // Current reboot generation uninitialized or incremented from outside to cancel reboot + if (currentRebootGeneration < node.currentRebootGeneration()) + currentRebootGeneration = node.currentRebootGeneration(); + + // Either we have changed allocation status (restart gen. only available to allocated nodes), or + // restart generation has been incremented from outside to cancel restart + if (currentRestartGeneration.isPresent() != node.currentRestartGeneration().isPresent() || + currentRestartGeneration.map(current -> current < node.currentRestartGeneration().get()).orElse(false)) + currentRestartGeneration = node.currentRestartGeneration(); + + if (!node.equals(lastNode)) { + logChangesToNodeSpec(context, lastNode, node); + lastNode = node; + } + + // Run this here and now, even though we may immediately remove the container below. + // This ensures these maintainers are run even if something fails or returns early. + // These maintainers should also run immediately after starting the container (see below). + container.filter(c -> c.state().isRunning()) + .ifPresent(c -> runImportantContainerMaintainers(context, c)); + + switch (node.state()) { + case ready, reserved, failed, inactive, parked -> { + storageMaintainer.syncLogs(context, true); + if (node.state() == NodeState.reserved) downloadImageIfNeeded(context, container); + removeContainerIfNeededUpdateContainerState(context, container); + updateNodeRepoWithCurrentAttributes(context, Optional.empty()); + stopServicesIfNeeded(context); + } + case active -> { + storageMaintainer.syncLogs(context, true); + storageMaintainer.cleanDiskIfFull(context); + storageMaintainer.handleCoreDumpsForContainer(context, container, false); + + if (downloadImageIfNeeded(context, container)) { + context.log(logger, "Waiting for image to download " + context.node().wantedDockerImage().get().asString()); + return; + } + dropDocsIfNeeded(context, container); + container = removeContainerIfNeededUpdateContainerState(context, container); + credentialsMaintainers.forEach(maintainer -> maintainer.converge(context)); + if (container.isEmpty()) { + containerState = STARTING; + container = Optional.of(startContainer(context)); + containerState = UNKNOWN; + runImportantContainerMaintainers(context, container.get()); + } else { + container = Optional.of(updateContainerIfNeeded(context, container.get())); + } + + serviceDumper.processServiceDumpRequest(context); + + startServicesIfNeeded(context); + resumeNodeIfNeeded(context); + if (healthChecker.isPresent()) { + healthChecker.get().verifyHealth(context); + if (firstSuccessfulHealthCheckInstant.isEmpty()) + firstSuccessfulHealthCheckInstant = Optional.of(timer.currentTime()); + + Duration timeLeft = Duration.between(timer.currentTime(), firstSuccessfulHealthCheckInstant.get().plus(warmUpDuration(context))); + if ( ! container.get().resources().equalsCpu(getContainerResources(context)) + && context.node().currentDockerImage().isPresent()) // Immediately resume first-time deployments, when healthy. + throw ConvergenceException.ofTransient("Refusing to resume until warm up period ends (" + + (timeLeft.isNegative() ? "next tick" : "in " + timeLeft) + ")"); + } + + // Because it's more important to stop a bad release from rolling out in prod, + // we put the resume call last. So if we fail after updating the node repo attributes + // but before resume, the app may go through the tenant pipeline but will halt in prod. + // + // Note that this problem exists only because there are 2 different mechanisms + // that should really be parts of a single mechanism: + // - The content of node repo is used to determine whether a new Vespa+application + // has been successfully rolled out. + // - Slobrok and internal orchestrator state is used to determine whether + // to allow upgrade (suspend). + updateNodeRepoWithCurrentAttributes(context, container.map(Container::createdAt)); + if (suspendedInOrchestrator || node.orchestratorStatus().isSuspended()) { + context.log(logger, "Call resume against Orchestrator"); + orchestrator.resume(context.hostname().value()); + suspendedInOrchestrator = false; + } + } + case dirty -> { + removeContainerIfNeededUpdateContainerState(context, container); + context.log(logger, "State is " + node.state() + ", will delete application storage and mark node as ready"); + credentialsMaintainers.forEach(maintainer -> maintainer.clearCredentials(context)); + storageMaintainer.syncLogs(context, false); + storageMaintainer.archiveNodeStorage(context); + updateNodeRepoWithCurrentAttributes(context, Optional.empty()); + nodeRepository.setNodeState(context.hostname().value(), NodeState.ready); + } + default -> throw ConvergenceException.ofError("Unexpected state " + node.state().name()); + } + } + + private void runImportantContainerMaintainers(NodeAgentContext context, Container runningContainer) { + aclMaintainer.ifPresent(maintainer -> maintainer.converge(context)); + wireguardTasks.forEach(task -> task.converge(context, runningContainer.id())); + } + + private static void logChangesToNodeSpec(NodeAgentContext context, NodeSpec lastNode, NodeSpec node) { + StringBuilder builder = new StringBuilder(); + appendIfDifferent(builder, "state", lastNode, node, NodeSpec::state); + if (builder.length() > 0) { + context.log(logger, Level.INFO, "Changes to node: " + builder); + } + } + + private static String fieldDescription(T value) { + return value == null ? "[absent]" : value.toString(); + } + + private static void appendIfDifferent(StringBuilder builder, String name, NodeSpec oldNode, NodeSpec newNode, Function getter) { + T oldValue = oldNode == null ? null : getter.apply(oldNode); + T newValue = getter.apply(newNode); + if (!Objects.equals(oldValue, newValue)) { + if (builder.length() > 0) { + builder.append(", "); + } + builder.append(name).append(" ").append(fieldDescription(oldValue)).append(" -> ").append(fieldDescription(newValue)); + } + } + + private Optional getContainer(NodeAgentContext context) { + if (containerState == ABSENT) return Optional.empty(); + Optional container = containerOperations.getContainer(context); + if (container.isEmpty()) containerState = ABSENT; + return container; + } + + @Override + public int getAndResetNumberOfUnhandledExceptions() { + int temp = numberOfUnhandledException; + numberOfUnhandledException = 0; + return temp; + } + + private void orchestratorSuspendNode(NodeAgentContext context) { + if (context.node().state() != NodeState.active) return; + + context.log(logger, "Ask Orchestrator for permission to suspend node"); + orchestrator.suspend(context.hostname().value()); + suspendedInOrchestrator = true; + } + + protected void writeContainerData(NodeAgentContext context, ContainerData containerData) { } + + protected List credentialsMaintainers() { + return credentialsMaintainers; + } + + private Duration warmUpDuration(NodeAgentContext context) { + ZoneApi zone = context.zone(); + Optional membership = context.node().membership(); + return zone.getEnvironment().isTest() + || context.nodeType() != NodeType.tenant + || membership.map(mem -> ! (mem.type().hasContainer() || mem.type().isAdmin())).orElse(false) + ? Duration.ofSeconds(-1) + : warmUpDuration.dividedBy(zone.getSystemName().isCd() ? 3 : 1); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java new file mode 100644 index 00000000000..59b3086988e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java @@ -0,0 +1,25 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import java.time.Duration; +import java.time.Instant; + +/** + * @author freva + */ +public interface NodeAgentScheduler { + + /** Schedule a tick for NodeAgent to run with the given NodeAgentContext, at no earlier than given instant */ + void scheduleTickWith(NodeAgentContext context, Instant at); + + /** + * Will eventually freeze/unfreeze the node agent + * @param frozen whether node agent should be frozen + * @param timeout maximum duration this method should block while waiting for NodeAgent to reach target state + * @return True if node agent has converged to the desired state + */ + boolean setFrozen(boolean frozen, Duration timeout); + + /** @return the last scheduled context or a default value */ + NodeAgentContext currentContext(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java new file mode 100644 index 00000000000..3e7895c1ebd --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentTask.java @@ -0,0 +1,31 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public enum NodeAgentTask { + + // The full task name is prefixed with 'node>', e.g. 'node>DiskCleanup' + DiskCleanup, + CoreDumps, + CredentialsMaintainer, + AclMaintainer; + + private static final Map tasksByName = Arrays.stream(NodeAgentTask.values()) + .collect(Collectors.toUnmodifiableMap(NodeAgentTask::taskName, n -> n)); + + private final String taskName; + NodeAgentTask() { + this.taskName = "node>" + name(); + } + + public String taskName() { return taskName; } + + public static Set fromString(List tasks) { + return tasks.stream().filter(tasksByName::containsKey).map(tasksByName::get).collect(Collectors.toUnmodifiableSet()); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/PathScope.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/PathScope.java new file mode 100644 index 00000000000..a8effa19b27 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/PathScope.java @@ -0,0 +1,57 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerFileSystem; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.nio.file.Path; +import java.util.Objects; + +/** + * @author freva + */ +public class PathScope { + + private final ContainerFileSystem containerFs; + private final String pathToVespaHome; + private final UserScope users; + + public PathScope(ContainerFileSystem containerFs, String pathToVespaHome) { + this.containerFs = Objects.requireNonNull(containerFs); + this.pathToVespaHome = Objects.requireNonNull(pathToVespaHome); + this.users = containerFs.getUserPrincipalLookupService().userScope(); + } + + public ContainerPath of(String pathInNode) { + return of(pathInNode, users.root()); + } + + public ContainerPath of(String pathInNode, UnixUser user) { + return ContainerPath.fromPathInContainer(containerFs, Path.of(pathInNode), user); + } + + public ContainerPath underVespaHome(String relativePath) { + if (relativePath.startsWith("/")) + throw new IllegalArgumentException("Expected a relative path to the Vespa home, got: " + relativePath); + + return ContainerPath.fromPathInContainer(containerFs, Path.of(pathToVespaHome, relativePath), users.vespa()); + } + + public ContainerPath fromPathOnHost(Path pathOnHost) { + return ContainerPath.fromPathOnHost(containerFs, pathOnHost, users.root()); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PathScope pathScope = (PathScope) o; + return containerFs.equals(pathScope.containerFs) && pathToVespaHome.equals(pathScope.pathToVespaHome) && users.equals(pathScope.users); + } + + @Override + public int hashCode() { + return Objects.hash(containerFs, pathToVespaHome, users); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserNamespace.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserNamespace.java new file mode 100644 index 00000000000..f44a19de36e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserNamespace.java @@ -0,0 +1,67 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import java.util.Objects; + +/** + * @author freva + */ +public class UserNamespace { + + /** + * IDs outside the ID range are translated to the overflow ID before being written to disk: + * https://github.com/torvalds/linux/blob/5bfc75d92efd494db37f5c4c173d3639d4772966/Documentation/admin-guide/sysctl/fs.rst#overflowgid--overflowuid + * Real value in /proc/sys/fs/overflowuid or overflowgid, hardcode default value*/ + private static final int OVERFLOW_ID = 65_534; + + private final int uidOffset; + private final int gidOffset; + private final int idRangeSize; + + public UserNamespace(int uidOffset, int gidOffset, int idRangeSize) { + this.uidOffset = uidOffset; + this.gidOffset = gidOffset; + this.idRangeSize = idRangeSize; + } + + public int userIdOnHost(int containerUid) { return toHostId(containerUid, uidOffset, idRangeSize); } + public int groupIdOnHost(int containerGid) { return toHostId(containerGid, gidOffset, idRangeSize); } + public int userIdInContainer(int hostUid) { return toContainerId(hostUid, uidOffset, idRangeSize); } + public int groupIdInContainer(int hostGid) { return toContainerId(hostGid, gidOffset, idRangeSize); } + + public int idRangeSize() { return idRangeSize; } + public int overflowId() { return OVERFLOW_ID; } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UserNamespace that = (UserNamespace) o; + return uidOffset == that.uidOffset && gidOffset == that.gidOffset && idRangeSize == that.idRangeSize; + } + + @Override + public int hashCode() { + return Objects.hash(uidOffset, gidOffset, idRangeSize); + } + + @Override + public String toString() { + return "UserNamespace{" + + "uidOffset=" + uidOffset + + ", gidOffset=" + gidOffset + + ", idRangeSize=" + idRangeSize + + '}'; + } + + private static int toHostId(int containerId, int idOffset, int idRangeSize) { + if (containerId < 0 || containerId > idRangeSize) + throw new IllegalArgumentException("Invalid container id: " + containerId); + return idOffset + containerId; + } + + private static int toContainerId(int hostId, int idOffset, int idRangeSize) { + hostId = hostId - idOffset; + return hostId < 0 || hostId >= idRangeSize ? OVERFLOW_ID : hostId; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserScope.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserScope.java new file mode 100644 index 00000000000..508adde5902 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserScope.java @@ -0,0 +1,52 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; + +import java.util.Objects; + +/** + * @author freva + */ +public class UserScope { + + private final UnixUser root; + private final UnixUser vespa; + private final UserNamespace namespace; + + private UserScope(UnixUser root, UnixUser vespa, UserNamespace namespace) { + this.root = Objects.requireNonNull(root); + this.vespa = Objects.requireNonNull(vespa); + this.namespace = Objects.requireNonNull(namespace); + } + + public UnixUser root() { + return root; + } + + public UnixUser vespa() { + return vespa; + } + + public UserNamespace namespace() { + return namespace; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UserScope userScope = (UserScope) o; + return root.equals(userScope.root) && vespa.equals(userScope.vespa) && namespace.equals(userScope.namespace); + } + + @Override + public int hashCode() { + return Objects.hash(root, vespa, namespace); + } + + /** Creates user scope with default root and vespa user */ + public static UserScope create(UserNamespace namespace) { + return new UserScope(UnixUser.ROOT, UnixUser.VESPA, namespace); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/package-info.java new file mode 100644 index 00000000000..42310c7233f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/DebugHandlerHelper.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/DebugHandlerHelper.java new file mode 100644 index 00000000000..59040abc4bf --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/DebugHandlerHelper.java @@ -0,0 +1,50 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.provider; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Class to make it easier to implement a NodeAdminDebugHandler: + * - Forward to sub-NodeAdminDebugHandlers with addHandler, + * - Specify constants with addConstant + * - Forwarding to methods that dynamically build debug objects with addThreadSafeSupplier. + * + * @author hakonhall + */ +public class DebugHandlerHelper implements NodeAdminDebugHandler { + private final ConcurrentMap> suppliers = new ConcurrentHashMap<>(); + + public void addThreadSafeSupplier(String name, Supplier threadSafeSupplier) { + Supplier previousSupplier = suppliers.putIfAbsent(name, threadSafeSupplier); + if (previousSupplier != null) { + throw new IllegalArgumentException(name + " is already registered"); + } + } + + public void addHandler(String name, NodeAdminDebugHandler handler) { + addThreadSafeSupplier(name, handler::getDebugPage); + } + + public void addConstant(String name, String value) { + addThreadSafeSupplier(name, () -> value); + } + + public void remove(String name) { + Supplier supplier = suppliers.remove(name); + if (supplier == null) { + throw new IllegalArgumentException(name + " is not registered"); + } + } + + @Override + public Map getDebugPage() { + return suppliers.entrySet().stream().collect(Collectors.toMap( + Map.Entry::getKey, + entry -> entry.getValue().get())); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminDebugHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminDebugHandler.java new file mode 100644 index 00000000000..2c38422e127 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/NodeAdminDebugHandler.java @@ -0,0 +1,18 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.provider; + +import java.util.Map; + +/** + * Interface for supporting debug info to introspect e.g. internal state. + * + * @author hakonhall + */ +public interface NodeAdminDebugHandler { + /** + * The Object in the map values must be serializable with Jackson's ObjectMapper. + * May be called concurrently by different threads. + */ + Map getDebugPage(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/package-info.java new file mode 100644 index 00000000000..8c8dd618869 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.provider; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/DefaultEnvWriter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/DefaultEnvWriter.java new file mode 100644 index 00000000000..baf0142df4d --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/DefaultEnvWriter.java @@ -0,0 +1,116 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.logging.Logger; + +import static com.yahoo.vespa.hosted.node.admin.task.util.file.IOExceptionUtil.ifExists; +import static com.yahoo.yolean.Exceptions.uncheck; +import static java.nio.file.StandardCopyOption.ATOMIC_MOVE; +import static java.util.stream.Collectors.joining; + +/** + * Rewrites default-env.txt files. + * + * @author bjorncs + */ +public class DefaultEnvWriter { + + private static final Logger logger = Logger.getLogger(DefaultEnvWriter.class.getName()); + + private final Map operations = new LinkedHashMap<>(); + + public DefaultEnvWriter addOverride(String name, String value) { + return addOperation("override", name, value); + } + + public DefaultEnvWriter addFallback(String name, String value) { + return addOperation("fallback", name, value); + } + + public DefaultEnvWriter addUnset(String name) { + return addOperation("unset", name, null); + } + + private DefaultEnvWriter addOperation(String action, String name, String value) { + if (operations.containsKey(name)) { + throw new IllegalArgumentException(String.format("Operation on variable '%s' already added", name)); + } + operations.put(name, new Operation(action, name, value)); + return this; + } + + /** + * Updates or created a default-env.txt file + * + * @return true if the file was modified + */ + public boolean updateFile(TaskContext context, Path defaultEnvFile) { + List currentDefaultEnvLines = ifExists(() -> Files.readAllLines(defaultEnvFile)).orElse(List.of()); + List newDefaultEnvLines = generateContent(currentDefaultEnvLines); + if (currentDefaultEnvLines.equals(newDefaultEnvLines)) { + return false; + } else { + context.log(logger, "Updating " + defaultEnvFile.toString()); + Path tempFile = defaultEnvFile.resolveSibling(defaultEnvFile.getFileName() + ".tmp"); + uncheck(() -> Files.write(tempFile, newDefaultEnvLines)); + uncheck(() -> Files.move(tempFile, defaultEnvFile, ATOMIC_MOVE)); + return true; + } + } + + /** + * @return generated default-env.txt content + */ + public String generateContent() { + return generateContent(List.of()).stream() + .collect(joining(System.lineSeparator(), "", System.lineSeparator())); + } + + private List generateContent(List currentDefaultEnvLines) { + List newDefaultEnvLines = new ArrayList<>(); + Set seenNames = new TreeSet<>(); + for (String line : currentDefaultEnvLines) { + String[] items = line.split(" "); + if (items.length < 2) { + throw new IllegalArgumentException(String.format("Invalid line in file '%s': %s", currentDefaultEnvLines, line)); + } + String name = items[1]; + if (!seenNames.contains(name)) { // implicitly removes duplicated variables + seenNames.add(name); + Operation operation = operations.get(name); + if (operation != null) { + newDefaultEnvLines.add(operation.toLine()); + } else { + newDefaultEnvLines.add(line); + } + } + } + for (var operation : operations.values()) { + if (!seenNames.contains(operation.name)) { + newDefaultEnvLines.add(operation.toLine()); + } + } + return newDefaultEnvLines; + } + + private record Operation(String action, String name, String value) { + String toLine() { + if (action.equals("unset")) { + return "unset " + name; + } + return action + " " + name + " " + value; + } + } +} + + diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Cursor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Cursor.java new file mode 100644 index 00000000000..4e9998bd40f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Cursor.java @@ -0,0 +1,96 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.Optional; +import java.util.function.Function; +import java.util.regex.Pattern; + +/** + * Simulates an editor cursor. + * + * @author hakon + */ +public interface Cursor { + // CURSOR AND BUFFER QUERIES + + String getBufferText(); + String getLine(); + String getPrefix(); + String getSuffix(); + String getTextTo(Mark mark); + + Position getPosition(); + Mark createMark(); + + // CURSOR MOVEMENT + + Cursor moveToStartOfBuffer(); + Cursor moveToEndOfBuffer(); + + Cursor moveToStartOfLine(); + Cursor moveToStartOfPreviousLine(); + Cursor moveToStartOfNextLine(); + Cursor moveToStartOf(int lineIndex); + + Cursor moveToEndOfLine(); + Cursor moveToEndOfPreviousLine(); + Cursor moveToEndOfNextLine(); + Cursor moveToEndOf(int lineIndex); + + Cursor moveForward(); + Cursor moveForward(int times); + Cursor moveBackward(); + Cursor moveBackward(int times); + + Cursor moveTo(Mark mark); + Cursor moveTo(Position position); + Cursor moveTo(int lineIndex, int columnIndex); + + Optional moveForwardToStartOfMatch(Pattern pattern); + Optional moveForwardToEndOfMatch(Pattern pattern); + + boolean skipBackward(String text); + boolean skipForward(String text); + + // BUFFER MODIFICATIONS + + Cursor write(String text); + Cursor writeLine(String line); + Cursor writeLines(String... lines); + Cursor writeLines(Iterable lines); + + Cursor writeNewline(); + Cursor writeNewlineAfter(); + + Cursor deleteAll(); + Cursor deleteLine(); + Cursor deletePrefix(); + Cursor deleteSuffix(); + + Cursor deleteForward(); + Cursor deleteForward(int times); + Cursor deleteBackward(); + Cursor deleteBackward(int times); + + Cursor deleteTo(Mark mark); + + boolean replaceMatch(Pattern pattern, Function replacer); + + /** + * Replace matches of a pattern. + * + *

The search for {@code pattern} starts at cursor and matches against the remaining line, + * and the full line for the following lines. Each match is replaced by a String returned by + * {@code replacer::apply}. + * + *

The cursor is unchanged without any matches, or moved to the end of the last replacement. + * + *

To replace all matches in a buffer, first call {@link #moveToStartOfBuffer()} to + * postion the cursor at the beginning of the buffer. + * + * @see #moveForwardToStartOfMatch(Pattern) + * @see #moveForwardToEndOfMatch(Pattern) + */ + int replaceMatches(Pattern pattern, Function replacer); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/CursorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/CursorImpl.java new file mode 100644 index 00000000000..501db764d05 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/CursorImpl.java @@ -0,0 +1,356 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.List; +import java.util.Optional; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.regex.Pattern; + +import static com.yahoo.collections.Comparables.max; +import static com.yahoo.collections.Comparables.min; + +/** + * @author hakon + */ +public class CursorImpl implements Cursor { + private final TextBuffer textBuffer; + private final Object unique = new Object(); + + private Position position; + + /** + * Creates a cursor to a text buffer. + * + * WARNING: The text buffer MUST NOT be accessed outside this cursor. This cursor + * takes sole ownership of the text buffer. + * + * @param textBuffer the text buffer this cursor owns and operates on + */ + CursorImpl(TextBuffer textBuffer) { + this.textBuffer = textBuffer; + position = textBuffer.getStartOfText(); + } + + @Override + public Position getPosition() { + return position; + } + + @Override + public Mark createMark() { + return new Mark(position, textBuffer.getVersion(), unique); + } + + @Override + public String getBufferText() { + return textBuffer.getString(); + } + + @Override + public String getLine() { + return textBuffer.getLine(position); + } + + @Override + public String getPrefix() { + return textBuffer.getLinePrefix(position); + } + + @Override + public String getSuffix() { + return textBuffer.getLineSuffix(position); + } + + @Override + public String getTextTo(Mark mark) { + validateMark(mark); + + Position start = min(mark.position(), position); + Position end = max(mark.position(), position); + + return textBuffer.getSubstring(start, end); + } + + @Override + public Cursor moveToStartOfBuffer() { + position = textBuffer.getStartOfText(); + return this; + } + + @Override + public Cursor moveToEndOfBuffer() { + position = textBuffer.getEndOfText(); + return this; + } + + @Override + public Cursor moveToStartOfLine() { + position = textBuffer.getStartOfLine(position); + return this; + } + + @Override + public Cursor moveToStartOfPreviousLine() { + position = textBuffer.getStartOfPreviousLine(position); + return this; + } + + @Override + public Cursor moveToStartOfNextLine() { + position = textBuffer.getStartOfNextLine(position); + return this; + } + + @Override + public Cursor moveToStartOf(int lineIndex) { + validateLineIndex(lineIndex); + position = new Position(lineIndex, 0); + return this; + } + + @Override + public Cursor moveToEndOfLine() { + position = textBuffer.getEndOfLine(position); + return this; + } + + @Override + public Cursor moveToEndOfPreviousLine() { + return moveToStartOfPreviousLine().moveToEndOfLine(); + } + + @Override + public Cursor moveToEndOfNextLine() { + return moveToStartOfNextLine().moveToEndOfLine(); + } + + @Override + public Cursor moveToEndOf(int lineIndex) { + return moveToStartOf(lineIndex).moveToEndOfLine(); + } + + @Override + public Cursor moveForward() { + return moveForward(1); + } + + @Override + public Cursor moveForward(int times) { + position = textBuffer.forward(position, times); + return this; + } + + @Override + public Cursor moveBackward() { + return moveBackward(1); + } + + @Override + public Cursor moveBackward(int times) { + position = textBuffer.backward(position, times); + return this; + } + + @Override + public Cursor moveTo(Mark mark) { + validateMark(mark); + position = mark.position(); + return this; + } + + @Override + public boolean skipBackward(String text) { + String prefix = getPrefix(); + if (prefix.endsWith(text)) { + position = new Position(position.lineIndex(), position.columnIndex() - text.length()); + return true; + } else { + return false; + } + } + + @Override + public boolean skipForward(String text) { + String suffix = getSuffix(); + if (suffix.startsWith(text)) { + position = new Position(position.lineIndex(), position.columnIndex() + text.length()); + return true; + } else { + return false; + } + } + + @Override + public Optional moveForwardToStartOfMatch(Pattern pattern) { + return moveForwardToXOfMatch(pattern, match -> position = match.startOfMatch()); + } + + @Override + public Optional moveForwardToEndOfMatch(Pattern pattern) { + return moveForwardToXOfMatch(pattern, match -> position = match.endOfMatch()); + } + + private Optional moveForwardToXOfMatch(Pattern pattern, Consumer callback) { + Optional match = textBuffer.findForward(position, pattern); + match.ifPresent(callback); + return match; + } + + @Override + public Cursor moveTo(Position position) { + validatePosition(position); + this.position = position; + return this; + } + + @Override + public Cursor moveTo(int lineIndex, int columnIndex) { + return moveTo(new Position(lineIndex, columnIndex)); + } + + @Override + public Cursor write(String text) { + position = textBuffer.write(position, text); + return this; + } + + @Override + public Cursor writeLine(String line) { + return write(line).write("\n"); + } + + @Override + public Cursor writeLines(String... lines) { + return writeLines(List.of(lines)); + } + + @Override + public Cursor writeLines(Iterable lines) { + return writeLine(String.join("\n", lines)); + } + + @Override + public Cursor writeNewline() { + return write("\n"); + } + + @Override + public Cursor writeNewlineAfter() { + return writeNewline().moveBackward(); + } + + @Override + public Cursor deleteAll() { + moveToStartOfBuffer(); + textBuffer.clear(); + return this; + } + + @Override + public Cursor deleteLine() { + moveToStartOfLine(); + textBuffer.delete(position, textBuffer.getStartOfNextLine(position)); + return this; + } + + @Override + public Cursor deletePrefix() { + Position originalPosition = position; + moveToStartOfLine(); + textBuffer.delete(position, originalPosition); + return this; + } + + @Override + public Cursor deleteSuffix() { + textBuffer.delete(position, textBuffer.getEndOfLine(position)); + return this; + } + + @Override + public Cursor deleteForward() { + return deleteForward(1); + } + + @Override + public Cursor deleteForward(int times) { + Position end = textBuffer.forward(position, times); + textBuffer.delete(position, end); + return this; + } + + @Override + public Cursor deleteBackward() { + return deleteBackward(1); + } + + @Override + public Cursor deleteBackward(int times) { + Position end = position; + moveBackward(times); + textBuffer.delete(position, end); + return this; + } + + @Override + public Cursor deleteTo(Mark mark) { + validateMark(mark); + Position start = min(mark.position(), position); + Position end = max(mark.position(), position); + + textBuffer.delete(start, end); + return this; + } + + @Override + public boolean replaceMatch(Pattern pattern, Function replacer) { + Optional match = moveForwardToStartOfMatch(pattern); + if (match.isEmpty()) { + return false; + } + + textBuffer.delete(match.get().startOfMatch(), match.get().endOfMatch()); + write(replacer.apply(match.get())); + return true; + } + + @Override + public int replaceMatches(Pattern pattern, Function replacer) { + int count = 0; + + for (; replaceMatch(pattern, replacer); ++count) { + // empty + } + + return count; + } + + private void validatePosition(Position position) { + validateLineIndex(position.lineIndex()); + + int maxColumnIndex = textBuffer.getLine(position.lineIndex()).length(); + if (position.columnIndex() < 0 || position.columnIndex() > maxColumnIndex) { + throw new IndexOutOfBoundsException("Column index of " + position.coordinateString() + + " is not in permitted range [0," + maxColumnIndex + "]"); + } + } + + private void validateLineIndex(int lineIndex) { + int maxLineIndex = textBuffer.getMaxLineIndex(); + if (lineIndex < 0 || lineIndex > maxLineIndex) { + throw new IndexOutOfBoundsException("Line index " + lineIndex + + " not in permitted range [0," + maxLineIndex + "]"); + } + } + + private void validateMark(Mark mark) { + if (mark.secret() != unique) { + throw new IllegalArgumentException("Unknown mark " + mark); + } + + if (!mark.version().equals(textBuffer.getVersion())) { + throw new IllegalArgumentException("Mark " + mark + " is outdated"); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/FileEditor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/FileEditor.java new file mode 100644 index 00000000000..fb09482a43a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/FileEditor.java @@ -0,0 +1,58 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; + +import java.nio.file.Path; + +/** + * @author hakon + */ +public class FileEditor { + private final UnixPath path; + private final StringEditor stringEditor; + + private String fileText; + private Version fileVersion; + + public static FileEditor open(Path path) { + UnixPath unixPath = new UnixPath(path); + String text = unixPath.readUtf8File(); + StringEditor stringEditor = new StringEditor(text); + return new FileEditor(unixPath, text, stringEditor); + } + + private FileEditor(UnixPath path, String fileText, StringEditor stringEditor) { + this.path = path; + this.fileText = fileText; + this.stringEditor = stringEditor; + fileVersion = stringEditor.bufferVersion(); + } + + public Cursor cursor() { + return stringEditor.cursor(); + } + + public void reloadFile() { + fileText = path.readUtf8File(); + stringEditor.cursor().deleteAll().write(fileText); + fileVersion = stringEditor.bufferVersion(); + } + + public boolean save() { + Version bufferVersion = stringEditor.bufferVersion(); + if (bufferVersion.equals(fileVersion)) { + return false; + } + + String newText = stringEditor.cursor().getBufferText(); + if (newText.equals(fileText)) { + return false; + } + + path.writeUtf8File(newText); + fileVersion = bufferVersion; + return true; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Mark.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Mark.java new file mode 100644 index 00000000000..616c98c5b76 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Mark.java @@ -0,0 +1,52 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.Objects; + +/** + * @author hakon + */ +public class Mark { + private final Position position; + private final Version version; + private final Object token; + + Mark(Position position, Version version, Object token) { + this.position = position; + this.version = version; + this.token = token; + } + + public Position position() { + return position; + } + + public Version version() { + return version; + } + + public Object secret() { + return token; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Mark mark = (Mark) o; + return Objects.equals(position, mark.position) && + Objects.equals(version, mark.version) && + token == mark.token; + } + + @Override + public int hashCode() { + return Objects.hash(position, version, token); + } + + @Override + public String toString() { + return position.coordinateString() + "@" + version; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Match.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Match.java new file mode 100644 index 00000000000..32e058c1067 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Match.java @@ -0,0 +1,53 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.regex.Matcher; + +/** + * Represents a pattern match of a line + * + * @author hakon + */ +public class Match { + private final int lineIndex; + private final String line; + private final Matcher matcher; + + Match(int lineIndex, String line, Matcher matcher) { + this.lineIndex = lineIndex; + this.line = line; + this.matcher = matcher; + } + + /** The part of the line before the match */ + public String prefix() { + return line.substring(0, matcher.start()); + } + + /** The part of the line that matched */ + public String match() { + return matcher.group(); + } + + /** The part of the line that followed the match */ + public String suffix() { + return line.substring(matcher.end()); + } + + public Position startOfMatch() { + return new Position(lineIndex, matcher.start()); + } + + public Position endOfMatch() { + return new Position(lineIndex, matcher.end()); + } + + public int groupCount() { + return matcher.groupCount(); + } + + public String group(int groupnr) { + return matcher.group(groupnr); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Position.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Position.java new file mode 100644 index 00000000000..95aa778d57e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Position.java @@ -0,0 +1,72 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.Comparator; +import java.util.Objects; + +/** + * Represents a position in the buffer + * + * @author hakon + */ +public class Position implements Comparable { + private static final Position START_POSITION = new Position(0, 0); + + private static final Comparator COMPARATOR = Comparator + .comparingInt(Position::lineIndex) + .thenComparingInt(Position::columnIndex); + + private final int lineIndex; + private final int columnIndex; + + /** Returns the first position at line index 0 and column index 0 */ + public static Position start() { + return START_POSITION; + } + + Position(int lineIndex, int columnIndex) { + this.lineIndex = lineIndex; + this.columnIndex = columnIndex; + } + + public int lineIndex() { + return lineIndex; + } + + public int columnIndex() { + return columnIndex; + } + + @Override + public int compareTo(Position that) { + return COMPARATOR.compare(this, that); + } + + public boolean isAfter(Position that) { return compareTo(that) > 0; } + public boolean isNotAfter(Position that) { return !isAfter(that); } + public boolean isBefore(Position that) { return compareTo(that) < 0; } + public boolean isNotBefore(Position that) { return !isBefore(that); } + + public String coordinateString() { + return "(" + lineIndex + "," + columnIndex + ")"; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Position position = (Position) o; + return lineIndex == position.lineIndex && + columnIndex == position.columnIndex; + } + + @Override + public int hashCode() { + return Objects.hash(lineIndex, columnIndex); + } + + @Override + public String toString() { + return coordinateString(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/StringEditor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/StringEditor.java new file mode 100644 index 00000000000..ea55e3c11a4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/StringEditor.java @@ -0,0 +1,30 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +/** + * Edits multi-line text. + * + * @author hakon + */ +public class StringEditor { + private final TextBuffer textBuffer; + private final Cursor cursor; + + public StringEditor() { + this(""); + } + + public StringEditor(String text) { + textBuffer = new TextBufferImpl(text); + cursor = new CursorImpl(textBuffer); + } + + public Cursor cursor() { + return cursor; + } + + public Version bufferVersion() { + return textBuffer.getVersion(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBuffer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBuffer.java new file mode 100644 index 00000000000..e6cf211d481 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBuffer.java @@ -0,0 +1,175 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author hakon + */ +interface TextBuffer { + // INTERFACE TO IMPLEMENT BY CONCRETE CLASS + + /** Get the version of the buffer - edits increment the version. */ + Version getVersion(); + + /** Return the text as a single String (likely) with embedded newlines. */ + String getString(); + + /** Return the maximum line index (the minimum line index is 0). */ + int getMaxLineIndex(); + + /** @param lineIndex must be in in {@code [0, getMaxLineIndex()]} */ + String getLine(int lineIndex); + + /** Insert the possibly multi-line text at position and return the end position. */ + Position write(Position position, String text); + + /** Delete everything. */ + void clear(); + + /** Delete range. */ + void delete(Position start, Position end); + + // DERIVED IMPLEMENTATION + + /** + * Return the Position closest to {@code position} which is in the range + * {@code [getStartOfText(), getEndOfText()]}. + */ + default Position getValidPositionClosestTo(Position position) { + if (position.isBefore(getStartOfText())) { + return getStartOfText(); + } else if (position.isAfter(getEndOfText())) { + return getEndOfText(); + } else { + return position; + } + } + + default String getLine(Position position) { return getLine(position.lineIndex()); } + + default String getLinePrefix(Position position) { + return getLine(position.lineIndex()).substring(0, position.columnIndex()); + } + + default String getLineSuffix(Position position) { + return getLine(position.lineIndex()).substring(position.columnIndex()); + } + + default String getSubstring(Position start, Position end) { + if (start.lineIndex() < end.lineIndex()) { + StringBuilder builder = new StringBuilder(getLineSuffix(start)); + for (int i = start.lineIndex() + 1; i < end.lineIndex(); ++i) { + builder.append('\n'); + builder.append(getLine(i)); + } + return builder.append('\n').append(getLinePrefix(end)).toString(); + } else if (start.lineIndex() == end.lineIndex() && start.columnIndex() <= end.columnIndex()) { + return getLine(start).substring(start.columnIndex(), end.columnIndex()); + } + + throw new IllegalArgumentException( + "Bad range [" + start.coordinateString() + "," + end.coordinateString() + ">"); + } + + default Position getStartOfText() { return Position.start(); } // aka (0,0) + + default Position getEndOfText() { + int maxLineIndex = getMaxLineIndex(); + return new Position(maxLineIndex, getLine(maxLineIndex).length()); + } + + default Position getStartOfLine(Position position) { + return new Position(position.lineIndex(), 0); + } + + default Position getEndOfLine(Position position) { + return new Position(position.lineIndex(), getLine(position).length()); + } + + default Position getStartOfNextLine(Position position) { + if (position.lineIndex() < getMaxLineIndex()) { + return new Position(position.lineIndex() + 1, 0); + } else { + return getEndOfText(); + } + } + + default Position getStartOfPreviousLine(Position position) { + int lineIndex = position.lineIndex(); + if (lineIndex > 0) { + return new Position(lineIndex - 1, 0); + } else { + return getStartOfText(); + } + } + + default Position forward(Position position, int length) { + int lineIndex = position.lineIndex(); + int columnIndex = position.columnIndex(); + + int offsetLeft = length; + do { + String line = getLine(lineIndex); + int columnIndexWithInfiniteLine = columnIndex + offsetLeft; + if (columnIndexWithInfiniteLine <= line.length()) { + return new Position(lineIndex, columnIndexWithInfiniteLine); + } else if (lineIndex >= getMaxLineIndex()) { + // End of text + return new Position(lineIndex, line.length()); + } + + offsetLeft -= line.length() - columnIndex; + + // advance past newline + --offsetLeft; + ++lineIndex; + columnIndex = 0; + + // At this point: offsetLeft is guaranteed to be >= 0, and lineIndex <= max line index + } while (true); + } + + default Position backward(Position position, int length) { + int lineIndex = position.lineIndex(); + int columnIndex = position.columnIndex(); + + int offsetLeft = length; + do { + int columnIndexWithInfiniteLine = columnIndex - offsetLeft; + if (columnIndexWithInfiniteLine >= 0) { + return new Position(lineIndex, columnIndexWithInfiniteLine); + } else if (lineIndex <= 0) { + // Start of text + return new Position(0, 0); + } + + offsetLeft -= columnIndex; + + // advance past newline + --offsetLeft; + --lineIndex; + columnIndex = getLine(lineIndex).length(); + + // At this point: offsetLeft is guaranteed to be <= 0, and lineIndex >= 0 + } while (true); + } + + default Optional findForward(Position startPosition, Pattern pattern) { + for (Position position = startPosition; ; position = getStartOfNextLine(position)) { + String line = getLine(position); + Matcher matcher = pattern.matcher(line); + if (matcher.find(position.columnIndex())) { + return Optional.of(new Match(position.lineIndex(), line, matcher)); + } + + if (position.lineIndex() == getMaxLineIndex()) { + // search failed - no lines matched + return Optional.empty(); + } + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBufferImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBufferImpl.java new file mode 100644 index 00000000000..0a7ff26c73c --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextBufferImpl.java @@ -0,0 +1,117 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +import static com.yahoo.vespa.hosted.node.admin.task.util.editor.TextUtil.splitString; + +/** + * @author hakon + */ +public class TextBufferImpl implements TextBuffer { + /** Invariant: {@code size() >= 1}. An empty text buffer {@code => [""]} */ + private final ArrayList lines = new ArrayList<>(); + + private Version version = new Version(); + + TextBufferImpl() { + lines.add(""); + } + + TextBufferImpl(String text) { + this(); + write(getStartOfText(), text); + // reset version + version = new Version(); + } + + @Override + public Version getVersion() { + return version; + } + + @Override + public String getString() { + return String.join("\n", lines); + } + + @Override + public int getMaxLineIndex() { + return lines.size() - 1; + } + + @Override + public String getLine(int lineIndex) { + return lines.get(lineIndex); + } + + @Override + public Position write(Position position, String text) { + List linesToInsert = new LinkedList<>(splitString(text, true, false)); + if (linesToInsert.isEmpty()) { + return position; + } + + // The position splits that line in two, and both prefix and suffix must be preserved + linesToInsert.set(0, getLinePrefix(position) + linesToInsert.get(0)); + String lastLine = linesToInsert.get(linesToInsert.size() - 1); + int endColumnIndex = lastLine.length(); + linesToInsert.set(linesToInsert.size() - 1, lastLine + getLineSuffix(position)); + + // Set the first line at lineIndex, insert the rest. + int lineIndex = position.lineIndex(); + int endLineIndex = lineIndex + linesToInsert.size() - 1; + lines.set(lineIndex, linesToInsert.remove(0)); + lines.addAll(lineIndex + 1, linesToInsert); + + incrementVersion(); + + return new Position(endLineIndex, endColumnIndex); + } + + @Override + public void clear() { + lines.clear(); + lines.add(""); + } + + @Override + public void delete(Position start, Position end) { + if (start.isAfter(end)) { + throw new IllegalArgumentException("start position " + start + + " is after end position " + end); + } + + String prefix = getLinePrefix(start); + String suffix = getLineSuffix(end); + String stichedLine = prefix + suffix; + + lines.set(start.lineIndex(), stichedLine); + + deleteLines(start.lineIndex() + 1, end.lineIndex() + 1); + + incrementVersion(); + } + + private void deleteLines(int startIndex, int endIndex) { + for (int fromIndex = endIndex, toIndex = startIndex; fromIndex <= getMaxLineIndex(); + ++toIndex, ++fromIndex) { + lines.set(toIndex, lines.get(fromIndex)); + } + + truncate(getMaxLineIndex() - (endIndex - startIndex)); + } + + private void truncate(int newMaxLineIndex) { + while (getMaxLineIndex() > newMaxLineIndex) { + lines.remove(getMaxLineIndex()); + } + } + + private void incrementVersion() { + version = version.next(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextUtil.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextUtil.java new file mode 100644 index 00000000000..625bb608fd7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/TextUtil.java @@ -0,0 +1,59 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; + +/** + * @author hakon + */ +public class TextUtil { + private TextUtil() {} + + /** + * Splits {@code text} by newline (LF {@code '\n'}). + * + * @param text the text to split into lines + * @param empty whether an empty text implies an empty List (true), or a List with one + * empty String element (false) + * @param prune whether a text ending with a newline will result in a List ending with the + * preceding line (true), or to add an empty String element (false) + */ + public static List splitString(String text, boolean empty, boolean prune) { + List lines = new ArrayList<>(); + splitString(text, empty, prune, lines::add); + return lines; + } + + /** + * Splits text by newline, passing each line to a consumer. + * + * @see #splitString(String, boolean, boolean) + */ + public static void splitString(String text, + boolean empty, + boolean prune, + Consumer consumer) { + if (text.isEmpty()) { + if (!empty) { + consumer.accept(text); + } + return; + } + + final int endIndex = text.length(); + + int start = 0; + for (int end = text.indexOf('\n'); + end != -1; + start = end + 1, end = text.indexOf('\n', start)) { + consumer.accept(text.substring(start, end)); + } + + if (start < endIndex || !prune) { + consumer.accept(text.substring(start)); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Version.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Version.java new file mode 100644 index 00000000000..97d8cbb6a50 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/editor/Version.java @@ -0,0 +1,52 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.editor; + +import java.util.Objects; + +/** + * Represents a snapshot of the TextBuffer, between two edits (or the initial or final state) + * + * @author hakon + */ +public class Version { + private final int version; + + Version() { + this(0); + } + + private Version(int version) { + this.version = version; + } + + public boolean isBefore(Version that) { + return version < that.version; + } + + public int asInt() { + return version; + } + + public Version next() { + return new Version(version + 1); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Version that = (Version) o; + return version == that.version; + } + + @Override + public int hashCode() { + return Objects.hash(version); + } + + @Override + public String toString() { + return String.valueOf(version); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/AttributeSync.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/AttributeSync.java new file mode 100644 index 00000000000..73eddd2bbe2 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/AttributeSync.java @@ -0,0 +1,125 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.file.Path; +import java.util.Objects; +import java.util.Optional; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.logging.Logger; + +/** + * Class to converge file/directory attributes like owner and permissions to wanted values. + * Typically used by higher abstraction layers working on files (FileSync/FileWriter) or + * directories (MakeDirectory). + * + * @author hakonhall + */ +public class AttributeSync { + private static final Logger logger = Logger.getLogger(AttributeSync.class.getName()); + + private final UnixPath path; + + private Optional ownerId = Optional.empty(); + private Optional groupId = Optional.empty(); + private Optional permissions = Optional.empty(); + + public AttributeSync(Path path) { + this.path = new UnixPath(path); + } + + public Optional getPermissions() { + return permissions; + } + + public AttributeSync withPermissions(String permissions) { + this.permissions = Optional.of(permissions); + return this; + } + + public Optional ownerId() { + return ownerId; + } + + public AttributeSync withOwnerId(int ownerId) { + this.ownerId = Optional.of(ownerId); + return this; + } + + public Optional groupId() { + return groupId; + } + + public AttributeSync withGroupId(int groupId) { + this.groupId = Optional.of(groupId); + return this; + } + + public AttributeSync with(PartialFileData fileData) { + ownerId = fileData.getOwnerId(); + groupId = fileData.getGroupId(); + permissions = fileData.getPermissions(); + return this; + } + + public boolean converge(TaskContext context) { + return converge(context, new FileAttributesCache(path)); + } + + /** + * Path must exist before calling converge. + */ + public boolean converge(TaskContext context, FileAttributesCache currentAttributes) { + boolean systemModified = updateAttribute( + context, + "user ID", + ownerId, + () -> currentAttributes.getOrThrow().ownerId(), + path::setOwnerId); + + systemModified |= updateAttribute( + context, + "group ID", + groupId, + () -> currentAttributes.getOrThrow().groupId(), + path::setGroupId); + + systemModified |= updateAttribute( + context, + "permissions", + permissions, + () -> currentAttributes.getOrThrow().permissions(), + path::setPermissions); + + return systemModified; + } + + private boolean updateAttribute(TaskContext context, + String attributeName, + Optional wantedValue, + Supplier currentValueSupplier, + Consumer valueSetter) { + if (wantedValue.isEmpty()) { + return false; + } + + T currentValue = currentValueSupplier.get(); + if (Objects.equals(currentValue, wantedValue.get())) { + return false; + } + + context.recordSystemModification( + logger, + String.format("Changing %s of %s from %s to %s", + attributeName, + path, + currentValue, + wantedValue.get())); + + valueSetter.accept(wantedValue.get()); + + return true; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/DiskSize.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/DiskSize.java new file mode 100644 index 00000000000..b1fedd47e60 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/DiskSize.java @@ -0,0 +1,71 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.util.Locale; +import java.util.Objects; + +/** + * @author freva + */ +public class DiskSize implements Comparable { + + public static final DiskSize ZERO = DiskSize.of(0); + private static final char[] UNITS = "kMGTPE".toCharArray(); + + public enum Unit { + kB(1_000), kiB(1 << 10), + MB(1_000_000), MiB(1 << 20), + GB(1_000_000_000), GiB(1 << 30), + TB(1_000_000_000_000L), TiB(1L << 40), + PB(1_000_000_000_000_000L), PiB(1L << 50); + + private final long size; + Unit(long size) { this.size = size; } + } + + private final long bytes; + private DiskSize(long bytes) { this.bytes = bytes; } + + public long bytes() { return bytes; } + public long as(Unit unit) { return bytes / unit.size; } + public double asDouble(Unit unit) { return (double) bytes / unit.size; } + + public DiskSize add(DiskSize other) { return new DiskSize(bytes + other.bytes); } + + public static DiskSize of(long bytes) { return new DiskSize(bytes); } + public static DiskSize of(double bytes, Unit unit) { return new DiskSize((long) (bytes * unit.size)); } + public static DiskSize of(long bytes, Unit unit) { return new DiskSize(bytes * unit.size); } + + public String asString() { return asString(0); } + public String asString(int decimals) { + if (bytes < 1000) return bytes + " bytes"; + + int unit = -1; + double remaining = bytes; + for (; remaining >= 1000; unit++) remaining /= 1000; + return String.format(Locale.ENGLISH, "%." + decimals + "f %sB", remaining, UNITS[unit]); + } + + @Override + public int compareTo(DiskSize rhs) { + return Long.compare(this.bytes, rhs.bytes); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DiskSize size = (DiskSize) o; + return bytes == size.bytes; + } + + @Override + public int hashCode() { + return Objects.hash(bytes); + } + + @Override + public String toString() { + return asString(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/Editor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/Editor.java new file mode 100644 index 00000000000..66269602afd --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/Editor.java @@ -0,0 +1,135 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.LinkedList; +import java.util.List; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.logging.Logger; + +import static com.yahoo.vespa.hosted.node.admin.task.util.file.IOExceptionUtil.ifExists; +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * An editor meant to edit small line-based files like /etc/fstab. + * + * @author hakonhall + */ +public class Editor { + private static final Logger logger = Logger.getLogger(Editor.class.getName()); + private static final Charset ENCODING = StandardCharsets.UTF_8; + + private static final int MAX_LENGTH = 500; + + private final Supplier> supplier; + private final Consumer> consumer; + private final String name; + private final LineEditor editor; + private int diffSize = 0; + + /** + * Read the file which must be encoded in UTF-8, use the LineEditor to edit it, + * and any modifications were done write it back and return true. + */ + public Editor(Path path, LineEditor editor) { + this(path.toString(), + () -> ifExists(() -> Files.readAllLines(path, ENCODING)).orElseGet(List::of), + (newLines) -> uncheck(() -> Files.write(path, newLines, ENCODING)), + editor); + } + + /** + * @param name The name of what is being edited - used in logging + * @param supplier Supplies the editor with a list of lines to edit + * @param consumer Consumes the lines to presist if any changes is detected + * @param editor The line operations to execute on the lines supplied + */ + public Editor(String name, + Supplier> supplier, + Consumer> consumer, + LineEditor editor) { + this.supplier = supplier; + this.consumer = consumer; + this.name = name; + this.editor = editor; + } + + public boolean edit(Consumer logConsumer) { + List lines = supplier.get(); + List newLines = new LinkedList<>(); + StringBuilder diff = new StringBuilder(); + boolean modified = false; + + for (String line : lines) { + LineEdit edit = editor.edit(line); + if (!edit.prependLines().isEmpty()) { + modified = true; + maybeAdd(diff, edit.prependLines()); + newLines.addAll(edit.prependLines()); + } + + switch (edit.getType()) { + case REPLACE -> { + modified = true; + maybeRemove(diff, line); + } + case NONE -> newLines.add(line); + default -> throw new IllegalArgumentException("Unknown EditType " + edit.getType()); + } + + if (!edit.appendLines().isEmpty()) { + modified = true; + maybeAdd(diff, edit.appendLines()); + newLines.addAll(edit.appendLines()); + } + } + + List linesToAppend = editor.onComplete(); + if (!linesToAppend.isEmpty()) { + modified = true; + newLines.addAll(linesToAppend); + maybeAdd(diff, linesToAppend); + } + + if (!modified) { + return false; + } + + String diffDescription = diffTooLarge() ? ": Diff too large (" + diffSize + ")" : ":\n" + diff; + logConsumer.accept("Patching file " + name + diffDescription); + consumer.accept(newLines); + return true; + } + + public boolean converge(TaskContext context) { + return this.edit(line -> context.recordSystemModification(logger, line)); + } + + private void maybeAdd(StringBuilder diff, List lines) { + for (String line : lines) { + // 2 for '+' and '\n' + diffSize += 2 + line.length(); + if (!diffTooLarge()) { + diff.append('+').append(line).append('\n'); + } + } + } + + private void maybeRemove(StringBuilder diff, String line) { + // 2 for '-' and '\n' + diffSize += 2 + line.length(); + if (!diffTooLarge()) { + diff.append('-').append(line).append('\n'); + } + } + + private boolean diffTooLarge() { + return diffSize > MAX_LENGTH; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/EditorFactory.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/EditorFactory.java new file mode 100644 index 00000000000..66f54fc1967 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/EditorFactory.java @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.nio.file.Path; + +/** + * @author hakonhall + */ +public class EditorFactory { + public Editor create(Path path, LineEditor editor) { + return new Editor(path, editor); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributes.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributes.java new file mode 100644 index 00000000000..06490bac3a4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributes.java @@ -0,0 +1,37 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.nio.file.attribute.FileTime; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.time.Instant; +import java.util.Map; +import java.util.Set; + +/** + * This wrapper around PosixFileAttributes. + * + * @author hakonhall + */ +public record FileAttributes(Instant lastModifiedTime, int ownerId, int groupId, String permissions, + boolean isRegularFile, boolean isDirectory, long size, int deviceMajor, int deviceMinor) { + + @SuppressWarnings("unchecked") + static FileAttributes fromAttributes(Map attributes) { + long dev_t = (long) attributes.get("dev"); + + return new FileAttributes( + ((FileTime) attributes.get("lastModifiedTime")).toInstant(), + (int) attributes.get("uid"), + (int) attributes.get("gid"), + PosixFilePermissions.toString(((Set) attributes.get("permissions"))), + (boolean) attributes.get("isRegularFile"), + (boolean) attributes.get("isDirectory"), + (long) attributes.get("size"), + deviceMajor(dev_t), deviceMinor(dev_t)); + } + + // Encoded as MMMM Mmmm mmmM MMmm, where M is a hex digit of the major number and m is a hex digit of the minor number. + static int deviceMajor(long dev_t) { return (int) (((dev_t & 0xFFFFF00000000000L) >> 32) | ((dev_t & 0xFFF00) >> 8)); } + static int deviceMinor(long dev_t) { return (int) (((dev_t & 0x00000FFFFFF00000L) >> 12) | (dev_t & 0x000FF)); } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributesCache.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributesCache.java new file mode 100644 index 00000000000..ca81669adcc --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileAttributesCache.java @@ -0,0 +1,32 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.util.Optional; + +// @ThreadUnsafe +public class FileAttributesCache { + private final UnixPath path; + + private Optional attributes = Optional.empty(); + + public FileAttributesCache(UnixPath path) { + this.path = path; + } + + public Optional get() { + if (attributes.isEmpty()) { + attributes = path.getAttributesIfExists(); + } + + return attributes; + } + + public FileAttributes getOrThrow() { + return get().orElseThrow(); + } + + public Optional forceGet() { + attributes = Optional.empty(); + return get(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileContentCache.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileContentCache.java new file mode 100644 index 00000000000..0a081ac53b4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileContentCache.java @@ -0,0 +1,35 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.time.Instant; +import java.util.Optional; + +/** + * Class to avoid repeated reads of file content when the file seldom changes. + * + * @author hakonhall + */ +class FileContentCache { + private final UnixPath path; + + private Optional value = Optional.empty(); + private Optional modifiedTime = Optional.empty(); + + FileContentCache(UnixPath path) { + this.path = path; + } + + byte[] get(Instant lastModifiedTime) { + if (modifiedTime.isEmpty() || lastModifiedTime.isAfter(modifiedTime.get())) { + value = Optional.of(path.readBytes()); + modifiedTime = Optional.of(lastModifiedTime); + } + + return value.get(); + } + + void updateWith(byte[] content, Instant modifiedTime) { + this.value = Optional.of(content); + this.modifiedTime = Optional.of(modifiedTime); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java new file mode 100644 index 00000000000..a443e683df0 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java @@ -0,0 +1,34 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.logging.Logger; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Deletes a file or empty directory. + * + * @author hakonhall + */ +public class FileDeleter { + private static final Logger logger = Logger.getLogger(FileDeleter.class.getName()); + + private final Path path; + + public FileDeleter(Path path) { + this.path = path; + } + + public boolean converge(TaskContext context) { + boolean deleted = uncheck(() -> Files.deleteIfExists(path)); + if (deleted) { + context.recordSystemModification(logger, "Deleted " + path); + } + + return deleted; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileFinder.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileFinder.java new file mode 100644 index 00000000000..1b3fa1854e7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileFinder.java @@ -0,0 +1,272 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.lang.MutableInteger; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Deque; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import java.util.function.Consumer; +import java.util.function.Predicate; +import java.util.logging.Logger; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +/** + * Helper class to find and list or deleteRecursively files and directories. Follows the general syntax of command line + * tool `find`. + * + * @author freva + */ +public class FileFinder { + private static final Logger logger = Logger.getLogger(FileFinder.class.getName()); + + private final Path basePath; + private final Set pruned = new HashSet<>(); + private Predicate matcher; + private int maxDepth = Integer.MAX_VALUE; + + private FileFinder(Path basePath, Predicate initialMatcher) { + this.basePath = basePath; + this.matcher = initialMatcher; + } + + /** Creates a FileFinder at the given basePath */ + public static FileFinder from(Path basePath) { + return new FileFinder(basePath, attrs -> true); + } + + /** Creates a FileFinder at the given basePath that will match all files */ + public static FileFinder files(Path basePath) { + return new FileFinder(basePath, FileAttributes::isRegularFile); + } + + + /** Creates a FileFinder at the given basePath that will match all directories */ + public static FileFinder directories(Path basePath) { + return new FileFinder(basePath, FileAttributes::isDirectory); + } + + + /** + * Predicate that will be used to match files and directories under the base path. + * + * NOTE: Consecutive calls to this method are ANDed (this include the initial filter from + * {@link #files(Path)} or {@link #directories(Path)}. + */ + public FileFinder match(Predicate matcher) { + this.matcher = this.matcher.and(matcher); + return this; + } + + /** + * Path for which whole directory tree will be skipped, including the path itself. + * The path must be under {@code basePath} or be relative to {@code basePath}. + */ + public FileFinder prune(Path path) { + if (!path.isAbsolute()) + path = basePath.resolve(path); + + if (!path.startsWith(basePath)) + throw new IllegalArgumentException("Prune path " + path + " is not under base path " + basePath); + + this.pruned.add(path); + return this; + } + + /** Convenience method for pruning multiple paths, see {@link #prune(Path)}. */ + public FileFinder prune(Collection paths) { + paths.forEach(this::prune); + return this; + } + + /** + * Maximum depth (relative to basePath) where contents should be matched with the given filters. + * Default is unlimited. + */ + public FileFinder maxDepth(int maxDepth) { + this.maxDepth = maxDepth; + return this; + } + + /** + * Recursively deletes all matching elements + * + * @return true iff anything was matched and deleted + */ + public boolean deleteRecursively(TaskContext context) { + final int maxNumberOfDeletedPathsToLog = 20; + MutableInteger numDeleted = new MutableInteger(0); + List deletedPaths = new ArrayList<>(); + + try { + forEach(attributes -> { + if (attributes.unixPath().deleteRecursively()) { + if (numDeleted.next() <= maxNumberOfDeletedPathsToLog) deletedPaths.add(attributes.path()); + } + }); + } finally { + if (numDeleted.get() > maxNumberOfDeletedPathsToLog) { + context.log(logger, "Deleted " + numDeleted.get() + " paths under " + basePath); + } else if (deletedPaths.size() > 0) { + List paths = deletedPaths.stream() + .map(basePath::relativize) + .sorted() + .toList(); + context.log(logger, "Deleted these paths in " + basePath + ": " + paths); + } + } + + return deletedPaths.size() > 0; + } + + public List list() { + LinkedList list = new LinkedList<>(); + forEach(list::add); + return list; + } + + public Stream stream() { + return list().stream(); + } + + public void forEachPath(Consumer action) { + forEach(attributes -> action.accept(attributes.path())); + } + + /** Applies a given consumer to all the matching {@link FileFinder.FileAttributes} */ + public void forEach(Consumer action) { + applyForEachToMatching(basePath, matcher, maxDepth, action); + } + + + /** + *

This method walks a file tree rooted at a given starting file. The file tree traversal is + * depth-first: The filter function is applied in pre-order (NLR), but the given + * {@link Consumer} will be called in post-order (LRN). + */ + private void applyForEachToMatching(Path basePath, Predicate matcher, + int maxDepth, Consumer action) { + try { + // Only need to traverse as deep as we want to match, unless we want to match everything in directories + // already matched + Files.walkFileTree(basePath, Set.of(), maxDepth, new SimpleFileVisitor<>() { + private final Deque matchingDirectoryStack = new ArrayDeque<>(); + private int currentLevel = -1; + + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) { + if (pruned.contains(dir)) return FileVisitResult.SKIP_SUBTREE; + + currentLevel++; + + FileAttributes attributes = new FileAttributes(dir, attrs); + if (currentLevel > 0 && matcher.test(attributes)) + matchingDirectoryStack.push(attributes); + + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { + // When we find a directory at the max depth given to Files.walkFileTree, the directory + // will be passed to visitFile() rather than (pre|post)VisitDirectory + if (attrs.isDirectory()) { + preVisitDirectory(file, attrs); + return postVisitDirectory(file, null); + } + + FileAttributes attributes = new FileAttributes(file, attrs); + if (matcher.test(attributes)) + action.accept(attributes); + + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) { + if (!matchingDirectoryStack.isEmpty()) + action.accept(matchingDirectoryStack.pop()); + + currentLevel--; + return FileVisitResult.CONTINUE; + } + }); + } catch (NoSuchFileException ignored) { + + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + + // Ideally, we would reuse the FileAttributes in this package, but unfortunately we only get + // BasicFileAttributes and not PosixFileAttributes from FileVisitor + public static class FileAttributes { + private final Path path; + private final BasicFileAttributes attributes; + + public FileAttributes(Path path, BasicFileAttributes attributes) { + this.path = path; + this.attributes = attributes; + } + + public Path path() { return path; } + public UnixPath unixPath() { return new UnixPath(path); } + public String filename() { return path.getFileName().toString(); } + public Instant lastModifiedTime() { return attributes.lastModifiedTime().toInstant(); } + public boolean isRegularFile() { return attributes.isRegularFile(); } + public boolean isDirectory() { return attributes.isDirectory(); } + public long size() { return attributes.size(); } + } + + + // Filters + public static Predicate olderThan(Duration duration) { + return attrs -> Duration.between(attrs.lastModifiedTime(), Instant.now()).compareTo(duration) > 0; + } + + public static Predicate youngerThan(Duration duration) { + return olderThan(duration).negate(); + } + + public static Predicate largerThan(long sizeInBytes) { + return attrs -> attrs.size() > sizeInBytes; + } + + public static Predicate smallerThan(long sizeInBytes) { + return largerThan(sizeInBytes).negate(); + } + + public static Predicate nameMatches(Pattern pattern) { + return attrs -> pattern.matcher(attrs.filename()).matches(); + } + + public static Predicate nameStartsWith(String string) { + return attrs -> attrs.filename().startsWith(string); + } + + public static Predicate nameEndsWith(String string) { + return attrs -> attrs.filename().endsWith(string); + } + + public static Predicate all() { + return attrs -> true; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMover.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMover.java new file mode 100644 index 00000000000..3c53609b84e --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMover.java @@ -0,0 +1,55 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.file.CopyOption; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Logger; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Utility for idempotent move of (any type of) file. + * + * @author hakonhall + */ +public class FileMover { + private static final Logger logger = Logger.getLogger(FileMover.class.getName()); + + private final Path source; + private final Path destination; + private final Set moveOptions = new HashSet<>(); + + public FileMover(Path source, Path destination) { + this.source = source; + this.destination = destination; + } + + public FileMover replaceExisting() { + moveOptions.add(StandardCopyOption.REPLACE_EXISTING); + return this; + } + + public FileMover atomic() { + moveOptions.add(StandardCopyOption.ATOMIC_MOVE); + return this; + } + + /** + * Move file. + * + * @return false if the source doesn't exist while the destination do. + * @see Files#move(Path, Path, CopyOption...) Files.move() + */ + public boolean converge(TaskContext context) { + if (!Files.exists(source) && Files.exists(destination)) return false; + uncheck(() -> Files.move(source, destination, moveOptions.toArray(CopyOption[]::new))); + context.recordSystemModification(logger, "Moved " + source + " to " + destination); + return true; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSnapshot.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSnapshot.java new file mode 100644 index 00000000000..b466b878ce5 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSnapshot.java @@ -0,0 +1,83 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.Optional; + +/** + * A snapshot of the attributes of the file for a given path, and file content if it is a regular file. + * + * @author hakonhall + */ +public class FileSnapshot { + private final Path path; + private final Optional attributes; + private final Optional content; + + public static FileSnapshot forPath(Path path) { return forNonExistingFile(path).snapshot(); } + + /** Guaranteed to not throw any exceptions. */ + public static FileSnapshot forNonExistingFile(Path path) { + return new FileSnapshot(path, Optional.empty(), Optional.empty()); + } + + private static FileSnapshot forRegularFile(Path path, FileAttributes attributes, byte[] content) { + if (!attributes.isRegularFile()) throw new IllegalArgumentException(path + " is not a regular file"); + return new FileSnapshot(path, Optional.of(attributes), Optional.of(content)); + } + + private static FileSnapshot forOtherFile(Path path, FileAttributes attributes) { + if (attributes.isRegularFile()) throw new IllegalArgumentException(path + " is a regular file"); + return new FileSnapshot(path, Optional.of(attributes), Optional.empty()); + } + + private FileSnapshot(Path path, Optional attributes, Optional content) { + this.path = path; + this.attributes = attributes; + this.content = content; + } + + public Path path() { return path; } + + /** Whether there was a file (or directory) at path. */ + public boolean exists() { return attributes.isPresent(); } + + /** Returns the file attributes if the file exists. */ + public Optional attributes() { return attributes; } + + /** Returns the file content if the file exists and is a regular file. */ + public Optional content() { return content; } + + /** Returns the file UTF-8 content if it exists and is a regular file. */ + public Optional utf8Content() { return content.map(c -> new String(c, StandardCharsets.UTF_8)); } + + /** Returns an up-to-date snapshot of the path, possibly {@code this} if last modified time has not changed. */ + public FileSnapshot snapshot() { + Optional currentAttributes = new UnixPath(path).getAttributesIfExists(); + if (currentAttributes.isPresent()) { + + // 'this' may still be valid, depending on last modified times. + if (attributes.isPresent()) { + Instant previousModifiedTime = attributes.get().lastModifiedTime(); + Instant currentModifiedTime = currentAttributes.get().lastModifiedTime(); + if (currentModifiedTime.compareTo(previousModifiedTime) <= 0) { + return this; + } + } + + if (currentAttributes.get().isRegularFile()) { + Optional content = IOExceptionUtil.ifExists(() -> Files.readAllBytes(path)); + return content.map(bytes -> FileSnapshot.forRegularFile(path, currentAttributes.get(), bytes)) + // File was removed after getting attributes and before getting content. + .orElseGet(() -> FileSnapshot.forNonExistingFile(path)); + } else { + return FileSnapshot.forOtherFile(path, currentAttributes.get()); + } + } else { + return attributes.isPresent() ? FileSnapshot.forNonExistingFile(path) : this /* avoid allocation */; + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSync.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSync.java new file mode 100644 index 00000000000..bc572ce82a9 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileSync.java @@ -0,0 +1,107 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.file.Path; +import java.time.Instant; +import java.util.Arrays; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * Class to minimize resource usage with repetitive and mostly identical, idempotent, and + * mutating file operations, e.g. setting file content, setting owner, etc. + * + * Only changes to the file is logged. + * + * @author hakohall + */ +// @ThreadUnsafe +public class FileSync { + private static final Logger logger = Logger.getLogger(FileSync.class.getName()); + + private final UnixPath path; + private final FileContentCache contentCache; + private final FileAttributesCache attributesCache; + + public FileSync(Path path) { + this.path = new UnixPath(path); + this.contentCache = new FileContentCache(this.path); + this.attributesCache = new FileAttributesCache(this.path); + } + + public boolean convergeTo(TaskContext taskContext, PartialFileData partialFileData) { + return convergeTo(taskContext, partialFileData, false); + } + + /** + * CPU, I/O, and memory usage is optimized for repeated calls with the same arguments. + * + * @param atomicWrite Whether to write updates to a temporary file in the same directory, and atomically move it + * to path. Ensures the file cannot be read while in the middle of writing it. + * @return true if the system was modified: content was written, or owner was set, etc. + * system is only modified if necessary (different). + */ + public boolean convergeTo(TaskContext taskContext, PartialFileData partialFileData, boolean atomicWrite) { + boolean modifiedSystem = false; + + if (partialFileData.getContent().isPresent()) { + modifiedSystem |= convergeTo(taskContext, partialFileData.getContent().get(), atomicWrite, partialFileData.getPermissions()); + } + + AttributeSync attributeSync = new AttributeSync(path.toPath()).with(partialFileData); + modifiedSystem |= attributeSync.converge(taskContext, this.attributesCache); + + return modifiedSystem; + } + + /** + * CPU, I/O, and memory usage is optimized for repeated calls with the same argument. + * + * @param atomicWrite Whether to write updates to a temporary file in the same directory, and atomically move it + * to path. Ensures the file cannot be read while in the middle of writing it. + * @param permissions Permissions if the file is created. + * @return true if the content was written. Only modified if necessary (different). + */ + public boolean convergeTo(TaskContext taskContext, byte[] content, boolean atomicWrite, Optional permissions) { + Optional lastModifiedTime = attributesCache.forceGet().map(FileAttributes::lastModifiedTime); + + if (lastModifiedTime.isEmpty()) { + taskContext.recordSystemModification(logger, "Creating file " + path + + permissions.map(p -> " with permissions " + p).orElse("")); + path.createParents(); + writeBytes(content, atomicWrite, permissions); + contentCache.updateWith(content, attributesCache.forceGet().orElseThrow().lastModifiedTime()); + return true; + } + + if (Arrays.equals(content, contentCache.get(attributesCache.getOrThrow().lastModifiedTime()))) { + return false; + } else { + taskContext.recordSystemModification(logger, "Patching file " + path); + // empty permissions here, because the file already exists and won't be applied anyway + writeBytes(content, atomicWrite, Optional.empty()); + contentCache.updateWith(content, attributesCache.forceGet().orElseThrow().lastModifiedTime()); + return true; + } + } + + private void writeBytes(byte[] content, boolean atomic, Optional permissions) { + if (atomic) { + UnixPath tmpPath = new UnixPath(path.toPath().getFileSystem().getPath(path.toPath().toString() + ".FileSyncTmp")); + if (permissions.isPresent()) { + tmpPath.writeBytes(content, permissions.get()); + } else { + tmpPath.writeBytes(content); + } + tmpPath.atomicMove(path.toPath()); + } else { + if (permissions.isPresent()) { + path.writeBytes(content, permissions.get()); + } else { + path.writeBytes(content); + } + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileWriter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileWriter.java new file mode 100644 index 00000000000..aa6364f2a98 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileWriter.java @@ -0,0 +1,92 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; +import java.util.function.Supplier; + +/** + * Write a file + * + * @author hakonhall + */ +public class FileWriter { + private final Path path; + private final FileSync fileSync; + private final PartialFileData.Builder fileDataBuilder = PartialFileData.builder(); + private final Optional contentProducer; + + private boolean atomicWrite = false; + private boolean overwriteExistingFile = true; + + public FileWriter(Path path) { + this(path, Optional.empty()); + } + + public FileWriter(Path path, Supplier contentProducer) { + this(path, () -> contentProducer.get().getBytes(StandardCharsets.UTF_8)); + } + + public FileWriter(Path path, ByteArraySupplier contentProducer) { + this(path, Optional.of(contentProducer)); + } + + private FileWriter(Path path, Optional contentProducer) { + this.path = path; + this.fileSync = new FileSync(path); + this.contentProducer = contentProducer; + } + + public Path path() { return path; } + + public FileWriter withOwnerId(int ownerId) { + fileDataBuilder.withOwnerId(ownerId); + return this; + } + + public FileWriter withGroupId(int groupId) { + fileDataBuilder.withGroupId(groupId); + return this; + } + + /** @see UnixPath#setPermissions */ + public FileWriter withPermissions(String permissions) { + fileDataBuilder.withPermissions(permissions); + return this; + } + + public FileWriter atomicWrite(boolean atomicWrite) { + this.atomicWrite = atomicWrite; + return this; + } + + public FileWriter onlyIfFileDoesNotAlreadyExist() { + overwriteExistingFile = false; + return this; + } + + public boolean converge(TaskContext context) { + return converge(context, contentProducer.orElseThrow().get()); + } + + public boolean converge(TaskContext context, String utf8Content) { + return converge(context, utf8Content.getBytes(StandardCharsets.UTF_8)); + } + + public boolean converge(TaskContext context, byte[] content) { + if (!overwriteExistingFile && Files.isRegularFile(path)) { + return false; + } + + fileDataBuilder.withContent(content); + PartialFileData fileData = fileDataBuilder.create(); + return fileSync.convergeTo(context, fileData, atomicWrite); + } + + @FunctionalInterface + public interface ByteArraySupplier extends Supplier { } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/IOExceptionUtil.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/IOExceptionUtil.java new file mode 100644 index 00000000000..a0db5a3cb16 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/IOExceptionUtil.java @@ -0,0 +1,33 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.yolean.Exceptions; + +import java.io.UncheckedIOException; +import java.nio.file.NoSuchFileException; +import java.util.Optional; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Utils related to IOException. + * + * @author hakonhall + */ +public class IOExceptionUtil { + /** + * Useful if it's not known whether a file or directory exists, in case e.g. + * NoSuchFileException is thrown and the caller wants an Optional.empty() in that case. + */ + public static Optional ifExists(Exceptions.SupplierThrowingIOException supplier) { + try { + return Optional.ofNullable(uncheck(supplier)); + } catch (UncheckedIOException e) { + if (e.getCause() instanceof NoSuchFileException) { + return Optional.empty(); + } + + throw e; + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/LineEdit.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/LineEdit.java new file mode 100644 index 00000000000..88b403ba443 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/LineEdit.java @@ -0,0 +1,40 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.util.List; + +import static com.yahoo.vespa.hosted.node.admin.task.util.file.LineEdit.Type.REPLACE; +import static com.yahoo.vespa.hosted.node.admin.task.util.file.LineEdit.Type.NONE; + +/** + * @author hakonhall + */ +public class LineEdit { + enum Type { NONE, REPLACE } + + public static LineEdit none() { return insert(List.of(), List.of()); } + public static LineEdit remove() { return replaceWith(List.of()); } + + public static LineEdit insertBefore(String... prepend) { return insertBefore(List.of(prepend)); } + public static LineEdit insertBefore(List prepend) { return insert(prepend, List.of()); } + public static LineEdit insertAfter(String... append) { return insertAfter(List.of(append)); } + public static LineEdit insertAfter(List append) { return insert(List.of(), append); } + public static LineEdit insert(List prepend, List append) { return new LineEdit(NONE, prepend, append); } + + public static LineEdit replaceWith(String... lines) { return replaceWith(List.of(lines)); } + public static LineEdit replaceWith(List insertLines) { return new LineEdit(REPLACE, List.of(), insertLines); } + + private final Type type; + private final List prependLines; + private final List appendLines; + + private LineEdit(Type type, List prependLines, List appendLines) { + this.type = type; + this.prependLines = List.copyOf(prependLines); + this.appendLines = List.copyOf(appendLines); + } + + public Type getType() { return type; } + public List prependLines() { return prependLines; } + public List appendLines() { return appendLines; } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/LineEditor.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/LineEditor.java new file mode 100644 index 00000000000..a7dcb4dd32a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/LineEditor.java @@ -0,0 +1,21 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.util.List; + +/** + * @author hakonhall + */ +public interface LineEditor { + /** + * @param line The line of a file. + * @return The edited line, or empty if the line should be removed. + */ + LineEdit edit(String line); + + /** + * Called after edit() has been called on all lines in the file. + * @return Lines to append to the file. + */ + List onComplete(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/MakeDirectory.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/MakeDirectory.java new file mode 100644 index 00000000000..24c2ae8543d --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/MakeDirectory.java @@ -0,0 +1,70 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.io.UncheckedIOException; +import java.nio.file.NotDirectoryException; +import java.nio.file.Path; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * Class to ensure a directory exists with the correct owner, group, and permissions. + * + * @author hakonhall + */ +public class MakeDirectory { + private static final Logger logger = Logger.getLogger(MakeDirectory.class.getName()); + + private final UnixPath path; + private final AttributeSync attributeSync; + private final FileAttributesCache attributesCache; + + private boolean createParents = false; + + public MakeDirectory(Path path) { + this.path = new UnixPath(path); + this.attributeSync = new AttributeSync(path); + this.attributesCache = new FileAttributesCache(this.path); + } + + /** + * Warning: The owner, group, and permissions of any created parent directories are NOT modified + */ + public MakeDirectory createParents() { this.createParents = true; return this; } + + public MakeDirectory withOwnerId(int ownerId) { attributeSync.withOwnerId(ownerId); return this; } + public MakeDirectory withGroupId(int groupId) { attributeSync.withGroupId(groupId); return this; } + public MakeDirectory withPermissions(String permissions) { + attributeSync.withPermissions(permissions); + return this; + } + + public boolean converge(TaskContext context) { + boolean systemModified = false; + + Optional attributes = attributesCache.forceGet(); + if (attributes.isPresent()) { + if (!attributes.get().isDirectory()) { + throw new UncheckedIOException(new NotDirectoryException(path.toString())); + } + } else { + Optional permissions = attributeSync.getPermissions(); + if (createParents) { + // We'll skip logging system modification here, as we'll log about the creation + // of the directory next. + permissions.ifPresentOrElse(path::createParents, path::createParents); + } + + context.recordSystemModification(logger, "Creating directory " + path); + systemModified = true; + + permissions.ifPresentOrElse(path::createDirectory, path::createDirectory); + } + + systemModified |= attributeSync.converge(context, attributesCache); + + return systemModified; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/PartialFileData.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/PartialFileData.java new file mode 100644 index 00000000000..b1d56b131bb --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/PartialFileData.java @@ -0,0 +1,67 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +/** + * Represents a subset of a file's content, owner, group, and permissions. + * + * @author hakonhall + */ +// @Immutable +public class PartialFileData { + private final Optional content; + private final Optional ownerId; + private final Optional groupId; + private final Optional permissions; + + public static Builder builder() { + return new Builder(); + } + + private PartialFileData(Optional content, + Optional ownerId, + Optional groupId, + Optional permissions) { + this.content = content; + this.ownerId = ownerId; + this.groupId = groupId; + this.permissions = permissions; + } + + public Optional getContent() { + return content; + } + + public Optional getOwnerId() { + return ownerId; + } + + public Optional getGroupId() { + return groupId; + } + + public Optional getPermissions() { + return permissions; + } + + public static class Builder { + private Optional content = Optional.empty(); + private Optional ownerId = Optional.empty(); + private Optional groupId = Optional.empty(); + private Optional permissions = Optional.empty(); + + public Builder withContent(byte[] content) { this.content = Optional.of(content); return this; } + public Builder withContent(String content, Charset charset) { return withContent(content.getBytes(charset)); } + public Builder withContent(String content) { return withContent(content, StandardCharsets.UTF_8); } + public Builder withOwnerId(int ownerId) { this.ownerId = Optional.of(ownerId); return this; } + public Builder withGroupId(int groupId) { this.groupId = Optional.of(groupId); return this; } + public Builder withPermissions(String permissions) { this.permissions = Optional.of(permissions); return this; } + + public PartialFileData create() { + return new PartialFileData(content, ownerId, groupId, permissions); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredBoolean.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredBoolean.java new file mode 100644 index 00000000000..50ca5db9d3d --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredBoolean.java @@ -0,0 +1,53 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.file.Path; +import java.util.logging.Logger; + +/** + * Class wrapping a boolean stored on disk. + * + *

The implementation is compatible with {@link StoredInteger} when absence or 0 means false. + * + * @author hakonhall + */ +public class StoredBoolean { + private static final Logger logger = Logger.getLogger(StoredBoolean.class.getName()); + + private final UnixPath path; + + /** The parent directory must exist. Value is false by default. */ + public StoredBoolean(Path path) { + this.path = new UnixPath(path); + } + + public boolean value() { + return path.readUtf8FileIfExists().map(String::trim).map(s -> !"0".equals(s)).orElse(false); + } + + /** Sets value to true. */ + public void set(TaskContext context) { + if (!value()) { + context.log(logger, "Writes " + path); + path.writeUtf8File("1"); + } + } + + public void set(TaskContext context, boolean value) { + if (value) { + set(context); + } else { + clear(context); + } + } + + /** Sets value to false. */ + public void clear(TaskContext context) { + if (value()) { + context.log(logger, "Deletes " + path); + path.deleteIfExists(); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredDouble.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredDouble.java new file mode 100644 index 00000000000..19e8bcfcf93 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredDouble.java @@ -0,0 +1,46 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.file.Path; +import java.time.Instant; +import java.util.Optional; +import java.util.OptionalDouble; +import java.util.function.Supplier; +import java.util.logging.Logger; + +/** + * Class wrapping a float stored on disk + * + * @author freva + */ +public class StoredDouble implements Supplier { + + private static final Logger logger = Logger.getLogger(StoredDouble.class.getName()); + + private final UnixPath path; + + public StoredDouble(Path path) { + this.path = new UnixPath(path); + } + + @Override + public OptionalDouble get() { + return path.readUtf8FileIfExists().stream().mapToDouble(Double::parseDouble).findAny(); + } + + public void write(TaskContext taskContext, double value) { + path.writeUtf8File(Double.toString(value)); + taskContext.log(logger, "Stored new double in %s: %f", path, value); + } + + public void clear() { + path.deleteIfExists(); + } + + public Optional getLastModifiedTime() { + return path.getAttributesIfExists().map(FileAttributes::lastModifiedTime); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredInteger.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredInteger.java new file mode 100644 index 00000000000..ec4d64db0e3 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/StoredInteger.java @@ -0,0 +1,42 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.file.Path; +import java.time.Instant; +import java.util.Optional; +import java.util.OptionalInt; +import java.util.function.Supplier; +import java.util.logging.Logger; + +/** + * Class wrapping an integer stored on disk + * + * @author freva + */ +public class StoredInteger implements Supplier { + + private static final Logger logger = Logger.getLogger(StoredInteger.class.getName()); + + private final UnixPath path; + + public StoredInteger(Path path) { + this.path = new UnixPath(path); + } + + @Override + public OptionalInt get() { + return path.readUtf8FileIfExists().stream().mapToInt(Integer::parseInt).findAny(); + } + + public void write(TaskContext taskContext, int value) { + path.writeUtf8File(Integer.toString(value)); + taskContext.log(logger, "Stored new integer in %s: %d", path, value); + } + + public Optional getLastModifiedTime() { + return path.getAttributesIfExists().map(FileAttributes::lastModifiedTime); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/Template.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/Template.java new file mode 100644 index 00000000000..2436ba306ac --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/Template.java @@ -0,0 +1,58 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import org.apache.velocity.VelocityContext; +import org.apache.velocity.app.Velocity; +import org.apache.velocity.runtime.RuntimeConstants; +import org.slf4j.helpers.NOPLogger; + +import java.io.StringWriter; +import java.nio.file.Files; +import java.nio.file.Path; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Uses the Velocity engine to render a template, to and from both String and Path objects. + * + * @author hakonhall + * @author jonmv + */ +public class Template { + + static { + Velocity.addProperty(RuntimeConstants.RUNTIME_LOG_INSTANCE, NOPLogger.NOP_LOGGER); + Velocity.init(); + } + + private final VelocityContext velocityContext = new VelocityContext(); + private final String template; + + private Template(String template) { + this.template = template; + } + + public static Template at(Path templatePath) { + return of(uncheck(() -> new String(Files.readAllBytes(templatePath)))); + } + + public static Template of(String template) { + return new Template(template); + } + + public Template set(String name, Object value) { + velocityContext.put(name, value); + return this; + } + + public FileWriter getFileWriterTo(Path destinationPath) { + return new FileWriter(destinationPath, this::render); + } + + public String render() { + StringWriter writer = new StringWriter(); + Velocity.evaluate(velocityContext, writer, "Template", template); + return writer.toString(); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixPath.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixPath.java new file mode 100644 index 00000000000..1983e94e6f5 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixPath.java @@ -0,0 +1,350 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.NotDirectoryException; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.FileTime; +import java.nio.file.attribute.GroupPrincipal; +import java.nio.file.attribute.PosixFileAttributeView; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.nio.file.attribute.UserPrincipal; +import java.nio.file.attribute.UserPrincipalLookupService; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Stream; + +import static com.yahoo.vespa.hosted.node.admin.task.util.file.IOExceptionUtil.ifExists; +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Thin wrapper around java.nio.file.Path, especially nice for UNIX-specific features. + * + * @author hakonhall + */ +// @Immutable +public class UnixPath { + + private static final Set DEFAULT_OPEN_OPTIONS = + Set.of(StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE); + + private final Path path; + + public UnixPath(Path path) { this.path = path; } + public UnixPath(String path) { this(Path.of(path)); } + + public Path toPath() { return path; } + public UnixPath resolve(String relativeOrAbsolutePath) { return new UnixPath(path.resolve(relativeOrAbsolutePath)); } + + public UnixPath getParent() { + Path parentPath = path.getParent(); + if (parentPath == null) { + throw new IllegalStateException("Path has no parent directory: '" + path + "'"); + } + + return new UnixPath(parentPath); + } + + public String getFilename() { + Path filename = path.getFileName(); + if (filename == null) { + // E.g. "/". + throw new IllegalStateException("Path has no filename: '" + path + "'"); + } + + return filename.toString(); + } + + public boolean exists() { return Files.exists(path); } + + public String readUtf8File() { + return new String(readBytes(), StandardCharsets.UTF_8); + } + + public Optional readUtf8FileIfExists() { + try { + return Optional.of(Files.readString(path)); + } catch (NoSuchFileException ignored) { + return Optional.empty(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public byte[] readBytes() { + return uncheck(() -> Files.readAllBytes(path)); + } + + /** Reads and returns all bytes contained in this path, if any such path exists. */ + public Optional readBytesIfExists() { + try { + return Optional.of(Files.readAllBytes(path)); + } catch (NoSuchFileException ignored) { + return Optional.empty(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public List readLines() { + return uncheck(() -> Files.readAllLines(path)); + } + + /** Create an empty file and return true, or false if the file already exists (the file may not be regular). */ + public boolean create() { + try { + Files.createFile(path); + return true; + } catch (FileAlreadyExistsException ignored) { + return false; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public UnixPath writeUtf8File(String content, OpenOption... options) { + return writeBytes(content.getBytes(StandardCharsets.UTF_8), options); + } + + public UnixPath writeUtf8File(String content, String permissions, OpenOption... options) { + return writeBytes(content.getBytes(StandardCharsets.UTF_8), permissions, options); + } + + public UnixPath writeBytes(byte[] content, OpenOption... options) { + return writeBytes(content, null, options); + } + + public UnixPath writeBytes(byte[] content, String permissions, OpenOption... options) { + FileAttribute[] attributes = Optional.ofNullable(permissions) + .map(this::permissionsAsFileAttributes) + .orElseGet(() -> new FileAttribute[0]); + + Set optionsSet = options.length == 0 ? DEFAULT_OPEN_OPTIONS : Set.of(options); + + try (SeekableByteChannel channel = Files.newByteChannel(path, optionsSet, attributes)) { + channel.write(ByteBuffer.wrap(content)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return this; + } + + /** Write a file to the same dir as this, and then atomically move it to this' path. */ + public UnixPath atomicWriteBytes(byte[] content) { + UnixPath temporaryPath = getParent().resolve(getFilename() + ".10Ia2f4N5"); + temporaryPath.writeBytes(content); + temporaryPath.atomicMove(path); + return this; + } + + public String getPermissions() { + return getAttributes().permissions(); + } + + /** + * @param permissions Example: "rwxr-x---" means rwx for owner, rx for group, + * and no permissions for others. + */ + public UnixPath setPermissions(String permissions) { + Set permissionSet = getPosixFilePermissionsFromString(permissions); + uncheck(() -> Files.setPosixFilePermissions(path, permissionSet)); + return this; + } + + public int getOwnerId() { + return getAttributes().ownerId(); + } + + public UnixPath setOwner(String user) { return setOwner(user, "user"); } + public UnixPath setOwnerId(int uid) { return setOwner(String.valueOf(uid), "uid"); } + private UnixPath setOwner(String owner, String type) { + UserPrincipalLookupService service = path.getFileSystem().getUserPrincipalLookupService(); + UserPrincipal principal = uncheck( + () -> service.lookupPrincipalByName(owner), + "While looking up %s %s", type, owner); + uncheck(() -> Files.setOwner(path, principal)); + return this; + } + + public int getGroupId() { + return getAttributes().groupId(); + } + + public UnixPath setGroup(String group) { return setGroup(group, "group"); } + public UnixPath setGroupId(int gid) { return setGroup(String.valueOf(gid), "gid"); } + private UnixPath setGroup(String group, String type) { + UserPrincipalLookupService service = path.getFileSystem().getUserPrincipalLookupService(); + GroupPrincipal principal = uncheck( + () -> service.lookupPrincipalByGroupName(group), + "While looking up group %s %s", type, group); + uncheck(() -> Files.getFileAttributeView(path, PosixFileAttributeView.class).setGroup(principal)); + return this; + } + + public Instant getLastModifiedTime() { + return getAttributes().lastModifiedTime(); + } + + public UnixPath updateLastModifiedTime() { + return setLastModifiedTime(Instant.now()); + } + + public UnixPath setLastModifiedTime(Instant instant) { + uncheck(() -> Files.setLastModifiedTime(path, FileTime.from(instant))); + return this; + } + + public FileAttributes getAttributes() { + return uncheck(() -> FileAttributes.fromAttributes(Files.readAttributes(path, "unix:*"))); + } + + public Optional getAttributesIfExists() { + return ifExists(this::getAttributes); + } + + public UnixPath createNewFile(String... permissions) { + uncheck(() -> Files.createFile(path, permissionsAsFileAttributes(permissions))); + return this; + } + + public UnixPath createParents(String... permissions) { + getParent().createDirectories(permissions); + return this; + } + + /** Create directory with given permissions and return true, or false if it already exists. */ + public boolean createDirectory(String... permissions) { + try { + Files.createDirectory(path, permissionsAsFileAttributes(permissions)); + } catch (FileAlreadyExistsException ignore) { + return false; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return true; + } + + public UnixPath createDirectories(String... permissions) { + uncheck(() -> Files.createDirectories(path, permissionsAsFileAttributes(permissions))); + return this; + } + + /** + * Returns whether this path is a directory. Symlinks are followed, so this returns true for symlinks pointing to a + * directory. + */ + public boolean isDirectory() { + return uncheck(() -> Files.isDirectory(path)); + } + + /** Returns whether this is a symlink */ + public boolean isSymbolicLink() { + return Files.isSymbolicLink(path); + } + + /** + * Similar to rm -rf file: + * - It's not an error if file doesn't exist + * - If file is a directory, it and all content is removed + * - For symlinks: Only the symlink is removed, not what the symlink points to + */ + public boolean deleteRecursively() { + if (!isSymbolicLink() && isDirectory()) { + try (Stream paths = listContentsOfDirectory()) { + paths.forEach(UnixPath::deleteRecursively); + } + } + return uncheck(() -> Files.deleteIfExists(path)); + } + + public boolean deleteIfExists() { + return uncheck(() -> Files.deleteIfExists(path)); + } + + /** @return false path does not exist, is not a directory, or has at least one entry. */ + public boolean isEmptyDirectory() { + try (var entryStream = Files.list(path)) { + return entryStream.findAny().isEmpty(); + } catch (NotDirectoryException | NoSuchFileException e) { + return false; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** Lists the contents of this as a stream. Callers should use try-with to ensure that the stream is closed */ + public Stream listContentsOfDirectory() { + try { + // Avoid the temptation to collect the stream here as collecting a directory with a high number of entries + // can quickly lead to out of memory conditions + return Files.list(path).map(UnixPath::new); + } catch (NoSuchFileException ignored) { + return Stream.empty(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to list contents of directory " + path.toAbsolutePath(), e); + } + } + + /** This path must be on the same file system as the to-path. Returns UnixPath of 'to'. */ + public UnixPath atomicMove(Path to) { + uncheck(() -> Files.move(path, to, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING)); + return new UnixPath(to); + } + + public boolean moveIfExists(Path to) { + try { + Files.move(path, to); + return true; + } catch (NoSuchFileException ignored) { + return false; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Creates a symbolic link from {@code link} to {@code this} (the target) + * @param link the path for the symbolic link + * @return the path to the symbolic link + */ + public UnixPath createSymbolicLink(Path link) { + uncheck(() -> Files.createSymbolicLink(link, path)); + return new UnixPath(link); + } + + @Override + public String toString() { + return path.toString(); + } + + private FileAttribute[] permissionsAsFileAttributes(String... permissions) { + if (permissions.length == 0) return new FileAttribute[0]; + if (permissions.length > 1) + throw new IllegalArgumentException("Expected permissions to not be set or be a single string"); + + return new FileAttribute[]{PosixFilePermissions.asFileAttribute(getPosixFilePermissionsFromString(permissions[0]))}; + } + + private Set getPosixFilePermissionsFromString(String permissions) { + try { + return PosixFilePermissions.fromString(permissions); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Failed to set permissions '" + + permissions + "' on path " + path, e); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixUser.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixUser.java new file mode 100644 index 00000000000..93ad0f21fe0 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixUser.java @@ -0,0 +1,58 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.util.Objects; + +/** + * A regular UNIX-style user and its primary group. + * + * @author mpolden + */ +public class UnixUser { + + public static final UnixUser ROOT = new UnixUser("root", 0, "root", 0); + public static final UnixUser VESPA = new UnixUser("vespa", 1000, "vespa", 1000); + + private final String name; + private final int uid; + private final String group; + private final int gid; + + public UnixUser(String name, int uid, String group, int gid) { + this.name = name; + this.uid = uid; + this.group = group; + this.gid = gid; + } + + /** Username of this */ + public String name() { return name; } + + /** User ID of this */ + public int uid() { return uid; } + + /** Primary group of this */ + public String group() { return group; } + + /** Primary group ID of this */ + public int gid() { return gid; } + + @Override + public String toString() { + return "user " + name + ":" + group; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UnixUser unixUser = (UnixUser) o; + return uid == unixUser.uid && name.equals(unixUser.name) && + gid == unixUser.gid && group.equals(unixUser.group); + } + + @Override + public int hashCode() { + return Objects.hash(uid, name, gid, group); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/package-info.java new file mode 100644 index 00000000000..a15b918913a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerAttributeViews.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerAttributeViews.java new file mode 100644 index 00000000000..77978e65f42 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerAttributeViews.java @@ -0,0 +1,81 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.fs; + +import java.io.IOException; +import java.nio.file.ProviderMismatchException; +import java.nio.file.attribute.FileTime; +import java.nio.file.attribute.GroupPrincipal; +import java.nio.file.attribute.PosixFileAttributeView; +import java.nio.file.attribute.PosixFileAttributes; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.UserPrincipal; +import java.util.Map; +import java.util.Set; + +import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerUserPrincipalLookupService.ContainerGroupPrincipal; +import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerUserPrincipalLookupService.ContainerUserPrincipal; + +/** + * @author freva + */ +class ContainerAttributeViews { + + static class ContainerPosixFileAttributeView implements PosixFileAttributeView { + private final PosixFileAttributeView posixFileAttributeView; + private final ContainerPosixFileAttributes fileAttributes; + + ContainerPosixFileAttributeView(PosixFileAttributeView posixFileAttributeView, + ContainerPosixFileAttributes fileAttributes) { + this.posixFileAttributeView = posixFileAttributeView; + this.fileAttributes = fileAttributes; + } + + @Override public String name() { return "posix"; } + @Override public UserPrincipal getOwner() { return fileAttributes.owner(); } + @Override public PosixFileAttributes readAttributes() { return fileAttributes; } + + @Override + public void setOwner(UserPrincipal owner) throws IOException { + if (!(owner instanceof ContainerUserPrincipal)) throw new ProviderMismatchException(); + posixFileAttributeView.setOwner(((ContainerUserPrincipal) owner).baseFsPrincipal()); + } + + @Override + public void setGroup(GroupPrincipal group) throws IOException { + if (!(group instanceof ContainerGroupPrincipal)) throw new ProviderMismatchException(); + posixFileAttributeView.setGroup(((ContainerGroupPrincipal) group).baseFsPrincipal()); + } + + @Override + public void setTimes(FileTime lastModifiedTime, FileTime lastAccessTime, FileTime createTime) throws IOException { + posixFileAttributeView.setTimes(lastModifiedTime, lastAccessTime, createTime); + } + + @Override + public void setPermissions(Set perms) throws IOException { + posixFileAttributeView.setPermissions(perms); + } + } + + static class ContainerPosixFileAttributes implements PosixFileAttributes { + private final Map attributes; + + ContainerPosixFileAttributes(Map attributes) { + this.attributes = attributes; + } + + @SuppressWarnings("unchecked") + @Override public Set permissions() { return (Set) attributes.get("permissions"); } + @Override public ContainerUserPrincipal owner() { return (ContainerUserPrincipal) attributes.get("owner"); } + @Override public ContainerGroupPrincipal group() { return (ContainerGroupPrincipal) attributes.get("group"); } + @Override public FileTime lastModifiedTime() { return (FileTime) attributes.get("lastModifiedTime"); } + @Override public FileTime lastAccessTime() { return (FileTime) attributes.get("lastAccessTime"); } + @Override public FileTime creationTime() { return (FileTime) attributes.get("creationTime"); } + @Override public boolean isRegularFile() { return (boolean) attributes.get("isRegularFile"); } + @Override public boolean isDirectory() { return (boolean) attributes.get("isDirectory"); } + @Override public boolean isSymbolicLink() { return (boolean) attributes.get("isSymbolicLink"); } + @Override public boolean isOther() { return (boolean) attributes.get("isOther"); } + @Override public long size() { return (long) attributes.get("size"); } + @Override public Object fileKey() { return attributes.get("fileKey"); } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystem.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystem.java new file mode 100644 index 00000000000..3329a646671 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystem.java @@ -0,0 +1,98 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.fs; + +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; + +import java.io.IOException; +import java.nio.file.FileStore; +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.WatchService; +import java.util.Set; + +/** + * @author freva + */ +public class ContainerFileSystem extends FileSystem { + + private final ContainerFileSystemProvider containerFsProvider; + private final Path containerRootOnHost; + + ContainerFileSystem(ContainerFileSystemProvider containerFsProvider, Path containerRootOnHost) { + this.containerFsProvider = containerFsProvider; + this.containerRootOnHost = containerRootOnHost; + } + + public Path containerRootOnHost() { + return containerRootOnHost; + } + + public void createRoot() { + provider().createFileSystemRoot(); + } + + @Override + public ContainerFileSystemProvider provider() { + return containerFsProvider; + } + + @Override + public boolean isOpen() { + return true; + } + + @Override + public boolean isReadOnly() { + return false; + } + + @Override + public String getSeparator() { + return "/"; + } + + @Override + public Set supportedFileAttributeViews() { + return Set.of("basic", "posix", "unix", "owner"); + } + + @Override + public ContainerUserPrincipalLookupService getUserPrincipalLookupService() { + return containerFsProvider.userPrincipalLookupService(); + } + + @Override + public ContainerPath getPath(String first, String... more) { + return ContainerPath.fromPathInContainer(this, Path.of(first, more), getUserPrincipalLookupService().userScope().root()); + } + + @Override + public void close() { + throw new UnsupportedOperationException(); + } + + @Override + public Iterable getRootDirectories() { + throw new UnsupportedOperationException(); + } + + @Override + public Iterable getFileStores() { + throw new UnsupportedOperationException(); + } + + @Override + public PathMatcher getPathMatcher(String syntaxAndPattern) { + throw new UnsupportedOperationException(); + } + + @Override + public WatchService newWatchService() { + throw new UnsupportedOperationException(); + } + + public static ContainerFileSystem create(Path containerStorageRoot, UserScope userScope) { + return new ContainerFileSystemProvider(containerStorageRoot, userScope).getFileSystem(null); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemProvider.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemProvider.java new file mode 100644 index 00000000000..469ddd89ea3 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemProvider.java @@ -0,0 +1,348 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.fs; + +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; + +import java.io.IOException; +import java.net.URI; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.AccessMode; +import java.nio.file.CopyOption; +import java.nio.file.DirectoryStream; +import java.nio.file.FileStore; +import java.nio.file.FileSystem; +import java.nio.file.FileSystemAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.LinkOption; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.ProviderMismatchException; +import java.nio.file.SecureDirectoryStream; +import java.nio.file.attribute.BasicFileAttributeView; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.FileAttributeView; +import java.nio.file.attribute.GroupPrincipal; +import java.nio.file.attribute.PosixFileAttributeView; +import java.nio.file.attribute.PosixFileAttributes; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.nio.file.attribute.UserPrincipal; +import java.nio.file.spi.FileSystemProvider; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerAttributeViews.ContainerPosixFileAttributeView; +import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerAttributeViews.ContainerPosixFileAttributes; +import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerUserPrincipalLookupService.ContainerGroupPrincipal; +import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerUserPrincipalLookupService.ContainerUserPrincipal; +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * @author freva + */ +class ContainerFileSystemProvider extends FileSystemProvider { + + private static final FileAttribute DEFAULT_FILE_PERMISSIONS = PosixFilePermissions.asFileAttribute(Set.of( // 0640 + PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE, PosixFilePermission.GROUP_READ)); + private static final FileAttribute DEFAULT_DIRECTORY_PERMISSIONS = PosixFilePermissions.asFileAttribute(Set.of( // 0750 + PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE, PosixFilePermission.OWNER_EXECUTE, PosixFilePermission.GROUP_READ, PosixFilePermission.GROUP_EXECUTE)); + + private final ContainerFileSystem containerFs; + private final ContainerUserPrincipalLookupService userPrincipalLookupService; + + ContainerFileSystemProvider(Path containerRootOnHost, UserScope userScope) { + this.containerFs = new ContainerFileSystem(this, containerRootOnHost); + this.userPrincipalLookupService = new ContainerUserPrincipalLookupService( + containerRootOnHost.getFileSystem().getUserPrincipalLookupService(), userScope); + } + + public ContainerUserPrincipalLookupService userPrincipalLookupService() { + return userPrincipalLookupService; + } + + @Override + public String getScheme() { + return "file"; + } + + @Override + public FileSystem newFileSystem(URI uri, Map env) { + throw new FileSystemAlreadyExistsException(); + } + + @Override + public ContainerFileSystem getFileSystem(URI uri) { + return containerFs; + } + + @Override + public Path getPath(URI uri) { + throw new UnsupportedOperationException(); + } + + @Override + public SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { + Path pathOnHost = pathOnHost(path); + try (SecureDirectoryStream sds = leafDirectoryStream(pathOnHost)) { + boolean existedBefore = Files.exists(pathOnHost); + SeekableByteChannel seekableByteChannel = sds.newByteChannel( + pathOnHost.getFileName(), addNoFollow(options), addPermissions(DEFAULT_FILE_PERMISSIONS, attrs)); + if (!existedBefore) fixOwnerToContainerRoot(toContainerPath(path)); + return seekableByteChannel; + } + } + + @Override + public DirectoryStream newDirectoryStream(Path dir, DirectoryStream.Filter filter) throws IOException { + Path pathOnHost = pathOnHost(dir); + return new ContainerDirectoryStream(provider(pathOnHost).newDirectoryStream(pathOnHost, filter), + toContainerPath(dir).user()); + } + + @Override + public void createDirectory(Path dir, FileAttribute... attrs) throws IOException { + Path pathOnHost = pathOnHost(dir); + boolean existedBefore = Files.exists(pathOnHost); + provider(pathOnHost).createDirectory(pathOnHost, addPermissions(DEFAULT_DIRECTORY_PERMISSIONS, attrs)); + if (!existedBefore) fixOwnerToContainerRoot(toContainerPath(dir)); + } + + @Override + public void delete(Path path) throws IOException { + Path pathOnHost = pathOnHost(path); + provider(pathOnHost).delete(pathOnHost); + } + + @Override + public void copy(Path source, Path target, CopyOption... options) throws IOException { + // Only called when both 'source' and 'target' have 'this' as the FS provider + Path targetPathOnHost = pathOnHost(target); + provider(targetPathOnHost).copy(pathOnHost(source), targetPathOnHost, addNoFollow(options)); + } + + @Override + public void move(Path source, Path target, CopyOption... options) throws IOException { + // Only called when both 'source' and 'target' have 'this' as the FS provider + Path targetPathOnHost = pathOnHost(target); + provider(targetPathOnHost).move(pathOnHost(source), targetPathOnHost, addNoFollow(options)); + } + + @Override + public void createSymbolicLink(Path link, Path target, FileAttribute... attrs) throws IOException { + Path pathOnHost = pathOnHost(link); + boolean existedBefore = Files.exists(pathOnHost, LinkOption.NOFOLLOW_LINKS); + if (target instanceof ContainerPath) + target = pathOnHost.getFileSystem().getPath(toContainerPath(target).pathInContainer()); + provider(pathOnHost).createSymbolicLink(pathOnHost, target, attrs); + if (!existedBefore) fixOwnerToContainerRoot(toContainerPath(link)); + } + + @Override + public Path readSymbolicLink(Path link) throws IOException { + Path pathOnHost = pathOnHost(link); + return provider(pathOnHost).readSymbolicLink(pathOnHost); + } + + @Override + public boolean isSameFile(Path path, Path path2) throws IOException { + // 'path' FS provider should be 'this' + if (path2 instanceof ContainerPath) + path2 = pathOnHost(path2); + Path pathOnHost = pathOnHost(path); + return provider(pathOnHost).isSameFile(pathOnHost, path2); + } + + @Override + public boolean isHidden(Path path) throws IOException { + Path pathOnHost = pathOnHost(path); + return provider(pathOnHost).isHidden(pathOnHost); + } + + @Override + public FileStore getFileStore(Path path) { + throw new UnsupportedOperationException(); + } + + @Override + public void checkAccess(Path path, AccessMode... modes) throws IOException { + Path pathOnHost = pathOnHost(path); + provider(pathOnHost).checkAccess(pathOnHost, modes); + } + + @Override + @SuppressWarnings("unchecked") + public V getFileAttributeView(Path path, Class type, LinkOption... options) { + if (!type.isAssignableFrom(PosixFileAttributeView.class)) return null; + Path pathOnHost = pathOnHost(path); + FileSystemProvider provider = pathOnHost.getFileSystem().provider(); + if (type == BasicFileAttributeView.class) // Basic view doesn't have owner/group fields, forward to base FS provider + return provider.getFileAttributeView(pathOnHost, type, addNoFollow(options)); + + PosixFileAttributeView view = provider.getFileAttributeView(pathOnHost, PosixFileAttributeView.class, addNoFollow(options)); + return (V) new ContainerPosixFileAttributeView(view, + uncheck(() -> new ContainerPosixFileAttributes(readAttributes(path, "unix:*", addNoFollow(options))))); + } + + @Override + @SuppressWarnings("unchecked") + public A readAttributes(Path path, Class type, LinkOption... options) throws IOException { + if (!type.isAssignableFrom(PosixFileAttributes.class)) throw new UnsupportedOperationException(); + Path pathOnHost = pathOnHost(path); + if (type == BasicFileAttributes.class) + return pathOnHost.getFileSystem().provider().readAttributes(pathOnHost, type, addNoFollow(options)); + + // Non-basic requests need to be upgraded to unix:* to get owner,group,uid,gid fields, which are then re-mapped + return (A) new ContainerPosixFileAttributes(readAttributes(path, "unix:*", addNoFollow(options))); + } + + @Override + public Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { + Path pathOnHost = pathOnHost(path); + int index = attributes.indexOf(':'); + if (index < 0 || attributes.startsWith("basic:")) + return provider(pathOnHost).readAttributes(pathOnHost, attributes, addNoFollow(options)); + + Map attrs = new HashMap<>(provider(pathOnHost).readAttributes(pathOnHost, "unix:*", addNoFollow(options))); + int uid = userPrincipalLookupService.userIdInContainer((int) attrs.get("uid")); + int gid = userPrincipalLookupService.groupIdInContainer((int) attrs.get("gid")); + attrs.put("uid", uid); + attrs.put("gid", gid); + attrs.put("owner", userPrincipalLookupService.userPrincipal(uid, (UserPrincipal) attrs.get("owner"))); + attrs.put("group", userPrincipalLookupService.groupPrincipal(gid, (GroupPrincipal) attrs.get("group"))); + return attrs; + } + + @Override + public void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { + Path pathOnHost = pathOnHost(path); + provider(pathOnHost).setAttribute(pathOnHost, attribute, fixAttributeValue(attribute, value), addNoFollow(options)); + } + + private Object fixAttributeValue(String attribute, Object value) { + int index = attribute.indexOf(':'); + if (index > 0) { + switch (attribute.substring(index + 1)) { + case "owner": return cast(value, ContainerUserPrincipal.class).baseFsPrincipal(); + case "group": return cast(value, ContainerGroupPrincipal.class).baseFsPrincipal(); + case "uid": return userPrincipalLookupService.userIdOnHost(cast(value, Integer.class)); + case "gid": return userPrincipalLookupService.groupIdOnHost(cast(value, Integer.class)); + } + } // else basic file attribute + return value; + } + + void createFileSystemRoot() { + ContainerPath root = containerFs.getPath("/"); + if (!Files.exists(root)) { + uncheck(() -> { + Files.createDirectories(root.pathOnHost()); + fixOwnerToContainerRoot(root); + }); + } + } + + private void fixOwnerToContainerRoot(ContainerPath path) throws IOException { + setAttribute(path, "unix:uid", path.user().uid(), LinkOption.NOFOLLOW_LINKS); + setAttribute(path, "unix:gid", path.user().gid(), LinkOption.NOFOLLOW_LINKS); + } + + private SecureDirectoryStream leafDirectoryStream(Path pathOnHost) throws IOException { + Path containerRoot = containerFs.containerRootOnHost(); + SecureDirectoryStream sds = ((SecureDirectoryStream) Files.newDirectoryStream(containerRoot)); + for (int i = containerRoot.getNameCount(); i < pathOnHost.getNameCount() - 1; i++) { + SecureDirectoryStream next = sds.newDirectoryStream(pathOnHost.getName(i), LinkOption.NOFOLLOW_LINKS); + sds.close(); + sds = next; + } + return sds; + } + + private class ContainerDirectoryStream implements DirectoryStream { + private final DirectoryStream hostDirectoryStream; + private final UnixUser user; + + private ContainerDirectoryStream(DirectoryStream hostDirectoryStream, UnixUser user) { + this.hostDirectoryStream = hostDirectoryStream; + this.user = user; + } + + @Override + public Iterator iterator() { + Iterator hostPathIterator = hostDirectoryStream.iterator(); + return new Iterator<>() { + @Override + public boolean hasNext() { + return hostPathIterator.hasNext(); + } + + @Override + public Path next() { + Path pathOnHost = hostPathIterator.next(); + return ContainerPath.fromPathOnHost(containerFs, pathOnHost, user); + } + }; + } + + @Override + public void close() throws IOException { + hostDirectoryStream.close(); + } + } + + static ContainerPath toContainerPath(Path path) { + return cast(path, ContainerPath.class); + } + + private static T cast(Object value, Class type) { + if (type.isInstance(value)) return type.cast(value); + throw new ProviderMismatchException("Expected " + type.getSimpleName() + ", was " + value.getClass().getName()); + } + + private static Path pathOnHost(Path path) { + return toContainerPath(path).pathOnHost(); + } + + private static FileSystemProvider provider(Path path) { + return path.getFileSystem().provider(); + } + + private static Set addNoFollow(Set options) { + if (options.contains(LinkOption.NOFOLLOW_LINKS)) return options; + Set copy = new HashSet<>(options); + copy.add(LinkOption.NOFOLLOW_LINKS); + return copy; + } + + private static LinkOption[] addNoFollow(LinkOption... options) { + if (Set.of(options).contains(LinkOption.NOFOLLOW_LINKS)) return options; + LinkOption[] copy = new LinkOption[options.length + 1]; + System.arraycopy(options, 0, copy, 0, options.length); + copy[options.length] = LinkOption.NOFOLLOW_LINKS; + return copy; + } + + private static CopyOption[] addNoFollow(CopyOption... options) { + if (Set.of(options).contains(LinkOption.NOFOLLOW_LINKS)) return options; + CopyOption[] copy = new CopyOption[options.length + 1]; + System.arraycopy(options, 0, copy, 0, options.length); + copy[options.length] = LinkOption.NOFOLLOW_LINKS; + return copy; + } + + private static FileAttribute[] addPermissions(FileAttribute defaultPermissions, FileAttribute... attrs) { + for (FileAttribute attr : attrs) { + if (attr.name().equals("posix:permissions") || attr.name().equals("unix:permissions")) + return attrs; + } + + FileAttribute[] copy = new FileAttribute[attrs.length + 1]; + System.arraycopy(attrs, 0, copy, 0, attrs.length); + copy[attrs.length] = defaultPermissions; + return copy; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPath.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPath.java new file mode 100644 index 00000000000..314e7cde5e2 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPath.java @@ -0,0 +1,224 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.fs; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.LinkOption; +import java.nio.file.Path; +import java.nio.file.WatchEvent; +import java.nio.file.WatchKey; +import java.nio.file.WatchService; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerFileSystemProvider.toContainerPath; + +/** + * Represents a path in container that is mapped in from the host. ContainerPaths are always normalized and absolute. + * + * @author freva + */ +public class ContainerPath implements Path { + private final ContainerFileSystem containerFs; + private final Path pathOnHost; + private final String[] parts; + private final UnixUser user; + + private ContainerPath(ContainerFileSystem containerFs, Path pathOnHost, String[] parts, UnixUser user) { + this.containerFs = Objects.requireNonNull(containerFs); + this.pathOnHost = Objects.requireNonNull(pathOnHost); + this.parts = Objects.requireNonNull(parts); + this.user = Objects.requireNonNull(user); + + if (!pathOnHost.isAbsolute()) + throw new IllegalArgumentException("Path host must be absolute: " + pathOnHost); + Path containerRootOnHost = containerFs.containerRootOnHost(); + if (!pathOnHost.startsWith(containerRootOnHost)) + throw new IllegalArgumentException("Path on host (" + pathOnHost + ") must start with container root on host (" + containerRootOnHost + ")"); + } + + public Path pathOnHost() { return pathOnHost; } + public String pathInContainer() { return '/' + String.join("/", parts); } + public ContainerPath withUser(UnixUser user) { return new ContainerPath(containerFs, pathOnHost, parts, user); } + public UnixUser user() { return user; } + + @Override + public ContainerFileSystem getFileSystem() { + return containerFs; + } + + @Override + public ContainerPath getRoot() { + return resolve(containerFs, new String[0], Path.of("/"), user); + } + + @Override + public Path getFileName() { + if (parts.length == 0) return null; + return Path.of(parts[parts.length - 1]); + } + + @Override + public ContainerPath getParent() { + if (parts.length == 0) return null; + return new ContainerPath(containerFs, pathOnHost.getParent(), Arrays.copyOf(parts, parts.length-1), user); + } + + @Override + public int getNameCount() { + return parts.length; + } + + @Override + public Path getName(int index) { + return Path.of(parts[index]); + } + + @Override + public Path subpath(int beginIndex, int endIndex) { + if (beginIndex < 0 || beginIndex >= endIndex || endIndex > parts.length) + throw new IllegalArgumentException(); + if (endIndex - beginIndex == 1) return getName(beginIndex); + + String[] rest = new String[endIndex - beginIndex - 1]; + System.arraycopy(parts, beginIndex + 1, rest, 0, rest.length); + return Path.of(parts[beginIndex], rest); + } + + @Override public ContainerPath resolve(Path other) { return resolve(containerFs, parts, other, user); } + @Override public ContainerPath resolve(String other) { return resolve(Path.of(other)); } + @Override public ContainerPath resolveSibling(String other) { return resolve(Path.of("..", other)); } + + @Override + public boolean startsWith(Path other) { + if (other.getFileSystem() != containerFs) return false; + String[] otherParts = toContainerPath(other).parts; + if (parts.length < otherParts.length) return false; + + for (int i = 0; i < otherParts.length; i++) { + if ( ! parts[i].equals(otherParts[i])) return false; + } + return true; + } + + @Override + public boolean endsWith(Path other) { + int offset = parts.length - other.getNameCount(); + // If the other path is longer than this, or the other path is absolute and shorter than this + if (offset < 0 || (other.isAbsolute() && offset > 0)) return false; + + for (int i = 0; i < other.getNameCount(); i++) { + if ( ! parts[offset + i].equals(other.getName(i).toString())) return false; + } + return true; + } + + @Override + public boolean isAbsolute() { + // All container paths are normalized and absolute + return true; + } + + @Override + public ContainerPath normalize() { + // All container paths are normalized and absolute + return this; + } + + @Override + public ContainerPath toAbsolutePath() { + // All container paths are normalized and absolute + return this; + } + + @Override + public ContainerPath toRealPath(LinkOption... options) throws IOException { + Path realPathOnHost = pathOnHost.toRealPath(options); + if (realPathOnHost.equals(pathOnHost)) return this; + return fromPathOnHost(containerFs, realPathOnHost, user); + } + + @Override + public Path relativize(Path other) { + return pathOnHost.relativize(toContainerPath(other).pathOnHost); + } + + @Override + public URI toUri() { + throw new UnsupportedOperationException(); + } + + @Override + public WatchKey register(WatchService watcher, WatchEvent.Kind[] events, WatchEvent.Modifier... modifiers) throws IOException { + return pathOnHost.register(watcher, events, modifiers); + } + + @Override + public int compareTo(Path other) { + return pathOnHost.compareTo(toContainerPath(other)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ContainerPath paths = (ContainerPath) o; + return containerFs.equals(paths.containerFs) && pathOnHost.equals(paths.pathOnHost) && Arrays.equals(parts, paths.parts); + } + + @Override + public int hashCode() { + int result = Objects.hash(containerFs, pathOnHost); + result = 31 * result + Arrays.hashCode(parts); + return result; + } + + @Override + public String toString() { + return containerFs.containerRootOnHost().getFileName() + ":" + pathInContainer(); + } + + private static ContainerPath resolve(ContainerFileSystem containerFs, String[] currentParts, Path other, UnixUser user) { + List parts = other.isAbsolute() ? new ArrayList<>() : new ArrayList<>(Arrays.asList(currentParts)); + for (int i = 0; i < other.getNameCount(); i++) { + String part = other.getName(i).toString(); + if (part.isEmpty() || part.equals(".")) continue; + if (part.equals("..")) { + if (!parts.isEmpty()) parts.remove(parts.size() - 1); + continue; + } + parts.add(part); + } + + return new ContainerPath(containerFs, + containerFs.containerRootOnHost().resolve(String.join("/", parts)), + parts.toArray(String[]::new), + user); + } + + public static ContainerPath fromPathInContainer(ContainerFileSystem containerFs, Path pathInContainer, UnixUser user) { + if (!pathInContainer.isAbsolute()) + throw new IllegalArgumentException("Path in container must be absolute: " + pathInContainer); + return resolve(containerFs, new String[0], pathInContainer, user); + } + + public static ContainerPath fromPathOnHost(ContainerFileSystem containerFs, Path pathOnHost, UnixUser user) { + pathOnHost = pathOnHost.normalize(); + Path containerRootOnHost = containerFs.containerRootOnHost(); + Path pathUnderContainerStorage = containerRootOnHost.relativize(pathOnHost); + + if (pathUnderContainerStorage.getNameCount() == 0 || pathUnderContainerStorage.getName(0).toString().isEmpty()) + return new ContainerPath(containerFs, pathOnHost, new String[0], user); + if (pathUnderContainerStorage.getName(0).toString().equals("..")) + throw new IllegalArgumentException("Path " + pathOnHost + " is not under container root " + containerRootOnHost); + + List parts = new ArrayList<>(); + for (int i = 0; i < pathUnderContainerStorage.getNameCount(); i++) + parts.add(pathUnderContainerStorage.getName(i).toString()); + return new ContainerPath(containerFs, pathOnHost, parts.toArray(String[]::new), user); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupService.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupService.java new file mode 100644 index 00000000000..1a9b9b60cd4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupService.java @@ -0,0 +1,130 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.fs; + +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; + +import java.io.IOException; +import java.nio.file.attribute.GroupPrincipal; +import java.nio.file.attribute.UserPrincipal; +import java.nio.file.attribute.UserPrincipalLookupService; +import java.nio.file.attribute.UserPrincipalNotFoundException; +import java.util.Objects; +import java.util.function.Function; + +/** + * @author freva + */ +public class ContainerUserPrincipalLookupService extends UserPrincipalLookupService { + + private final UserPrincipalLookupService baseFsUserPrincipalLookupService; + private final UserScope userScope; + + ContainerUserPrincipalLookupService(UserPrincipalLookupService baseFsUserPrincipalLookupService, UserScope userScope) { + this.baseFsUserPrincipalLookupService = Objects.requireNonNull(baseFsUserPrincipalLookupService); + this.userScope = Objects.requireNonNull(userScope); + } + + public UserScope userScope() { return userScope; } + + public int userIdOnHost(int containerUid) { return userScope.namespace().userIdOnHost(containerUid); } + public int groupIdOnHost(int containerGid) { return userScope.namespace().groupIdOnHost(containerGid); } + public int userIdInContainer(int hostUid) { return userScope.namespace().userIdInContainer(hostUid); } + public int groupIdInContainer(int hostGid) { return userScope.namespace().groupIdInContainer(hostGid); } + + @Override + public ContainerUserPrincipal lookupPrincipalByName(String name) throws IOException { + int containerUid = resolveName(name, UnixUser::uid, UnixUser::name); + String user = resolveId(containerUid, UnixUser::uid, UnixUser::name); + String hostUid = String.valueOf(userIdOnHost(containerUid)); + return new ContainerUserPrincipal(containerUid, user, baseFsUserPrincipalLookupService.lookupPrincipalByName(hostUid)); + } + + @Override + public ContainerGroupPrincipal lookupPrincipalByGroupName(String group) throws IOException { + int containerGid = resolveName(group, UnixUser::gid, UnixUser::group); + String name = resolveId(containerGid, UnixUser::gid, UnixUser::group); + String hostGid = String.valueOf(groupIdOnHost(containerGid)); + return new ContainerGroupPrincipal(containerGid, name, baseFsUserPrincipalLookupService.lookupPrincipalByGroupName(hostGid)); + } + + public ContainerUserPrincipal userPrincipal(int uid, UserPrincipal baseFsPrincipal) { + String name = resolveId(uid, UnixUser::uid, UnixUser::name); + return new ContainerUserPrincipal(uid, name, baseFsPrincipal); + } + + public ContainerGroupPrincipal groupPrincipal(int gid, GroupPrincipal baseFsPrincipal) { + String name = resolveId(gid, UnixUser::gid, UnixUser::group); + return new ContainerGroupPrincipal(gid, name, baseFsPrincipal); + } + + private String resolveId(int id, Function idExtractor, Function nameExtractor) { + if (idExtractor.apply(userScope.root()) == id) return nameExtractor.apply(userScope.root()); + if (idExtractor.apply(userScope.vespa()) == id) return nameExtractor.apply(userScope.vespa()); + return String.valueOf(id); + } + + private int resolveName(String name, Function idExtractor, Function nameExtractor) throws UserPrincipalNotFoundException { + if (name.equals(nameExtractor.apply(userScope.root()))) return idExtractor.apply(userScope.root()); + if (name.equals(nameExtractor.apply(userScope.vespa()))) return idExtractor.apply(userScope.vespa()); + + try { + return Integer.parseInt(name); + } catch (NumberFormatException ignored) { + throw new UserPrincipalNotFoundException(name); + } + } + + private abstract static class NamedPrincipal implements UserPrincipal { + private final int id; + private final String name; + private final UserPrincipal baseFsPrincipal; + + private NamedPrincipal(int id, String name, UserPrincipal baseFsPrincipal) { + this.id = id; + this.name = Objects.requireNonNull(name); + this.baseFsPrincipal = Objects.requireNonNull(baseFsPrincipal); + } + + @Override + public final String getName() { + return name; + } + + public int id() { + return id; + } + + public UserPrincipal baseFsPrincipal() { + return baseFsPrincipal; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NamedPrincipal that = (NamedPrincipal) o; + return id == that.id && baseFsPrincipal.equals(that.baseFsPrincipal); + } + + @Override + public int hashCode() { + return Objects.hash(id, baseFsPrincipal); + } + + @Override + public String toString() { + return "{id=" + id + ", baseFsPrincipal=" + baseFsPrincipal + '}'; + } + } + + static final class ContainerUserPrincipal extends NamedPrincipal { + private ContainerUserPrincipal(int id, String name, UserPrincipal baseFsPrincipal) { super(id, name, baseFsPrincipal); } + } + + static final class ContainerGroupPrincipal extends NamedPrincipal implements GroupPrincipal { + private ContainerGroupPrincipal(int id, String name, GroupPrincipal baseFsPrincipal) { super(id, name, baseFsPrincipal); } + + @Override public GroupPrincipal baseFsPrincipal() { return (GroupPrincipal) super.baseFsPrincipal(); } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/package-info.java new file mode 100644 index 00000000000..6891089ff71 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.task.util.fs; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java new file mode 100644 index 00000000000..965cd9942d6 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java @@ -0,0 +1,142 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.network; + +import ai.vespa.net.CidrBlock; +import com.google.common.net.InetAddresses; +import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; + +import java.io.UncheckedIOException; +import java.net.Inet4Address; +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * IP addresses - IP utilities to retrieve and manipulate addresses for docker host and docker containers in a + * multi-home environment. + *

+ * The assumption is that DNS is the source of truth for which address are assigned to the host and which + * that belongs to the containers. Only one address should be assigned to each. + *

+ * The behavior with respect to site-local addresses are distinct for IPv4 and IPv6. For IPv4 we choose + * the site-local address (assume the public is a NAT address not assigned to the host interface (the typical aws setup)). + *

+ * For IPv6 we disregard any site-local addresses (these are normally not in DNS anyway). + *

+ * This class also provides some utilities for prefix translation. + * + * @author smorgrav + */ +public interface IPAddresses { + CidrBlock gcpInternalBlock = CidrBlock.fromString("2600:2d00::/32"); + + InetAddress[] getAddresses(String hostname); + + default Optional getAddress(String hostname, IPVersion ipVersion) { + return ipVersion == IPVersion.IPv6 + ? getIPv6Address(hostname).map(InetAddress.class::cast) + : getIPv4Address(hostname).map(InetAddress.class::cast); + } + + /** + * Returns a list of string representation of the IP addresses (RFC 5952 compact format) + */ + default List getAddresses(String hostname, IPVersion ipVersion) { + return Stream.of(getAddresses(hostname)) + .filter(inetAddress -> isOfType(inetAddress, ipVersion)) + .map(InetAddresses::toAddrString) + .toList(); + } + + /** + * Get the IPv6 address for the host if any. + * + * @throws ConvergenceException if multiple addresses are found + */ + default Optional getIPv6Address(String hostname) { + List ipv6addresses = Stream.of(getAddresses(hostname)) + .filter(Inet6Address.class::isInstance) + .filter(inetAddress -> !inetAddress.isLoopbackAddress()) + .map(Inet6Address.class::cast) + .filter(inetAddress -> !inetAddress.isLinkLocalAddress()) + .filter(inetAddress -> !inetAddress.isSiteLocalAddress()) + .filter(inet6Address -> !gcpInternalBlock.contains(inet6Address)) + .toList(); + + if (ipv6addresses.size() <= 1) return ipv6addresses.stream().findFirst(); + + String addresses = ipv6addresses.stream().map(InetAddresses::toAddrString).collect(Collectors.joining(",")); + throw ConvergenceException.ofError( + String.format( + "Multiple IPv6 addresses found: %s. Perhaps a missing DNS entry or multiple AAAA records in DNS?", + addresses)); + } + + /** Returns the hostname of given inetAddress */ + default String getHostname(InetAddress inetAddress) { + String hostname = inetAddress.getHostName(); + if (hostname.equals(inetAddress.getHostAddress())) { + throw new IllegalArgumentException("Could not find hostname for address " + inetAddress.getHostAddress()); + } + return hostname; + } + + /** + * Get the IPv4 address for the host if any. + * + * @throws ConvergenceException if multiple site-local addresses are found + */ + default Optional getIPv4Address(String hostname) { + List ipv4Addresses = Stream.of(getAddresses(hostname)) + .filter(Inet4Address.class::isInstance) + .filter(inetAddress -> !inetAddress.isLoopbackAddress()) + .map(Inet4Address.class::cast) + .toList(); + + if (ipv4Addresses.size() <= 1) return ipv4Addresses.stream().findFirst(); + + List siteLocalIPv4Addresses = ipv4Addresses.stream() + .filter(InetAddress::isSiteLocalAddress) + .toList(); + + if (siteLocalIPv4Addresses.size() == 1) return Optional.of(siteLocalIPv4Addresses.get(0)); + + String addresses = ipv4Addresses.stream().map(InetAddresses::toAddrString).collect(Collectors.joining(",")); + throw ConvergenceException.ofError( + String.format( + "Multiple IPv4 addresses found: %s. Perhaps a missing DNS entry or multiple A records in DNS?", + addresses)); + } + + static boolean isOfType(InetAddress address, IPVersion ipVersion) { + if (ipVersion.equals(IPVersion.IPv4) && address instanceof Inet4Address) return true; + if (ipVersion.equals(IPVersion.IPv6) && address instanceof Inet6Address) return true; + return false; + } + + /** + * For NPTed networks we want to find the private address from a public. + * + * @param address The original address to translate + * @param prefix The prefix address + * @param subnetSizeInBytes in bits - e.g a /64 subnet equals 8 bytes + * @return The translated address + * @throws ConvergenceException if + */ + static InetAddress prefixTranslate(InetAddress address, InetAddress prefix, int subnetSizeInBytes) { + return prefixTranslate(address.getAddress(), prefix.getAddress(), subnetSizeInBytes); + } + + static InetAddress prefixTranslate(byte[] address, byte[] prefix, int nofBytes) { + System.arraycopy(prefix, 0, address, 0, nofBytes); + try { + return InetAddress.getByAddress(address); + } catch (UnknownHostException e) { + throw new UncheckedIOException(e); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddressesImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddressesImpl.java new file mode 100644 index 00000000000..4680502cee7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddressesImpl.java @@ -0,0 +1,21 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.network; + +import java.io.UncheckedIOException; +import java.net.InetAddress; +import java.net.UnknownHostException; + +/** + * @author smorgrav + */ +public class IPAddressesImpl implements IPAddresses { + + @Override + public InetAddress[] getAddresses(String hostname) { + try { + return InetAddress.getAllByName(hostname); + } catch (UnknownHostException e) { + throw new UncheckedIOException(e); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPVersion.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPVersion.java new file mode 100644 index 00000000000..eb92cbdd303 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPVersion.java @@ -0,0 +1,85 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.network; + +import com.google.common.net.InetAddresses; + +import java.net.Inet4Address; +import java.net.InetAddress; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Strong type IPv4 and IPv6 with common executables for ip related commands. + * + * @author smorgrav + */ +public enum IPVersion { + + IPv6(6, "ip6tables", "ip -6", "ipv6-icmp", 128, "icmp6-port-unreachable", "ip6tables-restore", "fe80::/10"), + IPv4(4, "iptables", "ip", "icmp", 32, "icmp-port-unreachable", "iptables-restore", "169.254.0.0/16"); + + private static final Pattern cidrNotationPattern = Pattern.compile("/\\d+$"); + + IPVersion(int version, String iptablesCmd, String ipCmd, + String icmpProtocol, int size, String icmpPortUnreachable, + String iptablesRestore, String linkLocalCidr) { + this.version = version; + this.ipCmd = ipCmd; + this.iptablesCmd = iptablesCmd; + this.icmpProtocol = icmpProtocol; + this.size = size; + this.icmpPortUnreachable = icmpPortUnreachable; + this.iptablesRestore = iptablesRestore; + this.linkLocalCidr = linkLocalCidr; + } + + private final int version; + private final String iptablesCmd; + private final String ipCmd; + private final String icmpProtocol; + private final int size; + private final String icmpPortUnreachable; + private final String iptablesRestore; + private final String linkLocalCidr; + + /** The ID of the IP version, either IPv4 or IPv6. */ + public String id() { return "IPv" + version; } + + /** The IP version, either 4 or 6 */ + public int version() { return version; } + + public String versionString() { return String.valueOf(version); } + public String iptablesCmd() { return iptablesCmd; } + public String iptablesRestore() { return iptablesRestore;} + public String ipCmd() { return ipCmd; } + public String icmpProtocol() { return icmpProtocol; } + public String singleHostCidr() { return "/" + size; } + public String icmpPortUnreachable() { return icmpPortUnreachable; } + + /** The address size (in bits) of the IP version: 32 or 128. */ + public int addressSize() { return size; } + + /** Both IPv4 and IPv6 have exactly one link-local address space: 169.254.0.0/16 or fe80::/10. */ + public String linkLocalAddressCidr() { return linkLocalCidr; } + + public boolean match(InetAddress address) { + return this == IPVersion.get(address); + } + + public boolean match(String address) { + return this == IPVersion.get(address); + } + + public static IPVersion get(String address) { + Matcher matcher = cidrNotationPattern.matcher(address); + if (matcher.find()) { + address = matcher.replaceFirst(""); + } + return get(InetAddresses.forString(address)); + } + + public static IPVersion get(InetAddress address) { + return address instanceof Inet4Address ? IPv4 : IPv6; + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/VersionedIpAddress.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/VersionedIpAddress.java new file mode 100644 index 00000000000..1186a58f53d --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/VersionedIpAddress.java @@ -0,0 +1,85 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.network; + +import com.google.common.net.InetAddresses; + +import java.net.Inet4Address; +import java.net.Inet6Address; +import java.net.InetAddress; +import java.util.Objects; + +/** + * Encapsulates an IP address and its version along with some convenience methods. + * Default sorting is by version (IPv6 first), then by address. + * + * @author gjoranv + */ +public class VersionedIpAddress implements Comparable { + + private final InetAddress address; + private final IPVersion version; + + private VersionedIpAddress(InetAddress address) { + this.address = Objects.requireNonNull(address); + version = getVersionOrThrow(address); + } + + public static VersionedIpAddress from(InetAddress address) { + return new VersionedIpAddress(address); + } + + public static VersionedIpAddress from(String address) { + return from(InetAddresses.forString(address)); + } + + public IPVersion version() { + return version; + } + + public String asString() { + return InetAddresses.toAddrString(address); + } + + public String asEndpoint(int port) { + var format = (version == IPVersion.IPv6) ? "[%s]:%d" : "%s:%d"; + return String.format(format, asString(), port); + } + + @Override + public int compareTo(VersionedIpAddress o) { + int version = version().compareTo(o.version()); + return (version != 0) ? version : asString().compareTo(o.asString()); + } + + @Override + public String toString() { + return "VersionedIpAddress{" + + "address=" + address + + ", version=" + version + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + VersionedIpAddress that = (VersionedIpAddress) o; + return address.equals(that.address) && version == that.version; + } + + @Override + public int hashCode() { + return Objects.hash(address, version); + } + + private static IPVersion getVersionOrThrow(InetAddress address) { + if (address instanceof Inet4Address) { + return IPVersion.IPv4; + } else if (address instanceof Inet6Address) { + return IPVersion.IPv6; + } else { + throw new IllegalArgumentException("Unknown IP version for " + InetAddresses.toAddrString(address) + " of class " + address.getClass().getName()); + } + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/package-info.java new file mode 100644 index 00000000000..9533b7240c4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.task.util.network; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/package-info.java new file mode 100644 index 00000000000..572182f7991 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/package-info.java @@ -0,0 +1,8 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @author bjorncs + */ +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.task.util; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2.java new file mode 100644 index 00000000000..007547aa41b --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2.java @@ -0,0 +1,16 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +/** + * @author hakonhall + */ +public interface ChildProcess2 extends AutoCloseable { + void waitForTermination(); + int exitCode(); + String getOutput(); + + /** Close/cleanup any resources held. Must not throw an exception. */ + @Override + void close(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2Impl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2Impl.java new file mode 100644 index 00000000000..8574028b6d7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcess2Impl.java @@ -0,0 +1,139 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import com.yahoo.jdisc.Timer; +import java.util.logging.Level; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.time.Instant; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * @author hakonhall + */ +public class ChildProcess2Impl implements ChildProcess2 { + private static final Logger logger = Logger.getLogger(ChildProcess2Impl.class.getName()); + + private final CommandLine commandLine; + private final ProcessApi2 process; + private final Path outputPath; + private final Timer timer; + + public ChildProcess2Impl(CommandLine commandLine, + ProcessApi2 process, + Path outputPath, + Timer timer) { + this.commandLine = commandLine; + this.process = process; + this.outputPath = outputPath; + this.timer = timer; + } + + @Override + public void waitForTermination() { + Duration timeoutDuration = commandLine.getTimeout(); + Instant timeout = timer.currentTime().plus(timeoutDuration); + long maxOutputBytes = commandLine.getMaxOutputBytes(); + + // How frequently do we want to wake up and check the output file size? + final Duration pollInterval = Duration.ofSeconds(10); + + boolean hasTerminated = false; + while (!hasTerminated) { + Instant now = timer.currentTime(); + long sleepPeriodMillis = pollInterval.toMillis(); + if (now.plusMillis(sleepPeriodMillis).isAfter(timeout)) { + sleepPeriodMillis = Duration.between(now, timeout).toMillis(); + + if (sleepPeriodMillis <= 0) { + gracefullyKill(); + throw new TimeoutChildProcessException( + timeoutDuration, commandLine.toString(), getOutput()); + } + } + + try { + hasTerminated = process.waitFor(sleepPeriodMillis, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // Ignore, just loop around. + continue; + } + + // Always check output file size to ensure we don't load too much into memory. + long sizeInBytes = uncheck(() -> Files.size(outputPath)); + if (sizeInBytes > maxOutputBytes) { + gracefullyKill(); + throw new LargeOutputChildProcessException( + sizeInBytes, commandLine.toString(), getOutput()); + } + } + } + + @Override + public int exitCode() { + return process.exitValue(); + } + + @Override + public String getOutput() { + byte[] bytes = uncheck(() -> Files.readAllBytes(outputPath)); + return new String(bytes, commandLine.getOutputEncoding()); + } + + @Override + public void close() { + try { + if (commandLine.getOutputFile().isEmpty()) + Files.delete(outputPath); + } catch (Throwable t) { + logger.log(Level.WARNING, "Failed to delete " + outputPath, t); + } + } + + Path getOutputPath() { + return outputPath; + } + + private void gracefullyKill() { + process.destroy(); + + Duration maxWaitAfterSigTerm = commandLine.getSigTermGracePeriod(); + Instant timeout = timer.currentTime().plus(maxWaitAfterSigTerm); + if (!waitForTermination(timeout)) { + process.destroyForcibly(); + + // If waiting for the process now takes a long time, it's probably a kernel issue + // or huge core is getting dumped. + Duration maxWaitAfterSigKill = commandLine.getSigKillGracePeriod(); + if (!waitForTermination(timer.currentTime().plus(maxWaitAfterSigKill))) { + throw new UnkillableChildProcessException( + maxWaitAfterSigTerm, + maxWaitAfterSigKill, + commandLine.toString(), + getOutput()); + } + } + } + + /** @return true if process terminated, false on timeout. */ + private boolean waitForTermination(Instant timeout) { + while (true) { + long waitDurationMillis = Duration.between(timer.currentTime(), timeout).toMillis(); + if (waitDurationMillis <= 0) { + return false; + } + + try { + return process.waitFor(waitDurationMillis, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // ignore + } + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcessException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcessException.java new file mode 100644 index 00000000000..9a0c08a8596 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcessException.java @@ -0,0 +1,47 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import com.yahoo.text.internal.SnippetGenerator; + +/** + * Base class for child process related exceptions, with a util to build an error message + * that includes a large part of the output. + * + * @author hakonhall + */ +@SuppressWarnings("serial") +public abstract class ChildProcessException extends RuntimeException { + private static final SnippetGenerator snippetGenerator = new SnippetGenerator(); + + /** + * An exception with a message of the following format: + * Command 'COMMANDLINE' PROBLEM: stdout/stderr: 'OUTPUT' + * + * If the output of the terminated command is too large it will be sampled. + * + * @param problem E.g. "terminated with exit code 1" + * @param commandLine The command that failed in a concise (e.g. shell-like) format + * @param possiblyHugeOutput The output of the command + */ + protected ChildProcessException(String problem, String commandLine, String possiblyHugeOutput) { + super(makeSnippet(problem, commandLine, possiblyHugeOutput)); + } + + protected ChildProcessException(RuntimeException cause, + String problem, + String commandLine, + String possiblyHugeOutput) { + super(makeSnippet(problem, commandLine, possiblyHugeOutput), cause); + } + + private static String makeSnippet(String problem, String commandLine, String possiblyHugeOutput) { + return "Command '" + + commandLine + + "' " + + problem + + ": stdout/stderr: '" + + snippetGenerator.makeSnippet(possiblyHugeOutput, 500) + + "'"; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcessFailureException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcessFailureException.java new file mode 100644 index 00000000000..2d1fe1f24bd --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ChildProcessFailureException.java @@ -0,0 +1,15 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +/** + * The child process terminated with a non-zero exit code. + * + * @author hakonhall + */ +@SuppressWarnings("serial") +public class ChildProcessFailureException extends ChildProcessException { + ChildProcessFailureException(int exitCode, String commandLine, String possiblyHugeOutput) { + super("terminated with exit code " + exitCode, commandLine, possiblyHugeOutput); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandLine.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandLine.java new file mode 100644 index 00000000000..516b50dc601 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandLine.java @@ -0,0 +1,382 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.TreeMap; +import java.util.function.Predicate; +import java.util.logging.Logger; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * A CommandLine is used to specify and execute a shell-like program in a child process, + * and capture its output. + * + * @author hakonhall + */ +public class CommandLine { + private static final Logger logger = Logger.getLogger(CommandLine.class.getName()); + private static final Pattern UNESCAPED_ARGUMENT_PATTERN = Pattern.compile("^[a-zA-Z0-9=!@%/+:.,_-]+$"); + + /** The default timeout. See setTimeout() for details. */ + public static final Duration DEFAULT_TIMEOUT = Duration.ofMinutes(10); + + /** The default maximum number of output bytes. See setMaxOutputBytes() for details. */ + public static final long DEFAULT_MAX_OUTPUT_BYTES = 1024 * 1024 * 1024; // 1 Gb + + /** + * The default grace period after SIGTERM has been sent during a graceful kill. + * See setSigTermGracePeriod for details. + */ + public static final Duration DEFAULT_SIGTERM_GRACE_PERIOD = Duration.ofMinutes(1); + + /** + * The default grace period after SIGKILL has been sent during a graceful kill. + * See setSigKillGracePeriod for details. + */ + public static final Duration DEFAULT_SIGKILL_GRACE_PERIOD = Duration.ofMinutes(30); + + private final List arguments = new ArrayList<>(); + private final Set censoredArgumentIndices = new HashSet<>(); + private final TreeMap environment = new TreeMap<>(); + private final TaskContext taskContext; + private final ProcessFactory processFactory; + + private boolean redirectStderrToStdoutInsteadOfDiscard = true; + private boolean executeSilentlyCalled = false; + private Optional outputFile = Optional.empty(); + private Charset outputEncoding = StandardCharsets.UTF_8; + private Duration timeout = DEFAULT_TIMEOUT; + private long maxOutputBytes = DEFAULT_MAX_OUTPUT_BYTES; + private Duration sigTermGracePeriod = DEFAULT_SIGTERM_GRACE_PERIOD; + private Duration sigKillGracePeriod = DEFAULT_SIGKILL_GRACE_PERIOD; + private Predicate successfulExitCodePredicate = code -> code == 0; + private boolean waitForTermination = true; + + public CommandLine(TaskContext taskContext, ProcessFactory processFactory) { + this.taskContext = taskContext; + this.processFactory = processFactory; + } + + /** Add arguments to the command. The first argument in the first call to add() is the program. */ + public CommandLine add(String... arguments) { return add(List.of(arguments)); } + + /** Add arguments to the command. The first argument in the first call to add() is the program. */ + public CommandLine add(Collection arguments) { + this.arguments.addAll(arguments); + return this; + } + + /** Add arguments by splitting arguments by space. */ + public CommandLine addTokens(String arguments) { + return add(arguments.split("\\s+")); + } + + /** Set an environment variable, overriding any existing. */ + public CommandLine setEnvironmentVariable(String name, String value) { + if (name.indexOf('=') != -1) { + throw new IllegalArgumentException("name contains '=': " + name); + } + Objects.requireNonNull(value, "cannot set environment variable to null"); + + environment.put(name, value); + return this; + } + + public CommandLine removeEnvironmentVariable(String name) { + if (name.indexOf('=') != -1) { + throw new IllegalArgumentException("name contains '=': " + name); + } + environment.put(name, null); + return this; + } + + /** Censor (prevent logging of) the last argument added to this */ + public CommandLine censorArgument() { + censoredArgumentIndices.add(arguments.size() - 1); + return this; + } + + /** + * Execute a shell-like program in a child process: + * - the program is recorded and logged as modifying the system, but see executeSilently(). + * - the program's stderr is redirected to stdout, but see discardStderr(). + * - the program's output is assumed to be UTF-8, but see setOutputEncoding(). + * - the program must terminate with exit code 0, but see ignoreExitCode(). + * - the output of the program will be accessible in the returned CommandResult. + * + * Footnote 1: As a safety measure the size of the output is capped, and the program is + * only allowed to execute up to a timeout. The defaults are set high so you typically do + * not have to worry about reaching these limits, but otherwise see setMaxOutputBytes() + * and setTimeout(), respectively. + * + * Footnote 2: If the child process is forced to be killed due to footnote 1, then + * setSigTermGracePeriod() and setSigKillGracePeriod() can be used to tweak how much time + * is given to the program to shut down. Again, the defaults should be reasonable. + */ + public CommandResult execute() { + taskContext.recordSystemModification(logger, "Executing command: " + toString()); + return doExecute(); + } + + /** + * Same as execute(), except it will not record the program as modifying the system. + * + * If the program is later found to have modified the system, or otherwise worthy of + * a record, call recordSilentExecutionAsSystemModification(). + */ + public CommandResult executeSilently() { + executeSilentlyCalled = true; + return doExecute(); + } + + public static class Options { + private boolean silent = false; + + public Options() {} + + /** Invoke {@link #executeSilently()} instead of {@link #execute()} (default). */ + public Options setSilent(boolean silent) { + this.silent = silent; + return this; + } + } + + /** Convenience method to bundle up a bunch of calls on this into an options object. */ + public CommandResult execute(Options options) { + return options.silent ? executeSilently() : execute(); + } + + /** + * Record an already executed executeSilently() as having modified the system. + * For instance with YUM it is not known until after a 'yum install' whether it + * modified the system. + */ + public void recordSilentExecutionAsSystemModification() { + if (!executeSilentlyCalled) { + throw new IllegalStateException("executeSilently has not been called"); + } + // Disallow multiple consecutive calls to this method without an intervening call + // to executeSilently(). + executeSilentlyCalled = false; + + taskContext.recordSystemModification(logger, "Executed command: " + toString()); + } + + /** + * The first argument of the command specifies the program and is either the program's + * filename (in case the environment variable PATH will be used to search for the program + * file) or a path with the last component being the program's filename. + * + * @return The filename of the program. + */ + public String programName() { + if (arguments.isEmpty()) { + throw new IllegalStateException( + "The program name cannot be determined yet as no arguments have been given"); + } + String path = arguments.get(0); + int lastIndex = path.lastIndexOf('/'); + if (lastIndex == -1) { + return path; + } else { + return path.substring(lastIndex + 1); + } + } + + /** Returns a shell-like representation of the command. */ + @Override + public String toString() { + return toString(true); + } + + String toString(boolean censor) { + var command = new StringBuilder(); + + if (!environment.isEmpty()) { + // Pretend environment is propagated through the env program for display purposes + command.append(environment.entrySet().stream() + .map(entry -> { + if (entry.getValue() == null) { + return "-u " + maybeEscapeArgument(entry.getKey()); + } else { + return maybeEscapeArgument(entry.getKey() + "=" + entry.getValue()); + } + }) + .collect(Collectors.joining(" ", "env ", " "))); + } + + for (int i = 0; i < arguments.size(); i++) { + if (censor && censoredArgumentIndices.contains(i)) { + command.append(""); + } else { + command.append(maybeEscapeArgument(arguments.get(i))); + } + if (i < arguments.size() - 1) { + command.append(" "); + } + } + + // Note: Both of these cannot be confused with an argument since they would + // require escaping. + command.append(redirectStderrToStdoutInsteadOfDiscard ? " 2>&1" : " 2>/dev/null"); + + return command.toString(); + } + + + /** + * By default, stderr is redirected to stderr. This method will instead discard stderr. + */ + public CommandLine discardStderr() { + this.redirectStderrToStdoutInsteadOfDiscard = false; + return this; + } + + /** + * By default, a non-zero exit code will cause the command execution to fail. This method + * will instead ignore the exit code. + */ + public CommandLine ignoreExitCode() { + this.successfulExitCodePredicate = code -> true; + return this; + } + + /** + * By default, a non-zero exit code causes the command execution to fail. This method + * will override that predicate. + */ + public CommandLine setSuccessfulExitCodePredicate(Predicate successPredicate) { + successfulExitCodePredicate = successPredicate; + return this; + } + + /** + * By default, the output of the command is parsed as UTF-8. This method will set a + * different encoding. + */ + public CommandLine setOutputEncoding(Charset outputEncoding) { + this.outputEncoding = outputEncoding; + return this; + } + + /** + * By default, the output of the command is piped to a temporary file, which is deleted + * when execution ends. This method will cause output to be piped to the given path + * instead, and the file will not be removed. + */ + public CommandLine setOutputFile(Path outputFile) { + this.outputFile = Optional.of(outputFile); + return this; + } + + /** + * By default, the command will be gracefully killed after DEFAULT_TIMEOUT. This method + * overrides that default. + */ + public CommandLine setTimeout(Duration timeout) { + this.timeout = timeout; + return this; + } + + /** + * By default, the command will be gracefully killed if it ever outputs more than + * DEFAULT_MAX_OUTPUT_BYTES. This method overrides that default. + */ + public CommandLine setMaxOutputBytes(long maxOutputBytes) { + this.maxOutputBytes = maxOutputBytes; + return this; + } + + /** + * By default, if the program needs to be gracefully killed it will wait up to + * DEFAULT_SIGTERM_GRACE_PERIOD for the program to exit after it has been killed with + * the SIGTERM signal. + */ + public CommandLine setSigTermGracePeriod(Duration period) { + this.sigTermGracePeriod = period; + return this; + } + + public CommandLine setSigKillGracePeriod(Duration period) { + this.sigKillGracePeriod = period; + return this; + } + + /** + * WARNING: This will leave the child as a zombie process until this process dies. + * I.e. only use this just before or a limited number of times per host admin restart. + */ + public CommandLine doNotWaitForTermination() { + this.waitForTermination = false; + return this; + } + + public List getArguments() { return Collections.unmodifiableList(arguments); } + + /** Returns a copy of the environment overrides. A null value means the environment variable should be removed. */ + public TreeMap getEnvironmentOverrides() { return new TreeMap<>(environment); } + + // Accessor fields necessary for classes in this package. Could be public if necessary. + boolean getRedirectStderrToStdoutInsteadOfDiscard() { return redirectStderrToStdoutInsteadOfDiscard; } + Predicate getSuccessfulExitCodePredicate() { return successfulExitCodePredicate; } + Optional getOutputFile() { return outputFile; } + Charset getOutputEncoding() { return outputEncoding; } + Duration getTimeout() { return timeout; } + long getMaxOutputBytes() { return maxOutputBytes; } + Duration getSigTermGracePeriod() { return sigTermGracePeriod; } + Duration getSigKillGracePeriod() { return sigKillGracePeriod; } + + private CommandResult doExecute() { + try (ChildProcess2 child = processFactory.spawn(this)) { + if (!waitForTermination) { + return new CommandResult(this, 0, ""); + } + + child.waitForTermination(); + int exitCode = child.exitCode(); + if (!successfulExitCodePredicate.test(exitCode)) { + throw new ChildProcessFailureException(exitCode, toString(), child.getOutput()); + } + + String output = child.getOutput(); + return new CommandResult(this, exitCode, output); + } + } + + private static String maybeEscapeArgument(String argument) { + if (UNESCAPED_ARGUMENT_PATTERN.matcher(argument).matches()) { + return argument; + } else { + return escapeArgument(argument); + } + } + + private static String escapeArgument(String argument) { + StringBuilder doubleQuoteEscaped = new StringBuilder(argument.length() + 10); + + for (int i = 0; i < argument.length(); ++i) { + char c = argument.charAt(i); + switch (c) { + case '"', '\\' -> doubleQuoteEscaped.append("\\").append(c); + default -> doubleQuoteEscaped.append(c); + } + } + + return "\"" + doubleQuoteEscaped + "\""; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandResult.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandResult.java new file mode 100644 index 00000000000..c4f3229792b --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/CommandResult.java @@ -0,0 +1,92 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import java.util.List; +import java.util.function.Function; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * A CommandResult is the result of the execution of a CommandLine. + * + * @author hakonhall + */ +public class CommandResult { + private static final Pattern NEWLINE = Pattern.compile("\\n"); + + private final CommandLine commandLine; + private final int exitCode; + private final String output; + + public CommandResult(CommandLine commandLine, int exitCode, String output) { + this.commandLine = commandLine; + this.exitCode = exitCode; + this.output = output; + } + + public int getExitCode() { + return exitCode; + } + + /** Returns the output with leading and trailing white-space removed. */ + public String getOutput() { return output.trim(); } + + public String getUntrimmedOutput() { return output; } + + /** Returns the output lines of the command, omitting trailing empty lines. */ + public List getOutputLines() { + return getOutputLinesStream().toList(); + } + + /** Returns the output lines as a stream, omitting trailing empty lines. */ + public Stream getOutputLinesStream() { + if (output.isEmpty()) { + // For some reason an empty string => one-element list. + return Stream.empty(); + } + + // For some reason this removes trailing empty elements, but that's OK with us. + return NEWLINE.splitAsStream(output); + } + + /** + * Map this CommandResult to an instance of type R. + * + * If a RuntimeException is thrown by the mapper, it is wrapped in an + * UnexpectedOutputException that includes a snippet of the output in the message. + * + * This method is intended to be used as part of the verification of the output. + */ + public R map(Function mapper) { + try { + return mapper.apply(this); + } catch (RuntimeException e) { + throw new UnexpectedOutputException(e, "Failed to map output", commandLine.toString(), output); + } + } + + /** + * Map the output to an instance of type R according to mapper, wrapping any + * RuntimeException in UnexpectedOutputException w/output snippet. See map() for details. + */ + public R mapOutput(Function mapper) { return map(result -> mapper.apply(result.getOutput())); } + + /** + * Map each output line to an instance of type R according to mapper, wrapping any + * RuntimeException in UnexpectedOutputException w/output snippet. See map() for details. + */ + public List mapEachLine(Function mapper) { + return map(result -> result.getOutputLinesStream().map(mapper).toList()); + } + + /** + * Convenience method for getting the CommandLine, whose execution resulted in + * this CommandResult instance. + * + * Warning: the CommandLine is mutable and may be changed by the caller of the execution + * through other references! This is just a convenience method for getting that instance. + */ + public CommandLine getCommandLine() { return commandLine; } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/LargeOutputChildProcessException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/LargeOutputChildProcessException.java new file mode 100644 index 00000000000..440928b5762 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/LargeOutputChildProcessException.java @@ -0,0 +1,15 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +/** + * Exception thrown if the output of the child process is larger than the maximum limit. + * + * @author hakonhall + */ +@SuppressWarnings("serial") +public class LargeOutputChildProcessException extends ChildProcessException { + LargeOutputChildProcessException(long maxFileSize, String commandLine, String possiblyHugeOutput) { + super("output more than " + maxFileSize + " bytes", commandLine, possiblyHugeOutput); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessApi2.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessApi2.java new file mode 100644 index 00000000000..006f1373e0f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessApi2.java @@ -0,0 +1,17 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import java.util.concurrent.TimeUnit; + +/** + * Process abstraction. + * + * @author hakonhall + */ +public interface ProcessApi2 { + boolean waitFor(long timeout, TimeUnit unit) throws InterruptedException; + int exitValue(); + void destroy(); + void destroyForcibly(); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessApi2Impl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessApi2Impl.java new file mode 100644 index 00000000000..0e4bc799007 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessApi2Impl.java @@ -0,0 +1,36 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import java.util.concurrent.TimeUnit; + +/** + * @author hakonhall + */ +public class ProcessApi2Impl implements ProcessApi2 { + private final Process process; + + ProcessApi2Impl(Process process) { + this.process = process; + } + + @Override + public boolean waitFor(long timeout, TimeUnit unit) throws InterruptedException { + return process.waitFor(timeout, unit); + } + + @Override + public int exitValue() { + return process.exitValue(); + } + + @Override + public void destroy() { + process.destroy(); + } + + @Override + public void destroyForcibly() { + process.destroyForcibly(); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactory.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactory.java new file mode 100644 index 00000000000..c09d6b543c3 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactory.java @@ -0,0 +1,10 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +/** + * @author hakonhall + */ +public interface ProcessFactory { + ChildProcess2 spawn(CommandLine commandLine); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactoryImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactoryImpl.java new file mode 100644 index 00000000000..f4bef260ec0 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessFactoryImpl.java @@ -0,0 +1,106 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import com.yahoo.jdisc.Timer; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.List; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * @author hakonhall + */ +public class ProcessFactoryImpl implements ProcessFactory { + private static final Logger logger = Logger.getLogger(ProcessFactoryImpl.class.getName()); + private static final File DEV_NULL = new File("/dev/null"); + + private final ProcessStarter processStarter; + private final Timer timer; + + ProcessFactoryImpl(ProcessStarter processStarter, Timer timer) { + this.processStarter = processStarter; + this.timer = timer; + } + + @Override + public ChildProcess2Impl spawn(CommandLine commandLine) { + List arguments = commandLine.getArguments(); + if (arguments.isEmpty()) { + throw new IllegalArgumentException("No arguments specified - missing program to spawn"); + } + + ProcessBuilder processBuilder = new ProcessBuilder(arguments); + + for (var entry : commandLine.getEnvironmentOverrides().entrySet()) { + if (entry.getValue() == null) { + processBuilder.environment().remove(entry.getKey()); + } else { + processBuilder.environment().put(entry.getKey(), entry.getValue()); + } + } + + if (commandLine.getRedirectStderrToStdoutInsteadOfDiscard()) { + processBuilder.redirectErrorStream(true); + } else { + processBuilder.redirectError(ProcessBuilder.Redirect.to(DEV_NULL)); + } + + // The output is redirected to a file (temporary or user-defined) because: + // - We could read continuously from process.getInputStream, but that may block + // indefinitely with a faulty program. + // - If we don't read continuously from process.getInputStream, then because + // the underlying channel may be a pipe, the child may be stopped because the pipe + // is full. + // - To honor the timeout, no API can be used that may end up blocking indefinitely. + // + // Therefore, we redirect the output to a file and use waitFor w/timeout. This also + // has the benefit of allowing for inspection of the file during execution, and + // allowing the inspection of the file if it e.g. gets too large to hold in-memory. + + FileAttribute> fileAttribute = PosixFilePermissions.asFileAttribute( + PosixFilePermissions.fromString("rw-------")); + + Path outputFile = commandLine.getOutputFile() + .map(file -> { + uncheck(() -> Files.deleteIfExists(file)); + uncheck(() -> Files.createFile(file, fileAttribute)); + return file; + }) + .orElseGet(() -> { + String temporaryFilePrefix = + ProcessFactoryImpl.class.getSimpleName() + "-" + commandLine.programName() + "-"; + + return uncheck(() -> Files.createTempFile( + temporaryFilePrefix, + ".out", + fileAttribute)); + }); + + try { + processBuilder.redirectOutput(outputFile.toFile()); + ProcessApi2 process = processStarter.start(processBuilder); + return new ChildProcess2Impl(commandLine, process, outputFile, timer); + } catch (RuntimeException | Error throwable) { + try { + if (commandLine.getOutputFile().isEmpty()) + Files.delete(outputFile); + } catch (IOException ioException) { + logger.log(Level.WARNING, "Failed to delete temporary file at " + + outputFile, ioException); + } + throw throwable; + } + + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessStarter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessStarter.java new file mode 100644 index 00000000000..fc78b5d3e72 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessStarter.java @@ -0,0 +1,10 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +/** + * @author hakonhall + */ +public interface ProcessStarter { + ProcessApi2 start(ProcessBuilder processBuilder); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessStarterImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessStarterImpl.java new file mode 100644 index 00000000000..644e5876eb7 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/ProcessStarterImpl.java @@ -0,0 +1,26 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import java.util.logging.Level; + +import java.util.logging.Logger; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * @author hakonhall + */ +public class ProcessStarterImpl implements ProcessStarter { + private static final Logger logger = Logger.getLogger(ProcessStarterImpl.class.getName()); + + @Override + public ProcessApi2 start(ProcessBuilder processBuilder) { + if (logger.isLoggable(Level.FINE)) { + logger.log(Level.FINE, "Spawning process: " + processBuilder.command()); + } + + Process process = uncheck(processBuilder::start); + return new ProcessApi2Impl(process); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/Terminal.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/Terminal.java new file mode 100644 index 00000000000..1cf6b533d5a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/Terminal.java @@ -0,0 +1,14 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +/** + * A Terminal is a light-weight terminal-like interface for executing shell-like programs. + * + * @author hakonhall + */ +public interface Terminal { + CommandLine newCommandLine(TaskContext taskContext); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TerminalImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TerminalImpl.java new file mode 100644 index 00000000000..e13e30d9c75 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TerminalImpl.java @@ -0,0 +1,26 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import com.yahoo.jdisc.Timer; +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +/** + * @author hakonhall + */ +public class TerminalImpl implements Terminal { + private final ProcessFactory processFactory; + + public TerminalImpl(Timer timer) { + this(new ProcessFactoryImpl(new ProcessStarterImpl(), timer)); + } + + /** For testing. */ + public TerminalImpl(ProcessFactory processFactory) { + this.processFactory = processFactory; + } + + @Override + public CommandLine newCommandLine(TaskContext taskContext) { + return new CommandLine(taskContext, processFactory); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestChildProcess2.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestChildProcess2.java new file mode 100644 index 00000000000..8490bc01f56 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestChildProcess2.java @@ -0,0 +1,52 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import java.util.Optional; + +/** + * @author hakonhall + */ +public class TestChildProcess2 implements ChildProcess2 { + private final int exitCode; + private final String output; + private Optional exceptionToThrowInWaitForTermination = Optional.empty(); + private boolean closeCalled = false; + + public TestChildProcess2(int exitCode, String output) { + this.exitCode = exitCode; + this.output = output; + } + + public void throwInWaitForTermination(RuntimeException e) { + this.exceptionToThrowInWaitForTermination = Optional.of(e); + } + + @Override + public void waitForTermination() { + if (exceptionToThrowInWaitForTermination.isPresent()) { + throw exceptionToThrowInWaitForTermination.get(); + } + } + + @Override + public int exitCode() { + return exitCode; + } + + @Override + public String getOutput() { + return output; + } + + @Override + public void close() { + if (closeCalled) { + throw new IllegalStateException("close already called"); + } + closeCalled = true; + } + + public boolean closeCalled() { + return closeCalled; + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestProcessFactory.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestProcessFactory.java new file mode 100644 index 00000000000..4e831dc2865 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestProcessFactory.java @@ -0,0 +1,113 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +/** + * @author hakonhall + */ +public class TestProcessFactory implements ProcessFactory { + private static class SpawnCall { + private final String commandDescription; + private final Function callback; + + private SpawnCall(String commandDescription, + Function callback) { + this.commandDescription = commandDescription; + this.callback = callback; + } + } + private final List expectedSpawnCalls = new ArrayList<>(); + private final List spawnCommandLines = new ArrayList<>(); + + private boolean muteVerifyAllCommandsExecuted = false; + + /** Forward call to spawn() to callback. */ + public TestProcessFactory interceptSpawn(String commandDescription, + Function callback) { + expectedSpawnCalls.add(new SpawnCall(commandDescription, callback)); + return this; + } + + // Convenience method for the caller to avoid having to create a TestChildProcess2 instance. + public TestProcessFactory expectSpawn(String commandLineString, TestChildProcess2 toReturn) { + int commandIndex = expectedSpawnCalls.size(); + return interceptSpawn( + commandLineString, + commandLine -> defaultSpawn(commandLine, commandLineString, toReturn, commandIndex)); + } + + // Convenience method for the caller to avoid having to create a TestChildProcess2 instance. + public TestProcessFactory expectSpawn(String commandLine, int exitCode, String output) { + return expectSpawn(commandLine, new TestChildProcess2(exitCode, output)); + } + + /** Ignore the CommandLine passed to spawn(), just return successfully with the given output. */ + public TestProcessFactory ignoreSpawn(String output) { + return interceptSpawn( + "[call index " + expectedSpawnCalls.size() + "]", + commandLine -> new TestChildProcess2(0, output)); + } + + public TestProcessFactory ignoreSpawn() { + return ignoreSpawn(""); + } + + public void verifyAllCommandsExecuted() { + if (muteVerifyAllCommandsExecuted) return; + + if (spawnCommandLines.size() < expectedSpawnCalls.size()) { + int missingCommandIndex = spawnCommandLines.size(); + throw new IllegalStateException("Command #" + missingCommandIndex + + " never executed: " + + expectedSpawnCalls.get(missingCommandIndex).commandDescription + + "\nExpected commands:\n" + getExpectedCommandLines() + + "\nActual commands:\n" + spawnCommandLines); + } + } + + /** + * WARNING: CommandLine is mutable, and e.g. reusing a CommandLine for the next call + * would make the CommandLine in this list no longer reflect the original CommandLine. + */ + public List getMutableCommandLines() { + return spawnCommandLines; + } + + @Override + public ChildProcess2 spawn(CommandLine commandLine) { + String commandLineString = commandLine.toString(false); + if (spawnCommandLines.size() + 1 > expectedSpawnCalls.size()) { + throw new IllegalStateException("Too many invocations: " + commandLineString); + } + spawnCommandLines.add(commandLine); + + return expectedSpawnCalls.get(spawnCommandLines.size() - 1).callback.apply(commandLine); + } + + private ChildProcess2 defaultSpawn(CommandLine commandLine, + String expectedCommandLineString, + ChildProcess2 toReturn, + int commandSequenceNumber) { + String actualCommandLineString = commandLine.toString(false); + if (!Objects.equals(actualCommandLineString, expectedCommandLineString)) { + muteVerifyAllCommandsExecuted = true; + throw new IllegalArgumentException("Expected command #" + commandSequenceNumber + " to be: \n" + + " \"" + expectedCommandLineString + "\"\n" + + "but got:\n" + + " \"" + actualCommandLineString + "\""); + } + + return toReturn; + } + + private List getExpectedCommandLines() { + return expectedSpawnCalls.stream() + .map(spawnCall -> spawnCall.commandDescription) + .toList(); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestTerminal.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestTerminal.java new file mode 100644 index 00000000000..bf231b7c35b --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TestTerminal.java @@ -0,0 +1,67 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; + +import java.util.function.Function; + +/** + * @author hakonhall + */ +public class TestTerminal implements Terminal { + private final TerminalImpl realTerminal; + private final TestProcessFactory testProcessFactory = new TestProcessFactory(); + + public TestTerminal() { + this.realTerminal = new TerminalImpl(testProcessFactory); + } + + /** Get the TestProcessFactory the terminal was started with. */ + public TestProcessFactory getTestProcessFactory() { return testProcessFactory; } + + /** Forward call to spawn() to callback. */ + public TestTerminal interceptCommand(String commandDescription, + Function callback) { + testProcessFactory.interceptSpawn(commandDescription, callback); + return this; + } + + /** Wraps expectSpawn in TestProcessFactory, provided here as convenience. */ + public TestTerminal expectCommand(String commandLine, TestChildProcess2 toReturn) { + testProcessFactory.expectSpawn(commandLine, toReturn); + return this; + } + + /** Wraps expectSpawn in TestProcessFactory, provided here as convenience. */ + public TestTerminal expectCommand(String commandLine, int exitCode, String output) { + testProcessFactory.expectSpawn(commandLine, new TestChildProcess2(exitCode, output)); + return this; + } + + /** Verifies command line matches commandLine, and returns successfully with output "". */ + public TestTerminal expectCommand(String commandLine) { + expectCommand(commandLine, 0, ""); + return this; + } + + /** Wraps expectSpawn in TestProcessFactory, provided here as convenience. */ + public TestTerminal ignoreCommand(String output) { + testProcessFactory.ignoreSpawn(output); + return this; + } + + /** Wraps expectSpawn in TestProcessFactory, provided here as convenience. */ + public TestTerminal ignoreCommand() { + testProcessFactory.ignoreSpawn(); + return this; + } + + public void verifyAllCommandsExecuted() { + testProcessFactory.verifyAllCommandsExecuted(); + } + + @Override + public CommandLine newCommandLine(TaskContext taskContext) { + return realTerminal.newCommandLine(taskContext); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TimeoutChildProcessException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TimeoutChildProcessException.java new file mode 100644 index 00000000000..c4c59073de8 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/TimeoutChildProcessException.java @@ -0,0 +1,18 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import java.time.Duration; + +/** + * Exception thrown when a child process has taken too long to terminate, in case it has been + * forcibly killed. + * + * @author hakonhall + */ +@SuppressWarnings("serial") +public class TimeoutChildProcessException extends ChildProcessException { + TimeoutChildProcessException(Duration timeout, String commandLine, String possiblyHugeOutput) { + super("timed out after " + timeout, commandLine, possiblyHugeOutput); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/UnexpectedOutputException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/UnexpectedOutputException.java new file mode 100644 index 00000000000..1829df96601 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/UnexpectedOutputException.java @@ -0,0 +1,26 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +/** + * @author hakonhall + */ +@SuppressWarnings("serial") +public class UnexpectedOutputException extends ChildProcessException { + /** + * @param problem Problem description, e.g. "Output is not of the form ^NAME=VALUE$" + */ + public UnexpectedOutputException(String problem, String commandLine, String possiblyHugeOutput) { + super("output was not of the expected format: " + problem, commandLine, possiblyHugeOutput); + } + + /** + * @param problem Problem description, e.g. "Output is not of the form ^NAME=VALUE$" + */ + public UnexpectedOutputException(RuntimeException cause, + String problem, + String commandLine, + String possiblyHugeOutput) { + super(cause, "output was not of the expected format: " + problem, commandLine, possiblyHugeOutput); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/UnkillableChildProcessException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/UnkillableChildProcessException.java new file mode 100644 index 00000000000..1b847380b47 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/UnkillableChildProcessException.java @@ -0,0 +1,21 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import java.time.Duration; + +/** + * @author hakonhall + */ +@SuppressWarnings("serial") +public class UnkillableChildProcessException extends ChildProcessException { + public UnkillableChildProcessException(Duration waitForSigTerm, + Duration waitForSigKill, + String commandLine, + String possiblyHugeOutput) { + super("did not terminate even after SIGTERM, +" + waitForSigTerm + + ", SIGKILL, and +" + waitForSigKill, + commandLine, + possiblyHugeOutput); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/package-info.java new file mode 100644 index 00000000000..d03eb80af50 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/process/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.task.util.process; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtl.java new file mode 100644 index 00000000000..55c7b23b1e8 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtl.java @@ -0,0 +1,227 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.systemd; + +import com.yahoo.vespa.hosted.node.admin.component.TaskContext; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandLine; +import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult; +import com.yahoo.vespa.hosted.node.admin.task.util.process.Terminal; + +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Control the systemd system and service manager + * + * @author hakonhall + */ +public class SystemCtl { + + // Valid systemd property names from looking at a couple of services. + private static final Pattern PROPERTY_NAME_PATTERN = Pattern.compile("^[a-zA-Z]+$"); + + // Last line of `systemctl list-unit-files ` prints '0 unit files listed.' + private static final Pattern UNIT_FILES_LISTED_PATTERN = Pattern.compile("([0-9]+) unit files listed\\."); + + private static final Pattern ACTIVE_STATE_PROPERTY_PATTERN = createPropertyPattern("ActiveState"); + + private final Terminal terminal; + private boolean useSudo = false; + + private static Pattern createPropertyPattern(String propertyName) { + if (!PROPERTY_NAME_PATTERN.matcher(propertyName).matches()) { + throw new IllegalArgumentException("Property name does not match " + PROPERTY_NAME_PATTERN); + } + + // Make ^ and $ match beginning and end of lines. + String regex = String.format("(?md)^%s=(.*)$", propertyName); + + return Pattern.compile(regex); + } + + public SystemCtl(Terminal terminal) { + this.terminal = terminal; + } + + /** Call all commands through sudo */ + public SystemCtl withSudo() { + this.useSudo = true; + return this; + } + + /** Returns whether this is configured to use sudo */ + public boolean useSudo() { + return useSudo; + } + + public void daemonReload(TaskContext taskContext) { + newCommandLine(taskContext).add("systemctl", "daemon-reload") + .execute(); + } + + public SystemCtlEnable enable(String unit) { return new SystemCtlEnable(unit); } + public SystemCtlDisable disable(String unit) { return new SystemCtlDisable(unit); } + public SystemCtlStart start(String unit) { return new SystemCtlStart(unit); } + public SystemCtlStop stop(String unit) { return new SystemCtlStop(unit); } + public SystemCtlRestart restart(String unit) { return new SystemCtlRestart(unit); } + public SystemCtlReload reload(String unit) { return new SystemCtlReload(unit); } + + public boolean serviceExists(TaskContext context, String unit) { + return newCommandLine(context) + .add("systemctl", "list-unit-files", unit + ".service").executeSilently() + .mapOutput(output -> { + // Last line of the form: "1 unit files listed." + Matcher matcher = UNIT_FILES_LISTED_PATTERN.matcher(output); + if (!matcher.find()) { + throw new IllegalArgumentException(); + } + + return !matcher.group(1).equals("0"); + }); + } + + /** Returns true if the unit exists and is active (i.e. running). unit is e.g. "docker". */ + public boolean isActive(TaskContext context, String unit) { + return newCommandLine(context) + .add("systemctl", "--quiet", "is-active", unit + ".service") + .ignoreExitCode() + .executeSilently() + .map(CommandResult::getExitCode) == 0; + } + + public String getServiceProperty(TaskContext context, String unit, String property) { + return newCommandLine(context) + .add("systemctl", "show", "--property", property, "--value", unit + ".service") + .executeSilently() + .getOutput(); + } + + private CommandLine newCommandLine(TaskContext context) { + var commandLine = terminal.newCommandLine(context); + if (useSudo) { + commandLine.add("sudo"); + } + return commandLine; + } + + public class SystemCtlEnable extends SystemCtlCommand { + private SystemCtlEnable(String unit) { + super("enable", unit); + } + + protected boolean isAlreadyConverged(TaskContext context) { + return isUnitEnabled(context); + } + } + + public class SystemCtlDisable extends SystemCtlCommand { + private SystemCtlDisable(String unit) { + super("disable", unit); + } + + protected boolean isAlreadyConverged(TaskContext context) { + return !isUnitEnabled(context); + } + } + + public class SystemCtlStart extends SystemCtlCommand { + private SystemCtlStart(String unit) { + super("start", unit); + } + + protected boolean isAlreadyConverged(TaskContext context) { + String activeState = getSystemCtlProperty(context, ACTIVE_STATE_PROPERTY_PATTERN); + return Objects.equals(activeState, "active"); + } + } + + public class SystemCtlStop extends SystemCtlCommand { + private SystemCtlStop(String unit) { + super("stop", unit); + } + + protected boolean isAlreadyConverged(TaskContext context) { + String activeState = getSystemCtlProperty(context, ACTIVE_STATE_PROPERTY_PATTERN); + return Objects.equals(activeState, "inactive"); + } + } + + public class SystemCtlRestart extends SystemCtlCommand { + private SystemCtlRestart(String unit) { + super("restart", unit); + } + + protected boolean isAlreadyConverged(TaskContext context) { + return false; + } + } + + public class SystemCtlReload extends SystemCtlCommand { + private SystemCtlReload(String unit) { + super("reload", unit); + } + + protected boolean isAlreadyConverged(TaskContext context) { + return false; + } + } + + public abstract class SystemCtlCommand { + + private final String command; + private final String unit; + + private SystemCtlCommand(String command, String unit) { + this.command = command; + this.unit = unit; + } + + protected abstract boolean isAlreadyConverged(TaskContext context); + + public boolean converge(TaskContext context) { + if (isAlreadyConverged(context)) { + return false; + } + newCommandLine(context).add("systemctl", command, unit) + .execute(); + return true; + } + + /** Returns true if unit is enabled */ + boolean isUnitEnabled(TaskContext context) { + return newCommandLine(context).add("systemctl", "--quiet", "is-enabled", unit) + .ignoreExitCode() + .executeSilently() + .map(CommandResult::getExitCode) == 0; + } + + /** + * @param propertyPattern Pattern to match the output of systemctl show command with + * exactly 1 group. The matchng group must exist. + * @return The matched group from the 'systemctl show' output. + */ + String getSystemCtlProperty(TaskContext context, Pattern propertyPattern) { + return newCommandLine(context).add("systemctl", "show", unit) + .executeSilently() + .mapOutput(output -> extractProperty(output, propertyPattern)); + } + } + + + /** + * Find the systemd property value of the property (given by propertyPattern) + * matching the 'systemctl show' output (given by showProcess). + */ + private static String extractProperty(String showOutput, Pattern propertyPattern) { + Matcher matcher = propertyPattern.matcher(showOutput); + if (!matcher.find()) { + throw new IllegalArgumentException("Pattern '" + propertyPattern + + "' didn't match output"); + } else if (matcher.groupCount() != 1) { + throw new IllegalArgumentException("Property pattern must have exactly 1 group"); + } + + return matcher.group(1); + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtlTester.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtlTester.java new file mode 100644 index 00000000000..32da4f455c1 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/SystemCtlTester.java @@ -0,0 +1,91 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.systemd; + +import com.yahoo.vespa.hosted.node.admin.task.util.process.TestTerminal; + +import java.util.function.Consumer; + +/** + * A {@link SystemCtl} tester that simplifies testing interaction with systemd units. + * + * @author mpolden + */ +public class SystemCtlTester extends SystemCtl { + + private final TestTerminal terminal; + + public SystemCtlTester(TestTerminal terminal) { + super(terminal); + this.terminal = terminal; + } + + public Expectation expectServiceExists(String unit) { + return new Expectation(wantedReturn -> + expectCommand("systemctl list-unit-files " + unit + ".service 2>&1", 0, (wantedReturn ? 1 : 0) + " unit files listed.")); + } + + public Expectation expectIsActive(String unit) { + return new Expectation(wantedReturn -> { + expectCommand("systemctl --quiet is-active " + unit + ".service 2>&1", wantedReturn ? 0 : 1, ""); + }); + } + + public Expectation expectEnable(String unit) { return forChangeEnabledState(unit, true); } + public Expectation expectDisable(String unit) { return forChangeEnabledState(unit, false); } + public Expectation expectStart(String unit) { return forChangeRunningState(unit, true); } + public Expectation expectStop(String unit) { return forChangeRunningState(unit, false); } + + public SystemCtlTester expectRestart(String unit) { + expectCommand("systemctl restart " + unit + " 2>&1", 0, ""); + return this; + } + + public SystemCtlTester expectReload(String unit) { + expectCommand("systemctl reload " + unit + " 2>&1", 0, ""); + return this; + } + + public SystemCtlTester expectDaemonReload() { + expectCommand("systemctl daemon-reload 2>&1", 0, ""); + return this; + } + + public SystemCtlTester expectGetServiceProperty(String unit, String property, String output) { + expectCommand("systemctl show --property " + property + " --value " + unit + ".service 2>&1", 0, output); + return this; + } + + private void expectCommand(String command, int exitCode, String output) { + terminal.expectCommand((useSudo() ? "sudo " : "") + command, exitCode, output); + } + + private Expectation forChangeEnabledState(String unit, boolean enable) { + return new Expectation(wantedReturn -> { + expectCommand("systemctl --quiet is-enabled " + unit + " 2>&1", enable != wantedReturn ? 0 : 1, ""); + if (wantedReturn) + expectCommand("systemctl " + (enable ? "enable" : "disable") + " " + unit + " 2>&1", 0, ""); + }); + } + + private Expectation forChangeRunningState(String unit, boolean start) { + return new Expectation(wantedReturn -> { + expectCommand("systemctl show " + unit + " 2>&1", 0, "ActiveState=" + (start != wantedReturn ? "active" : "inactive")); + if (wantedReturn) + expectCommand("systemctl " + (start ? "start" : "stop") + " " + unit + " 2>&1", 0, ""); + }); + } + + public class Expectation { + private final Consumer converger; + public Expectation(Consumer converger) { + this.converger = converger; + } + + /** Mock the return value of the converge(TaskContext) method for this operation (true iff system was modified) */ + public SystemCtlTester andReturn(boolean value) { + converger.accept(value); + return SystemCtlTester.this; + } + } + +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/package-info.java new file mode 100644 index 00000000000..465cec3c026 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/systemd/package-info.java @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.task.util.systemd; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/BadTemplateException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/BadTemplateException.java new file mode 100644 index 00000000000..2d907f79e2d --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/BadTemplateException.java @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.template; + +import com.yahoo.vespa.hosted.node.admin.task.util.text.Cursor; + +/** + * @author hakonhall + */ +public class BadTemplateException extends TemplateException { + public BadTemplateException(Cursor location, String message) { + super(message + " at " + location.calculateLocation().lineAndColumnText()); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/Form.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/Form.java new file mode 100644 index 00000000000..3ebac3322b4 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/Form.java @@ -0,0 +1,32 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.template; + +import java.util.Locale; + +/** + * Public methods common to both Template and ListElement. + * + * @author hakonhall + */ +public interface Form { + /** Set the value of a variable, e.g. %{=color}. */ + Template set(String name, String value); + + /** Set the value of a variable and/or if-condition. */ + default Template set(String name, boolean value) { return set(name, Boolean.toString(value)); } + + default Template set(String name, int value) { return set(name, Integer.toString(value)); } + default Template set(String name, long value) { return set(name, Long.toString(value)); } + + default Template set(String name, String format, Object first, Object... rest) { + var args = new Object[1 + rest.length]; + args[0] = first; + System.arraycopy(rest, 0, args, 1, rest.length); + var value = String.format(Locale.US, format, args); + + return set(name, value); + } + + /** Add an instance of a list section after any previously added (for the given name) */ + ListElement add(String name); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/IfSection.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/IfSection.java new file mode 100644 index 00000000000..d00b66c9b24 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/IfSection.java @@ -0,0 +1,69 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.template; + +import com.yahoo.vespa.hosted.node.admin.task.util.text.Cursor; +import com.yahoo.vespa.hosted.node.admin.task.util.text.CursorRange; + +import java.util.Optional; + +/** + * @author hakonhall + */ +class IfSection extends Section { + private final boolean negated; + private final String name; + private final Cursor nameOffset; + private final SectionList ifSections; + private final Optional elseSections; + + IfSection(CursorRange range, boolean negated, String name, Cursor nameOffset, + SectionList ifSections, Optional elseSections) { + super("if", range); + this.negated = negated; + this.name = name; + this.nameOffset = nameOffset; + this.ifSections = ifSections; + this.elseSections = elseSections; + } + + String name() { return name; } + Cursor nameOffset() { return nameOffset; } + + @Override + void appendTo(StringBuilder buffer) { + Optional stringValue = template().getVariableValue(name); + if (stringValue.isEmpty()) + throw new TemplateNameNotSetException(name, nameOffset); + + final boolean value; + if (stringValue.get().equals("true")) { + value = true; + } else if (stringValue.get().equals("false")) { + value = false; + } else { + throw new NotBooleanValueTemplateException(name); + } + + boolean condition = negated ? !value : value; + if (condition) { + ifSections.sections().forEach(section -> section.appendTo(buffer)); + } else if (elseSections.isPresent()) { + elseSections.get().sections().forEach(section -> section.appendTo(buffer)); + } + } + + @Override + void appendCopyTo(SectionList sectionList) { + SectionList ifSectionCopy = new SectionList(ifSections.range().start(), sectionList.templateBuilder()); + ifSections.sections().forEach(section -> section.appendCopyTo(ifSectionCopy)); + + Optional elseSectionCopy = elseSections.map(elseSections2 -> { + SectionList elseSectionCopy2 = new SectionList(elseSections2.range().start(), + sectionList.templateBuilder()); + elseSections2.sections().forEach(section -> section.appendCopyTo(elseSectionCopy2)); + return elseSectionCopy2; + }); + + sectionList.appendIfSection(negated, name, nameOffset, range().end(), ifSectionCopy, elseSectionCopy); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/ListElement.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/ListElement.java new file mode 100644 index 00000000000..e8b96d4a6b8 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/ListElement.java @@ -0,0 +1,17 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.template; + +/** + * @author hakonhall + */ +public class ListElement implements Form { + private final Template template; + + ListElement(Template template) { this.template = template; } + + @Override + public Template set(String name, String value) { return template.set(name, value); } + + @Override + public ListElement add(String name) { return new ListElement(template.addElement(name)); } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/ListSection.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/ListSection.java new file mode 100644 index 00000000000..512518c3a42 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/template/ListSection.java @@ -0,0 +1,61 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.template; + +import com.yahoo.vespa.hosted.node.admin.task.util.text.Cursor; +import com.yahoo.vespa.hosted.node.admin.task.util.text.CursorRange; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author hakonhall + */ +class ListSection extends Section { + private final String name; + private final Cursor nameOffset; + private final Template body; + private final List