diff options
37 files changed, 474 insertions, 131 deletions
diff --git a/bootstrap.sh b/bootstrap.sh index b0c3fc12ca6..b48f4b77836 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -42,7 +42,7 @@ echo "Using maven command: ${MAVEN_CMD}" echo "Using maven extra opts: ${MAVEN_EXTRA_OPTS}" mvn_install() { - ${MAVEN_CMD} --no-snapshot-updates clean install ${MAVEN_EXTRA_OPTS} "$@" + ${MAVEN_CMD} --no-snapshot-updates -Dmaven.wagon.http.retryHandler.count=5 clean install ${MAVEN_EXTRA_OPTS} "$@" } # Generate vtag map diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 490ab988048..dff70e6481e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -678,7 +678,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye public void deleteExpiredLocalSessions() { Map<Tenant, List<LocalSession>> sessionsPerTenant = new HashMap<>(); - tenantRepository.getAllTenants().forEach(tenant -> sessionsPerTenant.put(tenant, tenant.getLocalSessionRepo().listSessions())); + tenantRepository.getAllTenants().forEach(tenant -> sessionsPerTenant.put(tenant, tenant.getLocalSessionRepo().getSessions())); Set<ApplicationId> applicationIds = new HashSet<>(); sessionsPerTenant.values().forEach(sessionList -> sessionList.forEach(s -> applicationIds.add(s.getApplicationId()))); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/LocalSessionRepo.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/LocalSessionRepo.java index d276a59327d..3347ba4a63e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/LocalSessionRepo.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/LocalSessionRepo.java @@ -28,15 +28,17 @@ import java.util.logging.Level; import java.util.logging.Logger; /** - * File-based session repository for LocalSessions. Contains state for the local instance of the configserver. + * + * Contains state for the local instance of the configserver. * * @author Ulf Lilleengen */ -public class LocalSessionRepo extends SessionRepo<LocalSession> { +public class LocalSessionRepo { private static final Logger log = Logger.getLogger(LocalSessionRepo.class.getName()); private static final FilenameFilter sessionApplicationsFilter = (dir, name) -> name.matches("\\d+"); + private final SessionCache<LocalSession> sessionCache; private final Map<Long, LocalSessionStateWatcher> sessionStateWatchers = new HashMap<>(); private final long sessionLifetime; // in seconds private final Clock clock; @@ -52,6 +54,7 @@ public class LocalSessionRepo extends SessionRepo<LocalSession> { // Constructor public only for testing public LocalSessionRepo(TenantName tenantName, GlobalComponentRegistry componentRegistry) { + sessionCache = new SessionCache<>(); this.clock = componentRegistry.getClock(); this.curator = componentRegistry.getCurator(); this.sessionLifetime = componentRegistry.getConfigserverConfig().sessionLifetime(); @@ -60,15 +63,22 @@ public class LocalSessionRepo extends SessionRepo<LocalSession> { this.expiryTimeFlag = Flags.CONFIGSERVER_LOCAL_SESSIONS_EXPIRY_INTERVAL_IN_DAYS.bindTo(componentRegistry.getFlagSource()); } - @Override public synchronized void addSession(LocalSession session) { - super.addSession(session); + sessionCache.addSession(session); Path sessionsPath = TenantRepository.getSessionsPath(session.getTenantName()); long sessionId = session.getSessionId(); Curator.FileCache fileCache = curator.createFileCache(sessionsPath.append(String.valueOf(sessionId)).append(ConfigCurator.SESSIONSTATE_ZK_SUBPATH).getAbsolute(), false); sessionStateWatchers.put(sessionId, new LocalSessionStateWatcher(fileCache, session, this, zkWatcherExecutor)); } + public LocalSession getSession(long sessionId) { + return sessionCache.getSession(sessionId); + } + + public List<LocalSession> getSessions() { + return sessionCache.getSessions(); + } + private void loadSessions(LocalSessionLoader loader) { File[] sessions = tenantFileSystemDirs.sessionsPath().listFiles(sessionApplicationsFilter); if (sessions == null) { @@ -87,7 +97,7 @@ public class LocalSessionRepo extends SessionRepo<LocalSession> { public void deleteExpiredSessions(Map<ApplicationId, Long> activeSessions) { log.log(Level.FINE, "Purging old sessions"); try { - for (LocalSession candidate : listSessions()) { + for (LocalSession candidate : sessionCache.getSessions()) { Instant createTime = Instant.ofEpochSecond(candidate.getCreateTime()); log.log(Level.FINE, "Candidate session for deletion: " + candidate.getSessionId() + ", created: " + createTime); @@ -125,7 +135,7 @@ public class LocalSessionRepo extends SessionRepo<LocalSession> { log.log(Level.FINE, "Deleting local session " + sessionId); LocalSessionStateWatcher watcher = sessionStateWatchers.remove(sessionId); if (watcher != null) watcher.close(); - removeSession(sessionId); + sessionCache.removeSession(sessionId); NestedTransaction transaction = new NestedTransaction(); session.delete(transaction); transaction.commit(); @@ -137,7 +147,7 @@ public class LocalSessionRepo extends SessionRepo<LocalSession> { } private void deleteAllSessions() { - List<LocalSession> sessions = new ArrayList<>(listSessions()); + List<LocalSession> sessions = new ArrayList<>(sessionCache.getSessions()); for (LocalSession session : sessions) { deleteSession(session); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/RemoteSessionRepo.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/RemoteSessionRepo.java index 859f9af5144..d1d234cbfdf 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/RemoteSessionRepo.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/RemoteSessionRepo.java @@ -34,6 +34,7 @@ import java.util.logging.Logger; import java.util.stream.Collectors; /** + * Session repository for RemoteSessions. There is one such repo per tenant. * Will watch/prepare sessions (applications) based on watched nodes in ZooKeeper. The zookeeper state watched in * this class is shared between all config servers, so it should not modify any global state, because the operation * will be performed on all servers. The repo can be regarded as read only from the POV of the configserver. @@ -41,7 +42,7 @@ import java.util.stream.Collectors; * @author Vegard Havdal * @author Ulf Lilleengen */ -public class RemoteSessionRepo extends SessionRepo<RemoteSession> { +public class RemoteSessionRepo { private static final Logger log = Logger.getLogger(RemoteSessionRepo.class.getName()); @@ -55,12 +56,14 @@ public class RemoteSessionRepo extends SessionRepo<RemoteSession> { private final Curator.DirectoryCache directoryCache; private final TenantApplications applicationRepo; private final Executor zkWatcherExecutor; + private final SessionCache<RemoteSession> sessionCache; public RemoteSessionRepo(GlobalComponentRegistry componentRegistry, RemoteSessionFactory remoteSessionFactory, ReloadHandler reloadHandler, TenantName tenantName, TenantApplications applicationRepo) { + this.sessionCache = new SessionCache<>(); this.curator = componentRegistry.getCurator(); this.sessionsPath = TenantRepository.getSessionsPath(tenantName); this.applicationRepo = applicationRepo; @@ -76,14 +79,22 @@ public class RemoteSessionRepo extends SessionRepo<RemoteSession> { this.directoryCache.start(); } + public RemoteSession getSession(long sessionId) { + return sessionCache.getSession(sessionId); + } + public List<Long> getSessions() { return getSessionList(curator.getChildren(sessionsPath)); } + public void addSession(RemoteSession session) { + sessionCache.addSession(session); + } + public int deleteExpiredSessions(Clock clock, Duration expiryTime) { int deleted = 0; for (long sessionId : getSessions()) { - RemoteSession session = getSession(sessionId); + RemoteSession session = sessionCache.getSession(sessionId); if (session == null) continue; // Internal sessions not in synch with zk, continue if (session.getStatus() == Session.Status.ACTIVATE) continue; Instant created = Instant.ofEpochSecond(session.getCreateTime()); @@ -121,14 +132,14 @@ public class RemoteSessionRepo extends SessionRepo<RemoteSession> { } private void checkForRemovedSessions(List<Long> sessions) { - for (RemoteSession session : listSessions()) + for (RemoteSession session : sessionCache.getSessions()) if ( ! sessions.contains(session.getSessionId())) sessionRemoved(session.getSessionId()); } private void checkForAddedSessions(List<Long> sessions) { for (Long sessionId : sessions) - if (getSession(sessionId) == null) + if (sessionCache.getSession(sessionId) == null) sessionAdded(sessionId); } @@ -152,7 +163,7 @@ public class RemoteSessionRepo extends SessionRepo<RemoteSession> { private void sessionRemoved(long sessionId) { RemoteSessionStateWatcher watcher = sessionStateWatchers.remove(sessionId); if (watcher != null) watcher.close(); - removeSession(sessionId); + sessionCache.removeSession(sessionId); metrics.incRemovedSessions(); } @@ -182,7 +193,7 @@ public class RemoteSessionRepo extends SessionRepo<RemoteSession> { private void nodeChanged() { zkWatcherExecutor.execute(() -> { Multiset<Session.Status> sessionMetrics = HashMultiset.create(); - for (RemoteSession session : listSessions()) { + for (RemoteSession session : sessionCache.getSessions()) { sessionMetrics.add(session.getStatus()); } metrics.setNewSessions(sessionMetrics.count(Session.Status.NEW)); @@ -213,7 +224,7 @@ public class RemoteSessionRepo extends SessionRepo<RemoteSession> { private void synchronizeOnNew(List<Long> sessionList) { for (long sessionId : sessionList) { - RemoteSession session = getSession(sessionId); + RemoteSession session = sessionCache.getSession(sessionId); if (session == null) continue; // session might have been deleted after getting session list log.log(Level.FINE, () -> session.logPre() + "Confirming upload for session " + sessionId); session.confirmUpload(); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepo.java b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionCache.java index 0cc166dc6e3..8808dc0cf75 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepo.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionCache.java @@ -1,4 +1,4 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.config.server.session; import java.util.ArrayList; @@ -6,12 +6,12 @@ import java.util.HashMap; import java.util.List; /** - * A generic session repository that can store any type of session that extends the abstract interface. + * A session cache that can store any type of {@link Session}. * * @author Ulf Lilleengen + * @author hmusum */ -// TODO: This is a ZK cache. We should probably remove it, or make that explicit -public class SessionRepo<SESSIONTYPE extends Session> { +public class SessionCache<SESSIONTYPE extends Session> { private final HashMap<Long, SESSIONTYPE> sessions = new HashMap<>(); @@ -37,7 +37,7 @@ public class SessionRepo<SESSIONTYPE extends Session> { return sessions.get(id); } - public synchronized List<SESSIONTYPE> listSessions() { + public synchronized List<SESSIONTYPE> getSessions() { return new ArrayList<>(sessions.values()); } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java index 27705e9c1e0..f26fa05baf8 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java @@ -333,13 +333,13 @@ public class ApplicationRepositoryTest { // No change to active session id assertEquals(activeSessionId, tester.tenant().getApplicationRepo().requireActiveSessionOf(tester.applicationId())); LocalSessionRepo localSessionRepo = tester.tenant().getLocalSessionRepo(); - assertEquals(3, localSessionRepo.listSessions().size()); + assertEquals(3, localSessionRepo.getSessions().size()); clock.advance(Duration.ofHours(1)); // longer than session lifetime // All sessions except 3 should be removed after the call to deleteExpiredLocalSessions tester.applicationRepository().deleteExpiredLocalSessions(); - Collection<LocalSession> sessions = localSessionRepo.listSessions(); + Collection<LocalSession> sessions = localSessionRepo.getSessions(); assertEquals(1, sessions.size()); ArrayList<LocalSession> localSessions = new ArrayList<>(sessions); LocalSession localSession = localSessions.get(0); @@ -353,9 +353,9 @@ public class ApplicationRepositoryTest { assertTrue(deployment4.isPresent()); deployment4.get().prepare(); // session 5 (not activated) - assertEquals(2, localSessionRepo.listSessions().size()); + assertEquals(2, localSessionRepo.getSessions().size()); localSessionRepo.deleteSession(localSession); - assertEquals(1, localSessionRepo.listSessions().size()); + assertEquals(1, localSessionRepo.getSessions().size()); // Check that trying to expire when there are no active sessions works tester.applicationRepository().deleteExpiredLocalSessions(); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/RemoteSessionRepoTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/RemoteSessionRepoTest.java index 15ebe425e45..ecff6bec979 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/RemoteSessionRepoTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/RemoteSessionRepoTest.java @@ -103,9 +103,9 @@ public class RemoteSessionRepoTest { .build(); curator.create(TenantRepository.getSessionsPath(mytenant)); remoteSessionRepo = tenant.getRemoteSessionRepo(); - assertThat(remoteSessionRepo.listSessions().size(), is(0)); + assertThat(remoteSessionRepo.getSessions().size(), is(0)); createSession(sessionId, true, mytenant); - assertThat(remoteSessionRepo.listSessions().size(), is(1)); + assertThat(remoteSessionRepo.getSessions().size(), is(1)); } private void assertStatusChange(long sessionId, Session.Status status) throws Exception { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionRepoTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionRepoTest.java deleted file mode 100644 index b8a3d0bc401..00000000000 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionRepoTest.java +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.config.server.session; - -import com.yahoo.vespa.curator.mock.MockCurator; -import org.junit.Test; - -import com.yahoo.config.provision.TenantName; - -import static org.hamcrest.core.Is.is; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertThat; - -/** - * @author hmusum - */ -public class SessionRepoTest { - @Test - public void require_that_sessionrepo_is_initialized() { - SessionRepo<TestSession> sessionRepo = new SessionRepo<>(); - assertNull(sessionRepo.getSession(1L)); - sessionRepo.addSession(new TestSession(1)); - assertThat(sessionRepo.getSession(1L).getSessionId(), is(1L)); - } - - @Test(expected = IllegalArgumentException.class) - public void require_that_adding_existing_session_fails() { - SessionRepo<TestSession> sessionRepo = new SessionRepo<>(); - final TestSession session = new TestSession(1); - sessionRepo.addSession(session); - sessionRepo.addSession(session); - } - - private class TestSession extends Session { - TestSession(long sessionId) { - super(TenantName.defaultName(), - sessionId, - new MockSessionZKClient(new MockCurator(), TenantName.defaultName(), sessionId)); - } - } -} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/certificate/EndpointCertificateManager.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/certificate/EndpointCertificateManager.java index 513709aefc8..64549825b04 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/certificate/EndpointCertificateManager.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/certificate/EndpointCertificateManager.java @@ -224,7 +224,7 @@ public class EndpointCertificateManager { // If not deploying to a dev or perf zone, require all prod zones in deployment spec + test and staging if (!deploymentZone.environment().isManuallyDeployed()) { zoneCandidateList.stream() - .filter(z -> z.environment().isTest() || instanceSpec.isPresent() && instanceSpec.get().deploysTo(Environment.prod, z.region())) + .filter(z -> z.environment().isTest() || instanceSpec.isPresent() && instanceSpec.get().deploysTo(z.environment(), z.region())) .forEach(requiredZones::add); } diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp index b45a4131e7c..036a2b87692 100644 --- a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp +++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp @@ -625,6 +625,7 @@ TEST_F(ConformanceTest, testPutNewDocumentVersion) EXPECT_EQ(Result::ErrorType::NONE, gr.getErrorCode()); EXPECT_EQ(Timestamp(4), gr.getTimestamp()); + EXPECT_FALSE(gr.is_tombstone()); if (!((*doc2)==gr.getDocument())) { std::cerr << "Document returned is not the expected one: \n" @@ -676,6 +677,7 @@ TEST_F(ConformanceTest, testPutOlderDocumentVersion) EXPECT_EQ(Result::ErrorType::NONE, gr.getErrorCode()); EXPECT_EQ(Timestamp(5), gr.getTimestamp()); EXPECT_EQ(*doc1, gr.getDocument()); + EXPECT_FALSE(gr.is_tombstone()); } TEST_F(ConformanceTest, testPutDuplicate) @@ -798,8 +800,9 @@ TEST_F(ConformanceTest, testRemove) context); EXPECT_EQ(Result::ErrorType::NONE, getResult.getErrorCode()); - EXPECT_EQ(Timestamp(0), getResult.getTimestamp()); - EXPECT_TRUE(!getResult.hasDocument()); + EXPECT_EQ(Timestamp(9), getResult.getTimestamp()); + EXPECT_TRUE(getResult.is_tombstone()); + EXPECT_FALSE(getResult.hasDocument()); } TEST_F(ConformanceTest, testRemoveMerge) @@ -939,6 +942,7 @@ TEST_F(ConformanceTest, testUpdate) EXPECT_EQ(Result::ErrorType::NONE, result.getErrorCode()); EXPECT_EQ(Timestamp(4), result.getTimestamp()); + EXPECT_FALSE(result.is_tombstone()); EXPECT_EQ(document::IntFieldValue(42), static_cast<document::IntFieldValue&>( *result.getDocument().getValue("headerval"))); @@ -953,8 +957,9 @@ TEST_F(ConformanceTest, testUpdate) context); EXPECT_EQ(Result::ErrorType::NONE, result.getErrorCode()); - EXPECT_EQ(Timestamp(0), result.getTimestamp()); - EXPECT_TRUE(!result.hasDocument()); + EXPECT_EQ(Timestamp(5), result.getTimestamp()); + EXPECT_FALSE(result.hasDocument()); + EXPECT_TRUE(result.is_tombstone()); } { @@ -968,8 +973,9 @@ TEST_F(ConformanceTest, testUpdate) { GetResult result = spi->get(bucket, document::AllFields(), doc1->getId(), context); EXPECT_EQ(Result::ErrorType::NONE, result.getErrorCode()); - EXPECT_EQ(Timestamp(0), result.getTimestamp()); - EXPECT_TRUE(!result.hasDocument()); + EXPECT_EQ(Timestamp(5), result.getTimestamp()); + EXPECT_FALSE(result.hasDocument()); + EXPECT_TRUE(result.is_tombstone()); } update->setCreateIfNonExistent(true); @@ -985,6 +991,7 @@ TEST_F(ConformanceTest, testUpdate) GetResult result = spi->get(bucket, document::AllFields(), doc1->getId(), context); EXPECT_EQ(Result::ErrorType::NONE, result.getErrorCode()); EXPECT_EQ(Timestamp(7), result.getTimestamp()); + EXPECT_FALSE(result.is_tombstone()); EXPECT_EQ(document::IntFieldValue(42), reinterpret_cast<document::IntFieldValue&>( *result.getDocument().getValue("headerval"))); @@ -1008,6 +1015,7 @@ TEST_F(ConformanceTest, testGet) EXPECT_EQ(Result::ErrorType::NONE, result.getErrorCode()); EXPECT_EQ(Timestamp(0), result.getTimestamp()); + EXPECT_FALSE(result.is_tombstone()); } spi->put(bucket, Timestamp(3), doc1, context); @@ -1017,6 +1025,7 @@ TEST_F(ConformanceTest, testGet) doc1->getId(), context); EXPECT_EQ(*doc1, result.getDocument()); EXPECT_EQ(Timestamp(3), result.getTimestamp()); + EXPECT_FALSE(result.is_tombstone()); } spi->remove(bucket, Timestamp(4), doc1->getId(), context); @@ -1026,7 +1035,8 @@ TEST_F(ConformanceTest, testGet) doc1->getId(), context); EXPECT_EQ(Result::ErrorType::NONE, result.getErrorCode()); - EXPECT_EQ(Timestamp(0), result.getTimestamp()); + EXPECT_EQ(Timestamp(4), result.getTimestamp()); + EXPECT_TRUE(result.is_tombstone()); } } diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp index 5720a0ba662..92fea5ff6e4 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp @@ -533,7 +533,10 @@ DummyPersistence::get(const Bucket& b, const document::FieldSet& fieldSet, const if (!bc.get()) { } else { DocEntry::SP entry((*bc)->getEntry(did)); - if (entry.get() == 0 || entry->isRemove()) { + if (!entry) { + return GetResult(); + } else if (entry->isRemove()) { + return GetResult::make_for_tombstone(entry->getTimestamp()); } else { Document::UP doc(entry->getDocument()->clone()); if (fieldSet.getType() != document::FieldSet::ALL) { diff --git a/persistence/src/vespa/persistence/spi/result.cpp b/persistence/src/vespa/persistence/spi/result.cpp index f5131f08b1f..8de6245a8d2 100644 --- a/persistence/src/vespa/persistence/spi/result.cpp +++ b/persistence/src/vespa/persistence/spi/result.cpp @@ -30,8 +30,17 @@ std::ostream & operator << (std::ostream & os, const Result::ErrorType &errorCod GetResult::GetResult(Document::UP doc, Timestamp timestamp) : Result(), _timestamp(timestamp), - _doc(std::move(doc)) -{ } + _doc(std::move(doc)), + _is_tombstone(false) +{ +} + +GetResult::GetResult(Timestamp removed_at_ts) + : _timestamp(removed_at_ts), + _doc(), + _is_tombstone(true) +{ +} GetResult::~GetResult() = default; BucketIdListResult::~BucketIdListResult() = default; diff --git a/persistence/src/vespa/persistence/spi/result.h b/persistence/src/vespa/persistence/spi/result.h index fe8e74706bb..714d71e37ee 100644 --- a/persistence/src/vespa/persistence/spi/result.h +++ b/persistence/src/vespa/persistence/spi/result.h @@ -156,13 +156,20 @@ public: */ GetResult(ErrorType error, const vespalib::string& errorMessage) : Result(error, errorMessage), - _timestamp(0) { } + _timestamp(0), + _is_tombstone(false) + { + } /** * Constructor to use when we didn't find the document in question. */ GetResult() - : _timestamp(0) { } + : _timestamp(0), + _doc(), + _is_tombstone(false) + { + } /** * Constructor to use when we found the document asked for. @@ -172,12 +179,22 @@ public: */ GetResult(DocumentUP doc, Timestamp timestamp); - ~GetResult(); + static GetResult make_for_tombstone(Timestamp removed_at_ts) { + return GetResult(removed_at_ts); + } + + ~GetResult() override; - Timestamp getTimestamp() const { return _timestamp; } + [[nodiscard]] Timestamp getTimestamp() const { + return _timestamp; + } - bool hasDocument() const { - return _doc.get() != NULL; + [[nodiscard]] bool hasDocument() const { + return (_doc.get() != nullptr); + } + + [[nodiscard]] bool is_tombstone() const noexcept { + return _is_tombstone; } const Document& getDocument() const { @@ -193,8 +210,12 @@ public: } private: + // Explicitly creates a tombstone (remove entry) GetResult with no document. + explicit GetResult(Timestamp removed_at_ts); + Timestamp _timestamp; DocumentSP _doc; + bool _is_tombstone; }; class BucketIdListResult : public Result { diff --git a/searchcore/src/tests/proton/persistenceengine/persistenceengine_test.cpp b/searchcore/src/tests/proton/persistenceengine/persistenceengine_test.cpp index 2927457a6a5..d6a3fea4735 100644 --- a/searchcore/src/tests/proton/persistenceengine/persistenceengine_test.cpp +++ b/searchcore/src/tests/proton/persistenceengine/persistenceengine_test.cpp @@ -686,6 +686,7 @@ TEST_F("require that get returns the first document found", SimpleFixture) { EXPECT_EQUAL(tstamp1, result.getTimestamp()); ASSERT_TRUE(result.hasDocument()); EXPECT_EQUAL(*doc1, result.getDocument()); + EXPECT_FALSE(result.is_tombstone()); } TEST_F("require that createIterator does", SimpleFixture) { diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp index 91ccac6fce1..1dc5199557a 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/persistenceengine.cpp @@ -434,7 +434,7 @@ PersistenceEngine::get(const Bucket& b, const document::FieldSet& fields, const search::DocumentMetaData meta = retriever.getDocumentMetaData(did); if (meta.timestamp != 0 && meta.bucketId == b.getBucketId()) { if (meta.removed) { - return GetResult(); + return GetResult::make_for_tombstone(meta.timestamp); } document::Document::UP doc = retriever.getDocument(meta.lid); if (!doc || doc->getId().getGlobalId() != meta.gid) { diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt index 723af890f0e..5f8c2514f49 100644 --- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt @@ -19,6 +19,7 @@ vespa_add_library(searchlib_queryeval OBJECT fake_search.cpp fake_searchable.cpp field_spec.cpp + full_search.cpp get_weight_from_node.cpp global_filter.cpp hitcollector.cpp diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index ff19041e16c..8881bc7da05 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -3,6 +3,8 @@ #include "blueprint.h" #include "leaf_blueprints.h" #include "intermediate_blueprints.h" +#include "emptysearch.h" +#include "full_search.h" #include "field_spec.hpp" #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/vespalib/objects/visit.hpp> @@ -119,6 +121,17 @@ Blueprint::root() const return *bp; } +SearchIterator::UP +Blueprint::createFilterSearch(bool /*strict*/, FilterConstraint constraint) const +{ + if (constraint == FilterConstraint::UPPER_BOUND) { + return std::make_unique<FullSearch>(); + } else { + LOG_ASSERT(constraint == FilterConstraint::LOWER_BOUND); + return std::make_unique<EmptySearch>(); + } +} + vespalib::string Blueprint::asString() const { diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index 927f57907f8..10a68e45d27 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -164,6 +164,12 @@ public: virtual bool check(const Blueprint & bp) const = 0; }; + // Signal if createFilterSearch should ensure the returned + // iterator is an upper bound (yielding a hit on at least + // all matching documents) or a lower bound (never yielding a + // hit that isn't certain to be a match). + enum class FilterConstraint { UPPER_BOUND, LOWER_BOUND }; + Blueprint(); Blueprint(const Blueprint &) = delete; Blueprint &operator=(const Blueprint &) = delete; @@ -199,6 +205,7 @@ public: bool frozen() const { return _frozen; } virtual SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const = 0; + virtual SearchIteratorUP createFilterSearch(bool strict, FilterConstraint constraint) const; // for debug dumping vespalib::string asString() const; diff --git a/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp b/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp index bab047827ad..782c6fc1946 100644 --- a/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp @@ -2,8 +2,7 @@ #include "emptysearch.h" -namespace search { -namespace queryeval { +namespace search::queryeval { void EmptySearch::doSeek(uint32_t) @@ -15,6 +14,25 @@ EmptySearch::doUnpack(uint32_t) { } +void +EmptySearch::or_hits_into(BitVector &, uint32_t) +{ + // nop +} + +void +EmptySearch::and_hits_into(BitVector &result, uint32_t begin_id) +{ + result.clearInterval(begin_id, getEndId()); +} + +BitVector::UP +EmptySearch::get_hits(uint32_t begin_id) +{ + auto result = BitVector::create(begin_id, getEndId()); + return result; +} + EmptySearch::Trinary EmptySearch::is_strict() const { @@ -30,5 +48,4 @@ EmptySearch::~EmptySearch() { } -} // namespace queryeval -} // namespace search +} // namespace diff --git a/searchlib/src/vespa/searchlib/queryeval/emptysearch.h b/searchlib/src/vespa/searchlib/queryeval/emptysearch.h index 12d7430922c..3a9c6684db3 100644 --- a/searchlib/src/vespa/searchlib/queryeval/emptysearch.h +++ b/searchlib/src/vespa/searchlib/queryeval/emptysearch.h @@ -3,15 +3,20 @@ #pragma once #include "searchiterator.h" +#include <vespa/searchlib/common/bitvector.h> namespace search { namespace queryeval { +/** Search iterator that never yields any hits. */ class EmptySearch : public SearchIterator { protected: void doSeek(uint32_t) override; void doUnpack(uint32_t) override; + void or_hits_into(BitVector &result, uint32_t begin_id) override; + void and_hits_into(BitVector &result, uint32_t begin_id) override; + BitVector::UP get_hits(uint32_t begin_id) override; void initRange(uint32_t begin, uint32_t end) override { SearchIterator::initRange(begin, end); setAtEnd(); @@ -25,4 +30,3 @@ public: } // namespace queryeval } // namespace search - diff --git a/searchlib/src/vespa/searchlib/queryeval/full_search.cpp b/searchlib/src/vespa/searchlib/queryeval/full_search.cpp new file mode 100644 index 00000000000..6f387b0ab41 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/full_search.cpp @@ -0,0 +1,44 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "full_search.h" + +namespace search::queryeval { + +void +FullSearch::doSeek(uint32_t docid) +{ + setDocId(docid); +} + +void +FullSearch::doUnpack(uint32_t) +{ +} + +void +FullSearch::or_hits_into(BitVector &result, uint32_t begin_id) +{ + result.setInterval(begin_id, getEndId()); +} + +void +FullSearch::and_hits_into(BitVector &, uint32_t) +{ + // nop +} + +BitVector::UP +FullSearch::get_hits(uint32_t begin_id) +{ + auto result = BitVector::create(begin_id, getEndId()); + result->setInterval(begin_id, getEndId()); + return result; +} + +FullSearch::FullSearch() : SearchIterator() +{ +} + +FullSearch::~FullSearch() = default; + +} // namespace diff --git a/searchlib/src/vespa/searchlib/queryeval/full_search.h b/searchlib/src/vespa/searchlib/queryeval/full_search.h new file mode 100644 index 00000000000..734c3a1443e --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/full_search.h @@ -0,0 +1,30 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" +#include <vespa/searchlib/common/bitvector.h> + +namespace search::queryeval { + +/** + * Search iterator that hits all documents. + * Note that it does not search any field, and + * does not unpack any ranking information. + **/ +class FullSearch : public SearchIterator +{ +private: + Trinary is_strict() const override { return Trinary::True; } + void doSeek(uint32_t) override; + void doUnpack(uint32_t) override; + void or_hits_into(BitVector &result, uint32_t begin_id) override; + void and_hits_into(BitVector &result, uint32_t begin_id) override; + BitVector::UP get_hits(uint32_t begin_id) override; + +public: + FullSearch(); + ~FullSearch(); +}; + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h index afc1bc8ce15..3ce808de7a1 100644 --- a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h @@ -160,14 +160,14 @@ public: /** * Find all hits in the currently searched range (specified by - * initRange) and OR them into the given temporary result. This + * initRange) and AND them into the given temporary result. This * function will perform term-at-a-time evaluation and should only * be used for terms not needed for ranking. Calling this function * will exhaust this iterator and no more results will be * available in the currently searched range after this function * returns. * - * @param result result to be augmented by adding hits from this + * @param result result to be augmented by clearing non-hits from this * iterator. * @param begin_id the lowest document id that may be a hit * (we might not remember beginId from initRange) diff --git a/searchlib/src/vespa/searchlib/queryeval/truesearch.h b/searchlib/src/vespa/searchlib/queryeval/truesearch.h index 6cf1e4dec02..35a609cb3a7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/truesearch.h +++ b/searchlib/src/vespa/searchlib/queryeval/truesearch.h @@ -7,6 +7,10 @@ namespace search::queryeval { +/** + * Search iterator for testing, yielding a hit on all documents. + * Unpacks (sets docid) to the given TermFieldMatchData. + **/ class TrueSearch : public SearchIterator { private: diff --git a/storage/src/tests/distributor/getoperationtest.cpp b/storage/src/tests/distributor/getoperationtest.cpp index f14b78094d1..d4d14314790 100644 --- a/storage/src/tests/distributor/getoperationtest.cpp +++ b/storage/src/tests/distributor/getoperationtest.cpp @@ -66,7 +66,9 @@ struct GetOperationTest : Test, DistributorTestUtil { void sendReply(uint32_t idx, api::ReturnCode::Result result, - std::string authorVal, uint32_t timestamp) + std::string authorVal, + uint32_t timestamp, + bool is_tombstone = false) { if (idx == LastCommand) { idx = _sender.commands().size() - 1; @@ -75,7 +77,8 @@ struct GetOperationTest : Test, DistributorTestUtil { std::shared_ptr<api::StorageCommand> msg2 = _sender.command(idx); ASSERT_EQ(api::MessageType::GET, msg2->getType()); - auto* tmp = static_cast<api::GetCommand*>(msg2.get()); + auto* tmp = dynamic_cast<api::GetCommand*>(msg2.get()); + assert(tmp != nullptr); document::Document::SP doc; if (!authorVal.empty()) { @@ -86,12 +89,16 @@ struct GetOperationTest : Test, DistributorTestUtil { document::StringFieldValue(authorVal)); } - auto reply = std::make_shared<api::GetReply>(*tmp, doc, timestamp); + auto reply = std::make_shared<api::GetReply>(*tmp, doc, timestamp, false, is_tombstone); reply->setResult(result); op->receive(_sender, reply); } + void reply_with_tombstone(uint32_t idx, uint32_t tombstone_ts) { + sendReply(idx, api::ReturnCode::OK, "", tombstone_ts, true); + } + void replyWithFailure() { sendReply(LastCommand, api::ReturnCode::IO_FAILURE, "", 0); } @@ -126,6 +133,13 @@ struct GetOperationTest : Test, DistributorTestUtil { return dynamic_cast<api::GetReply&>(msg).had_consistent_replicas(); } + bool last_reply_has_document() { + assert(!_sender.replies().empty()); + auto& msg = *_sender.replies().back(); + assert(msg.getType() == api::MessageType::GET_REPLY); + return (dynamic_cast<api::GetReply&>(msg).getDocument().get() != nullptr); + } + void setClusterState(const std::string& clusterState) { enableDistributorClusterState(clusterState); } @@ -138,8 +152,8 @@ GetOperationTest::~GetOperationTest() = default; namespace { -NewestReplica replica_of(api::Timestamp ts, const document::BucketId& bucket_id, uint16_t node) { - return NewestReplica::of(ts, bucket_id, node); +NewestReplica replica_of(api::Timestamp ts, const document::BucketId& bucket_id, uint16_t node, bool is_tombstone) { + return NewestReplica::of(ts, bucket_id, node, is_tombstone); } } @@ -161,7 +175,7 @@ TEST_F(GetOperationTest, simple) { EXPECT_FALSE(op->any_replicas_failed()); EXPECT_TRUE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 0), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 0, false), *op->newest_replica()); } TEST_F(GetOperationTest, ask_all_checksum_groups_if_inconsistent_even_if_trusted_replica_available) { @@ -182,7 +196,7 @@ TEST_F(GetOperationTest, ask_all_checksum_groups_if_inconsistent_even_if_trusted EXPECT_FALSE(op->any_replicas_failed()); EXPECT_FALSE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(2), bucketId, 0), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(2), bucketId, 0, false), *op->newest_replica()); } TEST_F(GetOperationTest, ask_all_nodes_if_bucket_is_inconsistent) { @@ -205,7 +219,7 @@ TEST_F(GetOperationTest, ask_all_nodes_if_bucket_is_inconsistent) { EXPECT_FALSE(op->any_replicas_failed()); EXPECT_FALSE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(2), bucketId, 1), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(2), bucketId, 1, false), *op->newest_replica()); } TEST_F(GetOperationTest, send_to_all_invalid_copies) { @@ -273,7 +287,7 @@ TEST_F(GetOperationTest, inconsistent_split) { EXPECT_FALSE(last_reply_had_consistent_replicas()); // Bucket with highest timestamp should be returned. In this case it's the one on node 0. ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(2), BucketId(16, 0x0593), 0), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(2), BucketId(16, 0x0593), 0, false), *op->newest_replica()); } TEST_F(GetOperationTest, multi_inconsistent_bucket_not_found) { @@ -317,7 +331,7 @@ TEST_F(GetOperationTest, multi_inconsistent_bucket_not_found_deleted) { EXPECT_FALSE(op->any_replicas_failed()); EXPECT_FALSE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(3), bucketId, 1), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(3), bucketId, 1, false), *op->newest_replica()); } TEST_F(GetOperationTest, multi_inconsistent_bucket) { @@ -367,7 +381,7 @@ TEST_F(GetOperationTest, multi_inconsistent_bucket_fail) { EXPECT_FALSE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); // First send to node 2 fails, second is to node 3 which returned the highest timestamp - EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 3), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 3, false), *op->newest_replica()); } TEST_F(GetOperationTest, return_not_found_when_bucket_not_in_db) { @@ -408,7 +422,7 @@ TEST_F(GetOperationTest, not_found) { // the caller may want to perform special logic if all replicas are in sync // but are missing the document. // FIXME make sure all callers are aware of this! - EXPECT_EQ(replica_of(api::Timestamp(0), bucketId, 0), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(0), bucketId, 0, false), *op->newest_replica()); } TEST_F(GetOperationTest, not_found_on_subset_of_replicas_marks_get_as_inconsistent) { @@ -453,7 +467,7 @@ TEST_F(GetOperationTest, resend_on_storage_failure) { // Replica had read failure, but they're still in sync. An immutable Get won't change that fact. EXPECT_TRUE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 2), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 2, false), *op->newest_replica()); } TEST_F(GetOperationTest, storage_failure_of_out_of_sync_replica_is_tracked_as_inconsistent) { @@ -470,7 +484,7 @@ TEST_F(GetOperationTest, storage_failure_of_out_of_sync_replica_is_tracked_as_in EXPECT_TRUE(op->any_replicas_failed()); EXPECT_FALSE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(3), bucketId, 2), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(3), bucketId, 2, false), *op->newest_replica()); } TEST_F(GetOperationTest, resend_on_storage_failure_all_fail) { @@ -519,7 +533,7 @@ TEST_F(GetOperationTest, send_to_ideal_copy_if_bucket_in_sync) { _sender.getLastReply()); EXPECT_TRUE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 1), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 1, false), *op->newest_replica()); } TEST_F(GetOperationTest, multiple_copies_with_failure_on_local_node) { @@ -550,7 +564,7 @@ TEST_F(GetOperationTest, multiple_copies_with_failure_on_local_node) { EXPECT_TRUE(op->any_replicas_failed()); EXPECT_TRUE(last_reply_had_consistent_replicas()); ASSERT_TRUE(op->newest_replica().has_value()); - EXPECT_EQ(replica_of(api::Timestamp(3), BucketId(16, 0x0593), 2), *op->newest_replica()); + EXPECT_EQ(replica_of(api::Timestamp(3), BucketId(16, 0x0593), 2, false), *op->newest_replica()); } TEST_F(GetOperationTest, can_get_documents_when_all_replica_nodes_retired) { @@ -580,4 +594,58 @@ TEST_F(GetOperationTest, can_send_gets_with_weak_internal_read_consistency) { do_test_read_consistency_is_propagated(api::InternalReadConsistency::Weak); } +TEST_F(GetOperationTest, replicas_considered_consistent_if_all_equal_tombstone_timestamps) { + setClusterState("distributor:1 storage:4"); + addNodesToBucketDB(bucketId, "0=100,2=100,1=200,3=200"); + sendGet(); + ASSERT_EQ("Get => 0,Get => 1", _sender.getCommands(true)); + + ASSERT_NO_FATAL_FAILURE(reply_with_tombstone(0, 100)); + ASSERT_NO_FATAL_FAILURE(reply_with_tombstone(1, 100)); + + ASSERT_EQ("GetReply(BucketId(0x0000000000000000), id:ns:text/html::uri, timestamp 0) ReturnCode(NONE)", + _sender.getLastReply()); + + EXPECT_FALSE(op->any_replicas_failed()); + EXPECT_TRUE(last_reply_had_consistent_replicas()); + EXPECT_FALSE(last_reply_has_document()); + EXPECT_EQ(replica_of(api::Timestamp(100), bucketId, 0, true), *op->newest_replica()); +} + +TEST_F(GetOperationTest, newer_tombstone_hides_older_document) { + setClusterState("distributor:1 storage:4"); + addNodesToBucketDB(bucketId, "0=100,2=100,1=200,3=200"); + sendGet(); + ASSERT_EQ("Get => 0,Get => 1", _sender.getCommands(true)); + + ASSERT_NO_FATAL_FAILURE(reply_with_tombstone(1, 200)); + ASSERT_NO_FATAL_FAILURE(sendReply(0, api::ReturnCode::OK, "newauthor", 100)); + + ASSERT_EQ("GetReply(BucketId(0x0000000000000000), id:ns:text/html::uri, timestamp 0) ReturnCode(NONE)", + _sender.getLastReply()); + + EXPECT_FALSE(op->any_replicas_failed()); + EXPECT_FALSE(last_reply_had_consistent_replicas()); + EXPECT_FALSE(last_reply_has_document()); + EXPECT_EQ(replica_of(api::Timestamp(200), bucketId, 1, true), *op->newest_replica()); +} + +TEST_F(GetOperationTest, older_tombstone_does_not_hide_newer_document) { + setClusterState("distributor:1 storage:4"); + addNodesToBucketDB(bucketId, "0=100,2=100,1=200,3=200"); + sendGet(); + ASSERT_EQ("Get => 0,Get => 1", _sender.getCommands(true)); + + ASSERT_NO_FATAL_FAILURE(reply_with_tombstone(1, 100)); + ASSERT_NO_FATAL_FAILURE(sendReply(0, api::ReturnCode::OK, "newauthor", 200)); + + ASSERT_EQ("GetReply(BucketId(0x0000000000000000), id:ns:text/html::uri, timestamp 200) ReturnCode(NONE)", + _sender.getLastReply()); + + EXPECT_FALSE(op->any_replicas_failed()); + EXPECT_FALSE(last_reply_had_consistent_replicas()); + EXPECT_TRUE(last_reply_has_document()); + EXPECT_EQ(replica_of(api::Timestamp(200), bucketId, 0, false), *op->newest_replica()); +} + } diff --git a/storage/src/tests/distributor/twophaseupdateoperationtest.cpp b/storage/src/tests/distributor/twophaseupdateoperationtest.cpp index 962ce085cb0..e42e7684f81 100644 --- a/storage/src/tests/distributor/twophaseupdateoperationtest.cpp +++ b/storage/src/tests/distributor/twophaseupdateoperationtest.cpp @@ -90,6 +90,12 @@ struct TwoPhaseUpdateOperationTest : Test, DistributorTestUtil { api::ReturnCode::Result result = api::ReturnCode::OK, const std::string& trace_msg = ""); + void reply_to_get_with_tombstone( + Operation& callback, + DistributorMessageSenderStub& sender, + uint32_t index, + uint64_t old_timestamp); + struct UpdateOptions { bool _makeInconsistentSplit; bool _createIfNonExistent; @@ -253,6 +259,18 @@ TwoPhaseUpdateOperationTest::reply_to_metadata_get( callback.receive(sender, reply); } +void +TwoPhaseUpdateOperationTest::reply_to_get_with_tombstone( + Operation& callback, + DistributorMessageSenderStub& sender, + uint32_t index, + uint64_t old_timestamp) +{ + auto& get = dynamic_cast<const api::GetCommand&>(*sender.command(index)); + auto reply = std::make_shared<api::GetReply>(get, std::shared_ptr<Document>(), old_timestamp, false, true); + callback.receive(sender, reply); +} + namespace { struct DummyTransportContext : api::TransportContext { @@ -1317,6 +1335,45 @@ TEST_F(ThreePhaseUpdateTest, single_full_get_reply_received_after_close_is_no_op ASSERT_EQ("", _sender.getCommands(true, false, 3)); // Nothing new sent. } +TEST_F(ThreePhaseUpdateTest, single_full_get_tombstone_is_no_op_without_auto_create) { + setupDistributor(2, 2, "storage:2 distributor:1"); + getConfig().set_enable_metadata_only_fetch_phase_for_inconsistent_updates(true); + getConfig().set_update_fast_path_restart_enabled(true); + auto cb = sendUpdate("0=1/2/3,1=2/3/4"); + cb->start(_sender, framework::MilliSecTime(0)); + + ASSERT_EQ("Get => 0,Get => 1", _sender.getCommands(true)); + reply_to_metadata_get(*cb, _sender, 0, 1000U); + reply_to_metadata_get(*cb, _sender, 1, 2000U); + ASSERT_EQ("Get => 1", _sender.getCommands(true, false, 2)); + reply_to_get_with_tombstone(*cb, _sender, 2, 2000U); + // No puts should be sent, as Get returned a tombstone and no auto-create is set. + ASSERT_EQ("", _sender.getCommands(true, false, 3)); + // Nothing was updated. + EXPECT_EQ("UpdateReply(id:ns:testdoctype1::1, " + "BucketId(0x0000000000000000), " + "timestamp 0, timestamp of updated doc: 0) " + "ReturnCode(NONE)", + _sender.getLastReply(true)); +} + +TEST_F(ThreePhaseUpdateTest, single_full_get_tombstone_sends_puts_with_auto_create) { + setupDistributor(2, 2, "storage:2 distributor:1"); + getConfig().set_enable_metadata_only_fetch_phase_for_inconsistent_updates(true); + getConfig().set_update_fast_path_restart_enabled(true); + auto cb = sendUpdate("0=1/2/3,1=2/3/4", UpdateOptions().createIfNonExistent(true)); + cb->start(_sender, framework::MilliSecTime(0)); + + ASSERT_EQ("Get => 0,Get => 1", _sender.getCommands(true)); + reply_to_metadata_get(*cb, _sender, 0, 1000U); + reply_to_metadata_get(*cb, _sender, 1, 2000U); + ASSERT_EQ("Get => 1", _sender.getCommands(true, false, 2)); + reply_to_get_with_tombstone(*cb, _sender, 2, 2000U); + // Tombstone is treated as Not Found in this case, which auto-creates a new + // document version locally and pushes it out with Puts as expected. + ASSERT_EQ("Put => 1,Put => 0", _sender.getCommands(true, false, 3)); +} + // XXX currently differs in behavior from content nodes in that updates for // document IDs without explicit doctypes will _not_ be auto-failed on the // distributor. diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp index b59405a73cd..bbe477be9ef 100644 --- a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp @@ -168,8 +168,9 @@ GetOperation::onReceive(DistributorMessageSender& sender, const std::shared_ptr< if (!_newest_replica.has_value() || getreply->getLastModifiedTimestamp() > _newest_replica->timestamp) { _returnCode = getreply->getResult(); assert(response.second[i].to_node != UINT16_MAX); - _newest_replica = NewestReplica::of(getreply->getLastModifiedTimestamp(), bucket_id, send_state.to_node); - _doc = getreply->getDocument(); + _newest_replica = NewestReplica::of(getreply->getLastModifiedTimestamp(), bucket_id, + send_state.to_node, getreply->is_tombstone()); + _doc = getreply->getDocument(); // May be empty (tombstones). } } else { _any_replicas_failed = true; @@ -228,7 +229,12 @@ void GetOperation::sendReply(DistributorMessageSender& sender) { if (_msg.get()) { - const auto timestamp = _newest_replica.value_or(NewestReplica::make_empty()).timestamp; + const auto newest = _newest_replica.value_or(NewestReplica::make_empty()); + // If the newest entry is a tombstone (remove entry), the externally visible + // behavior is as if the document was not found. In this case _doc will also + // be empty. This means we also currently don't propagate tombstone status outside + // of this operation (except via the newest_replica() functionality). + const auto timestamp = (newest.is_tombstone ? api::Timestamp(0) : newest.timestamp); auto repl = std::make_shared<api::GetReply>(*_msg, _doc, timestamp, !_has_replica_inconsistency); repl->setResult(_returnCode); update_internal_metrics(); diff --git a/storage/src/vespa/storage/distributor/operations/external/newest_replica.cpp b/storage/src/vespa/storage/distributor/operations/external/newest_replica.cpp index 8ca3b9bf411..2520db3e57a 100644 --- a/storage/src/vespa/storage/distributor/operations/external/newest_replica.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/newest_replica.cpp @@ -7,7 +7,9 @@ namespace storage::distributor { std::ostream& operator<<(std::ostream& os, const NewestReplica& nr) { os << "NewestReplica(timestamp " << nr.timestamp << ", bucket_id " << nr.bucket_id - << ", node " << nr.node << ')'; + << ", node " << nr.node + << ", is_tombstone " << nr.is_tombstone + << ')'; return os; } diff --git a/storage/src/vespa/storage/distributor/operations/external/newest_replica.h b/storage/src/vespa/storage/distributor/operations/external/newest_replica.h index 9eb9c1b8bd0..ec2b73e52ab 100644 --- a/storage/src/vespa/storage/distributor/operations/external/newest_replica.h +++ b/storage/src/vespa/storage/distributor/operations/external/newest_replica.h @@ -17,21 +17,24 @@ struct NewestReplica { api::Timestamp timestamp {0}; document::BucketId bucket_id; uint16_t node {UINT16_MAX}; + bool is_tombstone {false}; static NewestReplica of(api::Timestamp timestamp, const document::BucketId& bucket_id, - uint16_t node) noexcept { - return {timestamp, bucket_id, node}; + uint16_t node, + bool is_tombstone) noexcept { + return {timestamp, bucket_id, node, is_tombstone}; } static NewestReplica make_empty() { - return {api::Timestamp(0), document::BucketId(), 0}; + return {api::Timestamp(0), document::BucketId(), 0, false}; } bool operator==(const NewestReplica& rhs) const noexcept { return ((timestamp == rhs.timestamp) && (bucket_id == rhs.bucket_id) && - (node == rhs.node)); + (node == rhs.node) && + (is_tombstone == rhs.is_tombstone)); } bool operator!=(const NewestReplica& rhs) const noexcept { return !(*this == rhs); diff --git a/storage/src/vespa/storage/distributor/operations/external/twophaseupdateoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/twophaseupdateoperation.cpp index 2e6fe4020c2..43cf43b02b6 100644 --- a/storage/src/vespa/storage/distributor/operations/external/twophaseupdateoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/twophaseupdateoperation.cpp @@ -463,6 +463,9 @@ void TwoPhaseUpdateOperation::handle_safe_path_received_metadata_get( // Timestamps were not in sync, so we have to fetch the document from the highest // timestamped replica, apply the update to it and then explicitly Put the result // to all replicas. + // Note that this timestamp may be for a tombstone (remove) entry, in which case + // conditional create-if-missing behavior kicks in as usual. + // TODO avoid sending the Get at all if the newest replica is marked as a tombstone. _single_get_latency_timer.emplace(_manager.getClock()); document::Bucket bucket(_updateCmd->getBucket().getBucketSpace(), newest_replica->bucket_id); LOG(debug, "Update(%s): sending single payload Get to %s on node %u (had timestamp %" PRIu64 ")", diff --git a/storage/src/vespa/storage/persistence/persistencethread.cpp b/storage/src/vespa/storage/persistence/persistencethread.cpp index d8f41008c8a..76c241d0627 100644 --- a/storage/src/vespa/storage/persistence/persistencethread.cpp +++ b/storage/src/vespa/storage/persistence/persistencethread.cpp @@ -295,7 +295,8 @@ PersistenceThread::handleGet(api::GetCommand& cmd, MessageTracker::UP tracker) if (!result.hasDocument()) { _env._metrics.get[cmd.getLoadType()].notFound.inc(); } - tracker->setReply(std::make_shared<api::GetReply>(cmd, result.getDocumentPtr(), result.getTimestamp())); + tracker->setReply(std::make_shared<api::GetReply>(cmd, result.getDocumentPtr(), result.getTimestamp(), + false, result.is_tombstone())); } return tracker; diff --git a/storageapi/src/tests/mbusprot/storageprotocoltest.cpp b/storageapi/src/tests/mbusprot/storageprotocoltest.cpp index 0f628f59aac..37159ab0011 100644 --- a/storageapi/src/tests/mbusprot/storageprotocoltest.cpp +++ b/storageapi/src/tests/mbusprot/storageprotocoltest.cpp @@ -291,6 +291,7 @@ TEST_P(StorageProtocolTest, get) { EXPECT_EQ(_testDoc->getId(), reply2->getDocumentId()); EXPECT_EQ(Timestamp(123), reply2->getBeforeTimestamp()); EXPECT_EQ(Timestamp(100), reply2->getLastModifiedTimestamp()); + EXPECT_FALSE(reply2->is_tombstone()); EXPECT_NO_FATAL_FAILURE(assert_bucket_info_reply_fields_propagated(*reply2)); } @@ -316,6 +317,40 @@ TEST_P(StorageProtocolTest, can_set_internal_read_consistency_on_get_commands) { EXPECT_EQ(cmd2->internal_read_consistency(), InternalReadConsistency::Strong); } +TEST_P(StorageProtocolTest, tombstones_propagated_for_gets) { + // Only supported on protocol version 7+. + if (GetParam().getMajor() < 7) { + return; + } + auto cmd = std::make_shared<GetCommand>(_bucket, _testDocId, "foo,bar", 123); + auto reply = std::make_shared<GetReply>(*cmd, std::shared_ptr<Document>(), 100, false, true); + set_dummy_bucket_info_reply_fields(*reply); + auto reply2 = copyReply(reply); + + EXPECT_TRUE(reply2->getDocument().get() == nullptr); + EXPECT_EQ(_testDoc->getId(), reply2->getDocumentId()); + EXPECT_EQ(Timestamp(123), reply2->getBeforeTimestamp()); + EXPECT_EQ(Timestamp(100), reply2->getLastModifiedTimestamp()); // In this case, the tombstone timestamp. + EXPECT_TRUE(reply2->is_tombstone()); +} + +// TODO remove this once pre-protobuf serialization is removed +TEST_P(StorageProtocolTest, old_serialization_format_treats_tombstone_get_replies_as_not_found) { + if (GetParam().getMajor() >= 7) { + return; + } + auto cmd = std::make_shared<GetCommand>(_bucket, _testDocId, "foo,bar", 123); + auto reply = std::make_shared<GetReply>(*cmd, std::shared_ptr<Document>(), 100, false, true); + set_dummy_bucket_info_reply_fields(*reply); + auto reply2 = copyReply(reply); + + EXPECT_TRUE(reply2->getDocument().get() == nullptr); + EXPECT_EQ(_testDoc->getId(), reply2->getDocumentId()); + EXPECT_EQ(Timestamp(123), reply2->getBeforeTimestamp()); + EXPECT_EQ(Timestamp(0), reply2->getLastModifiedTimestamp()); + EXPECT_FALSE(reply2->is_tombstone()); // Protocol version doesn't understand explicit tombstones. +} + TEST_P(StorageProtocolTest, remove) { auto cmd = std::make_shared<RemoveCommand>(_bucket, _testDocId, 159); auto cmd2 = copyCommand(cmd); diff --git a/storageapi/src/vespa/storageapi/mbusprot/protobuf/feed.proto b/storageapi/src/vespa/storageapi/mbusprot/protobuf/feed.proto index 12dbaf59146..f38c5bfb56a 100644 --- a/storageapi/src/vespa/storageapi/mbusprot/protobuf/feed.proto +++ b/storageapi/src/vespa/storageapi/mbusprot/protobuf/feed.proto @@ -73,6 +73,9 @@ message GetResponse { uint64 last_modified_timestamp = 2; BucketInfo bucket_info = 3; BucketId remapped_bucket_id = 4; + // Note: last_modified_timestamp and tombstone_timestamp are mutually exclusive. + // Tracked separately (rather than being a flag bool) to avoid issues during rolling upgrades. + uint64 tombstone_timestamp = 5; } message RevertRequest { diff --git a/storageapi/src/vespa/storageapi/mbusprot/protocolserialization5_0.cpp b/storageapi/src/vespa/storageapi/mbusprot/protocolserialization5_0.cpp index aeb6d382997..8a0204c8fc9 100644 --- a/storageapi/src/vespa/storageapi/mbusprot/protocolserialization5_0.cpp +++ b/storageapi/src/vespa/storageapi/mbusprot/protocolserialization5_0.cpp @@ -168,7 +168,8 @@ ProtocolSerialization5_0::onDecodeUpdateReply(const SCmd& cmd, BBuf& buf) const void ProtocolSerialization5_0::onEncode(GBBuf& buf, const api::GetReply& msg) const { SH::putDocument(msg.getDocument().get(), buf); - buf.putLong(msg.getLastModifiedTimestamp()); + // Old protocol version doesn't understand tombstones. Make it appear as Not Found. + buf.putLong(msg.is_tombstone() ? api::Timestamp(0) : msg.getLastModifiedTimestamp()); onEncodeBucketInfoReply(buf, msg); } diff --git a/storageapi/src/vespa/storageapi/mbusprot/protocolserialization7.cpp b/storageapi/src/vespa/storageapi/mbusprot/protocolserialization7.cpp index 90c8d1c7d2a..8ea946eede4 100644 --- a/storageapi/src/vespa/storageapi/mbusprot/protocolserialization7.cpp +++ b/storageapi/src/vespa/storageapi/mbusprot/protocolserialization7.cpp @@ -567,7 +567,17 @@ void ProtocolSerialization7::onEncode(GBBuf& buf, const api::GetReply& msg) cons if (msg.getDocument()) { set_document(*res.mutable_document(), *msg.getDocument()); } - res.set_last_modified_timestamp(msg.getLastModifiedTimestamp()); + if (!msg.is_tombstone()) { + res.set_last_modified_timestamp(msg.getLastModifiedTimestamp()); + } else { + // This field will be ignored by older versions, making the behavior as if + // a timestamp of zero was returned for tombstones, as it the legacy behavior. + res.set_tombstone_timestamp(msg.getLastModifiedTimestamp()); + // Will not be encoded onto the wire, but we include it here to hammer down the + // point that it's intentional to have the last modified time appear as a not + // found document for older versions. + res.set_last_modified_timestamp(0); + } }); } @@ -585,8 +595,12 @@ api::StorageReply::UP ProtocolSerialization7::onDecodeGetReply(const SCmd& cmd, return decode_bucket_info_response<protobuf::GetResponse>(buf, [&](auto& res) { try { auto document = get_document(res.document(), type_repo()); + const bool is_tombstone = (res.tombstone_timestamp() != 0); + const auto effective_timestamp = (is_tombstone ? res.tombstone_timestamp() + : res.last_modified_timestamp()); return std::make_unique<api::GetReply>(static_cast<const api::GetCommand&>(cmd), - std::move(document), res.last_modified_timestamp()); + std::move(document), effective_timestamp, + false, is_tombstone); } catch (std::exception& e) { auto reply = std::make_unique<api::GetReply>(static_cast<const api::GetCommand&>(cmd), std::shared_ptr<document::Document>(), 0u); diff --git a/storageapi/src/vespa/storageapi/message/persistence.cpp b/storageapi/src/vespa/storageapi/message/persistence.cpp index 7fd789a8c81..af0b7ae0288 100644 --- a/storageapi/src/vespa/storageapi/message/persistence.cpp +++ b/storageapi/src/vespa/storageapi/message/persistence.cpp @@ -211,14 +211,16 @@ GetCommand::print(std::ostream& out, bool verbose, const std::string& indent) co GetReply::GetReply(const GetCommand& cmd, const DocumentSP& doc, Timestamp lastModified, - bool had_consistent_replicas) + bool had_consistent_replicas, + bool is_tombstone) : BucketInfoReply(cmd), _docId(cmd.getDocumentId()), _fieldSet(cmd.getFieldSet()), _doc(doc), _beforeTimestamp(cmd.getBeforeTimestamp()), _lastModifiedTime(lastModified), - _had_consistent_replicas(had_consistent_replicas) + _had_consistent_replicas(had_consistent_replicas), + _is_tombstone(is_tombstone) { } diff --git a/storageapi/src/vespa/storageapi/message/persistence.h b/storageapi/src/vespa/storageapi/message/persistence.h index e6fe2b6dae5..24601803266 100644 --- a/storageapi/src/vespa/storageapi/message/persistence.h +++ b/storageapi/src/vespa/storageapi/message/persistence.h @@ -224,24 +224,27 @@ class GetReply : public BucketInfoReply { Timestamp _beforeTimestamp; Timestamp _lastModifiedTime; bool _had_consistent_replicas; - + bool _is_tombstone; public: - GetReply(const GetCommand& cmd, - const DocumentSP& doc = DocumentSP(), - Timestamp lastModified = 0, - bool had_consistent_replicas = false); + explicit GetReply(const GetCommand& cmd, + const DocumentSP& doc = DocumentSP(), + Timestamp lastModified = 0, + bool had_consistent_replicas = false, + bool is_tombstone = false); + ~GetReply() override; const DocumentSP& getDocument() const { return _doc; } const document::DocumentId& getDocumentId() const { return _docId; } const vespalib::string& getFieldSet() const { return _fieldSet; } - Timestamp getLastModifiedTimestamp() const { return _lastModifiedTime; } - Timestamp getBeforeTimestamp() const { return _beforeTimestamp; } + Timestamp getLastModifiedTimestamp() const noexcept { return _lastModifiedTime; } + Timestamp getBeforeTimestamp() const noexcept { return _beforeTimestamp; } - bool had_consistent_replicas() const noexcept { return _had_consistent_replicas; } + [[nodiscard]] bool had_consistent_replicas() const noexcept { return _had_consistent_replicas; } + [[nodiscard]] bool is_tombstone() const noexcept { return _is_tombstone; } - bool wasFound() const { return (_doc.get() != 0); } + bool wasFound() const { return (_doc.get() != nullptr); } void print(std::ostream& out, bool verbose, const std::string& indent) const override; DECLARE_STORAGEREPLY(GetReply, onGetReply) }; |