aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.cpp
blob: c14863137e45ae25f031b3c35843283fbdec2739 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "docstorevalidator.h"
#include "feedhandler.h"
#include <vespa/searchcore/proton/feedoperation/removeoperation.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/datatype/documenttype.h>
#include <vespa/searchcore/proton/common/feedtoken.h>
#include <vespa/searchcore/proton/feedoperation/lidvectorcontext.h>

#include <vespa/log/log.h>
LOG_SETUP(".server.docstorevalidator");

namespace proton {

DocStoreValidator::DocStoreValidator(IDocumentMetaStore &dms)
    : _dms(dms),
      _docIdLimit(dms.getCommittedDocIdLimit()),
      _invalid(search::BitVector::create(_docIdLimit)),
      _orphans(search::BitVector::create(_docIdLimit)),
      _visitCount(0u),
      _visitEmptyCount(0u)
{
    for (uint32_t lid = 1; lid < _docIdLimit; ++lid) {
        if (_dms.validLid(lid)) {
            _invalid->setBit(lid);
        }
    }
}


void
DocStoreValidator::visit(uint32_t lid, const std::shared_ptr<document::Document> &doc)
{
    if (lid == 0 || lid >= _docIdLimit)
        return;
    ++_visitCount;
    if (!_dms.validLid(lid)) {
        _orphans->setBit(lid);
        return;
    }
    const document::DocumentId &docId(doc->getId());
    const document::GlobalId &gid = docId.getGlobalId();
    const RawDocumentMetaData &meta = _dms.getRawMetaData(lid);
    const document::GlobalId &dmsGid = meta.getGid();
    if (gid == dmsGid) {
        _invalid->clearBit(lid);
    } else {
        _invalid->setBit(lid);
    }
}


void
DocStoreValidator::visit(uint32_t lid)
{
    if (lid == 0 || lid >= _docIdLimit)
        return;
    ++_visitEmptyCount;
    if (!_dms.validLid(lid)) {
        _orphans->clearBit(lid);
        return;
    }
    _invalid->setBit(lid);
}


void
DocStoreValidator::visitDone()
{
    _invalid->invalidateCachedCount();
    _orphans->invalidateCachedCount();
    (void) _invalid->countTrueBits();
    (void) _orphans->countTrueBits();
}

uint32_t
DocStoreValidator::getInvalidCount() const
{
    return _invalid->countTrueBits();
}

uint32_t
DocStoreValidator::getOrphanCount() const
{
    return _orphans->countTrueBits();
}

void
DocStoreValidator::killOrphans(search::IDocumentStore &store,
                               search::SerialNum serialNum)
{
    for (uint32_t lid = 1; lid < _docIdLimit; ++lid) {
        if (_orphans->testBit(lid)) {
            assert(!_dms.validLid(lid));
            store.remove(serialNum, lid);
        }
    }
}


std::shared_ptr<LidVectorContext>
DocStoreValidator::getInvalidLids() const
{
    auto res = std::make_unique<LidVectorContext>(_docIdLimit);
    assert(_invalid->size() == _docIdLimit);
    for (search::DocumentIdT lid(_invalid->getFirstTrueBit(1));
         lid < _docIdLimit;
         lid = _invalid->getNextTrueBit(lid + 1))
    {
        res->addLid(lid);
    }
    return res;
}

void
DocStoreValidator::performRemoves(FeedHandler & feedHandler, const search::IDocumentStore &store, const document::DocumentTypeRepo & repo) const {
    for (search::DocumentIdT lid(_invalid->getFirstTrueBit(1));
         lid < _docIdLimit;
         lid = _invalid->getNextTrueBit(lid + 1))
    {
        document::GlobalId gid;
        bool found = _dms.getGid(lid, gid);
        assert(found);
        if (found) {
            search::DocumentMetaData metaData = _dms.getMetaData(gid);
            assert(metaData.valid());
            document::Document::UP document = store.read(lid, repo);
            assert(document);
            LOG(info, "Removing document with id %s and lid %u with gid %s in bucket %s", document->getId().toString().c_str(), lid, metaData.gid.toString().c_str(), metaData.bucketId.toString().c_str());
            auto remove = std::make_unique<RemoveOperationWithGid>(metaData.bucketId, storage::spi::Timestamp(metaData.timestamp), gid, document->getType().getName());
            feedHandler.performOperation(FeedToken(), std::move(remove));
        }
    }
}

} // namespace proton