diff options
9 files changed, 26 insertions, 155 deletions
diff --git a/searchcommon/src/tests/schema/schema_test.cpp b/searchcommon/src/tests/schema/schema_test.cpp index e9997c2e70d..790e5703109 100644 --- a/searchcommon/src/tests/schema/schema_test.cpp +++ b/searchcommon/src/tests/schema/schema_test.cpp @@ -26,9 +26,7 @@ void assertIndexField(const Schema::IndexField & exp, const Schema::IndexField & act) { assertField(exp, act); - EXPECT_EQUAL(exp.hasPrefix(), act.hasPrefix()); - EXPECT_EQUAL(exp.hasPhrases(), act.hasPhrases()); - EXPECT_EQUAL(exp.hasPositions(), act.hasPositions()); + EXPECT_EQUAL(exp.getAvgElemLen(), act.getAvgElemLen()); } void assertSet(const Schema::FieldSet &exp, @@ -94,9 +92,6 @@ TEST("testBasic") { EXPECT_EQUAL("foo", s.getIndexField(0).getName()); EXPECT_EQUAL(DataType::STRING, s.getIndexField(0).getDataType()); EXPECT_EQUAL(CollectionType::SINGLE, s.getIndexField(0).getCollectionType()); - EXPECT_TRUE(!s.getIndexField(0).hasPrefix()); - EXPECT_TRUE(!s.getIndexField(0).hasPhrases()); - EXPECT_TRUE(s.getIndexField(0).hasPositions()); EXPECT_EQUAL("bar", s.getIndexField(1).getName()); EXPECT_EQUAL(DataType::INT32, s.getIndexField(1).getDataType()); @@ -178,9 +173,7 @@ TEST("testLoadAndSave") { EXPECT_EQUAL(3u, s.getNumIndexFields()); assertIndexField(SIF("a", SDT::STRING), s.getIndexField(0)); assertIndexField(SIF("b", SDT::INT64), s.getIndexField(1)); - assertIndexField(SIF("c", SDT::STRING).setPrefix(true) - .setPhrases(false).setPositions(false), - s.getIndexField(2)); + assertIndexField(SIF("c", SDT::STRING), s.getIndexField(2)); EXPECT_EQUAL(9u, s.getNumAttributeFields()); assertField(SAF("a", SDT::STRING, SCT::SINGLE), diff --git a/searchcommon/src/vespa/searchcommon/common/schema.cpp b/searchcommon/src/vespa/searchcommon/common/schema.cpp index cef74409024..4cd95423155 100644 --- a/searchcommon/src/vespa/searchcommon/common/schema.cpp +++ b/searchcommon/src/vespa/searchcommon/common/schema.cpp @@ -131,9 +131,6 @@ Schema::Field::operator!=(const Field &rhs) const Schema::IndexField::IndexField(vespalib::stringref name, DataType dt) : Field(name, dt), - _prefix(false), - _phrases(false), - _positions(true), _avgElemLen(512) { } @@ -141,18 +138,12 @@ Schema::IndexField::IndexField(vespalib::stringref name, DataType dt) Schema::IndexField::IndexField(vespalib::stringref name, DataType dt, CollectionType ct) : Field(name, dt, ct), - _prefix(false), - _phrases(false), - _positions(true), _avgElemLen(512) { } Schema::IndexField::IndexField(const std::vector<vespalib::string> &lines) : Field(lines), - _prefix(ConfigParser::parse<bool>("prefix", lines)), - _phrases(ConfigParser::parse<bool>("phrases", lines)), - _positions(ConfigParser::parse<bool>("positions", lines)), _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines)) { } @@ -161,9 +152,6 @@ void Schema::IndexField::write(vespalib::asciistream & os, vespalib::stringref prefix) const { Field::write(os, prefix); - os << prefix << "prefix " << (_prefix ? "true" : "false") << "\n"; - os << prefix << "phrases " << (_phrases ? "true" : "false") << "\n"; - os << prefix << "positions " << (_positions ? "true" : "false") << "\n"; os << prefix << "averageelementlen " << static_cast<int32_t>(_avgElemLen) << "\n"; } @@ -171,9 +159,6 @@ bool Schema::IndexField::operator==(const IndexField &rhs) const { return Field::operator==(rhs) && - _prefix == rhs._prefix && - _phrases == rhs._phrases && - _positions == rhs._positions && _avgElemLen == rhs._avgElemLen; } @@ -181,9 +166,6 @@ bool Schema::IndexField::operator!=(const IndexField &rhs) const { return Field::operator!=(rhs) || - _prefix != rhs._prefix || - _phrases != rhs._phrases || - _positions != rhs._positions || _avgElemLen != rhs._avgElemLen; } @@ -337,9 +319,6 @@ cloneIndexField(const Schema::IndexField &field, return Schema::IndexField(field.getName() + suffix, field.getDataType(), field.getCollectionType()). - setPrefix(field.hasPrefix()). - setPhrases(field.hasPhrases()). - setPositions(field.hasPositions()). setAvgElemLen(field.getAvgElemLen()); } diff --git a/searchcommon/src/vespa/searchcommon/common/schema.h b/searchcommon/src/vespa/searchcommon/common/schema.h index 90cf099f2d8..10ab8f47856 100644 --- a/searchcommon/src/vespa/searchcommon/common/schema.h +++ b/searchcommon/src/vespa/searchcommon/common/schema.h @@ -74,11 +74,8 @@ public: * A representation of an index field with extra information on * how the index should be generated. **/ - class IndexField : public Field - { - bool _prefix; - bool _phrases; - bool _positions; + class IndexField : public Field { + private: uint32_t _avgElemLen; public: @@ -89,20 +86,11 @@ public: **/ IndexField(const std::vector<vespalib::string> &lines); - IndexField &setPrefix(bool value) { _prefix = value; return *this; } - IndexField &setPhrases(bool value) { _phrases = value; return *this; } - IndexField &setPositions(bool value) - { _positions = value; return *this; } - IndexField &setAvgElemLen(uint32_t avgElemLen) - { _avgElemLen = avgElemLen; return *this; } + IndexField &setAvgElemLen(uint32_t avgElemLen) { _avgElemLen = avgElemLen; return *this; } - void - write(vespalib::asciistream &os, - vespalib::stringref prefix) const override; + void write(vespalib::asciistream &os, + vespalib::stringref prefix) const override; - bool hasPrefix() const { return _prefix; } - bool hasPhrases() const { return _phrases; } - bool hasPositions() const { return _positions; } uint32_t getAvgElemLen() const { return _avgElemLen; } bool operator==(const IndexField &rhs) const; diff --git a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp index d56f3c747c1..a357cc3538f 100644 --- a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp +++ b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp @@ -144,9 +144,6 @@ SchemaBuilder::build(const IndexschemaConfig &cfg, Schema &schema) const IndexschemaConfig::Indexfield & f = cfg.indexfield[i]; schema.addIndexField(Schema::IndexField(f.name, convertIndexDataType(f.datatype), convertIndexCollectionType(f.collectiontype)). - setPrefix(f.prefix). - setPhrases(f.phrases). - setPositions(f.positions). setAvgElemLen(f.averageelementlen)); } for (size_t i = 0; i < cfg.fieldset.size(); ++i) { diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index 64a54187254..b6d843e4e3c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -182,9 +182,8 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch, if (settings.hasError()) { return false; } - bool hasPhraseOcc = settings.hasPhrases(); SchemaUtil::IndexIterator oItr(oldSchema, itr); - if (!itr.hasMatchingOldFields(oldSchema, hasPhraseOcc) || !oItr.isValid()) { + if (!itr.hasMatchingOldFields(oldSchema) || !oItr.isValid()) { if (!openField(fieldDir, tuneFileSearch)) { return false; } diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp index a41f0412294..8da590654da 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp @@ -199,10 +199,10 @@ FieldReader::allocFieldReader(const SchemaUtil::IndexIterator &index, const Schema &oldSchema) { assert(index.isValid()); - if (index.hasMatchingOldFields(oldSchema, false)) { + if (index.hasMatchingOldFields(oldSchema)) { return std::make_unique<FieldReader>(); // The common case } - if (!index.hasOldFields(oldSchema, false)) { + if (!index.hasOldFields(oldSchema)) { return std::make_unique<FieldReaderEmpty>(index); // drop data } // field exists in old schema with different collection type setting diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp index fc198e3b74e..ed311b682e6 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp @@ -102,7 +102,7 @@ Fusion::openInputWordReaders(const SchemaUtil::IndexIterator &index, vespalib::string fieldDir(oldindexpath + "/" + index.getName()); vespalib::string dictName(fieldDir + "/dictionary"); const Schema &oldSchema = oi.getSchema(); - if (!index.hasOldFields(oldSchema, false)) { + if (!index.hasOldFields(oldSchema)) { continue; // drop data } bool res = reader->open(dictName, @@ -296,7 +296,7 @@ Fusion::openInputFieldReaders(const SchemaUtil::IndexIterator &index, for (auto &i : _oldIndexes) { OldIndex &oi = *i; const Schema &oldSchema = oi.getSchema(); - if (!index.hasOldFields(oldSchema, false)) { + if (!index.hasOldFields(oldSchema)) { continue; // drop data } auto reader = FieldReader::allocFieldReader(index, oldSchema); @@ -413,7 +413,7 @@ Fusion::ReadMappingFiles(const SchemaUtil::IndexIterator *index) wordNumMapping.noMappingFile(); continue; } - if (index && !index->hasOldFields(oldSchema, false)) { + if (index && !index->hasOldFields(oldSchema)) { continue; // drop data } diff --git a/searchlib/src/vespa/searchlib/index/schemautil.cpp b/searchlib/src/vespa/searchlib/index/schemautil.cpp index 62f6cd08510..7f3b7c8c2a9 100644 --- a/searchlib/src/vespa/searchlib/index/schemautil.cpp +++ b/searchlib/src/vespa/searchlib/index/schemautil.cpp @@ -15,47 +15,21 @@ SchemaUtil::IndexSettings SchemaUtil::getIndexSettings(const Schema &schema, const uint32_t index) { - IndexSettings ret; Schema::DataType indexDataType(DataType::STRING); bool error = false; - bool somePrefixes = false; - bool someNotPrefixes = false; - bool somePhrases = false; - bool someNotPhrases = false; - bool somePositions = false; - bool someNotPositions = false; const Schema::IndexField &iField = schema.getIndexField(index); - if (iField.hasPhrases()) { - somePhrases = true; - } else { - someNotPhrases = true; - } - if (iField.hasPrefix()) { - somePrefixes = true; - } else { - someNotPrefixes = true; - } - if (iField.hasPositions()) { - somePositions = true; - } else { - someNotPositions = true; - } indexDataType = iField.getDataType(); if (indexDataType != DataType::STRING) { error = true; LOG(error, "Field %s has bad data type", iField.getName().c_str()); } - return IndexSettings(indexDataType, error, - somePrefixes && !someNotPrefixes, - somePhrases && !someNotPhrases, - somePositions && !someNotPositions); + return IndexSettings(indexDataType, error); } bool -SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema, - bool phrases) const +SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema) const { assert(isValid()); const Schema::IndexField &newField = @@ -70,15 +44,11 @@ SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema, if (oldField.getDataType() != newField.getDataType()) { return false; // wrong data type } - if (!phrases) { - return true; - } - return oldField.hasPhrases(); + return true; } bool -SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema, - bool phrases) const +SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema) const { assert(isValid()); const Schema::IndexField &newField = @@ -88,18 +58,13 @@ SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema, if (oldFieldId == Schema::UNKNOWN_FIELD_ID) { return false; } - if (phrases) { - IndexIterator oldIterator(oldSchema, oldFieldId); - IndexSettings settings = oldIterator.getIndexSettings(); - if (!settings.hasPhrases()) { - return false; - } - } const Schema::IndexField &oldField = oldSchema.getIndexField(oldFieldId); if (oldField.getDataType() != newField.getDataType() || oldField.getCollectionType() != newField.getCollectionType()) + { return false; + } return true; } @@ -113,32 +78,6 @@ SchemaUtil::validateIndexField(const Schema::IndexField &field) field.getName().c_str()); ok = false; } - if (field.getDataType() != DataType::STRING) { - if (field.hasPrefix()) { - LOG(error, - "Field %s is non-string but has prefix", - field.getName().c_str()); - ok = false; - } - if (field.hasPhrases()) { - LOG(error, - "Field %s is non-string but has phrases", - field.getName().c_str()); - ok = false; - } - if (field.hasPositions()) { - LOG(error, - "Field %s is non-string but has positions", - field.getName().c_str()); - ok = false; - } - } - if (field.hasPhrases() && !field.hasPositions()) { - LOG(error, - "Field %s has phrases but not positions", - field.getName().c_str()); - ok = false; - } return ok; } diff --git a/searchlib/src/vespa/searchlib/index/schemautil.h b/searchlib/src/vespa/searchlib/index/schemautil.h index a678e335ebb..c8fe8e4fe32 100644 --- a/searchlib/src/vespa/searchlib/index/schemautil.h +++ b/searchlib/src/vespa/searchlib/index/schemautil.h @@ -12,9 +12,6 @@ public: class IndexSettings { schema::DataType _dataType; bool _error; // Schema is bad. - bool _prefix; - bool _phrases; - bool _positions; public: const schema::DataType & getDataType() const { @@ -22,36 +19,21 @@ public: } bool hasError() const { return _error; } - bool hasPrefix() const { return _prefix; } - bool hasPhrases() const { return _phrases; } - bool hasPositions() const { return _positions; } IndexSettings() : _dataType(schema::DataType::STRING), - _error(false), - _prefix(false), - _phrases(false), - _positions(false) + _error(false) { } IndexSettings(const IndexSettings &rhs) : _dataType(rhs._dataType), - _error(rhs._error), - _prefix(rhs._prefix), - _phrases(rhs._phrases), - _positions(rhs._positions) + _error(rhs._error) { } IndexSettings(schema::DataType dataType, - bool error, - bool prefix, - bool phrases, - bool positions) + bool error) : _dataType(dataType), - _error(error), - _prefix(prefix), - _phrases(phrases), - _positions(positions) + _error(error) { } IndexSettings & operator=(const IndexSettings &rhs) { @@ -63,9 +45,6 @@ public: void swap(IndexSettings &rhs) { std::swap(_dataType, rhs._dataType); std::swap(_error, rhs._error); - std::swap(_prefix, rhs._prefix); - std::swap(_phrases, rhs._phrases); - std::swap(_positions, rhs._positions); } }; @@ -121,13 +100,11 @@ public: /** * Return if old schema has at least one usable input field - * with matching data type. If we want phrases then all input - * fields usable for terms must also be usable for phrases. + * with matching data type. * * @param oldSchema old schema, present in an input index - * @param phrases ask for phrase files */ - bool hasOldFields(const Schema &oldSchema, bool phrases) const; + bool hasOldFields(const Schema &oldSchema) const; /** * Return if fields in old schema matches fields in new @@ -136,9 +113,8 @@ public: * also match between new and old schema. * * @param oldSchema old schema, present in an input index - * @param phrases ask for phrase files */ - bool hasMatchingOldFields(const Schema &oldSchema, bool phrases) const; + bool hasMatchingOldFields(const Schema &oldSchema) const; }; static IndexSettings getIndexSettings(const Schema &schema, const uint32_t index); |