diff options
75 files changed, 466 insertions, 822 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index a492ee47030..62e78db0e6f 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -74,6 +74,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private String mergeThrottlingPolicy = "STATIC"; private double persistenceThrottlingWsDecrementFactor = 1.2; private double persistenceThrottlingWsBackoff = 0.95; + private boolean useV8GeoPositions = false; @Override public ModelContext.FeatureFlags featureFlags() { return this; } @Override public boolean multitenant() { return multitenant; } @@ -127,6 +128,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public String mergeThrottlingPolicy() { return mergeThrottlingPolicy; } @Override public double persistenceThrottlingWsDecrementFactor() { return persistenceThrottlingWsDecrementFactor; } @Override public double persistenceThrottlingWsBackoff() { return persistenceThrottlingWsBackoff; } + @Override public boolean useV8GeoPositions() { return useV8GeoPositions; } public TestProperties maxUnCommittedMemory(int maxUnCommittedMemory) { this.maxUnCommittedMemory = maxUnCommittedMemory; @@ -334,6 +336,11 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea return this; } + public TestProperties setUseV8GeoPositions(boolean value) { + this.useV8GeoPositions = value; + return this; + } + public static class Spec implements ConfigServerSpec { private final String hostName; diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/AdjustPositionSummaryFields.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/AdjustPositionSummaryFields.java index 766b6ed3fec..254b3743f52 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/AdjustPositionSummaryFields.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/AdjustPositionSummaryFields.java @@ -2,6 +2,7 @@ package com.yahoo.searchdefinition.processing; import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; import com.yahoo.document.ArrayDataType; import com.yahoo.document.DataType; import com.yahoo.document.PositionDataType; @@ -26,6 +27,14 @@ public class AdjustPositionSummaryFields extends Processor { super(schema, deployLogger, rankProfileRegistry, queryProfiles); } + private boolean useV8GeoPositions = false; + + @Override + public void process(boolean validate, boolean documentsOnly, ModelContext.Properties properties) { + this.useV8GeoPositions = properties.featureFlags().useV8GeoPositions(); + process(validate, documentsOnly); + } + @Override public void process(boolean validate, boolean documentsOnly) { for (DocumentSummary summary : schema.getSummaries().values()) { @@ -80,6 +89,7 @@ public class AdjustPositionSummaryFields extends Processor { private void ensureSummaryField(DocumentSummary summary, String fieldName, DataType dataType, Source source, SummaryTransform transform) { SummaryField oldField = schema.getSummaryField(fieldName); if (oldField == null) { + if (useV8GeoPositions) return; SummaryField newField = new SummaryField(fieldName, dataType, transform); newField.addSource(source); summary.add(newField); @@ -94,6 +104,7 @@ public class AdjustPositionSummaryFields extends Processor { if (oldField.getSourceCount() != 1 || !oldField.getSingleSource().equals(source.getName())) { fail(oldField, "has source '" + oldField.getSources().toString() + "', should have source '" + source + "'"); } + if (useV8GeoPositions) return; summary.add(oldField); } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/CreatePositionZCurve.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/CreatePositionZCurve.java index f5c1d8d8197..0bb1b7da769 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/CreatePositionZCurve.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/CreatePositionZCurve.java @@ -2,6 +2,7 @@ package com.yahoo.searchdefinition.processing; import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; import com.yahoo.searchdefinition.RankProfileRegistry; import com.yahoo.document.ArrayDataType; import com.yahoo.document.DataType; @@ -38,6 +39,14 @@ public class CreatePositionZCurve extends Processor { super(schema, deployLogger, rankProfileRegistry, queryProfiles); } + private boolean useV8GeoPositions = false; + + @Override + public void process(boolean validate, boolean documentsOnly, ModelContext.Properties properties) { + this.useV8GeoPositions = properties.featureFlags().useV8GeoPositions(); + process(validate, documentsOnly); + } + @Override public void process(boolean validate, boolean documentsOnly) { for (SDField field : schema.allConcreteFields()) { @@ -63,14 +72,16 @@ public class CreatePositionZCurve extends Processor { // configure summary Collection<String> summaryTo = removeSummaryTo(field); - ensureCompatibleSummary(field, zName, - PositionDataType.getPositionSummaryFieldName(fieldName), - DataType.getArray(DataType.STRING), // will become "xmlstring" - SummaryTransform.POSITIONS, summaryTo, validate); - ensureCompatibleSummary(field, zName, - PositionDataType.getDistanceSummaryFieldName(fieldName), - DataType.INT, - SummaryTransform.DISTANCE, summaryTo, validate); + if (! useV8GeoPositions) { + ensureCompatibleSummary(field, zName, + PositionDataType.getPositionSummaryFieldName(fieldName), + DataType.getArray(DataType.STRING), // will become "xmlstring" + SummaryTransform.POSITIONS, summaryTo, validate); + ensureCompatibleSummary(field, zName, + PositionDataType.getDistanceSummaryFieldName(fieldName), + DataType.INT, + SummaryTransform.DISTANCE, summaryTo, validate); + } // clear indexing script field.setIndexingScript(null); SDField posX = field.getStructField(PositionDataType.FIELD_X); diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/ImplicitSummaries.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/ImplicitSummaries.java index 9eb8b921e81..0db6f4f05ba 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/ImplicitSummaries.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/ImplicitSummaries.java @@ -48,7 +48,7 @@ public class ImplicitSummaries extends Processor { sdField.addSummaryFieldSources(summaryField); } - private void collectSummaries(SDField field , Schema schema, boolean validate) { + private void collectSummaries(SDField field, Schema schema, boolean validate) { SummaryField addedSummaryField = null; // Implicit @@ -91,9 +91,16 @@ public class ImplicitSummaries extends Processor { if (field.doesSummarying()) { for (Attribute attribute : field.getAttributes().values()) { if ( ! attribute.isPosition()) continue; - DocumentSummary attributePrefetchSummary = getOrCreateAttributePrefetchSummary(schema); - attributePrefetchSummary.add(field.getSummaryField(PositionDataType.getDistanceSummaryFieldName(fieldName))); - attributePrefetchSummary.add(field.getSummaryField(PositionDataType.getPositionSummaryFieldName(fieldName))); + var distField = field.getSummaryField(PositionDataType.getDistanceSummaryFieldName(fieldName)); + if (distField != null) { + DocumentSummary attributePrefetchSummary = getOrCreateAttributePrefetchSummary(schema); + attributePrefetchSummary.add(distField); + } + var posField = field.getSummaryField(PositionDataType.getPositionSummaryFieldName(fieldName)); + if (posField != null) { + DocumentSummary attributePrefetchSummary = getOrCreateAttributePrefetchSummary(schema); + attributePrefetchSummary.add(posField); + } } } @@ -104,7 +111,6 @@ public class ImplicitSummaries extends Processor { if (attribute != null && summaryField.getTransform() == SummaryTransform.NONE) { summaryField.setTransform(SummaryTransform.ATTRIBUTE); } - if (isValid(summaryField, schema, validate)) { addToDestinations(summaryField, schema); } @@ -203,8 +209,9 @@ public class ImplicitSummaries extends Processor { addToDestination("default", summaryField, schema); } else { - for (String destinationName : summaryField.getDestinations()) + for (String destinationName : summaryField.getDestinations()) { addToDestination(destinationName, summaryField, schema); + } } } diff --git a/config-model/src/test/derived/position_nosummary/summary.cfg b/config-model/src/test/derived/position_nosummary/summary.cfg index cd7c295ab11..2c46031bdad 100644 --- a/config-model/src/test/derived/position_nosummary/summary.cfg +++ b/config-model/src/test/derived/position_nosummary/summary.cfg @@ -1,12 +1,8 @@ -defaultsummaryid 1727020212 -usev8geopositions false -classes[].id 1727020212 +defaultsummaryid 1151071433 +usev8geopositions true +classes[].id 1151071433 classes[].name "default" classes[].omitsummaryfeatures false -classes[].fields[].name "pos.position" -classes[].fields[].type "xmlstring" -classes[].fields[].name "pos.distance" -classes[].fields[].type "integer" classes[].fields[].name "rankfeatures" classes[].fields[].type "featuredata" classes[].fields[].name "summaryfeatures" diff --git a/config-model/src/test/derived/position_nosummary/summarymap.cfg b/config-model/src/test/derived/position_nosummary/summarymap.cfg index cc1c14a6614..0a36f98c6ca 100644 --- a/config-model/src/test/derived/position_nosummary/summarymap.cfg +++ b/config-model/src/test/derived/position_nosummary/summarymap.cfg @@ -1,10 +1,4 @@ defaultoutputclass -1 -override[].field "pos.position" -override[].command "positions" -override[].arguments "pos_zcurve" -override[].field "pos.distance" -override[].command "absdist" -override[].arguments "pos_zcurve" override[].field "rankfeatures" override[].command "rankfeatures" override[].arguments "" diff --git a/config-model/src/test/derived/position_summary/summary.cfg b/config-model/src/test/derived/position_summary/summary.cfg index 7b8bf16287f..7fda1ca0c05 100644 --- a/config-model/src/test/derived/position_summary/summary.cfg +++ b/config-model/src/test/derived/position_summary/summary.cfg @@ -1,14 +1,10 @@ -defaultsummaryid 230670304 -usev8geopositions false -classes[].id 230670304 +defaultsummaryid 644855547 +usev8geopositions true +classes[].id 644855547 classes[].name "default" classes[].omitsummaryfeatures false classes[].fields[].name "pos" classes[].fields[].type "jsonstring" -classes[].fields[].name "pos.position" -classes[].fields[].type "xmlstring" -classes[].fields[].name "pos.distance" -classes[].fields[].type "integer" classes[].fields[].name "rankfeatures" classes[].fields[].type "featuredata" classes[].fields[].name "summaryfeatures" diff --git a/config-model/src/test/derived/position_summary/summarymap.cfg b/config-model/src/test/derived/position_summary/summarymap.cfg index de48a19b6db..2223d3380a0 100644 --- a/config-model/src/test/derived/position_summary/summarymap.cfg +++ b/config-model/src/test/derived/position_summary/summarymap.cfg @@ -2,12 +2,6 @@ defaultoutputclass -1 override[].field "pos" override[].command "geopos" override[].arguments "pos_zcurve" -override[].field "pos.position" -override[].command "positions" -override[].arguments "pos_zcurve" -override[].field "pos.distance" -override[].command "absdist" -override[].arguments "pos_zcurve" override[].field "rankfeatures" override[].command "rankfeatures" override[].arguments "" diff --git a/config-model/src/test/derived/position_summary/vsmsummary.cfg b/config-model/src/test/derived/position_summary/vsmsummary.cfg index d7d45782843..56405069131 100644 --- a/config-model/src/test/derived/position_summary/vsmsummary.cfg +++ b/config-model/src/test/derived/position_summary/vsmsummary.cfg @@ -2,12 +2,6 @@ outputclass "" fieldmap[].summary "pos" fieldmap[].document[].field "pos" fieldmap[].command NONE -fieldmap[].summary "pos.position" -fieldmap[].document[].field "pos_zcurve" -fieldmap[].command NONE -fieldmap[].summary "pos.distance" -fieldmap[].document[].field "pos_zcurve" -fieldmap[].command NONE fieldmap[].summary "rankfeatures" fieldmap[].command NONE fieldmap[].summary "summaryfeatures" diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/derived/AbstractExportingTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/derived/AbstractExportingTestCase.java index a88ad929e80..456efdb08ae 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/derived/AbstractExportingTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/derived/AbstractExportingTestCase.java @@ -100,6 +100,11 @@ public abstract class AbstractExportingTestCase extends AbstractSchemaTestCase { DeployLogger logger) throws IOException, ParseException { return assertCorrectDeriving(dirName, searchDefinitionName, new TestProperties(), logger); } + protected DerivedConfiguration assertCorrectDeriving(String dirName, + TestProperties properties) throws IOException, ParseException + { + return assertCorrectDeriving(dirName, null, properties, new TestableDeployLogger()); + } protected DerivedConfiguration assertCorrectDeriving(String dirName, String searchDefinitionName, diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/derived/ExportingTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/derived/ExportingTestCase.java index ddb627216b7..27ba4e906a1 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/derived/ExportingTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/derived/ExportingTestCase.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.searchdefinition.derived; +import com.yahoo.config.model.deploy.TestProperties; import com.yahoo.searchdefinition.ApplicationBuilder; import com.yahoo.searchdefinition.parser.ParseException; import org.junit.Test; @@ -23,27 +24,32 @@ public class ExportingTestCase extends AbstractExportingTestCase { @Test public void testPositionArray() throws IOException, ParseException { - assertCorrectDeriving("position_array"); + assertCorrectDeriving("position_array", + new TestProperties().setUseV8GeoPositions(true)); } @Test public void testPositionAttribute() throws IOException, ParseException { - assertCorrectDeriving("position_attribute"); + assertCorrectDeriving("position_attribute", + new TestProperties().setUseV8GeoPositions(true)); } @Test public void testPositionExtra() throws IOException, ParseException { - assertCorrectDeriving("position_extra"); + assertCorrectDeriving("position_extra", + new TestProperties().setUseV8GeoPositions(true)); } @Test public void testPositionNoSummary() throws IOException, ParseException { - assertCorrectDeriving("position_nosummary"); + assertCorrectDeriving("position_nosummary", + new TestProperties().setUseV8GeoPositions(true)); } @Test public void testPositionSummary() throws IOException, ParseException { - assertCorrectDeriving("position_summary"); + assertCorrectDeriving("position_summary", + new TestProperties().setUseV8GeoPositions(true)); } @Test diff --git a/fastlib/src/vespa/fastlib/io/bufferedfile.cpp b/fastlib/src/vespa/fastlib/io/bufferedfile.cpp index d93dbd74ee1..56ffbf40eca 100644 --- a/fastlib/src/vespa/fastlib/io/bufferedfile.cpp +++ b/fastlib/src/vespa/fastlib/io/bufferedfile.cpp @@ -13,7 +13,7 @@ const size_t MIN_ALIGNMENT = 0x1000; } void -Fast_BufferedFile::flushWriteBuf(void) +Fast_BufferedFile::flushWriteBuf() { if (_bufi != buf()) { _file->WriteBuf(buf(), _bufi - buf()); @@ -31,7 +31,7 @@ Fast_BufferedFile::flushWriteBuf(void) } void -Fast_BufferedFile::fillReadBuf(void) +Fast_BufferedFile::fillReadBuf() { size_t toread = std::min(static_cast<int64_t>(_buf.size()), _fileleft); if (toread > 0) { @@ -71,20 +71,14 @@ Fast_BufferedFile::addNum(unsigned int num, int fieldw, char fill) } } -uint64_t -Fast_BufferedFile::BytesLeft(void) const -{ - return _fileleft + (_bufe - _bufi); -} - bool -Fast_BufferedFile::Eof(void) const +Fast_BufferedFile::Eof() const { return _fileleft == 0 && _bufi == _bufe; } int64_t -Fast_BufferedFile::GetSize (void) +Fast_BufferedFile::GetSize() { return _file->GetSize(); } @@ -101,41 +95,41 @@ Fast_BufferedFile::SetSize (int64_t s) } bool -Fast_BufferedFile::IsOpened (void) const +Fast_BufferedFile::IsOpened () const { return _file->IsOpened(); } bool -Fast_BufferedFile::Sync(void) +Fast_BufferedFile::Sync() { Flush(); return _file->Sync(); } time_t -Fast_BufferedFile::GetModificationTime(void) +Fast_BufferedFile::GetModificationTime() { time_t retval = _file->GetModificationTime(); return retval; } void -Fast_BufferedFile::EnableDirectIO(void) +Fast_BufferedFile::EnableDirectIO() { _file->EnableDirectIO(); _directIOEnabled = true; } void -Fast_BufferedFile::EnableSyncWrites(void) +Fast_BufferedFile::EnableSyncWrites() { FastOS_FileInterface::EnableSyncWrites(); _file->EnableSyncWrites(); } int64_t -Fast_BufferedFile::GetPosition(void) +Fast_BufferedFile::GetPosition() { if (_file->IsWriteMode()) { int64_t filePosition = _file->GetPosition(); @@ -147,7 +141,7 @@ Fast_BufferedFile::GetPosition(void) void -Fast_BufferedFile::Flush(void) +Fast_BufferedFile::Flush() { if (_file->IsWriteMode()) { flushWriteBuf(); @@ -202,7 +196,7 @@ Fast_BufferedFile::SetPosition(const int64_t s) } const char * -Fast_BufferedFile::GetFileName(void) const +Fast_BufferedFile::GetFileName() const { return _file ? _file->GetFileName() : ""; } @@ -233,7 +227,7 @@ Fast_BufferedFile::ReadLine(char *line, size_t buflen) continue; } *p++ = *_bufi++; - *p++ = 0; + *p = 0; return line; } } @@ -294,22 +288,11 @@ Fast_BufferedFile::WriteByte(char byte) *_bufi++ = byte; } -int -Fast_BufferedFile::GetByte(void) -{ - if (_bufi < _bufe) - return *reinterpret_cast<unsigned char *>(_bufi++); - fillReadBuf(); - if (_bufi < _bufe) - return *reinterpret_cast<unsigned char *>(_bufi++); - return -1; -} - void Fast_BufferedFile::ReadOpenExisting(const char *name) { - Close(); - bool ok = _file->OpenReadOnlyExisting(true, name); + bool ok = Close(); + ok &= _file->OpenReadOnlyExisting(true, name); if (!ok) { fprintf(stderr, "ERROR opening %s for read: %s", _file->GetFileName(), getLastErrorString().c_str()); @@ -325,8 +308,8 @@ Fast_BufferedFile::ReadOpenExisting(const char *name) void Fast_BufferedFile::ReadOpen(const char *name) { - Close(); - bool ok = _file->OpenReadOnly(name); + bool ok = Close(); + ok &= _file->OpenReadOnly(name); if (!ok) { fprintf(stderr, "ERROR opening %s for read: %s", _file->GetFileName(), getLastErrorString().c_str()); @@ -345,8 +328,8 @@ Fast_BufferedFile::ReadOpen(const char *name) void Fast_BufferedFile::WriteOpen(const char *name) { - Close(); - bool ok = _file->OpenWriteOnly(name); + bool ok = Close(); + ok &= _file->OpenWriteOnly(name); if (!ok) { fprintf(stderr, "ERROR opening %s for write: %s", _file->GetFileName(), getLastErrorString().c_str()); @@ -377,7 +360,7 @@ namespace { size_t computeBufLen(size_t buflen) { - size_t bitCount(0); + size_t bitCount; for ( bitCount = 1; buflen >> bitCount; bitCount++); buflen = 1 << (bitCount - 1); @@ -402,21 +385,21 @@ Fast_BufferedFile::Fast_BufferedFile(FastOS_FileInterface *file, size_t bufferSi ResetBuf(); } -Fast_BufferedFile::~Fast_BufferedFile(void) +Fast_BufferedFile::~Fast_BufferedFile() { bool close_ok = Close(); assert(close_ok); } void -Fast_BufferedFile::ResetBuf(void) +Fast_BufferedFile::ResetBuf() { _bufi = buf(); _bufe = _bufi; } bool -Fast_BufferedFile::Close(void) +Fast_BufferedFile::Close() { Flush(); _openFlags = 0; @@ -426,14 +409,14 @@ Fast_BufferedFile::Close(void) bool Fast_BufferedFile::Open(unsigned int openFlags, const char * name) { - bool ok = false; + bool ok; if (openFlags & FASTOS_FILE_OPEN_READ) { - Close(); + ok = Close(); _filepos = 0; _fileleft = 0; ResetBuf(); - ok = _file->Open(openFlags, name); + ok &= _file->Open(openFlags, name); if (ok) { _openFlags = openFlags; //CASTWARN @@ -442,10 +425,10 @@ bool Fast_BufferedFile::Open(unsigned int openFlags, const char * name) // caller will have to check return value } } else { - Close(); + ok = Close(); _filepos = 0; ResetBuf(); - ok = _file->Open(FASTOS_FILE_OPEN_WRITE | openFlags, name); + ok &= _file->Open(FASTOS_FILE_OPEN_WRITE | openFlags, name); if (ok) { _openFlags = FASTOS_FILE_OPEN_WRITE | openFlags; } else { diff --git a/fastlib/src/vespa/fastlib/io/bufferedfile.h b/fastlib/src/vespa/fastlib/io/bufferedfile.h index 78c19ef8169..48f90262ad9 100644 --- a/fastlib/src/vespa/fastlib/io/bufferedfile.h +++ b/fastlib/src/vespa/fastlib/io/bufferedfile.h @@ -26,7 +26,6 @@ private: /** True if the file should be read using direct IO */ bool _directIOEnabled; - void setupDirectIOAlign(); char * buf() { return static_cast<char *>(_buf.get()); } protected: /** The file instance used for low-level file access. */ @@ -51,7 +50,7 @@ public: /** * Delete the file instance used for low-level file access. **/ - virtual ~Fast_BufferedFile(void); + virtual ~Fast_BufferedFile(); /** * Open an existing file for reading. * @@ -75,13 +74,13 @@ public: * Reset the internal start and end pointers to the * head of the buffer, thus "emptying" it. */ - void ResetBuf(void); + void ResetBuf(); /** * Write the buffer to the file. Caution: Uses obsolete * FastOS_FileInterface::WriteBuf. * Allocates a 32kB buffer if not previously allocated. */ - void flushWriteBuf(void); + void flushWriteBuf(); /** * Read from the file into the buffer. Allocates a 32kB * buffer if not previously allocated. Fills the buffer, @@ -90,7 +89,7 @@ public: * Caution: If the amount read is smaller than the expected * amount, the method will abort. */ - void fillReadBuf(void); + void fillReadBuf(); /** * Read the next line of the buffered file into a buffer, * reading from the file as necessary. @@ -131,13 +130,7 @@ public: * @param byte The byte to write. */ void WriteByte(char byte); - /** - * Get one byte from the buffered file, reading from - * the file if necessary. - * - * @return int The byte read, or -1 if not read. - */ - int GetByte(void); + /** * Add an unsigned int number as ASCII text in base 10 to the buffered * file using a fixed width with a designated fill character. @@ -148,22 +141,14 @@ public: * for instance '0' or ' '. */ void addNum(unsigned int num, int fieldw, char fill); - /** - * Get the number of bytes left to read from the buffered - * file. This is the sum of bytes left in the buffer, and - * the number of bytes left in the file that has not yet - * been read into the buffer. - * - * @return The number of bytes left. - */ - uint64_t BytesLeft(void) const; + /** * Test for end of file. * * @return bool True if all bytes have been read from the * buffered file. */ - bool Eof(void) const; + bool Eof() const; /** * Get the size of the file. * diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp index bd84a6ca419..a26e751ae73 100644 --- a/searchlib/src/tests/attribute/attribute_test.cpp +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -5,16 +5,13 @@ #include <vespa/document/update/arithmeticvalueupdate.h> #include <vespa/document/update/assignvalueupdate.h> #include <vespa/document/update/mapvalueupdate.h> -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/attribute/address_space_components.h> #include <vespa/searchlib/attribute/attribute.h> #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchlib/attribute/attributeguard.h> #include <vespa/searchlib/attribute/attributememorysavetarget.h> #include <vespa/searchlib/attribute/attributevector.hpp> -#include <vespa/searchlib/attribute/attrvector.h> #include <vespa/searchlib/attribute/multienumattribute.hpp> -#include <vespa/searchlib/attribute/multinumericattribute.h> #include <vespa/searchlib/attribute/multistringattribute.h> #include <vespa/searchlib/attribute/multivalueattribute.hpp> #include <vespa/searchlib/attribute/predicate_attribute.h> @@ -25,6 +22,7 @@ #include <vespa/searchlib/util/randomgenerator.h> #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/testkit/testapp.h> +#include <vespa/fastos/file.h> #include <cmath> #include <iostream> @@ -41,6 +39,8 @@ using search::attribute::IAttributeVector; using vespalib::stringref; using vespalib::string; +namespace search { + namespace { string empty; @@ -157,8 +157,6 @@ bool contains_value(const Container& c, size_t elems, const V& value) { } -namespace search { - using attribute::CollectionType; using attribute::Config; diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp index c5ea12a7568..570f1a6ea03 100644 --- a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp @@ -1,13 +1,16 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/log/log.h> -LOG_SETUP("bitvector_test"); + #include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/index/field_length_info.h> #include <vespa/searchlib/diskindex/bitvectordictionary.h> #include <vespa/searchlib/diskindex/fieldwriter.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/io/fileutil.h> +#include <vespa/log/log.h> +LOG_SETUP("bitvector_test"); + using namespace search::index; using search::index::schema::DataType; diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp index 0fc42ae3fad..35b42223cfc 100644 --- a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp +++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp @@ -19,6 +19,7 @@ #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/util/time.h> #include <openssl/evp.h> +#include <vespa/fastos/file.h> #include <vespa/fastos/app.h> #include <vespa/log/log.h> LOG_SETUP("fieldwriter_test"); diff --git a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp index 9a326af4c3b..34f9f7d27a9 100644 --- a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp +++ b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/memoryindex/feature_store.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/log/log.h> diff --git a/searchlib/src/tests/stringenum/stringenum_test.cpp b/searchlib/src/tests/stringenum/stringenum_test.cpp index 15ec1862338..f5915db6df0 100644 --- a/searchlib/src/tests/stringenum/stringenum_test.cpp +++ b/searchlib/src/tests/stringenum/stringenum_test.cpp @@ -1,43 +1,28 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/log/log.h> -LOG_SETUP("stringenum"); -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/util/stringenum.h> - +LOG_SETUP("stringenum"); #include <vespa/vespalib/testkit/testapp.h> using namespace vespalib; -class MyApp : public vespalib::TestApp -{ -public: - void CheckLookup( search::util::StringEnum *strEnum, const char *str, int value); - int Main() override; - MyApp() {} -}; - - void -MyApp::CheckLookup( search::util::StringEnum *strEnum, const char *str, int value) +CheckLookup( search::util::StringEnum *strEnum, const char *str, int value) { EXPECT_EQUAL(0, strcmp(str, strEnum->Lookup(value))); EXPECT_EQUAL(value, strEnum->Lookup(str)); } -int -MyApp::Main() +TEST("test StringEnum Add and Lookup") { - TEST_INIT("stringenum_test"); search::util::StringEnum enum1; - search::util::StringEnum enum2; // check number of entries EXPECT_EQUAL(enum1.GetNumEntries(), 0u); - EXPECT_EQUAL(enum2.GetNumEntries(), 0u); // check add non-duplicates EXPECT_EQUAL(enum1.Add("zero"), 0); @@ -80,63 +65,11 @@ MyApp::Main() TEST_DO(CheckLookup(&enum1, "nine", 9)); TEST_DO(CheckLookup(&enum1, "ten", 10)); - TEST_FLUSH(); - - // save/load - EXPECT_TRUE(enum1.Save("tmp.enum")); - EXPECT_TRUE(enum2.Load("tmp.enum")); - - // check mapping and reverse mapping - EXPECT_EQUAL(enum2.GetNumEntries(), 11u); - TEST_DO(CheckLookup(&enum2, "zero", 0)); - TEST_DO(CheckLookup(&enum2, "one", 1)); - TEST_DO(CheckLookup(&enum2, "two", 2)); - TEST_DO(CheckLookup(&enum2, "three", 3)); - TEST_DO(CheckLookup(&enum2, "four", 4)); - TEST_DO(CheckLookup(&enum2, "five", 5)); - TEST_DO(CheckLookup(&enum2, "six", 6)); - TEST_DO(CheckLookup(&enum2, "seven", 7)); - TEST_DO(CheckLookup(&enum2, "eight", 8)); - TEST_DO(CheckLookup(&enum2, "nine", 9)); - TEST_DO(CheckLookup(&enum2, "ten", 10)); - - // add garbage - enum2.Add("sfsdffgdfh"); - enum2.Add("sf24dfsgg3"); - enum2.Add("sfwertfgdh"); - enum2.Add("sfewrgtsfh"); - enum2.Add("sfgdsdgdfh"); - - TEST_FLUSH(); - - // reload - EXPECT_TRUE(enum2.Load("tmp.enum")); - - // check garbage lost - EXPECT_EQUAL(enum2.GetNumEntries(), 11u); - EXPECT_EQUAL(-1, enum2.Lookup("sfewrgtsfh")); - // check mapping and reverse mapping - TEST_DO(CheckLookup(&enum2, "zero", 0)); - TEST_DO(CheckLookup(&enum2, "one", 1)); - TEST_DO(CheckLookup(&enum2, "two", 2)); - TEST_DO(CheckLookup(&enum2, "three", 3)); - TEST_DO(CheckLookup(&enum2, "four", 4)); - TEST_DO(CheckLookup(&enum2, "five", 5)); - TEST_DO(CheckLookup(&enum2, "six", 6)); - TEST_DO(CheckLookup(&enum2, "seven", 7)); - TEST_DO(CheckLookup(&enum2, "eight", 8)); - TEST_DO(CheckLookup(&enum2, "nine", 9)); - TEST_DO(CheckLookup(&enum2, "ten", 10)); - // clear enum1.Clear(); - enum2.Clear(); // check number of entries EXPECT_EQUAL(enum1.GetNumEntries(), 0u); - EXPECT_EQUAL(enum2.GetNumEntries(), 0u); - - TEST_DONE(); } -TEST_APPHOOK(MyApp); +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index 90d08fa681c..acd00413568 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -26,9 +26,6 @@ #include <mutex> #include <shared_mutex> -class Fast_BufferedFile; -class FastOS_FileInterface; - namespace document { class ArithmeticValueUpdate; class AssignValueUpdate; diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp index 87f1985d7c0..f398fe0b46b 100644 --- a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp +++ b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp @@ -4,7 +4,6 @@ #include "attrvector.h" #include "load_utils.h" #include <vespa/vespalib/util/hdr_abort.h> -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/util/filekit.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp index 10f837ec1ab..73d248a21fa 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -8,7 +8,6 @@ #include "multinumericattributesaver.h" #include "load_utils.h" #include "primitivereader.h" -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/util/fileutil.h> diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp index 5449f85bf68..4322faefe67 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp @@ -6,7 +6,6 @@ #include "load_utils.h" #include "loadednumericvalue.h" #include "multinumericenumattribute.h" -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/util/fileutil.hpp> diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index 89cd8ca310a..c8330225fb9 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -6,7 +6,6 @@ #include "multistringattribute.h" #include "enumattribute.hpp" #include "multienumattribute.hpp" -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchlib/util/bufferwriter.h> diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index 2bb4d2ada60..a1f8b42df2f 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -5,7 +5,6 @@ #include "stringattribute.h" #include "multistringpostattribute.h" #include "multistringattribute.hpp" -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/query/query_term_simple.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/readerbase.cpp b/searchlib/src/vespa/searchlib/attribute/readerbase.cpp index f08130b9fbe..d023d9b56b1 100644 --- a/searchlib/src/vespa/searchlib/attribute/readerbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/readerbase.cpp @@ -5,7 +5,6 @@ #include "readerbase.h" #include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/util/filesizecalculator.h> -#include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/size_literals.h> #include <vespa/log/log.h> diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp index 606c99161be..730ad1107a7 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -6,7 +6,6 @@ #include "stringattribute.h" #include "singleenumattribute.hpp" #include "attributevector.hpp" -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchlib/util/bufferwriter.h> diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp index 0d5835b4fa9..5ac506e4fc2 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp @@ -3,6 +3,7 @@ #include "compression.h" #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/arrayref.h> diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h index 973d622461a..45005d499fb 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h @@ -3,7 +3,6 @@ #pragma once #include <vespa/searchlib/util/comprfile.h> -#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/vespalib/stllike/string.h> #include <cassert> @@ -14,7 +13,10 @@ template <typename T> class ConstArrayRef; } -namespace search::index { class DocIdAndFeatures; } +namespace search::index { + class DocIdAndFeatures; + class PostingListParams; +} namespace search::fef { class TermFieldMatchDataArray; } diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h index 664a1245c2f..6eb37e1d1ad 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h @@ -19,15 +19,13 @@ public: uint32_t _minChunkDocs; // Minimum number of documents for chunking uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) uint64_t _numWordIds; // Number of words in dictionary - uint64_t _minWordNum; // Minimum word number PostingListCountFileDecodeContext() : ParentClass(), _avgBitsPerDoc(10), _minChunkDocs(262144), _docIdLimit(10000000), - _numWordIds(0), - _minWordNum(0u) + _numWordIds(0) { } @@ -50,15 +48,13 @@ public: uint32_t _minChunkDocs; // Minimum number of documents for chunking uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) uint64_t _numWordIds; // Number of words in dictionary - uint64_t _minWordNum; // Mininum word number PostingListCountFileEncodeContext() : ParentClass(), _avgBitsPerDoc(10), _minChunkDocs(262144), _docIdLimit(10000000), - _numWordIds(0), - _minWordNum(0u) + _numWordIds(0) { } diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp index 76a65a7244a..fd6c723e901 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp @@ -1,10 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "compression.h" #include "posocccompression.h" #include "posocc_fields_params.h" #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/data/fileheader.h> @@ -36,8 +36,7 @@ EG2PosOccDecodeContext<bigEndian>:: readHeader(const vespalib::GenericHeader &header, const vespalib::string &prefix) { - const_cast<PosOccFieldsParams *>(_fieldsParams)->readHeader(header, - prefix); + const_cast<PosOccFieldsParams *>(_fieldsParams)->readHeader(header, prefix); } diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h index 184e2414638..aadd58f9152 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include "compression.h" #include <vespa/searchlib/index/docidandfeatures.h> -#include <vespa/searchcommon/common/schema.h> #define K_VALUE_POSOCC_FIRST_WORDPOS 8 @@ -16,35 +16,6 @@ #define K_VALUE_POSOCC_ELEMENTID 0 #define K_VALUE_POSOCC_ELEMENTWEIGHT 9 -namespace search::index { - -class DocIdAndPosOccFeatures : public DocIdAndFeatures -{ -public: - - void - addNextOcc(uint32_t elementId, - uint32_t wordPos, - int32_t elementWeight, - uint32_t elementLen) - { - assert(wordPos < elementLen); - if (_elements.empty() || elementId > _elements.back().getElementId()) { - _elements.emplace_back(elementId, elementWeight, elementLen); - } else { - assert(elementId == _elements.back().getElementId()); - assert(elementWeight == _elements.back().getWeight()); - assert(elementLen == _elements.back().getElementLen()); - } - assert(_elements.back().getNumOccs() == 0 || - wordPos > _word_positions.back().getWordPos()); - _elements.back().incNumOccs(); - _word_positions.emplace_back(wordPos); - } -}; - -} - namespace search::bitcompression { class PosOccFieldsParams; diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp index d2b02b02a87..70309645ee2 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp @@ -7,6 +7,7 @@ #include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/util/size_literals.h> +#include <vespa/fastlib/io/bufferedfile.h> #include <cassert> namespace search::diskindex { @@ -29,17 +30,13 @@ readHeader(vespalib::FileHeader &h, BitVectorFileWrite::BitVectorFileWrite(BitVectorKeyScope scope) : BitVectorIdxFileWrite(scope), - _datFile(nullptr), + _datFile(), _datHeaderLen(0) { } -BitVectorFileWrite::~BitVectorFileWrite() -{ - // No implicit close() call, but cleanup memory allocations. - delete _datFile; -} +BitVectorFileWrite::~BitVectorFileWrite() = default; void @@ -50,12 +47,11 @@ BitVectorFileWrite::open(const vespalib::string &name, { vespalib::string datname = name + ".bdat"; - assert(_datFile == nullptr); + assert( ! _datFile); Parent::open(name, docIdLimit, tuneFileWrite, fileHeaderContext); - FastOS_FileInterface *datfile = new FastOS_File; - _datFile = new Fast_BufferedFile(datfile); + _datFile = std::make_unique<Fast_BufferedFile>(new FastOS_File); if (tuneFileWrite.getWantSyncWrites()) { _datFile->EnableSyncWrites(); } @@ -115,13 +111,13 @@ BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize) h.putTag(Tag("numKeys", _numKeys)); h.putTag(Tag("frozen", 1)); h.putTag(Tag("fileBitSize", fileBitSize)); - _datFile->Flush(); - _datFile->Sync(); + bool sync_ok = _datFile->Sync(); + assert(sync_ok); assert(h.getSize() == _datHeaderLen); _datFile->SetPosition(0); h.writeFile(*_datFile); - _datFile->Flush(); - _datFile->Sync(); + sync_ok = _datFile->Sync(); + assert(sync_ok); } @@ -150,7 +146,8 @@ BitVectorFileWrite::sync() { flush(); Parent::syncCommon(); - _datFile->Sync(); + bool sync_ok = _datFile->Sync(); + assert(sync_ok); } @@ -167,10 +164,10 @@ BitVectorFileWrite::close() (void) bitmapbytes; _datFile->alignEndForDirectIO(); updateDatHeader(pos * 8); - _datFile->Close(); + bool close_ok = _datFile->Close(); + assert(close_ok); } - delete _datFile; - _datFile = nullptr; + _datFile.reset(); } Parent::close(); } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h index 37ca72d3ec0..007368babdd 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h @@ -2,12 +2,13 @@ #pragma once -#include <vespa/fastlib/io/bufferedfile.h> +#include "bitvectoridxfile.h" #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/tunefileinfo.h> #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/stllike/allocator.h> -#include "bitvectoridxfile.h" + +class Fast_BufferedFile; namespace search::diskindex { @@ -16,7 +17,7 @@ class BitVectorFileWrite : public BitVectorIdxFileWrite private: using Parent = BitVectorIdxFileWrite; - Fast_BufferedFile *_datFile; + std::unique_ptr<Fast_BufferedFile> _datFile; public: private: @@ -32,13 +33,13 @@ public: void open(const vespalib::string &name, uint32_t docIdLimit, const TuneFileSeqWrite &tuneFileWrite, - const common::FileHeaderContext &fileHeaderContext); + const common::FileHeaderContext &fileHeaderContext) override; void addWordSingle(uint64_t wordNum, const BitVector &bitVector); - void flush(); - void sync(); - void close(); + void flush() override; + void sync() override; + void close() override; void makeDatHeader(const common::FileHeaderContext &fileHeaderContext); void updateDatHeader(uint64_t fileBitSize); }; diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp index 17ad4c5b846..e87238bef2d 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp @@ -1,12 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "bitvectoridxfile.h" -#include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/searchlib/index/bitvectorkeys.h> #include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/util/size_literals.h> +#include <vespa/fastlib/io/bufferedfile.h> #include <cassert> namespace search::diskindex { @@ -115,13 +115,13 @@ BitVectorIdxFileWrite::updateIdxHeader(uint64_t fileBitSize) if (_scope != BitVectorKeyScope::SHARED_WORDS) { h.putTag(Tag("fileBitSize", fileBitSize)); } - _idxFile->Flush(); - _idxFile->Sync(); + bool sync_ok = _idxFile->Sync(); + assert(sync_ok); assert(h.getSize() == _idxHeaderLen); _idxFile->SetPosition(0); h.writeFile(*_idxFile); - _idxFile->Flush(); - _idxFile->Sync(); + sync_ok = _idxFile->Sync(); + assert(sync_ok); } void @@ -147,7 +147,8 @@ BitVectorIdxFileWrite::flush() void BitVectorIdxFileWrite::syncCommon() { - _idxFile->Sync(); + bool sync_ok = _idxFile->Sync(); + assert(sync_ok); } void @@ -166,7 +167,8 @@ BitVectorIdxFileWrite::close() assert(pos == idxSize()); _idxFile->alignEndForDirectIO(); updateIdxHeader(pos * 8); - _idxFile->Close(); + bool close_ok = _idxFile->Close(); + assert(close_ok); } _idxFile.reset(); } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h index a5b6226fd43..f814cd20f5a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h @@ -2,12 +2,13 @@ #pragma once -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/tunefileinfo.h> #include <vespa/vespalib/stllike/string.h> #include "bitvectorkeyscope.h" +class Fast_BufferedFile; + namespace search::common { class FileHeaderContext; } namespace search::diskindex { @@ -35,18 +36,18 @@ public: BitVectorIdxFileWrite& operator=(const BitVectorIdxFileWrite &&) = delete; BitVectorIdxFileWrite(BitVectorKeyScope scope); - ~BitVectorIdxFileWrite(); + virtual ~BitVectorIdxFileWrite(); - void open(const vespalib::string &name, uint32_t docIdLimit, - const TuneFileSeqWrite &tuneFileWrite, - const common::FileHeaderContext &fileHeaderContext); + virtual void open(const vespalib::string &name, uint32_t docIdLimit, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext); void addWordSingle(uint64_t wordNum, uint32_t numDocs); - void flush(); - void sync(); - void close(); + virtual void flush(); + virtual void sync(); + virtual void close(); static uint32_t getBitVectorLimit(uint32_t docIdLimit) { // Must match FastS_BinSizeParams::CalcMaxBinSize() diff --git a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp index a3b9259a278..7bc1bdbfb9b 100644 --- a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp @@ -3,6 +3,7 @@ #include "dictionarywordreader.h" #include <vespa/searchlib/index/schemautil.h> #include <vespa/vespalib/util/error.h> +#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/log/log.h> LOG_SETUP(".diskindex.dictionarywordreader"); @@ -43,21 +44,27 @@ DictionaryWordReader::open(const vespalib::string & dictionaryName, _old2newwordfile->EnableDirectIO(); } // no checking possible - _old2newwordfile->WriteOpen(wordMapName.c_str()); + _old2newwordfile->OpenWriteOnly(wordMapName.c_str()); _old2newwordfile->SetSize(0); return true; } void +DictionaryWordReader::writeNewWordNum(uint64_t newWordNum) { + _old2newwordfile->WriteBuf(&newWordNum, sizeof(newWordNum)); +} + +void DictionaryWordReader::close() { if (!_dictFile->close()) { LOG(error, "Error closing input dictionary"); } - _old2newwordfile->Flush(); - _old2newwordfile->Sync(); - _old2newwordfile->Close(); + bool sync_ok = _old2newwordfile->Sync(); + assert(sync_ok); + bool close_ok = _old2newwordfile->Close(); + assert(close_ok); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h index 18e886ca22e..5c5dc60f4e7 100644 --- a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h +++ b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h @@ -2,7 +2,7 @@ #pragma once #include "pagedict4file.h" -#include <vespa/fastlib/io/bufferedfile.h> + namespace search::diskindex { @@ -47,7 +47,7 @@ public: private: // "owners" of file handles. - std::unique_ptr<Fast_BufferedFile> _old2newwordfile; + std::unique_ptr<FastOS_FileInterface> _old2newwordfile; using DictionaryFileSeqRead = index::DictionaryFileSeqRead; std::unique_ptr<DictionaryFileSeqRead> _dictFile; @@ -82,9 +82,7 @@ public: void close(); - void writeNewWordNum(uint64_t newWordNum) { - _old2newwordfile->WriteBuf(&newWordNum, sizeof(newWordNum)); - } + void writeNewWordNum(uint64_t newWordNum); void write(WordAggregator &writer) { writer.tryWriteWord(_word); diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h index 05492a59ee3..12be8979cc3 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.h +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h @@ -7,6 +7,7 @@ #include <vespa/searchlib/index/dictionaryfile.h> #include <vespa/searchlib/index/field_length_info.h> #include <vespa/searchlib/queryeval/searchable.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/stllike/cache.h> diff --git a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp index a4e9e4d06f7..dcf897df955 100644 --- a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp @@ -5,8 +5,8 @@ #include "fileheader.h" #include <vespa/searchlib/index/postinglistcounts.h> #include <vespa/searchlib/index/docidandfeatures.h> -#include <vespa/searchlib/index/postinglistcounts.h> #include <vespa/searchlib/index/postinglistcountfile.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/log/log.h> LOG_SETUP(".diskindex.extposocc"); diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h index f1b5c487e40..bf62965719d 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h @@ -4,9 +4,10 @@ #include "bitvectorfile.h" #include <vespa/searchlib/index/dictionaryfile.h> #include <vespa/searchlib/index/postinglistfile.h> -#include <vespa/searchlib/bitcompression/compression.h> -#include <vespa/searchlib/bitcompression/countcompression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> +#include <vespa/searchlib/bitcompression/countcompression.h> + +namespace search::index { class Schema; } namespace search::diskindex { diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp index 32d0105b7c1..4462c90f4c5 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp @@ -6,6 +6,7 @@ #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/util/size_literals.h> +#include <vespa/fastos/file.h> #include <vespa/log/log.h> LOG_SETUP(".diskindex.pagedict4file"); @@ -123,7 +124,6 @@ PageDict4FileSeqRead::PageDict4FileSeqRead() _wordNum(0u) { } - PageDict4FileSeqRead::~PageDict4FileSeqRead() = default; void @@ -192,7 +192,6 @@ PageDict4FileSeqRead::open(const vespalib::string &name, return true; } - bool PageDict4FileSeqRead::close() { @@ -204,7 +203,6 @@ PageDict4FileSeqRead::close() return true; } - void PageDict4FileSeqRead::getParams(PostingListParams ¶ms) { @@ -219,32 +217,83 @@ PageDict4FileSeqRead::getParams(PostingListParams ¶ms) } } +struct PageDict4FileSeqWrite::DictFileContext { + DictFileContext(bool extended, vespalib::stringref id, vespalib::stringref desc, + const vespalib::string &name, const TuneFileSeqWrite &tune); + ~DictFileContext(); + void makeHeader(const FileHeaderContext &fileHeaderContext); + bool updateHeader(uint64_t fileBitSize, uint64_t wordNum); + void writeExtendedHeader(vespalib::GenericHeader &header); + bool close(); + const vespalib::string _id; + const vespalib::string _desc; + const bool _extended; + uint32_t _headerLen; + bool _valid; + EC _ec; + ComprFileWriteContext _writeContext; + FastOS_File _file; +}; -PageDict4FileSeqWrite::PageDict4FileSeqWrite() - : _pWriter(), - _spWriter(), - _ssWriter(), - _pe(), - _pWriteContext(_pe), - _pfile(), - _spe(), - _spWriteContext(_spe), - _spfile(), - _sse(), - _ssWriteContext(_sse), - _ssfile(), - _pHeaderLen(0), - _spHeaderLen(0), - _ssHeaderLen(0) +PageDict4FileSeqWrite::DictFileContext::DictFileContext(bool extended, vespalib::stringref id, vespalib::stringref desc, + const vespalib::string & name, const TuneFileSeqWrite &tune) + : _id(id), + _desc(desc), + _extended(extended), + _headerLen(0u), + _valid(false), + _ec(), + _writeContext(_ec), + _file() { - _pe.setWriteContext(&_pWriteContext); - _spe.setWriteContext(&_spWriteContext); - _sse.setWriteContext(&_ssWriteContext); + _ec.setWriteContext(&_writeContext); + if (tune.getWantSyncWrites()) { + _file.EnableSyncWrites(); + } + if (tune.getWantDirectIO()) { + _file.EnableDirectIO(); + } + bool ok = _file.OpenWriteOnly(name.c_str()); + assertOpenWriteOnly(ok, name); + _writeContext.setFile(&_file); + _writeContext.allocComprBuf(64_Ki, 32_Ki); + uint64_t fileSize = _file.GetSize(); + uint64_t bufferStartFilePos = _writeContext.getBufferStartFilePos(); + assert(fileSize >= bufferStartFilePos); + _file.SetSize(bufferStartFilePos); + assert(bufferStartFilePos == static_cast<uint64_t>(_file.GetPosition())); + + _ec.setupWrite(_writeContext); + assert(_ec.getWriteOffset() == 0); + _valid = true; } +bool +PageDict4FileSeqWrite::DictFileContext::DictFileContext::close() { + //uint64_t usedPBits = _ec.getWriteOffset(); + _ec.flush(); + _writeContext.writeComprBuffer(true); + + _writeContext.dropComprBuf(); + bool success = _file.Sync(); + success &= _file.Close(); + _writeContext.setFile(nullptr); + return success; +} -PageDict4FileSeqWrite::~PageDict4FileSeqWrite() = default; +PageDict4FileSeqWrite::DictFileContext::~DictFileContext() = default; +PageDict4FileSeqWrite::PageDict4FileSeqWrite() + : _params(), + _pWriter(), + _spWriter(), + _ssWriter(), + _ss(), + _sp(), + _p() +{ } + +PageDict4FileSeqWrite::~PageDict4FileSeqWrite() = default; void PageDict4FileSeqWrite::writeWord(vespalib::stringref word, const PostingListCounts &counts) @@ -252,122 +301,48 @@ PageDict4FileSeqWrite::writeWord(vespalib::stringref word, const PostingListCoun _pWriter->addCounts(word, counts); } - bool PageDict4FileSeqWrite::open(const vespalib::string &name, - const TuneFileSeqWrite &tuneFileWrite, + const TuneFileSeqWrite &tune, const FileHeaderContext &fileHeaderContext) { assert( ! _pWriter); assert( ! _spWriter); assert( ! _ssWriter); - - vespalib::string pname = name + ".pdat"; - vespalib::string spname = name + ".spdat"; - vespalib::string ssname = name + ".ssdat"; - - if (tuneFileWrite.getWantSyncWrites()) { - _pfile.EnableSyncWrites(); - _spfile.EnableSyncWrites(); - _ssfile.EnableSyncWrites(); - } - if (tuneFileWrite.getWantDirectIO()) { - _pfile.EnableDirectIO(); - _spfile.EnableDirectIO(); - _ssfile.EnableDirectIO(); - } - bool ok = _pfile.OpenWriteOnly(pname.c_str()); - assertOpenWriteOnly(ok, pname); - _pWriteContext.setFile(&_pfile); - - ok = _spfile.OpenWriteOnly(spname.c_str()); - assertOpenWriteOnly(ok, spname); - _spWriteContext.setFile(&_spfile); - - ok = _ssfile.OpenWriteOnly(ssname.c_str()); - assertOpenWriteOnly(ok, ssname); - _ssWriteContext.setFile(&_ssfile); - - _pWriteContext.allocComprBuf(64_Ki, 32_Ki); - _spWriteContext.allocComprBuf(64_Ki, 32_Ki); - _ssWriteContext.allocComprBuf(64_Ki, 32_Ki); - - uint64_t pFileSize = _pfile.GetSize(); - uint64_t spFileSize = _spfile.GetSize(); - uint64_t ssFileSize = _ssfile.GetSize(); - uint64_t pBufferStartFilePos = _pWriteContext.getBufferStartFilePos(); - uint64_t spBufferStartFilePos = _spWriteContext.getBufferStartFilePos(); - uint64_t ssBufferStartFilePos = _ssWriteContext.getBufferStartFilePos(); - assert(pFileSize >= pBufferStartFilePos); - assert(spFileSize >= spBufferStartFilePos); - assert(ssFileSize >= ssBufferStartFilePos); - (void) pFileSize; - (void) spFileSize; - (void) ssFileSize; - _pfile.SetSize(pBufferStartFilePos); - _spfile.SetSize(spBufferStartFilePos); - _ssfile.SetSize(ssBufferStartFilePos); - assert(pBufferStartFilePos == static_cast<uint64_t>(_pfile.GetPosition())); - assert(spBufferStartFilePos == - static_cast<uint64_t>(_spfile.GetPosition())); - assert(ssBufferStartFilePos == - static_cast<uint64_t>(_ssfile.GetPosition())); - - _pe.setupWrite(_pWriteContext); - _spe.setupWrite(_spWriteContext); - _sse.setupWrite(_ssWriteContext); - assert(_pe.getWriteOffset() == 0); - assert(_spe.getWriteOffset() == 0); - assert(_sse.getWriteOffset() == 0); - _spe.copyParams(_sse); - _pe.copyParams(_sse); + _ss = std::make_unique<DictFileContext>(true, mySSId, "Dictionary sparse sparse file", name + ".ssdat", tune); + _sp = std::make_unique<DictFileContext>(false, mySPId, "Dictionary sparse page file", name + ".spdat", tune); + _p = std::make_unique<DictFileContext>(false, myPId, "Dictionary page file", name + ".pdat", tune); + activateParams(_params); // Write initial file headers - makePHeader(fileHeaderContext); - makeSPHeader(fileHeaderContext); - makeSSHeader(fileHeaderContext); + _p->makeHeader(fileHeaderContext); + _sp->makeHeader(fileHeaderContext); + _ss->makeHeader(fileHeaderContext); - _ssWriter = std::make_unique<SSWriter>(_sse); - _spWriter = std::make_unique<SPWriter>(*_ssWriter, _spe); - _pWriter = std::make_unique<PWriter>(*_spWriter, _pe); + _ssWriter = std::make_unique<SSWriter>(_ss->_ec); + _spWriter = std::make_unique<SPWriter>(*_ssWriter, _sp->_ec); + _pWriter = std::make_unique<PWriter>(*_spWriter, _p->_ec); _spWriter->setup(); _pWriter->setup(); - return true; } - bool PageDict4FileSeqWrite::close() { bool success = true; _pWriter->flush(); - uint64_t usedPBits = _pe.getWriteOffset(); - uint64_t usedSPBits = _spe.getWriteOffset(); - uint64_t usedSSBits = _sse.getWriteOffset(); - _pe.flush(); - _pWriteContext.writeComprBuffer(true); - _spe.flush(); - _spWriteContext.writeComprBuffer(true); - _sse.flush(); - _ssWriteContext.writeComprBuffer(true); - - _pWriteContext.dropComprBuf(); - success &= _pfile.Sync(); - success &= _pfile.Close(); - _pWriteContext.setFile(nullptr); - _spWriteContext.dropComprBuf(); - success &= _spfile.Sync(); - success &= _spfile.Close(); - _spWriteContext.setFile(nullptr); - _ssWriteContext.dropComprBuf(); - success &= _ssfile.Sync(); - success &= _ssfile.Close(); - _ssWriteContext.setFile(nullptr); + uint64_t usedPBits = _p->_ec.getWriteOffset(); + uint64_t usedSPBits = _sp->_ec.getWriteOffset(); + uint64_t usedSSBits = _ss->_ec.getWriteOffset(); + success &= _p->close(); + success &= _sp->close(); + success &= _ss->close(); + uint64_t wordNum = _pWriter->getWordNum(); // Update file headers - success &= updatePHeader(usedPBits); - success &= updateSPHeader(usedSPBits); - success &= updateSSHeader(usedSSBits); + success &= _p->updateHeader(usedPBits, wordNum); + success &= _sp->updateHeader(usedSPBits, wordNum); + success &= _ss->updateHeader(usedSSBits, wordNum); _pWriter.reset(); _spWriter.reset(); @@ -376,192 +351,99 @@ PageDict4FileSeqWrite::close() return success; } - void -PageDict4FileSeqWrite::writeSSSubHeader(vespalib::GenericHeader &header) +PageDict4FileSeqWrite::DictFileContext::writeExtendedHeader(vespalib::GenericHeader &header) { - SSEC &e = _sse; typedef vespalib::GenericHeader::Tag Tag; - header.putTag(Tag("numWordIds", e._numWordIds)); - header.putTag(Tag("avgBitsPerDoc", e._avgBitsPerDoc)); - header.putTag(Tag("minChunkDocs", e._minChunkDocs)); - header.putTag(Tag("docIdLimit", e._docIdLimit)); + header.putTag(Tag("numWordIds", _ec._numWordIds)); + header.putTag(Tag("avgBitsPerDoc", _ec._avgBitsPerDoc)); + header.putTag(Tag("minChunkDocs", _ec._minChunkDocs)); + header.putTag(Tag("docIdLimit", _ec._docIdLimit)); } - void -PageDict4FileSeqWrite::makePHeader(const FileHeaderContext &fileHeaderContext) +PageDict4FileSeqWrite::DictFileContext::makeHeader(const FileHeaderContext &fileHeaderContext) { - PEC &e = _pe; - ComprFileWriteContext &wc = _pWriteContext; - - // subheader only written to SS file. - typedef vespalib::GenericHeader::Tag Tag; vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); - fileHeaderContext.addTags(header, _pfile.GetFileName()); + fileHeaderContext.addTags(header, _file.GetFileName()); header.putTag(Tag("frozen", 0)); header.putTag(Tag("fileBitSize", 0)); - header.putTag(Tag("format.0", myPId)); + header.putTag(Tag("format.0", _id)); header.putTag(Tag("endian", "big")); - header.putTag(Tag("desc", "Dictionary page file")); - e.setupWrite(wc); - e.writeHeader(header); - e.smallAlign(64); - e.flush(); - uint32_t headerLen = header.getSize(); - headerLen += (-headerLen & 7); - assert(e.getWriteOffset() == headerLen * 8); - assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned - if (_pHeaderLen != 0) { - assert(_pHeaderLen == headerLen); + header.putTag(Tag("desc", _desc)); + if (_extended) { + writeExtendedHeader(header); } - _pHeaderLen = headerLen; -} - - -void -PageDict4FileSeqWrite::makeSPHeader(const FileHeaderContext &fileHeaderContext) -{ - SPEC &e = _spe; - ComprFileWriteContext &wc = _spWriteContext; - - // subheader only written to SS file. - - typedef vespalib::GenericHeader::Tag Tag; - vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); - - fileHeaderContext.addTags(header, _spfile.GetFileName()); - header.putTag(Tag("frozen", 0)); - header.putTag(Tag("fileBitSize", 0)); - header.putTag(Tag("format.0", mySPId)); - header.putTag(Tag("endian", "big")); - header.putTag(Tag("desc", "Dictionary sparse page file")); - e.setupWrite(wc); - e.writeHeader(header); - e.smallAlign(64); - e.flush(); - uint32_t headerLen = header.getSize(); - headerLen += (-headerLen & 7); - assert(e.getWriteOffset() == headerLen * 8); - assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned - if (_spHeaderLen != 0) { - assert(_spHeaderLen == headerLen); - } - _spHeaderLen = headerLen; -} - - -void -PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext) -{ - SSEC &e = _sse; - ComprFileWriteContext &wc = _ssWriteContext; - - typedef vespalib::GenericHeader::Tag Tag; - vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); - - fileHeaderContext.addTags(header, _ssfile.GetFileName()); - header.putTag(Tag("frozen", 0)); - header.putTag(Tag("fileBitSize", 0)); - header.putTag(Tag("format.0", mySSId)); - header.putTag(Tag("endian", "big")); - header.putTag(Tag("desc", "Dictionary sparse sparse file")); - writeSSSubHeader(header); - - e.setupWrite(wc); - e.writeHeader(header); - e.smallAlign(64); - e.flush(); + _ec.setupWrite(_writeContext); + _ec.writeHeader(header); + _ec.smallAlign(64); + _ec.flush(); uint32_t headerLen = header.getSize(); headerLen += (-headerLen & 7); - assert(e.getWriteOffset() == headerLen * 8); - assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned - if (_ssHeaderLen != 0) { - assert(_ssHeaderLen == headerLen); + assert(_ec.getWriteOffset() == headerLen * 8); + assert((_ec.getWriteOffset() & 63) == 0); // Header must be word aligned + if (_headerLen != 0) { + assert(_headerLen == headerLen); } - _ssHeaderLen = headerLen; -} - - -bool -PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize) -{ - vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); - FastOS_File f; - f.OpenReadWrite(_pfile.GetFileName()); - h.readFile(f); - FileHeaderContext::setFreezeTime(h); - typedef vespalib::GenericHeader::Tag Tag; - h.putTag(Tag("frozen", 1)); - h.putTag(Tag("fileBitSize", fileBitSize)); - h.rewriteFile(f); - bool success = f.Sync(); - success &= f.Close(); - return success; + _headerLen = headerLen; } - bool -PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize) +PageDict4FileSeqWrite::DictFileContext::updateHeader(uint64_t fileBitSize, uint64_t wordNum) { vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); FastOS_File f; - f.OpenReadWrite(_spfile.GetFileName()); + f.OpenReadWrite(_file.GetFileName()); h.readFile(f); FileHeaderContext::setFreezeTime(h); typedef vespalib::GenericHeader::Tag Tag; h.putTag(Tag("frozen", 1)); h.putTag(Tag("fileBitSize", fileBitSize)); + if (_extended) { + assert(wordNum <= _ec._numWordIds); + h.putTag(Tag("numWordIds", wordNum)); + } h.rewriteFile(f); bool success = f.Sync(); success &= f.Close(); return success; } - -bool -PageDict4FileSeqWrite::updateSSHeader(uint64_t fileBitSize) -{ - vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); - FastOS_File f; - f.OpenReadWrite(_ssfile.GetFileName()); - h.readFile(f); - FileHeaderContext::setFreezeTime(h); - typedef vespalib::GenericHeader::Tag Tag; - h.putTag(Tag("frozen", 1)); - h.putTag(Tag("fileBitSize", fileBitSize)); - uint64_t wordNum = _pWriter->getWordNum(); - assert(wordNum <= _sse._numWordIds); - h.putTag(Tag("numWordIds", wordNum)); - h.rewriteFile(f); - bool success = f.Sync(); - success &= f.Close(); - return success; +void +PageDict4FileSeqWrite::setParams(const PostingListParams ¶ms) { + _params.add(params); + if (_ss) { + activateParams(_params); + } } - void -PageDict4FileSeqWrite::setParams(const PostingListParams ¶ms) -{ - params.get("avgBitsPerDoc", _sse._avgBitsPerDoc); - params.get("minChunkDocs", _sse._minChunkDocs); - params.get("docIdLimit", _sse._docIdLimit); - params.get("numWordIds", _sse._numWordIds); - _spe.copyParams(_sse); - _pe.copyParams(_sse); +PageDict4FileSeqWrite::activateParams(const PostingListParams ¶ms) { + assert(_ss); + EC & ec = _ss->_ec; + params.get("avgBitsPerDoc", ec._avgBitsPerDoc); + params.get("minChunkDocs", ec._minChunkDocs); + params.get("docIdLimit", ec._docIdLimit); + params.get("numWordIds", ec._numWordIds); + _sp->_ec.copyParams(_ss->_ec); + _p->_ec.copyParams(_ss->_ec); } - void PageDict4FileSeqWrite::getParams(PostingListParams ¶ms) { params.clear(); - params.set("avgBitsPerDoc", _sse._avgBitsPerDoc); - params.set("minChunkDocs", _sse._minChunkDocs); - params.set("docIdLimit", _sse._docIdLimit); - params.set("numWordIds", _sse._numWordIds); + if (_ss) { + EC &ec = _ss->_ec; + params.set("avgBitsPerDoc", ec._avgBitsPerDoc); + params.set("minChunkDocs", ec._minChunkDocs); + params.set("docIdLimit", ec._docIdLimit); + params.set("numWordIds", ec._numWordIds); + } else { + params = _params; + } } } diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h index 6e2ab6f9ffa..1c43c20a219 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h @@ -2,10 +2,8 @@ #pragma once #include <vespa/searchlib/index/dictionaryfile.h> -#include <vespa/searchlib/bitcompression/compression.h> -#include <vespa/searchlib/bitcompression/countcompression.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/searchlib/bitcompression/pagedict4.h> -#include <vespa/fastos/file.h> namespace vespalib { class GenericHeader; } @@ -47,44 +45,23 @@ public: */ class PageDict4FileSeqWrite : public index::DictionaryFileSeqWrite { - typedef bitcompression::PostingListCountFileEncodeContext EC; - typedef EC SPEC; - typedef EC PEC; - typedef EC SSEC; - typedef bitcompression::PageDict4SSWriter SSWriter; - typedef bitcompression::PageDict4SPWriter SPWriter; - typedef bitcompression::PageDict4PWriter PWriter; - - typedef index::PostingListCounts PostingListCounts; + using EC = bitcompression::PostingListCountFileEncodeContext; + using SSWriter = bitcompression::PageDict4SSWriter; + using SPWriter = bitcompression::PageDict4SPWriter; + using PWriter = bitcompression::PageDict4PWriter; + using PostingListCounts = index::PostingListCounts; using FileHeaderContext = common::FileHeaderContext; + struct DictFileContext; + index::PostingListParams _params; std::unique_ptr<PWriter> _pWriter; std::unique_ptr<SPWriter> _spWriter; std::unique_ptr<SSWriter> _ssWriter; + std::unique_ptr<DictFileContext> _ss; + std::unique_ptr<DictFileContext> _sp; + std::unique_ptr<DictFileContext> _p; - EC _pe; - ComprFileWriteContext _pWriteContext; - FastOS_File _pfile; - - EC _spe; - ComprFileWriteContext _spWriteContext; - FastOS_File _spfile; - - EC _sse; - ComprFileWriteContext _ssWriteContext; - FastOS_File _ssfile; - - uint32_t _pHeaderLen; // Length of header for page file (bytes) - uint32_t _spHeaderLen; // Length of header for sparse page file (bytes) - uint32_t _ssHeaderLen; // Length of header for sparse sparse file (bytes) - - void writeSSSubHeader(vespalib::GenericHeader &header); - void makePHeader(const FileHeaderContext &fileHeaderContext); - void makeSPHeader(const FileHeaderContext &fileHeaderContext); - void makeSSHeader(const FileHeaderContext &fileHeaderContext); - bool updatePHeader(uint64_t fileBitSize); - bool updateSPHeader(uint64_t fileBitSize); - bool updateSSHeader(uint64_t fileBitSize); + void activateParams(const index::PostingListParams ¶ms); public: PageDict4FileSeqWrite(); ~PageDict4FileSeqWrite(); @@ -95,7 +72,7 @@ public: * Open dictionary file for sequential write. The index with most * words should be first for optimal compression. */ - bool open(const vespalib::string &name, const TuneFileSeqWrite &tuneFileWrite, + bool open(const vespalib::string &name, const TuneFileSeqWrite &tune, const FileHeaderContext &fileHeaderContext) override; bool close() override; diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp index a14c880a214..3f44b56706a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp @@ -2,6 +2,7 @@ #include "zc4_posting_writer_base.h" #include <vespa/searchlib/index/postinglistcounts.h> +#include <vespa/searchlib/index/postinglistparams.h> using search::index::PostingListCounts; using search::index::PostingListParams; @@ -225,9 +226,7 @@ Zc4PostingWriterBase::Zc4PostingWriterBase(PostingListCounts &counts) _l4Skip.maybeExpand(); } -Zc4PostingWriterBase::~Zc4PostingWriterBase() -{ -} +Zc4PostingWriterBase::~Zc4PostingWriterBase() = default; #define L1SKIPSTRIDE 16 #define L2SKIPSTRIDE 8 diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp index 593d5567266..d0b7fb42692 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp @@ -3,9 +3,7 @@ #include "zcposocc.h" #include <vespa/searchlib/index/postinglistcounts.h> #include <vespa/searchlib/index/postinglistcountfile.h> -#include <vespa/searchlib/index/postinglistfile.h> -#include <vespa/searchlib/index/docidandfeatures.h> - +#include <vespa/searchlib/index/postinglistparams.h> namespace search::diskindex { diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp index 544e8d9f262..1f399971406 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp @@ -5,6 +5,7 @@ #include <vespa/searchlib/index/postinglistcountfile.h> #include <vespa/searchlib/index/postinglistfile.h> #include <vespa/searchlib/index/docidandfeatures.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/vespalib/data/fileheader.h> @@ -46,9 +47,7 @@ Zc4PostingSeqRead::Zc4PostingSeqRead(PostingListCountFileSeqRead *countFile, boo } -Zc4PostingSeqRead::~Zc4PostingSeqRead() -{ -} +Zc4PostingSeqRead::~Zc4PostingSeqRead() = default; void Zc4PostingSeqRead::readDocIdAndFeatures(DocIdAndFeatures &features) @@ -201,9 +200,7 @@ Zc4PostingSeqWrite(PostingListCountFileSeqWrite *countFile) } -Zc4PostingSeqWrite::~Zc4PostingSeqWrite() -{ -} +Zc4PostingSeqWrite::~Zc4PostingSeqWrite() = default; void diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.h b/searchlib/src/vespa/searchlib/diskindex/zcposting.h index dc23fe5b37e..3d7dee1a988 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposting.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.h @@ -42,8 +42,6 @@ public: bool close() override; void getParams(PostingListParams ¶ms) override; void getFeatureParams(PostingListParams ¶ms) override; - void readWordStartWithSkip(); - void readWordStart(); void readHeader(); static const vespalib::string &getIdentifier(bool dynamic_k); }; diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.cpp b/searchlib/src/vespa/searchlib/docstore/compacter.cpp index 26fb79f8a4e..3639b0a57d2 100644 --- a/searchlib/src/vespa/searchlib/docstore/compacter.cpp +++ b/searchlib/src/vespa/searchlib/docstore/compacter.cpp @@ -2,6 +2,7 @@ #include "compacter.h" #include "logdatastore.h" +#include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/util/array.hpp> #include <vespa/log/log.h> @@ -11,6 +12,10 @@ namespace search::docstore { using vespalib::alloc::Alloc; +namespace { + static constexpr size_t INITIAL_BACKING_BUFFER_SIZE = 64_Mi; +} + void Compacter::write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) { (void) chunkId; @@ -28,7 +33,7 @@ BucketCompacter::BucketCompacter(size_t maxSignificantBucketBits, const Compress _maxBucketGuardDuration(vespalib::duration::zero()), _lastSample(vespalib::steady_clock::now()), _lock(), - _backingMemory(Alloc::alloc(0x40000000), &_lock), + _backingMemory(Alloc::alloc(INITIAL_BACKING_BUFFER_SIZE), &_lock), _tmpStore(), _lidGuard(ds.getLidReadGuard()), _bucketizerGuard(), diff --git a/searchlib/src/vespa/searchlib/index/dictionaryfile.h b/searchlib/src/vespa/searchlib/index/dictionaryfile.h index 5063143d323..6c8535f8563 100644 --- a/searchlib/src/vespa/searchlib/index/dictionaryfile.h +++ b/searchlib/src/vespa/searchlib/index/dictionaryfile.h @@ -1,7 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include "postinglistcounts.h" #include "postinglisthandle.h" #include "postinglistcountfile.h" #include <vespa/searchlib/common/tunefileinfo.h> diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp index f62a4bc7997..4341bcb9a46 100644 --- a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp @@ -1,8 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "docidandfeatures.h" -#include <vespa/log/log.h> -LOG_SETUP(".index.docidandfeatures"); +#include <cassert> namespace search::index { @@ -23,4 +22,21 @@ DocIdAndFeatures::DocIdAndFeatures(const DocIdAndFeatures &) = default; DocIdAndFeatures & DocIdAndFeatures::operator = (const DocIdAndFeatures &) = default; DocIdAndFeatures::~DocIdAndFeatures() = default; +void +DocIdAndPosOccFeatures::addNextOcc(uint32_t elementId, uint32_t wordPos, int32_t elementWeight, uint32_t elementLen) +{ + assert(wordPos < elementLen); + if (_elements.empty() || elementId > _elements.back().getElementId()) { + _elements.emplace_back(elementId, elementWeight, elementLen); + } else { + assert(elementId == _elements.back().getElementId()); + assert(elementWeight == _elements.back().getWeight()); + assert(elementLen == _elements.back().getElementLen()); + } + assert(_elements.back().getNumOccs() == 0 || + wordPos > _word_positions.back().getWordPos()); + _elements.back().incNumOccs(); + _word_positions.emplace_back(wordPos); +} + } diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.h b/searchlib/src/vespa/searchlib/index/docidandfeatures.h index 6b1659771fa..e595ec833ef 100644 --- a/searchlib/src/vespa/searchlib/index/docidandfeatures.h +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.h @@ -163,4 +163,10 @@ public: void set_has_raw_data(bool val) { _has_raw_data = val; } }; +class DocIdAndPosOccFeatures : public DocIdAndFeatures +{ +public: + void addNextOcc(uint32_t elementId, uint32_t wordPos, int32_t elementWeight, uint32_t elementLen); +}; + } diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp index f4c38636d01..edf4f8c43b2 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp @@ -1,16 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "postinglistcountfile.h" +#include <vespa/searchlib/index/postinglistparams.h> namespace search::index { -PostingListCountFileSeqRead::PostingListCountFileSeqRead() -{ -} - -PostingListCountFileSeqRead::~PostingListCountFileSeqRead() -{ -} +PostingListCountFileSeqRead::PostingListCountFileSeqRead() = default; +PostingListCountFileSeqRead::~PostingListCountFileSeqRead() = default; void PostingListCountFileSeqRead:: @@ -19,13 +15,8 @@ getParams(PostingListParams ¶ms) params.clear(); } -PostingListCountFileSeqWrite::PostingListCountFileSeqWrite() -{ -} - -PostingListCountFileSeqWrite::~PostingListCountFileSeqWrite() -{ -} +PostingListCountFileSeqWrite::PostingListCountFileSeqWrite() = default; +PostingListCountFileSeqWrite::~PostingListCountFileSeqWrite() = default; void PostingListCountFileSeqWrite:: diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.h b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h index 47ec202dad1..7e17fc5bb9e 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistcountfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h @@ -1,9 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include "postinglistparams.h" #include "postinglistcounts.h" #include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/vespalib/stllike/string.h> namespace search::common { class FileHeaderContext; } @@ -11,6 +11,7 @@ namespace search::index { class PostingListCounts; class PostingListHandle; +class PostingListParams; /** * Interface for count files describing where in a posting list file diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp index 15412fcd5f1..4d53790bd73 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp @@ -1,14 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "postinglistfile.h" +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/fastos/file.h> namespace search::index { -PostingListFileSeqRead::PostingListFileSeqRead() -{ -} - +PostingListFileSeqRead::PostingListFileSeqRead() = default; PostingListFileSeqRead::~PostingListFileSeqRead() = default; void @@ -37,9 +35,7 @@ PostingListFileSeqWrite::PostingListFileSeqWrite() { } -PostingListFileSeqWrite::~PostingListFileSeqWrite() -{ -} +PostingListFileSeqWrite::~PostingListFileSeqWrite() = default; void PostingListFileSeqWrite:: @@ -75,9 +71,7 @@ PostingListFileRandRead() { } -PostingListFileRandRead::~PostingListFileRandRead() -{ -} +PostingListFileRandRead::~PostingListFileRandRead() = default; void PostingListFileRandRead::afterOpen(FastOS_FileInterface &file) @@ -117,8 +111,7 @@ readPostingList(const PostingListCounts &counts, uint32_t numSegments, PostingListHandle &handle) { - _lower->readPostingList(counts, firstSegment, numSegments, - handle); + _lower->readPostingList(counts, firstSegment, numSegments,handle); } bool diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h index d731b3f0f67..a33319e1d4f 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -3,8 +3,8 @@ #include "postinglistcounts.h" #include "postinglisthandle.h" -#include "postinglistparams.h" #include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/vespalib/stllike/string.h> class FastOS_FileInterface; @@ -14,6 +14,7 @@ namespace search::index { class DocIdAndFeatures; class FieldLengthInfo; +class PostingListParams; /** * Interface for posting list files containing document ids and features diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.cpp b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp index 74e8f731f6f..6275399c498 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistparams.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp @@ -49,6 +49,12 @@ PostingListParams::clear() } void +PostingListParams::add(const PostingListParams & toAdd) +{ + _map.insert(toAdd._map.begin(), toAdd._map.end()); +} + +void PostingListParams::erase(const vespalib::string &key) { _map.erase(key); @@ -62,8 +68,7 @@ PostingListParams::operator!=(const PostingListParams &rhs) const template <typename TYPE> void -PostingListParams::set(const vespalib::string &key, - const TYPE &val) +PostingListParams::set(const vespalib::string &key, const TYPE &val) { std::ostringstream os; @@ -73,8 +78,7 @@ PostingListParams::set(const vespalib::string &key, template <typename TYPE> void -PostingListParams::get(const vespalib::string &key, - TYPE &val) const +PostingListParams::get(const vespalib::string &key, TYPE &val) const { std::istringstream is; Map::const_iterator it; @@ -87,35 +91,27 @@ PostingListParams::get(const vespalib::string &key, } template void -PostingListParams::set<bool>(const vespalib::string &key, - const bool &val); +PostingListParams::set<bool>(const vespalib::string &key, const bool &val); template void -PostingListParams::get<bool>(const vespalib::string &key, - bool &val) const; +PostingListParams::get<bool>(const vespalib::string &key, bool &val) const; template void -PostingListParams::set<int32_t>(const vespalib::string &key, - const int32_t &val); +PostingListParams::set<int32_t>(const vespalib::string &key, const int32_t &val); template void -PostingListParams::get<int32_t>(const vespalib::string &key, - int32_t &val) const; +PostingListParams::get<int32_t>(const vespalib::string &key, int32_t &val) const; template void -PostingListParams::set<uint32_t>(const vespalib::string &key, - const uint32_t &val); +PostingListParams::set<uint32_t>(const vespalib::string &key, const uint32_t &val); template void -PostingListParams::get<uint32_t>(const vespalib::string &key, - uint32_t &val) const; +PostingListParams::get<uint32_t>(const vespalib::string &key, uint32_t &val) const; template void -PostingListParams::set<uint64_t>(const vespalib::string &key, - const uint64_t &val); +PostingListParams::set<uint64_t>(const vespalib::string &key, const uint64_t &val); template void -PostingListParams::get<uint64_t>(const vespalib::string &key, - uint64_t &val) const; +PostingListParams::get<uint64_t>(const vespalib::string &key, uint64_t &val) const; } diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.h b/searchlib/src/vespa/searchlib/index/postinglistparams.h index 9797eef5278..42da5855c23 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistparams.h +++ b/searchlib/src/vespa/searchlib/index/postinglistparams.h @@ -7,34 +7,22 @@ namespace search::index { class PostingListParams { - typedef std::map<vespalib::string, vespalib::string> Map; + using Map = std::map<vespalib::string, vespalib::string>; Map _map; public: template <typename TYPE> - void - set(const vespalib::string &key, const TYPE &val); + void set(const vespalib::string &key, const TYPE &val); template <typename TYPE> - void - get(const vespalib::string &key, TYPE &val) const; - - bool - isSet(const vespalib::string &key) const; - - void - setStr(const vespalib::string &key, const vespalib::string &val); - - const vespalib::string & - getStr(const vespalib::string &key) const; - - void - clear(); - - void - erase(const vespalib::string &key); - - bool - operator!=(const PostingListParams &rhs) const; + void get(const vespalib::string &key, TYPE &val) const; + + bool isSet(const vespalib::string &key) const; + void setStr(const vespalib::string &key, const vespalib::string &val); + const vespalib::string & getStr(const vespalib::string &key) const; + void clear(); + void erase(const vespalib::string &key); + bool operator!=(const PostingListParams &rhs) const; + void add(const PostingListParams & toAdd); }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp index fdb2de8fb59..c55de3890cd 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp @@ -11,6 +11,7 @@ #include <vespa/searchlib/common/schedule_sequenced_task_callback.h> #include <vespa/vespalib/util/isequencedtaskexecutor.h> #include <vespa/vespalib/util/retain_guard.h> +#include <cassert> namespace search::memoryindex { @@ -28,8 +29,7 @@ DocumentInverter::DocumentInverter(DocumentInverterContext& context) { auto& schema = context.get_schema(); auto& field_indexes = context.get_field_indexes(); - for (uint32_t fieldId = 0; fieldId < schema.getNumIndexFields(); - ++fieldId) { + for (uint32_t fieldId = 0; fieldId < schema.getNumIndexFields(); ++fieldId) { auto &remover(field_indexes.get_remover(fieldId)); auto &inserter(field_indexes.get_inserter(fieldId)); auto &calculator(field_indexes.get_calculator(fieldId)); diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h index 2ad1fd78f07..9f17d369208 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h @@ -3,7 +3,6 @@ #pragma once #include <vespa/searchlib/index/docidandfeatures.h> -#include <vespa/searchlib/bitcompression/compression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/searchlib/bitcompression/posocc_fields_params.h> #include <vespa/vespalib/datastore/datastore.h> diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp index 5f4d02d23db..c606b9b6340 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp @@ -5,6 +5,7 @@ #include "ordered_field_index_inserter.h" #include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/searchlib/index/i_field_length_inspector.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/btree/btree.hpp> #include <vespa/vespalib/btree/btreeiterator.hpp> #include <vespa/vespalib/btree/btreenode.hpp> diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp index d96b8491027..a443e994559 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp @@ -13,6 +13,7 @@ #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/searchlib/bitcompression/compression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/searchlib/common/sort.h> #include <vespa/searchlib/util/url.h> #include <vespa/vespalib/text/utf8.h> @@ -443,6 +444,17 @@ FieldInverter::invertField(uint32_t docId, const FieldValue::UP &val) } void +FieldInverter::startDoc(uint32_t docId) { + assert(_docId == 0); + assert(docId != 0); + abortPendingDoc(docId); + _removeDocs.push_back(docId); + _docId = docId; + _elem = 0; + _wpos = 0; +} + +void FieldInverter::invertNormalDocTextField(const FieldValue &val) { const vespalib::Identifiable::RuntimeClass & cInfo(val.getClass()); diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h index 56cb1677f67..36dd6339b54 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h @@ -6,13 +6,14 @@ #include <vespa/document/annotation/span.h> #include <vespa/document/datatype/datatypes.h> #include <vespa/document/fieldvalue/document.h> -#include <vespa/searchlib/bitcompression/compression.h> -#include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/searchlib/index/docidandfeatures.h> #include <vespa/vespalib/stllike/allocator.h> #include <limits> -namespace search::index { class FieldLengthCalculator; } +namespace search::index { + class FieldLengthCalculator; + class Schema; +} namespace search::memoryindex { @@ -310,15 +311,7 @@ public: _removeDocs.push_back(docId); } - void startDoc(uint32_t docId) { - assert(_docId == 0); - assert(docId != 0); - abortPendingDoc(docId); - _removeDocs.push_back(docId); - _docId = docId; - _elem = 0; - _wpos = 0; - } + void startDoc(uint32_t docId); void endDoc(); diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp index 10918a83c50..326b7b0967a 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp @@ -389,7 +389,7 @@ UrlFieldInverter::pushDocuments() _hostname->pushDocuments(); } -UrlFieldInverter::UrlFieldInverter(index::Schema::CollectionType collectionType, +UrlFieldInverter::UrlFieldInverter(index::schema::CollectionType collectionType, FieldInverter *all, FieldInverter *scheme, FieldInverter *host, diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 9a35e4a2b05..ae34cdd66c8 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(searchlib_tensor OBJECT SOURCES angular_distance.cpp bitvector_visited_tracker.cpp + blob_sequence_reader.cpp default_nearest_neighbor_index_factory.cpp dense_tensor_attribute.cpp dense_tensor_attribute_saver.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.cpp b/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.cpp new file mode 100644 index 00000000000..0d86af2f3a5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.cpp @@ -0,0 +1,13 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "blob_sequence_reader.h" +#include <vespa/fastos/file.h> + +namespace search::tensor { + +void +BlobSequenceReader::readBlob(void *buf, size_t len) { + _datFile.file().ReadBuf(buf, len); +} + +} // namespace diff --git a/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h b/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h index 5b7efc73a02..45fcf5524d2 100644 --- a/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h +++ b/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastlib/io/bufferedfile.h> +#pragma once + #include <vespa/searchlib/attribute/readerbase.h> -#include <vespa/searchlib/util/fileutil.h> namespace search::tensor { @@ -20,7 +20,7 @@ public: _sizeReader(_datFile.file()) { } uint32_t getNextSize() { return _sizeReader.readHostOrder(); } - void readBlob(void *buf, size_t len) { _datFile.file().ReadBuf(buf, len); } + void readBlob(void *buf, size_t len); }; } // namespace diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp index 0cb23fe2ae9..0ae55a670da 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp @@ -5,7 +5,6 @@ #include <vespa/eval/eval/fast_value.h> #include <vespa/eval/eval/value.h> -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/attribute/readerbase.h> #include <vespa/searchlib/util/fileutil.h> #include <vespa/vespalib/util/array.h> diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp index 16d9f2e3bbd..a0ec04b98cb 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp @@ -3,10 +3,7 @@ #include "serialized_fast_value_attribute.h" #include "streamed_value_saver.h" #include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/fast_value.h> #include <vespa/fastlib/io/bufferedfile.h> -#include <vespa/searchlib/attribute/readerbase.h> -#include <vespa/searchlib/util/fileutil.h> #include <vespa/vespalib/util/rcuvector.hpp> #include <vespa/log/log.h> diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h index 088019749af..c6646f2e61f 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h @@ -5,6 +5,8 @@ #include <vespa/searchlib/bitcompression/compression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/searchlib/bitcompression/posocc_fields_params.h> +#include <vespa/searchcommon/common/schema.h> + namespace vespalib { class Rand48; } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp index 87efc8132ee..8d5f6d6db4e 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -8,6 +8,7 @@ #include <vespa/searchlib/diskindex/zc4_posting_params.h> #include <vespa/searchlib/diskindex/zc4_posting_reader.h> #include <vespa/searchlib/diskindex/zc4_posting_writer.h> +#include <vespa/searchlib/index/postinglistparams.h> using search::fef::TermFieldMatchData; using search::fef::TermFieldMatchDataArray; diff --git a/searchlib/src/vespa/searchlib/util/fileutil.cpp b/searchlib/src/vespa/searchlib/util/fileutil.cpp index 83f3fe3a5fa..e85e792f492 100644 --- a/searchlib/src/vespa/searchlib/util/fileutil.cpp +++ b/searchlib/src/vespa/searchlib/util/fileutil.cpp @@ -4,6 +4,7 @@ #include "filesizecalculator.h" #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/guard.h> +#include <vespa/fastlib/io/bufferedfile.h> #include <fcntl.h> #include <sys/mman.h> #include <sys/stat.h> diff --git a/searchlib/src/vespa/searchlib/util/fileutil.h b/searchlib/src/vespa/searchlib/util/fileutil.h index 74096d8b5dd..8271265aa33 100644 --- a/searchlib/src/vespa/searchlib/util/fileutil.h +++ b/searchlib/src/vespa/searchlib/util/fileutil.h @@ -8,7 +8,6 @@ #include <vespa/vespalib/stllike/string.h> using vespalib::GenericHeader; -class Fast_BufferedFile; namespace search { diff --git a/searchlib/src/vespa/searchlib/util/fileutil.hpp b/searchlib/src/vespa/searchlib/util/fileutil.hpp index 98e54581ac7..5b5303ef169 100644 --- a/searchlib/src/vespa/searchlib/util/fileutil.hpp +++ b/searchlib/src/vespa/searchlib/util/fileutil.hpp @@ -2,7 +2,6 @@ #pragma once #include "fileutil.h" -#include <vespa/fastlib/io/bufferedfile.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/util/stringenum.cpp b/searchlib/src/vespa/searchlib/util/stringenum.cpp index 9744d39746e..116e400083a 100644 --- a/searchlib/src/vespa/searchlib/util/stringenum.cpp +++ b/searchlib/src/vespa/searchlib/util/stringenum.cpp @@ -1,7 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "stringenum.h" -#include <vespa/fastlib/io/bufferedfile.h> #include <vespa/vespalib/stllike/hashtable.hpp> #include <cassert> @@ -10,27 +9,6 @@ LOG_SETUP(".seachlib.util.stringenum"); namespace search::util { -static inline char * -StripString(char *str) -{ - char *last = nullptr; // last non-space char - - if (str == nullptr) - return nullptr; - - for (; *str != '\0' && isspace(*str); str++); - char *first = str; - - for (; *str != '\0'; str++) - if (!isspace(*str)) - last = str; - - if (last != nullptr) - *(last + 1) = '\0'; - - return first; -} - StringEnum::StringEnum() : _numEntries(0), _mapping(), @@ -55,78 +33,6 @@ StringEnum::CreateReverseMapping() const } } - -bool -StringEnum::Save(const char *filename) -{ - char str[1024]; - - Fast_BufferedFile file; - file.WriteOpen(filename); - if (!file.IsOpened()) - return false; - - file.SetSize(0); - sprintf(str, "%d\n", _numEntries); - file.WriteString(str); - - for (uint32_t i = 0; i < _numEntries; i++) { - file.WriteString(Lookup(i)); - file.WriteString("\n"); - } - - return file.Sync(); -} - - -bool -StringEnum::Load(const char *filename) -{ - char line[1024]; - char *pt; - uint32_t entries; // from first line of file - uint32_t lineNumber; // current line in file - uint32_t entryCnt; // # entries obtained from file - - Clear(); - - Fast_BufferedFile file; - if (!file.OpenReadOnly(filename)) - return false; - - lineNumber = 0; - entryCnt = 0; - - pt = StripString(file.ReadLine(line, sizeof(line))); - if (pt == nullptr || *pt == '\0') - return false; - lineNumber++; - - entries = atoi(pt); - - while (!file.Eof()) { - pt = StripString(file.ReadLine(line, sizeof(line))); - if (pt == nullptr) // end of input ? - break; - lineNumber++; - if (*pt == '\0') // empty line ? - continue; - - uint32_t tmp = _numEntries; - if (static_cast<uint32_t>(Add(pt)) != tmp) { - LOG(error, "(%s:%d) duplicate enum entry: %s", filename, lineNumber, pt); - } - entryCnt++; - } - - if (entries != _numEntries - || entries != entryCnt) { - Clear(); - return false; - } - return true; -} - void StringEnum::Clear() { diff --git a/searchlib/src/vespa/searchlib/util/stringenum.h b/searchlib/src/vespa/searchlib/util/stringenum.h index 85f97de48ad..0da79db323a 100644 --- a/searchlib/src/vespa/searchlib/util/stringenum.h +++ b/searchlib/src/vespa/searchlib/util/stringenum.h @@ -80,25 +80,6 @@ public: * @return current number of entries. **/ uint32_t GetNumEntries() const { return _numEntries; } - - - /** - * Save the enumeration currently held by this object to file. - * - * @return success(true)/fail(false). - * @param filename name of save file. - **/ - bool Save(const char *filename); - - - /** - * Load an enumeration from file. The loaded enumeration will - * replace the one currently held by this object. - * - * @return success(true)/fail(false). - * @param filename name of file to load. - **/ - bool Load(const char *filename); }; } diff --git a/staging_vespalib/src/vespa/vespalib/stllike/cache.h b/staging_vespalib/src/vespa/vespalib/stllike/cache.h index 0f4349eb15a..181bb2ac63a 100644 --- a/staging_vespalib/src/vespa/vespalib/stllike/cache.h +++ b/staging_vespalib/src/vespa/vespalib/stllike/cache.h @@ -3,6 +3,7 @@ #include <vespa/vespalib/stllike/lrucache_map.h> #include <atomic> +#include <mutex> namespace vespalib { |