diff options
27 files changed, 850 insertions, 279 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonFeedReader.java b/document/src/main/java/com/yahoo/document/json/JsonFeedReader.java index 5d329554192..2f892144c66 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonFeedReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonFeedReader.java @@ -25,7 +25,7 @@ import com.yahoo.vespaxmlparser.VespaXMLFeedReader.Operation; public class JsonFeedReader implements FeedReader { private final JsonReader reader; private InputStream stream; - private static final JsonFactory jsonFactory = new JsonFactory(); + private static final JsonFactory jsonFactory = new JsonFactory().disable(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES); public JsonFeedReader(InputStream stream, DocumentTypeManager docMan) { reader = new JsonReader(docMan, stream, jsonFactory); diff --git a/document/src/main/java/com/yahoo/document/json/SingleDocumentParser.java b/document/src/main/java/com/yahoo/document/json/SingleDocumentParser.java index 0ca5ba744f1..9da04eb6ae4 100644 --- a/document/src/main/java/com/yahoo/document/json/SingleDocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/SingleDocumentParser.java @@ -18,7 +18,7 @@ import java.io.InputStream; * @author dybis */ public class SingleDocumentParser { - private static final JsonFactory jsonFactory = new JsonFactory(); + private static final JsonFactory jsonFactory = new JsonFactory().disable(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES); private DocumentTypeManager docMan; public SingleDocumentParser(DocumentTypeManager docMan) { diff --git a/jrt/src/com/yahoo/jrt/slobrok/api/Register.java b/jrt/src/com/yahoo/jrt/slobrok/api/Register.java index d1ea7a7f1fa..8dbab27d041 100644 --- a/jrt/src/com/yahoo/jrt/slobrok/api/Register.java +++ b/jrt/src/com/yahoo/jrt/slobrok/api/Register.java @@ -1,12 +1,25 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.jrt.slobrok.api; -import com.yahoo.jrt.*; +import com.yahoo.jrt.ErrorCode; +import com.yahoo.jrt.Method; +import com.yahoo.jrt.MethodHandler; +import com.yahoo.jrt.Request; +import com.yahoo.jrt.RequestWaiter; +import com.yahoo.jrt.Spec; +import com.yahoo.jrt.StringArray; +import com.yahoo.jrt.StringValue; +import com.yahoo.jrt.Supervisor; +import com.yahoo.jrt.Target; +import com.yahoo.jrt.Task; +import com.yahoo.jrt.Values; + import java.util.ArrayList; +import java.util.HashMap; import java.util.List; -import java.util.Random; -import java.util.logging.Logger; +import java.util.Map; import java.util.logging.Level; +import java.util.logging.Logger; /** * A Register object is used to register and unregister services with @@ -20,22 +33,29 @@ public class Register { private static Logger log = Logger.getLogger(Register.class.getName()); + private static final String REGISTER_METHOD_NAME = "slobrok.registerRpcServer"; + private static final String UNREGISTER_METHOD_NAME = "slobrok.unregisterRpcServer"; + private Supervisor orb; private SlobrokList slobroks; private String currSlobrok; - private String mySpec; + private final String mySpec; private BackOffPolicy backOff; private boolean reqDone = false; - private List<String> names = new ArrayList<>(); - private List<String> pending = new ArrayList<>(); - private List<String> unreg = new ArrayList<>(); + private List<String> names = new ArrayList<>(); + private List<String> pending = new ArrayList<>(); + private List<String> unreg = new ArrayList<>(); private Task updateTask = null; private RequestWaiter reqWait = null; private Target target = null; private Request req = null; + private String name = null; private Method m_list = null; private Method m_unreg = null; + /** Whether the last registerRpcServer for the name was a success, or null for the first. */ + private final Map<String, Boolean> lastRegisterSucceeded = new HashMap<>(); + /** * Remove all instances of name from list. */ @@ -160,24 +180,47 @@ public class Register { private void handleUpdate() { if (reqDone) { reqDone = false; + + boolean logOnSuccess = false; + synchronized (this) { + if (req.methodName().equals(UNREGISTER_METHOD_NAME)) { + logOnSuccess = true; + // Why is this remove() here and not in unregisterName? Because at that time there may be + // an in-flight request for the registration of name, and in case handleUpdate() would + // anyway have to have special code for handling a removed name, e.g. testing for name + // being in names which is O(N). + lastRegisterSucceeded.remove(name); + } else { + final Boolean lastSucceeded = lastRegisterSucceeded.get(name); + if (lastSucceeded == null || lastSucceeded != !req.isError()) { + logOnSuccess = true; + lastRegisterSucceeded.put(name, !req.isError()); + } + } + } + if (req.isError()) { - if (req.errorCode() != ErrorCode.METHOD_FAILED) { - log.log(Level.FINE, "register failed: " + req.errorMessage() + " (code " + req.errorCode() + ")"); + if (req.errorCode() != ErrorCode.METHOD_FAILED) { + log.log(Level.INFO, logMessagePrefix() + " failed, will disconnect: " + req.errorMessage() + " (code " + req.errorCode() + ")"); target.close(); target = null; } else { - log.log(Level.WARNING, "register failed: " + req.errorMessage() + " (code " + req.errorCode() + ")"); + log.log(Level.WARNING, logMessagePrefix() + " failed: " + req.errorMessage()); } } else { + log.log(logOnSuccess ? Level.INFO : Level.FINE, logMessagePrefix() + " completed successfully"); backOff.reset(); } + req = null; + name = null; } if (req != null) { log.log(Level.FINEST, "req in progress"); return; // current request still in progress } if (target != null && ! slobroks.contains(currSlobrok)) { + log.log(Level.INFO, "RPC server " + mySpec + ": Slobrok server " + currSlobrok + " removed, will disconnect"); target.close(); target = null; } @@ -185,48 +228,56 @@ public class Register { currSlobrok = slobroks.nextSlobrokSpec(); if (currSlobrok == null) { double delay = backOff.get(); + Level level = backOff.shouldWarn(delay) ? Level.WARNING : Level.FINE; + log.log(level, "RPC server " + mySpec + ": All Slobrok servers tried, will retry in " + delay + + " seconds: " + slobroks); updateTask.schedule(delay); - if (backOff.shouldWarn(delay)) - log.log(Level.WARNING, "slobrok connection problems (retry in " + delay + " seconds) to: " + slobroks); - else - log.log(Level.FINE, "slobrok retry in " + delay + " seconds"); return; } + lastRegisterSucceeded.clear(); target = orb.connect(new Spec(currSlobrok)); + String namesString = null; + final boolean logFine = log.isLoggable(Level.FINE); synchronized (this) { + if (logFine) { + // 'names' must only be accessed in a synchronized(this) block + namesString = names.toString(); + } pending.clear(); pending.addAll(names); } + + if (logFine) { + log.log(Level.FINE, "RPC server " + mySpec + ": Connect to Slobrok server " + currSlobrok + + " and reregister all Slobrok names: " + namesString); + } } - boolean unregister = false; - String name; + synchronized (this) { if (unreg.size() > 0) { name = unreg.remove(unreg.size() - 1); - unregister = true; + req = new Request(UNREGISTER_METHOD_NAME); } else if (pending.size() > 0) { name = pending.remove(pending.size() - 1); + req = new Request(REGISTER_METHOD_NAME); } else { pending.addAll(names); - log.log(Level.FINE, "done, reschedule in 30s"); + log.log(Level.FINE, "RPC server " + mySpec + ": Reregister all Slobrok names in 30 seconds: " + names); updateTask.schedule(30.0); return; } } - if (unregister) { - req = new Request("slobrok.unregisterRpcServer"); - req.parameters().add(new StringValue(name)); - log.log(Level.FINE, "unregister [" + name + "]"); - req.parameters().add(new StringValue(mySpec)); - target.invokeAsync(req, 35.0, reqWait); - } else { // register - req = new Request("slobrok.registerRpcServer"); - req.parameters().add(new StringValue(name)); - log.log(Level.FINE, "register [" + name + "]"); - req.parameters().add(new StringValue(mySpec)); - target.invokeAsync(req, 35.0, reqWait); - } + req.parameters().add(new StringValue(name)); + req.parameters().add(new StringValue(mySpec)); + log.log(Level.FINE, logMessagePrefix() + " now"); + target.invokeAsync(req, 35.0, reqWait); + } + + private String logMessagePrefix() { + return "RPC server " + mySpec + + (req.methodName().equals(UNREGISTER_METHOD_NAME) ? " unregistering " : " registering ") + + name + " with Slobrok server " + currSlobrok; } private synchronized void handleRpcList(Request req) { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/Acl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/Acl.java index 850b9bf170d..250b4ee6fb3 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/Acl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/Acl.java @@ -31,8 +31,8 @@ public class Acl { public List<Command> toCommands() { final ImmutableList.Builder<Command> commands = ImmutableList.builder(); commands.add( - // Default policies - new PolicyCommand(Chain.INPUT, Action.REJECT), + // Default policies. Packets that do not match any rules will be processed according to policy. + new PolicyCommand(Chain.INPUT, Action.DROP), new PolicyCommand(Chain.FORWARD, Action.DROP), new PolicyCommand(Chain.OUTPUT, Action.ACCEPT), @@ -57,6 +57,13 @@ public class Acl { .withOption("-s", String.format("%s/128", ipAddress))) .forEach(commands::add); + // Reject all other packets. This means that packets that would otherwise be processed according to policy, are + // matched by the following rule. + // + // Ideally, we want to set the INPUT policy to REJECT and get rid of this rule, but unfortunately REJECT is not + // a valid policy action. + commands.add(new FilterCommand(Chain.INPUT, Action.REJECT)); + return commands.build(); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 14a46ab0b9a..aa6f762543f 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -134,10 +134,11 @@ public class NodeAgentImpl implements NodeAgent { private void addDebugMessage(String message) { synchronized (monitor) { - while (debugMessages.size() > 100) { + while (debugMessages.size() > 1000) { debugMessages.pop(); } + logger.debug(message); debugMessages.add("[" + sdf.format(new Date()) + "] " + message); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java index d5bc0e3826d..f5f22b7eca8 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java @@ -80,11 +80,7 @@ public class ComponentsProviderImpl implements ComponentsProvider { @Inject public ComponentsProviderImpl(final NodeAdminConfig config, final Docker docker, final MetricReceiverWrapper metricReceiver) { - this( - docker, - metricReceiver, - new Environment(), - config.isRunningLocally()); + this(docker, metricReceiver, new Environment(), config.isRunningLocally()); if (! config.isRunningLocally()) { setCorePattern(docker); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/Environment.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/Environment.java index df7fe151cae..cdf473263cf 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/Environment.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/Environment.java @@ -1,6 +1,7 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.util; +import com.google.common.base.Strings; import com.yahoo.net.HostName; import com.yahoo.vespa.hosted.dockerapi.ContainerName; @@ -32,6 +33,7 @@ public class Environment { private static final String ENV_CONFIGSERVERS = "services__addr_configserver"; private static final String ENVIRONMENT = "ENVIRONMENT"; private static final String REGION = "REGION"; + private static final String LOGSTASH_NODES = "LOGSTASH_NODES"; private final Set<String> configServerHosts; private final String environment; @@ -53,7 +55,7 @@ public class Environment { HostName.getLocalhost(), new InetAddressResolver(), new PathResolver(), - Collections.emptyList() + getLogstashNodesFromEnvironment() ); } @@ -107,6 +109,14 @@ public class Environment { return hostNameStrings.stream().collect(Collectors.toSet()); } + private static List<String> getLogstashNodesFromEnvironment() { + String logstashNodes = System.getenv(LOGSTASH_NODES); + if(Strings.isNullOrEmpty(logstashNodes)) { + return Collections.emptyList(); + } + return Arrays.asList(logstashNodes.split("[,\\s]+")); + } + public InetAddress getInetAddressForHost(String hostname) throws UnknownHostException { return inetAddressResolver.getInetAddressForHost(hostname); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/PrefixLogger.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/PrefixLogger.java index aa566c5afcc..438f59d2222 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/PrefixLogger.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/PrefixLogger.java @@ -36,6 +36,10 @@ public class PrefixLogger { } + public void debug(String message) { + log(LogLevel.DEBUG, message); + } + public void info(String message) { log(LogLevel.INFO, message); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java index dc924cc7a8f..860d42fb928 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/AclMaintainerTest.java @@ -116,7 +116,7 @@ public class AclMaintainerTest { ); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), - aryEq(new String[]{"ip6tables", "-P", "INPUT", "REJECT"}) + aryEq(new String[]{"ip6tables", "-P", "INPUT", "DROP"}) ); verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( eq(containerName), @@ -143,6 +143,10 @@ public class AclMaintainerTest { eq(containerName), aryEq(new String[]{"ip6tables", "-A", "INPUT", "-s", aclSpec.ipAddress() + "/128", "-j", "ACCEPT"}) )); + verify(dockerOperations, verificationMode).executeCommandInNetworkNamespace( + eq(containerName), + aryEq(new String[]{"ip6tables", "-A", "INPUT", "-j", "REJECT"}) + ); } private Container makeContainer(String hostname) { diff --git a/searchcommon/src/vespa/searchcommon/attribute/search_context_params.h b/searchcommon/src/vespa/searchcommon/attribute/search_context_params.h index d15a97629f0..96c3d7f3470 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/search_context_params.h +++ b/searchcommon/src/vespa/searchcommon/attribute/search_context_params.h @@ -2,6 +2,8 @@ #pragma once +#include <stddef.h> + namespace search { namespace attribute { diff --git a/searchcommon/src/vespa/searchcommon/common/range.h b/searchcommon/src/vespa/searchcommon/common/range.h index f33630daaf4..5bcf2355eb9 100644 --- a/searchcommon/src/vespa/searchcommon/common/range.h +++ b/searchcommon/src/vespa/searchcommon/common/range.h @@ -4,6 +4,9 @@ #pragma once +#include <limits> +#include <stdint.h> + namespace search { diff --git a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp index 339ceab83c8..84c34714526 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp +++ b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp @@ -39,6 +39,9 @@ FusionRunner::FusionRunner(const string &base_dir, _fileHeaderContext(fileHeaderContext) { } +FusionRunner::~FusionRunner() { +} + namespace { void readSelectorArray(const string &selector_name, SelectorArray &selector_array, diff --git a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.h b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.h index 097b76bc4cc..ea197378fac 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.h +++ b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.h @@ -44,6 +44,7 @@ public: const search::index::Schema &schema, const search::TuneFileAttributes &tuneFileAttributes, const search::common::FileHeaderContext &fileHeaderContext); + ~FusionRunner(); /** * Combine the indexes specified by the ids by running fusion. diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index ad62331ac2b..290ac63939b 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -92,6 +92,7 @@ vespa_define_module( src/tests/attribute/stringattribute src/tests/attribute/tensorattribute src/tests/attribute/imported_attribute_vector + src/tests/attribute/imported_search_context src/tests/bitcompression/expgolomb src/tests/bitvector src/tests/btree diff --git a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp index 2503d3c564a..b46875cf3e6 100644 --- a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp +++ b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp @@ -1,237 +1,13 @@ // Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/base/documentid.h> -#include <vespa/document/base/globalid.h> -#include <vespa/searchlib/attribute/attributefactory.h> -#include <vespa/searchlib/attribute/attributeguard.h> -#include <vespa/searchlib/attribute/floatbase.h> -#include <vespa/searchlib/attribute/imported_attribute_vector.h> -#include <vespa/searchlib/attribute/integerbase.h> -#include <vespa/searchlib/attribute/not_implemented_attribute.h> -#include <vespa/searchlib/attribute/stringbase.h> -#include <vespa/searchlib/test/mock_gid_to_lid_mapping.h> -#include <vespa/searchcommon/attribute/attributecontent.h> -#include <vespa/vespalib/testkit/testapp.h> -#include <algorithm> -#include <future> -#include <map> -#include <memory> -#include <vector> +#include <vespa/searchlib/test/imported_attribute_fixture.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchcommon/attribute/search_context_params.h> namespace search { namespace attribute { -using document::DocumentId; -using document::GlobalId; -using DocId = IAttributeVector::DocId; -using WeightedInt = IAttributeVector::WeightedInt; -using WeightedFloat = IAttributeVector::WeightedFloat; -using WeightedString = IAttributeVector::WeightedString; -using WeightedConstChar = IAttributeVector::WeightedConstChar; -using WeightedEnum = IAttributeVector::WeightedEnum; -using test::MockGidToLidMapperFactory; - -std::shared_ptr<ReferenceAttribute> create_reference_attribute(vespalib::stringref name = "ref") { - return std::make_shared<ReferenceAttribute>(name, Config(BasicType::REFERENCE)); -} - -template <typename AttrVecType> -std::shared_ptr<AttrVecType> create_typed_attribute(BasicType basic_type, - CollectionType collection_type, - vespalib::stringref name = "parent") { - return std::dynamic_pointer_cast<AttrVecType>( - AttributeFactory::createAttribute(name, Config(basic_type, collection_type))); -} - -template <typename AttrVecType> -std::shared_ptr<AttrVecType> create_single_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::SINGLE, name); -} - -template <typename AttrVecType> -std::shared_ptr<AttrVecType> create_array_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::ARRAY, name); -} - -template <typename AttrVecType> -std::shared_ptr<AttrVecType> create_wset_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::WSET, name); -} - -template <typename VectorType> -void add_n_docs_with_undefined_values(VectorType& vec, size_t n) { - vec.addDocs(n); - vec.commit(); -} - -GlobalId dummy_gid(uint32_t doc_index) { - return DocumentId(vespalib::make_string("id:foo:bar::%u", doc_index)).getGlobalId(); -} - -struct Fixture { - std::shared_ptr<AttributeVector> target_attr; - std::shared_ptr<ReferenceAttribute> reference_attr; - std::shared_ptr<ImportedAttributeVector> imported_attr; - std::shared_ptr<MockGidToLidMapperFactory> mapper_factory; - - Fixture(); - ~Fixture(); - - void map_reference(DocId from_lid, GlobalId via_gid, DocId to_lid) { - assert(from_lid < reference_attr->getNumDocs()); - reference_attr->update(from_lid, via_gid); - reference_attr->commit(); - mapper_factory->_map[via_gid] = to_lid; - } - - std::shared_ptr<ImportedAttributeVector> create_attribute_vector_from_members(vespalib::stringref name = "imported") { - return std::make_shared<ImportedAttributeVector>(name, reference_attr, target_attr); - } - - template <typename AttrVecType> - std::shared_ptr<AttrVecType> target_attr_as() { - auto ptr = std::dynamic_pointer_cast<AttrVecType>(target_attr); - assert(ptr.get() != nullptr); - return ptr; - } - - void reset_with_new_target_attr(std::shared_ptr<AttributeVector> new_target) { - target_attr = std::move(new_target); - imported_attr = create_attribute_vector_from_members(); - } - - template <typename ValueType> - struct LidToLidMapping { - DocId _from_lid; - GlobalId _via_gid; - DocId _to_lid; - ValueType _value_in_target_attr; - - LidToLidMapping(DocId from_lid, - GlobalId via_gid, - DocId to_lid, - ValueType value_in_target_attr) - : _from_lid(from_lid), - _via_gid(via_gid), - _to_lid(to_lid), - _value_in_target_attr(std::move(value_in_target_attr)) - {} - }; - - void set_up_attribute_vectors_before_adding_mappings() { - // Make a sneaky assumption that no tests try to use a lid > 9 - add_n_docs_with_undefined_values(*reference_attr, 10); - add_n_docs_with_undefined_values(*target_attr, 10); - } - - template <typename AttrVecType, typename MappingsType, typename ValueAssigner> - void set_up_and_map(const MappingsType& mappings, ValueAssigner assigner) { - set_up_attribute_vectors_before_adding_mappings(); - auto subtyped_target = target_attr_as<AttrVecType>(); - for (auto& m : mappings) { - map_reference(m._from_lid, m._via_gid, m._to_lid); - assigner(*subtyped_target, m); - } - subtyped_target->commit(); - } - - template <typename AttrVecType, typename ValueType> - void reset_with_single_value_reference_mappings( - BasicType type, - const std::vector<LidToLidMapping<ValueType>>& mappings) { - reset_with_new_target_attr(create_single_attribute<AttrVecType>(type)); - // Fun experiment: rename `auto& mapping` to `auto& m` and watch GCC howl about - // shadowing a variable... that exists in the set_up_and_map function! - set_up_and_map<AttrVecType>(mappings, [this](auto& target_vec, auto& mapping) { - ASSERT_TRUE(target_vec.update(mapping._to_lid, mapping._value_in_target_attr)); - }); - } - - template <typename AttrVecType, typename ValueType> - void reset_with_array_value_reference_mappings( - BasicType type, - const std::vector<LidToLidMapping<std::vector<ValueType>>> &mappings) { - reset_with_new_target_attr(create_array_attribute<AttrVecType>(type)); - set_up_and_map<AttrVecType>(mappings, [this](auto& target_vec, auto& mapping) { - constexpr uint32_t weight = 1; - for (const auto& v : mapping._value_in_target_attr) { - ASSERT_TRUE(target_vec.append(mapping._to_lid, v, weight)); - } - }); - } - - template <typename AttrVecType, typename WeightedValueType> - void reset_with_wset_value_reference_mappings( - BasicType type, - const std::vector<LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { - reset_with_new_target_attr(create_wset_attribute<AttrVecType>(type)); - set_up_and_map<AttrVecType>(mappings, [this](auto& target_vec, auto& mapping) { - for (const auto& v : mapping._value_in_target_attr) { - ASSERT_TRUE(target_vec.append(mapping._to_lid, v.value(), v.weight())); - } - }); - } -}; - -Fixture::Fixture() - : target_attr(create_single_attribute<IntegerAttribute>(BasicType::INT32)), - reference_attr(create_reference_attribute()), - imported_attr(create_attribute_vector_from_members()), - mapper_factory(std::make_shared<MockGidToLidMapperFactory>()) -{ - reference_attr->setGidToLidMapperFactory(mapper_factory); -} - -Fixture::~Fixture() {} - -template <typename AttrValueType, typename PredicateType> -void assert_multi_value_matches(const Fixture& f, - DocId lid, - const std::vector<AttrValueType>& expected, - PredicateType predicate) { - AttributeContent<AttrValueType> content; - content.fill(*f.imported_attr, lid); - EXPECT_EQUAL(expected.size(), content.size()); - std::vector<AttrValueType> actual(content.begin(), content.end()); - EXPECT_TRUE(std::equal(expected.begin(), expected.end(), - actual.begin(), actual.end(), predicate)); -} - -template <typename AttrValueType> -void assert_multi_value_matches(const Fixture& f, - DocId lid, - const std::vector<AttrValueType>& expected) { - assert_multi_value_matches(f, lid, expected, std::equal_to<AttrValueType>()); -} - -// Simple wrappers to avoid ugly "f.template reset..." syntax. -template <typename AttrVecType, typename ValueType> -void reset_with_single_value_reference_mappings( - Fixture& f, - BasicType type, - const std::vector<Fixture::LidToLidMapping<ValueType>>& mappings) { - f.reset_with_single_value_reference_mappings<AttrVecType, ValueType>(type, mappings); -} - -template <typename AttrVecType, typename ValueType> -void reset_with_array_value_reference_mappings( - Fixture& f, - BasicType type, - const std::vector<Fixture::LidToLidMapping<std::vector<ValueType>>> &mappings) { - f.reset_with_array_value_reference_mappings<AttrVecType, ValueType>(type, mappings); -} - -template <typename AttrVecType, typename WeightedValueType> -void reset_with_wset_value_reference_mappings( - Fixture& f, - BasicType type, - const std::vector<Fixture::LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { - f.reset_with_wset_value_reference_mappings<AttrVecType, WeightedValueType>(type, mappings); -} - -bool has_active_enum_guards(AttributeVector &attr) { - return std::async(std::launch::async, [&attr] { return attr.hasActiveEnumGuards(); }).get(); -} +using Fixture = ImportedAttributeFixture; TEST_F("Accessors return expected attributes", Fixture) { EXPECT_EQUAL(f.imported_attr->getReferenceAttribute().get(), @@ -343,6 +119,10 @@ TEST_F("getFixedWidth() is inherited from target attribute vector", Fixture) { f.imported_attr->getFixedWidth()); } +TEST_F("asDocumentWeightAttribute() returns nullptr", Fixture) { + EXPECT_TRUE(f.imported_attr->asDocumentWeightAttribute() == nullptr); +} + TEST_F("Multi-valued integer attribute values can be retrieved via reference", Fixture) { const std::vector<int64_t> doc3_values({1234}); const std::vector<int64_t> doc7_values({5678, 9876, 555, 777}); @@ -445,6 +225,19 @@ TEST_F("hasEnum() is true for enum target attribute vector", SingleStringAttrFix EXPECT_TRUE(f.imported_attr->hasEnum()); } +TEST_F("createSearchContext() returns an imported search context", SingleStringAttrFixture) { + auto ctx = f.imported_attr->createSearchContext(word_term("bar"), SearchContextParams()); + ASSERT_TRUE(ctx.get() != nullptr); + fef::TermFieldMatchData match; + // Iterator specifics are tested in imported_search_context_test, so just make sure + // we get the expected iterator functionality. In this case, a non-strict iterator. + auto iter = ctx->createIterator(&match, false); + EXPECT_FALSE(iter->seek(DocId(1))); + EXPECT_FALSE(iter->seek(DocId(2))); + EXPECT_FALSE(iter->seek(DocId(3))); + EXPECT_TRUE(iter->seek(DocId(4))); +} + bool string_eq(const char* lhs, const char* rhs) noexcept { return strcmp(lhs, rhs) == 0; }; diff --git a/searchlib/src/tests/attribute/imported_search_context/CMakeLists.txt b/searchlib/src/tests/attribute/imported_search_context/CMakeLists.txt new file mode 100644 index 00000000000..228dfe20b0b --- /dev/null +++ b/searchlib/src/tests/attribute/imported_search_context/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_imported_search_context_test_app TEST + SOURCES + imported_search_context_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_imported_search_context_test_app COMMAND searchlib_imported_search_context_test_app) diff --git a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp new file mode 100644 index 00000000000..55675089a41 --- /dev/null +++ b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp @@ -0,0 +1,295 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcommon/attribute/search_context_params.h> +#include <vespa/searchlib/test/imported_attribute_fixture.h> +#include <vespa/searchlib/attribute/imported_search_context.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> + +namespace search { +namespace attribute { + +using fef::TermFieldMatchData; +using vespalib::Trinary; + +struct Fixture : ImportedAttributeFixture { + std::unique_ptr<ImportedSearchContext> create_context(std::unique_ptr<QueryTermSimple> term) { + return std::make_unique<ImportedSearchContext>(std::move(term), SearchContextParams(), *imported_attr); + } + + std::unique_ptr<queryeval::SearchIterator> create_iterator( + ImportedSearchContext& ctx, + TermFieldMatchData& match, + bool strict) { + auto iter = ctx.createIterator(&match, strict); + assert(iter.get() != nullptr); + iter->initRange(DocId(1), reference_attr->getNumDocs() + 1); + return iter; + } + + std::unique_ptr<queryeval::SearchIterator> create_non_strict_iterator( + ImportedSearchContext& ctx, + TermFieldMatchData& match) { + return create_iterator(ctx, match, false); + } + + std::unique_ptr<queryeval::SearchIterator> create_strict_iterator( + ImportedSearchContext& ctx, + TermFieldMatchData& match) { + return create_iterator(ctx, match, true); + } +}; + +template <typename Iterator> +bool is_hit_with_weight(Iterator& iter, TermFieldMatchData& match, DocId lid, int32_t weight) { + if (!EXPECT_TRUE(iter.seek(lid))) { + return false; + } + iter.unpack(lid); + return (EXPECT_EQUAL(lid, match.getDocId()) && + EXPECT_EQUAL(weight, match.getWeight())); +} + +template <typename Iterator> +bool is_strict_hit_with_weight(Iterator& iter, TermFieldMatchData& match, + DocId seek_lid, DocId expected_lid, int32_t weight) { + iter.seek(seek_lid); + if (!EXPECT_EQUAL(expected_lid, iter.getDocId())) { + return false; + } + iter.unpack(expected_lid); + return (EXPECT_EQUAL(expected_lid, match.getDocId()) && + EXPECT_EQUAL(weight, match.getWeight())); +} + +TEST_F("approximateHits() returns document count of reference attribute", Fixture) { + add_n_docs_with_undefined_values(*f.reference_attr, 101); + + auto ctx = f.create_context(word_term("foo")); + EXPECT_EQUAL(101, ctx->approximateHits()); +} + +TEST_F("attributeName() returns imported attribute name", Fixture) { + auto ctx = f.create_context(word_term("foo")); + EXPECT_EQUAL(f.default_imported_attr_name(), ctx->attributeName()); +} + +TEST_F("valid() forwards to target search context", Fixture) { + auto ctx = f.create_context(word_term("foo")); + EXPECT_EQUAL(ctx->target_search_context().valid(), ctx->valid()); +} + +TEST_F("getAsIntegerTerm() forwards to target search context", Fixture) { + auto ctx = f.create_context(word_term("foo")); + // No operator== or printing for Range, so doing this the hard way + // TODO could add the darn things + auto expected_range = ctx->target_search_context().getAsIntegerTerm(); + auto actual_range = ctx->getAsIntegerTerm(); + EXPECT_EQUAL(expected_range.lower(), actual_range.lower()); + EXPECT_EQUAL(expected_range.upper(), actual_range.upper()); +} + +/* + FIXME this seems to not actually be implemented as expected by the target search context...! SIGSEGVs. +TEST_F("queryTerm() returns term context was created with", Fixture) { + auto ctx = f.create_context(word_term("helloworld")); + EXPECT_EQUAL(std::string("helloworld"), std::string(ctx->queryTerm().getTerm())); +} +*/ + +TEST_F("Non-strict iterator not marked as strict", Fixture) { + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_TRUE(iter->is_strict() == Trinary::False); // No EXPECT_EQUALS printing of Trinary... +} + +TEST_F("Non-strict iterator seek forwards to target attribute", Fixture) { + reset_with_single_value_reference_mappings<IntegerAttribute, int32_t>( + f, BasicType::INT32, + {{DocId(1), dummy_gid(3), DocId(3), 1234}, + {DocId(3), dummy_gid(7), DocId(7), 5678}, + {DocId(5), dummy_gid(8), DocId(8), 7890}}); + + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(iter->beginId(), iter->getDocId()); + + EXPECT_FALSE(iter->seek(DocId(1))); + EXPECT_EQUAL(iter->beginId(), iter->getDocId()); // Non-strict iterator does not change current ID + + EXPECT_TRUE(iter->seek(DocId(3))); + EXPECT_EQUAL(DocId(3), iter->getDocId()); + + EXPECT_FALSE(iter->seek(DocId(5))); + EXPECT_EQUAL(DocId(3), iter->getDocId()); // Still unchanged +} + +TEST_F("Non-strict iterator unpacks target match data for single value hit", Fixture) { + reset_with_single_value_reference_mappings<IntegerAttribute, int32_t>( + f, BasicType::INT32, + {{DocId(1), dummy_gid(3), DocId(3), 1234}, + {DocId(2), dummy_gid(4), DocId(4), 1234}}); + + auto ctx = f.create_context(word_term("1234")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(1), 1)); + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(2), 1)); +} + +struct ArrayValueFixture : Fixture { + ArrayValueFixture() { + const std::vector<int64_t> doc3_values({1234}); + const std::vector<int64_t> doc7_values({1234, 1234, 1234, 777}); + const std::vector<int64_t> doc8_values({}); + reset_with_array_value_reference_mappings<IntegerAttribute, int64_t>( + BasicType::INT64, + {{DocId(1), dummy_gid(3), DocId(3), doc3_values}, + {DocId(4), dummy_gid(7), DocId(7), doc7_values}, + {DocId(5), dummy_gid(8), DocId(8), doc8_values}}); + } +}; + +TEST_F("Non-strict iterator unpacks target match data for array hit", ArrayValueFixture) { + auto ctx = f.create_context(word_term("1234")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(1), 1)); + EXPECT_FALSE(iter->seek(DocId(2))); + EXPECT_FALSE(iter->seek(DocId(3))); + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(4), 3)); +} + +struct WsetValueFixture : Fixture { + WsetValueFixture() { + std::vector<WeightedString> doc3_values{{WeightedString("foo", -5)}}; + std::vector<WeightedString> doc4_values{{WeightedString("baz", 10)}}; + std::vector<WeightedString> doc7_values{{WeightedString("bar", 7), WeightedString("foo", 42)}}; + reset_with_wset_value_reference_mappings<StringAttribute, WeightedString>( + BasicType::STRING, + {{DocId(2), dummy_gid(3), DocId(3), doc3_values}, + {DocId(4), dummy_gid(4), DocId(4), doc4_values}, + {DocId(6), dummy_gid(7), DocId(7), doc7_values}}); + } +}; + +TEST_F("Non-strict iterator unpacks target match data for weighted set hit", WsetValueFixture) { + auto ctx = f.create_context(word_term("foo")); + TermFieldMatchData match; + auto iter = f.create_non_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(2), -5)); + EXPECT_TRUE(is_hit_with_weight(*iter, match, DocId(6), 42)); +} + +TEST_F("Strict iterator is marked as strict", Fixture) { + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_TRUE(iter->is_strict() == Trinary::True); // No EXPECT_EQUALS printing of Trinary... +} + +struct SingleValueFixture : Fixture { + SingleValueFixture() { + reset_with_single_value_reference_mappings<IntegerAttribute, int32_t>( + BasicType::INT32, + {{DocId(3), dummy_gid(5), DocId(5), 5678}, + {DocId(4), dummy_gid(6), DocId(6), 1234}, + {DocId(5), dummy_gid(8), DocId(8), 5678}, + {DocId(7), dummy_gid(9), DocId(9), 4321}}); + } +}; + +TEST_F("Strict iterator seeks to first available hit LID", SingleValueFixture) { + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(iter->beginId(), iter->getDocId()); + + EXPECT_FALSE(iter->seek(DocId(1))); + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(DocId(3), iter->getDocId()); + + EXPECT_TRUE(iter->seek(DocId(3))); + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(DocId(3), iter->getDocId()); + + EXPECT_FALSE(iter->seek(DocId(4))); + EXPECT_FALSE(iter->isAtEnd()); + EXPECT_EQUAL(DocId(5), iter->getDocId()); + + // Seeking beyond last hit exhausts doc id limit and marks iterator as done + EXPECT_FALSE(iter->seek(DocId(6))); + EXPECT_TRUE(iter->isAtEnd()); +} + +TEST_F("Strict iterator unpacks target match data for single value hit", SingleValueFixture) { + auto ctx = f.create_context(word_term("5678")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(1), DocId(3), 1)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(2), DocId(3), 1)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(3), DocId(3), 1)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(4), DocId(5), 1)); +} + +TEST_F("Strict iterator unpacks target match data for array hit", ArrayValueFixture) { + auto ctx = f.create_context(word_term("1234")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(1), DocId(1), 1)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(2), DocId(4), 3)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(3), DocId(4), 3)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(4), DocId(4), 3)); +} + +TEST_F("Strict iterator unpacks target match data for weighted set hit", WsetValueFixture) { + auto ctx = f.create_context(word_term("foo")); + TermFieldMatchData match; + auto iter = f.create_strict_iterator(*ctx, match); + + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(1), DocId(2), -5)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(2), DocId(2), -5)); + EXPECT_TRUE(is_strict_hit_with_weight(*iter, match, DocId(3), DocId(6), 42)); +} + +TEST_F("cmp() performs GID mapping and forwards to target attribute", SingleValueFixture) { + auto ctx = f.create_context(word_term("5678")); + EXPECT_FALSE(ctx->cmp(DocId(2))); + EXPECT_TRUE(ctx->cmp(DocId(3))); + EXPECT_FALSE(ctx->cmp(DocId(4))); + EXPECT_TRUE(ctx->cmp(DocId(5))); +} + +TEST_F("cmp(weight) performs GID mapping and forwards to target attribute", WsetValueFixture) { + auto ctx = f.create_context(word_term("foo")); + int32_t weight = 0; + EXPECT_FALSE(ctx->cmp(DocId(1), weight)); + EXPECT_EQUAL(0, weight); // Unchanged + + EXPECT_TRUE(ctx->cmp(DocId(2), weight)); + EXPECT_EQUAL(-5, weight); + + EXPECT_TRUE(ctx->cmp(DocId(6), weight)); + EXPECT_EQUAL(42, weight); +} + +// TODO test multiple iterators created from same context +// TODO test non-mapped lid +// TODO test seek outside lid limit + +} // attribute +} // search + +TEST_MAIN() { TEST_RUN_ALL(); }
\ No newline at end of file diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 66a614379a8..e167dc38f22 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -45,6 +45,7 @@ vespa_add_library(searchlib_attribute OBJECT iattributemanager.cpp iattributesavetarget.cpp imported_attribute_vector.cpp + imported_search_context.cpp integerbase.cpp ipostinglistsearchcontext.cpp iterator_pack.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp index 4c48920406b..219ef2221f2 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp +++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp @@ -208,7 +208,7 @@ FilterAttributeIteratorT<SC>::visitMembers(vespalib::ObjectVisitor &visitor) con template <typename SC> AttributeIteratorT<SC>::AttributeIteratorT(const SC &searchContext, fef::TermFieldMatchData *matchData) - : AttributeIterator(matchData, searchContext._attr.getCommittedDocIdLimit()), + : AttributeIterator(matchData, searchContext.attribute().getCommittedDocIdLimit()), _searchContext(searchContext) { } diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp index 5e2973790fc..05d925ee7d1 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp @@ -1,7 +1,9 @@ // Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "imported_attribute_vector.h" +#include "imported_search_context.h" #include "attributeguard.h" +#include <vespa/searchlib/query/queryterm.h> #include <vespa/vespalib/util/exceptions.h> namespace search { @@ -94,9 +96,7 @@ bool ImportedAttributeVector::findEnum(const char *value, EnumHandle &e) const { std::unique_ptr<ISearchContext> ImportedAttributeVector::createSearchContext(std::unique_ptr<QueryTermSimple> term, const SearchContextParams ¶ms) const { - (void) term; - (void) params; - return std::unique_ptr<ISearchContext>(); + return std::make_unique<ImportedSearchContext>(std::move(term), params, *this); } const IDocumentWeightAttribute *ImportedAttributeVector::asDocumentWeightAttribute() const { diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h index 95a817bac35..556ce7c2722 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h @@ -89,7 +89,7 @@ private: vespalib::string _name; std::shared_ptr<ReferenceAttribute> _reference_attribute; - std::shared_ptr<AttributeVector> _target_attribute; + std::shared_ptr<AttributeVector> _target_attribute; }; } // attribute diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp new file mode 100644 index 00000000000..51b765f725a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp @@ -0,0 +1,71 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "imported_search_context.h" +#include "attributeiterators.hpp" +#include "imported_attribute_vector.h" +#include "reference_attribute.h" +#include <vespa/searchcommon/attribute/search_context_params.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/queryterm.h> + +namespace search { +namespace attribute { + +ImportedSearchContext::ImportedSearchContext( + std::unique_ptr<QueryTermSimple> term, + const SearchContextParams& params, + const ImportedAttributeVector& imported_attribute) + : _imported_attribute(imported_attribute), + _reference_attribute(*_imported_attribute.getReferenceAttribute()), + _target_attribute(*_imported_attribute.getTargetAttribute()), + _target_search_context(_target_attribute.getSearch(std::move(term), params)) +{ +} + +ImportedSearchContext::~ImportedSearchContext() { +} + +unsigned int ImportedSearchContext::approximateHits() const { + return _reference_attribute.getNumDocs(); +} + +std::unique_ptr<queryeval::SearchIterator> +ImportedSearchContext::createIterator(fef::TermFieldMatchData* matchData, bool strict) { + if (!strict) { + return std::make_unique<AttributeIteratorT<ImportedSearchContext>>(*this, matchData); + } else { + return std::make_unique<AttributeIteratorStrict<ImportedSearchContext>>(*this, matchData); + } +} + +void ImportedSearchContext::fetchPostings(bool strict) { + (void)strict; + // Imported attributes do not have posting lists (at least not currently), so this is a no-op. +} + +bool ImportedSearchContext::valid() const { + return _target_search_context->valid(); +} + +Int64Range ImportedSearchContext::getAsIntegerTerm() const { + return _target_search_context->getAsIntegerTerm(); +} + +const QueryTermBase& ImportedSearchContext::queryTerm() const { + return _target_search_context->queryTerm(); +} + +const vespalib::string& ImportedSearchContext::attributeName() const { + return _imported_attribute.getName(); +} + +bool ImportedSearchContext::cmp(DocId docId, int32_t& weight) const { + return _target_search_context->cmp(_reference_attribute.getReferencedLid(docId), weight); +} + +bool ImportedSearchContext::cmp(DocId docId) const { + return _target_search_context->cmp(_reference_attribute.getReferencedLid(docId)); +} + +} // attribute +} // search
\ No newline at end of file diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h new file mode 100644 index 00000000000..1a383b2705e --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h @@ -0,0 +1,71 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributevector.h" +#include <vespa/searchcommon/attribute/i_search_context.h> +#include <memory> + +namespace search { + +namespace fef { +class TermFieldMatchData; +} + +namespace attribute { + +class ReferenceAttribute; +class ImportedAttributeVector; +class SearchContextParams; + +/** + * Search context exposing iteraton over an imported attribute vector. + * + * Iterator doc id matching is performed via the GID->LID indirection of the + * associated reference attribute. This means that if the _referenced_ document + * matches the search term, the doc id of the _referring_ document will be + * considered a match. + */ +class ImportedSearchContext : public ISearchContext { + const ImportedAttributeVector& _imported_attribute; + const ReferenceAttribute& _reference_attribute; + const AttributeVector& _target_attribute; + std::unique_ptr<AttributeVector::SearchContext> _target_search_context; +public: + ImportedSearchContext(std::unique_ptr<QueryTermSimple> term, + const SearchContextParams& params, + const ImportedAttributeVector& imported_attribute); + ~ImportedSearchContext(); + + unsigned int approximateHits() const override; + + std::unique_ptr<queryeval::SearchIterator> + createIterator(fef::TermFieldMatchData* matchData, bool strict) override; + + void fetchPostings(bool strict) override; + + bool valid() const override; + + Int64Range getAsIntegerTerm() const override; + + const QueryTermBase& queryTerm() const override; + + const vespalib::string& attributeName() const override; + + using DocId = IAttributeVector::DocId; + + bool cmp(DocId docId, int32_t& weight) const; + bool cmp(DocId docId) const; + + const ReferenceAttribute& attribute() const noexcept { return _reference_attribute; } + + const AttributeVector::SearchContext& target_search_context() const noexcept { + return *_target_search_context; + } +}; + +} // attribute +} // search + + + diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h index 17fa8fd9902..89abe560b25 100644 --- a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h @@ -271,7 +271,7 @@ public: * * @return global posting info or NULL if no info is available. **/ - virtual const PostingInfo *getPostingInfo() const { return NULL; } + virtual const PostingInfo *getPostingInfo() const { return nullptr; } /** * Create a human-readable representation of this object. This diff --git a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h new file mode 100644 index 00000000000..21b3d8e8040 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h @@ -0,0 +1,249 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "mock_gid_to_lid_mapping.h" +#include <vespa/document/base/documentid.h> +#include <vespa/document/base/globalid.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/floatbase.h> +#include <vespa/searchlib/attribute/imported_attribute_vector.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/not_implemented_attribute.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchcommon/attribute/attributecontent.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <algorithm> +#include <future> +#include <map> +#include <memory> +#include <vector> + +namespace search { +namespace attribute { + +using document::DocumentId; +using document::GlobalId; +using DocId = IAttributeVector::DocId; +using WeightedInt = IAttributeVector::WeightedInt; +using WeightedFloat = IAttributeVector::WeightedFloat; +using WeightedString = IAttributeVector::WeightedString; +using WeightedConstChar = IAttributeVector::WeightedConstChar; +using WeightedEnum = IAttributeVector::WeightedEnum; +using test::MockGidToLidMapperFactory; + +std::shared_ptr<ReferenceAttribute> create_reference_attribute(vespalib::stringref name = "ref") { + return std::make_shared<ReferenceAttribute>(name, Config(BasicType::REFERENCE)); +} + +template<typename AttrVecType> +std::shared_ptr<AttrVecType> create_typed_attribute(BasicType basic_type, + CollectionType collection_type, + vespalib::stringref name = "parent") { + return std::dynamic_pointer_cast<AttrVecType>( + AttributeFactory::createAttribute(name, Config(basic_type, collection_type))); +} + +template<typename AttrVecType> +std::shared_ptr<AttrVecType> create_single_attribute(BasicType type, vespalib::stringref name = "parent") { + return create_typed_attribute<AttrVecType>(type, CollectionType::SINGLE, name); +} + +template<typename AttrVecType> +std::shared_ptr<AttrVecType> create_array_attribute(BasicType type, vespalib::stringref name = "parent") { + return create_typed_attribute<AttrVecType>(type, CollectionType::ARRAY, name); +} + +template<typename AttrVecType> +std::shared_ptr<AttrVecType> create_wset_attribute(BasicType type, vespalib::stringref name = "parent") { + return create_typed_attribute<AttrVecType>(type, CollectionType::WSET, name); +} + +template<typename VectorType> +void add_n_docs_with_undefined_values(VectorType &vec, size_t n) { + vec.addDocs(n); + vec.commit(); +} + +GlobalId dummy_gid(uint32_t doc_index) { + return DocumentId(vespalib::make_string("id:foo:bar::%u", doc_index)).getGlobalId(); +} + +std::unique_ptr<QueryTermSimple> word_term(vespalib::stringref term) { + return std::make_unique<QueryTermSimple>(term, QueryTerm::WORD); +} + +struct ImportedAttributeFixture { + std::shared_ptr<AttributeVector> target_attr; + std::shared_ptr<ReferenceAttribute> reference_attr; + std::shared_ptr<ImportedAttributeVector> imported_attr; + std::shared_ptr<MockGidToLidMapperFactory> mapper_factory; + + ImportedAttributeFixture(); + + ~ImportedAttributeFixture(); + + void map_reference(DocId from_lid, GlobalId via_gid, DocId to_lid) { + assert(from_lid < reference_attr->getNumDocs()); + reference_attr->update(from_lid, via_gid); + reference_attr->commit(); + mapper_factory->_map[via_gid] = to_lid; + } + + static vespalib::stringref default_imported_attr_name() { + return "imported"; + } + + std::shared_ptr<ImportedAttributeVector> + create_attribute_vector_from_members(vespalib::stringref name = default_imported_attr_name()) { + return std::make_shared<ImportedAttributeVector>(name, reference_attr, target_attr); + } + + template<typename AttrVecType> + std::shared_ptr<AttrVecType> target_attr_as() { + auto ptr = std::dynamic_pointer_cast<AttrVecType>(target_attr); + assert(ptr.get() != nullptr); + return ptr; + } + + void reset_with_new_target_attr(std::shared_ptr<AttributeVector> new_target) { + target_attr = std::move(new_target); + imported_attr = create_attribute_vector_from_members(); + } + + template<typename ValueType> + struct LidToLidMapping { + DocId _from_lid; + GlobalId _via_gid; + DocId _to_lid; + ValueType _value_in_target_attr; + + LidToLidMapping(DocId from_lid, + GlobalId via_gid, + DocId to_lid, + ValueType value_in_target_attr) + : _from_lid(from_lid), + _via_gid(via_gid), + _to_lid(to_lid), + _value_in_target_attr(std::move(value_in_target_attr)) {} + }; + + void set_up_attribute_vectors_before_adding_mappings() { + // Make a sneaky assumption that no tests try to use a lid > 9 + add_n_docs_with_undefined_values(*reference_attr, 10); + target_attr->addReservedDoc(); + add_n_docs_with_undefined_values(*target_attr, 10); + } + + template<typename AttrVecType, typename MappingsType, typename ValueAssigner> + void set_up_and_map(const MappingsType &mappings, ValueAssigner assigner) { + set_up_attribute_vectors_before_adding_mappings(); + auto subtyped_target = target_attr_as<AttrVecType>(); + for (auto &m : mappings) { + map_reference(m._from_lid, m._via_gid, m._to_lid); + assigner(*subtyped_target, m); + } + subtyped_target->commit(); + } + + template<typename AttrVecType, typename ValueType> + void reset_with_single_value_reference_mappings( + BasicType type, + const std::vector<LidToLidMapping<ValueType>> &mappings) { + reset_with_new_target_attr(create_single_attribute<AttrVecType>(type)); + // Fun experiment: rename `auto& mapping` to `auto& m` and watch GCC howl about + // shadowing a variable... that exists in the set_up_and_map function! + set_up_and_map<AttrVecType>(mappings, [this](auto &target_vec, auto &mapping) { + ASSERT_TRUE(target_vec.update(mapping._to_lid, mapping._value_in_target_attr)); + }); + } + + template<typename AttrVecType, typename ValueType> + void reset_with_array_value_reference_mappings( + BasicType type, + const std::vector<LidToLidMapping<std::vector<ValueType>>> &mappings) { + reset_with_new_target_attr(create_array_attribute<AttrVecType>(type)); + set_up_and_map<AttrVecType>(mappings, [this](auto &target_vec, auto &mapping) { + constexpr uint32_t weight = 1; + for (const auto &v : mapping._value_in_target_attr) { + ASSERT_TRUE(target_vec.append(mapping._to_lid, v, weight)); + } + }); + } + + template<typename AttrVecType, typename WeightedValueType> + void reset_with_wset_value_reference_mappings( + BasicType type, + const std::vector<LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { + reset_with_new_target_attr(create_wset_attribute<AttrVecType>(type)); + set_up_and_map<AttrVecType>(mappings, [this](auto &target_vec, auto &mapping) { + for (const auto &v : mapping._value_in_target_attr) { + ASSERT_TRUE(target_vec.append(mapping._to_lid, v.value(), v.weight())); + } + }); + } +}; + +ImportedAttributeFixture::ImportedAttributeFixture() + : target_attr(create_single_attribute<IntegerAttribute>(BasicType::INT32)), + reference_attr(create_reference_attribute()), + imported_attr(create_attribute_vector_from_members()), + mapper_factory(std::make_shared<MockGidToLidMapperFactory>()) { + reference_attr->setGidToLidMapperFactory(mapper_factory); +} + +ImportedAttributeFixture::~ImportedAttributeFixture() {} + +template<typename AttrValueType, typename PredicateType> +void assert_multi_value_matches(const ImportedAttributeFixture &f, + DocId lid, + const std::vector<AttrValueType> &expected, + PredicateType predicate) { + AttributeContent<AttrValueType> content; + content.fill(*f.imported_attr, lid); + EXPECT_EQUAL(expected.size(), content.size()); + std::vector<AttrValueType> actual(content.begin(), content.end()); + EXPECT_TRUE(std::equal(expected.begin(), expected.end(), + actual.begin(), actual.end(), predicate)); +} + +template<typename AttrValueType> +void assert_multi_value_matches(const ImportedAttributeFixture &f, + DocId lid, + const std::vector<AttrValueType> &expected) { + assert_multi_value_matches(f, lid, expected, std::equal_to<AttrValueType>()); +} + +// Simple wrappers to avoid ugly "f.template reset..." syntax. +template<typename AttrVecType, typename ValueType> +void reset_with_single_value_reference_mappings( + ImportedAttributeFixture &f, + BasicType type, + const std::vector<ImportedAttributeFixture::LidToLidMapping<ValueType>> &mappings) { + f.reset_with_single_value_reference_mappings<AttrVecType, ValueType>(type, mappings); +} + +template<typename AttrVecType, typename ValueType> +void reset_with_array_value_reference_mappings( + ImportedAttributeFixture &f, + BasicType type, + const std::vector<ImportedAttributeFixture::LidToLidMapping<std::vector<ValueType>>> &mappings) { + f.reset_with_array_value_reference_mappings<AttrVecType, ValueType>(type, mappings); +} + +template<typename AttrVecType, typename WeightedValueType> +void reset_with_wset_value_reference_mappings( + ImportedAttributeFixture &f, + BasicType type, + const std::vector<ImportedAttributeFixture::LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { + f.reset_with_wset_value_reference_mappings<AttrVecType, WeightedValueType>(type, mappings); +} + +bool has_active_enum_guards(AttributeVector &attr) { + return std::async(std::launch::async, [&attr] { return attr.hasActiveEnumGuards(); }).get(); +} + +} // attribute +} // search
\ No newline at end of file diff --git a/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/mapreduce/VespaSimpleJsonInputFormat.java b/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/mapreduce/VespaSimpleJsonInputFormat.java index d7bdc592fd5..bd013362d67 100644 --- a/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/mapreduce/VespaSimpleJsonInputFormat.java +++ b/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/mapreduce/VespaSimpleJsonInputFormat.java @@ -54,7 +54,7 @@ public class VespaSimpleJsonInputFormat extends FileInputFormat<Text, NullWritab remaining = fileSplit.getLength(); - JsonFactory factory = new JsonFactory(); + JsonFactory factory = new JsonFactory().disable(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES); parser = factory.createParser(new BufferedInputStream(stream)); parser.setCodec(new ObjectMapper()); parser.nextToken(); diff --git a/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/core/JsonReader.java b/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/core/JsonReader.java index 31fca0a0d3d..acaadd91cf1 100644 --- a/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/core/JsonReader.java +++ b/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/core/JsonReader.java @@ -35,7 +35,7 @@ public class JsonReader { public static void read(InputStream inputStream, FeedClient feedClient, AtomicInteger numSent) { try { final InputStreamJsonElementBuffer jsonElementBuffer = new InputStreamJsonElementBuffer(inputStream); - final JsonFactory jfactory = new JsonFactory(); + final JsonFactory jfactory = new JsonFactory().disable(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES); final JsonParser jParser = jfactory.createParser(jsonElementBuffer); while (true) { String docId = parseOneDocument(jParser); |