aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--application-model/pom.xml4
-rw-r--r--client/js/app/yarn.lock182
-rw-r--r--dependency-versions/pom.xml16
-rw-r--r--document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java79
-rw-r--r--document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java31
-rw-r--r--integration/intellij/build.gradle.kts2
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java60
-rw-r--r--model-integration/src/test/java/ai/vespa/embedding/HuggingFaceEmbedderTest.java126
-rw-r--r--model-integration/src/test/models/onnx/transformer/embedding_model.onnxbin0 -> 17409774 bytes
-rw-r--r--parent/pom.xml2
-rw-r--r--searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp6
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp8
12 files changed, 375 insertions, 141 deletions
diff --git a/application-model/pom.xml b/application-model/pom.xml
index 3581a0020ee..425e5a5193a 100644
--- a/application-model/pom.xml
+++ b/application-model/pom.xml
@@ -26,6 +26,10 @@
<artifactId>commons-compress</artifactId>
</dependency>
<dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
<groupId>com.yahoo.vespa</groupId>
<artifactId>vespajlib</artifactId>
<version>${project.version}</version>
diff --git a/client/js/app/yarn.lock b/client/js/app/yarn.lock
index 37385a80486..28313d7fb65 100644
--- a/client/js/app/yarn.lock
+++ b/client/js/app/yarn.lock
@@ -1320,70 +1320,80 @@
resolved "https://registry.yarnpkg.com/@remix-run/router/-/router-1.15.3.tgz#d2509048d69dbb72d5389a14945339f1430b2d3c"
integrity sha512-Oy8rmScVrVxWZVOpEF57ovlnhpZ8CCPlnIIumVcV9nFdiSIrus99+Lw78ekXyGvVDlIsFJbSfmSovJUhCWYV3w==
-"@rollup/rollup-android-arm-eabi@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.13.0.tgz#b98786c1304b4ff8db3a873180b778649b5dff2b"
- integrity sha512-5ZYPOuaAqEH/W3gYsRkxQATBW3Ii1MfaT4EQstTnLKViLi2gLSQmlmtTpGucNP3sXEpOiI5tdGhjdE111ekyEg==
-
-"@rollup/rollup-android-arm64@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.13.0.tgz#8833679af11172b1bf1ab7cb3bad84df4caf0c9e"
- integrity sha512-BSbaCmn8ZadK3UAQdlauSvtaJjhlDEjS5hEVVIN3A4bbl3X+otyf/kOJV08bYiRxfejP3DXFzO2jz3G20107+Q==
-
-"@rollup/rollup-darwin-arm64@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.13.0.tgz#ef02d73e0a95d406e0eb4fd61a53d5d17775659b"
- integrity sha512-Ovf2evVaP6sW5Ut0GHyUSOqA6tVKfrTHddtmxGQc1CTQa1Cw3/KMCDEEICZBbyppcwnhMwcDce9ZRxdWRpVd6g==
-
-"@rollup/rollup-darwin-x64@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.13.0.tgz#3ce5b9bcf92b3341a5c1c58a3e6bcce0ea9e7455"
- integrity sha512-U+Jcxm89UTK592vZ2J9st9ajRv/hrwHdnvyuJpa5A2ngGSVHypigidkQJP+YiGL6JODiUeMzkqQzbCG3At81Gg==
-
-"@rollup/rollup-linux-arm-gnueabihf@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.13.0.tgz#3d3d2c018bdd8e037c6bfedd52acfff1c97e4be4"
- integrity sha512-8wZidaUJUTIR5T4vRS22VkSMOVooG0F4N+JSwQXWSRiC6yfEsFMLTYRFHvby5mFFuExHa/yAp9juSphQQJAijQ==
-
-"@rollup/rollup-linux-arm64-gnu@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.13.0.tgz#5fc8cc978ff396eaa136d7bfe05b5b9138064143"
- integrity sha512-Iu0Kno1vrD7zHQDxOmvweqLkAzjxEVqNhUIXBsZ8hu8Oak7/5VTPrxOEZXYC1nmrBVJp0ZcL2E7lSuuOVaE3+w==
-
-"@rollup/rollup-linux-arm64-musl@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.13.0.tgz#f2ae7d7bed416ffa26d6b948ac5772b520700eef"
- integrity sha512-C31QrW47llgVyrRjIwiOwsHFcaIwmkKi3PCroQY5aVq4H0A5v/vVVAtFsI1nfBngtoRpeREvZOkIhmRwUKkAdw==
-
-"@rollup/rollup-linux-riscv64-gnu@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.13.0.tgz#303d57a328ee9a50c85385936f31cf62306d30b6"
- integrity sha512-Oq90dtMHvthFOPMl7pt7KmxzX7E71AfyIhh+cPhLY9oko97Zf2C9tt/XJD4RgxhaGeAraAXDtqxvKE1y/j35lA==
-
-"@rollup/rollup-linux-x64-gnu@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.13.0.tgz#f672f6508f090fc73f08ba40ff76c20b57424778"
- integrity sha512-yUD/8wMffnTKuiIsl6xU+4IA8UNhQ/f1sAnQebmE/lyQ8abjsVyDkyRkWop0kdMhKMprpNIhPmYlCxgHrPoXoA==
-
-"@rollup/rollup-linux-x64-musl@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.13.0.tgz#d2f34b1b157f3e7f13925bca3288192a66755a89"
- integrity sha512-9RyNqoFNdF0vu/qqX63fKotBh43fJQeYC98hCaf89DYQpv+xu0D8QFSOS0biA7cGuqJFOc1bJ+m2rhhsKcw1hw==
-
-"@rollup/rollup-win32-arm64-msvc@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.13.0.tgz#8ffecc980ae4d9899eb2f9c4ae471a8d58d2da6b"
- integrity sha512-46ue8ymtm/5PUU6pCvjlic0z82qWkxv54GTJZgHrQUuZnVH+tvvSP0LsozIDsCBFO4VjJ13N68wqrKSeScUKdA==
-
-"@rollup/rollup-win32-ia32-msvc@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.13.0.tgz#a7505884f415662e088365b9218b2b03a88fc6f2"
- integrity sha512-P5/MqLdLSlqxbeuJ3YDeX37srC8mCflSyTrUsgbU1c/U9j6l2g2GiIdYaGD9QjdMQPMSgYm7hgg0551wHyIluw==
-
-"@rollup/rollup-win32-x64-msvc@4.13.0":
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.13.0.tgz#6abd79db7ff8d01a58865ba20a63cfd23d9e2a10"
- integrity sha512-UKXUQNbO3DOhzLRwHSpa0HnhhCgNODvfoPWv2FCXme8N/ANFfhIPMGuOT+QuKd16+B5yxZ0HdpNlqPvTMS1qfw==
+"@rollup/rollup-android-arm-eabi@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.13.2.tgz#fbf098f49d96a8cac9056f22f5fd80906ef3af85"
+ integrity sha512-3XFIDKWMFZrMnao1mJhnOT1h2g0169Os848NhhmGweEcfJ4rCi+3yMCOLG4zA61rbJdkcrM/DjVZm9Hg5p5w7g==
+
+"@rollup/rollup-android-arm64@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.13.2.tgz#0d2448251040fce19a98eee505dff5b3c8ec9b98"
+ integrity sha512-GdxxXbAuM7Y/YQM9/TwwP+L0omeE/lJAR1J+olu36c3LqqZEBdsIWeQ91KBe6nxwOnb06Xh7JS2U5ooWU5/LgQ==
+
+"@rollup/rollup-darwin-arm64@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.13.2.tgz#78db4d4da5b1b84c22adbe25c8a4961b3f22d3af"
+ integrity sha512-mCMlpzlBgOTdaFs83I4XRr8wNPveJiJX1RLfv4hggyIVhfB5mJfN4P8Z6yKh+oE4Luz+qq1P3kVdWrCKcMYrrA==
+
+"@rollup/rollup-darwin-x64@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.13.2.tgz#fcc05af54379f8ee5c7e954987d4514c6fd0fb42"
+ integrity sha512-yUoEvnH0FBef/NbB1u6d3HNGyruAKnN74LrPAfDQL3O32e3k3OSfLrPgSJmgb3PJrBZWfPyt6m4ZhAFa2nZp2A==
+
+"@rollup/rollup-linux-arm-gnueabihf@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.13.2.tgz#2ce200efa1ef4a56ee2af7b453edc74a259d7d31"
+ integrity sha512-GYbLs5ErswU/Xs7aGXqzc3RrdEjKdmoCrgzhJWyFL0r5fL3qd1NPcDKDowDnmcoSiGJeU68/Vy+OMUluRxPiLQ==
+
+"@rollup/rollup-linux-arm64-gnu@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.13.2.tgz#5a24aac882bff9abfda3f45f6f1db2166c342a4a"
+ integrity sha512-L1+D8/wqGnKQIlh4Zre9i4R4b4noxzH5DDciyahX4oOz62CphY7WDWqJoQ66zNR4oScLNOqQJfNSIAe/6TPUmQ==
+
+"@rollup/rollup-linux-arm64-musl@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.13.2.tgz#f1fb4c6f961d3f3397231a99e621d199200e4ea9"
+ integrity sha512-tK5eoKFkXdz6vjfkSTCupUzCo40xueTOiOO6PeEIadlNBkadH1wNOH8ILCPIl8by/Gmb5AGAeQOFeLev7iZDOA==
+
+"@rollup/rollup-linux-powerpc64le-gnu@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.13.2.tgz#46b2463d94ac3af3e0f7a2947b695397bc13b755"
+ integrity sha512-zvXvAUGGEYi6tYhcDmb9wlOckVbuD+7z3mzInCSTACJ4DQrdSLPNUeDIcAQW39M3q6PDquqLWu7pnO39uSMRzQ==
+
+"@rollup/rollup-linux-riscv64-gnu@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.13.2.tgz#47b932ee59a5395a3a341b0493e361d9e6032cf2"
+ integrity sha512-C3GSKvMtdudHCN5HdmAMSRYR2kkhgdOfye4w0xzyii7lebVr4riCgmM6lRiSCnJn2w1Xz7ZZzHKuLrjx5620kw==
+
+"@rollup/rollup-linux-s390x-gnu@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.13.2.tgz#8e14a1b3c3b9a4440c70a9c1ba12d32aa21f9712"
+ integrity sha512-l4U0KDFwzD36j7HdfJ5/TveEQ1fUTjFFQP5qIt9gBqBgu1G8/kCaq5Ok05kd5TG9F8Lltf3MoYsUMw3rNlJ0Yg==
+
+"@rollup/rollup-linux-x64-gnu@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.13.2.tgz#270e939194b66df77bcb33dd9a5ddf7784bd7997"
+ integrity sha512-xXMLUAMzrtsvh3cZ448vbXqlUa7ZL8z0MwHp63K2IIID2+DeP5iWIT6g1SN7hg1VxPzqx0xZdiDM9l4n9LRU1A==
+
+"@rollup/rollup-linux-x64-musl@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.13.2.tgz#e8dd0f3c2046acbda2934490b36552e856a3bc6a"
+ integrity sha512-M/JYAWickafUijWPai4ehrjzVPKRCyDb1SLuO+ZyPfoXgeCEAlgPkNXewFZx0zcnoIe3ay4UjXIMdXQXOZXWqA==
+
+"@rollup/rollup-win32-arm64-msvc@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.13.2.tgz#f8b65a4a7e7a6b383e7b14439129b2f474ff123c"
+ integrity sha512-2YWwoVg9KRkIKaXSh0mz3NmfurpmYoBBTAXA9qt7VXk0Xy12PoOP40EFuau+ajgALbbhi4uTj3tSG3tVseCjuA==
+
+"@rollup/rollup-win32-ia32-msvc@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.13.2.tgz#bc1c5a4fbc4337d6cb15da80a4de95fd53ab3573"
+ integrity sha512-2FSsE9aQ6OWD20E498NYKEQLneShWes0NGMPQwxWOdws35qQXH+FplabOSP5zEe1pVjurSDOGEVCE2agFwSEsw==
+
+"@rollup/rollup-win32-x64-msvc@4.13.2":
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.13.2.tgz#851959c4c1c3c6647aba1f388198c8243aed6917"
+ integrity sha512-7h7J2nokcdPePdKykd8wtc8QqqkqxIrUz7MHj6aNr8waBRU//NLDVnNjQnqQO6fqtjrtCdftpbTuOKAyrAQETQ==
"@sinclair/typebox@^0.27.8":
version "0.27.8"
@@ -4610,7 +4620,7 @@ possible-typed-array-names@^1.0.0:
resolved "https://registry.yarnpkg.com/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz#89bb63c6fada2c3e90adc4a647beeeb39cc7bf8f"
integrity sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q==
-postcss@^8.4.36:
+postcss@^8.4.38:
version "8.4.38"
resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.38.tgz#b387d533baf2054288e337066d81c6bee9db9e0e"
integrity sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A==
@@ -4890,25 +4900,27 @@ rimraf@^3.0.2:
glob "^7.1.3"
rollup@^4.13.0:
- version "4.13.0"
- resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.13.0.tgz#dd2ae144b4cdc2ea25420477f68d4937a721237a"
- integrity sha512-3YegKemjoQnYKmsBlOHfMLVPPA5xLkQ8MHLLSw/fBrFaVkEayL51DilPpNNLq1exr98F2B1TzrV0FUlN3gWRPg==
+ version "4.13.2"
+ resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.13.2.tgz#ac57d2dc48e8f5562f5a6daadb9caee590069262"
+ integrity sha512-MIlLgsdMprDBXC+4hsPgzWUasLO9CE4zOkj/u6j+Z6j5A4zRY+CtiXAdJyPtgCsc42g658Aeh1DlrdVEJhsL2g==
dependencies:
"@types/estree" "1.0.5"
optionalDependencies:
- "@rollup/rollup-android-arm-eabi" "4.13.0"
- "@rollup/rollup-android-arm64" "4.13.0"
- "@rollup/rollup-darwin-arm64" "4.13.0"
- "@rollup/rollup-darwin-x64" "4.13.0"
- "@rollup/rollup-linux-arm-gnueabihf" "4.13.0"
- "@rollup/rollup-linux-arm64-gnu" "4.13.0"
- "@rollup/rollup-linux-arm64-musl" "4.13.0"
- "@rollup/rollup-linux-riscv64-gnu" "4.13.0"
- "@rollup/rollup-linux-x64-gnu" "4.13.0"
- "@rollup/rollup-linux-x64-musl" "4.13.0"
- "@rollup/rollup-win32-arm64-msvc" "4.13.0"
- "@rollup/rollup-win32-ia32-msvc" "4.13.0"
- "@rollup/rollup-win32-x64-msvc" "4.13.0"
+ "@rollup/rollup-android-arm-eabi" "4.13.2"
+ "@rollup/rollup-android-arm64" "4.13.2"
+ "@rollup/rollup-darwin-arm64" "4.13.2"
+ "@rollup/rollup-darwin-x64" "4.13.2"
+ "@rollup/rollup-linux-arm-gnueabihf" "4.13.2"
+ "@rollup/rollup-linux-arm64-gnu" "4.13.2"
+ "@rollup/rollup-linux-arm64-musl" "4.13.2"
+ "@rollup/rollup-linux-powerpc64le-gnu" "4.13.2"
+ "@rollup/rollup-linux-riscv64-gnu" "4.13.2"
+ "@rollup/rollup-linux-s390x-gnu" "4.13.2"
+ "@rollup/rollup-linux-x64-gnu" "4.13.2"
+ "@rollup/rollup-linux-x64-musl" "4.13.2"
+ "@rollup/rollup-win32-arm64-msvc" "4.13.2"
+ "@rollup/rollup-win32-ia32-msvc" "4.13.2"
+ "@rollup/rollup-win32-x64-msvc" "4.13.2"
fsevents "~2.3.2"
rsvp@^4.8.4:
@@ -5529,12 +5541,12 @@ v8-to-istanbul@^9.0.1:
convert-source-map "^1.6.0"
vite@^5.0.5:
- version "5.2.3"
- resolved "https://registry.yarnpkg.com/vite/-/vite-5.2.3.tgz#198efc2fd4d80eac813b146a68a4b0dbde884fc2"
- integrity sha512-+i1oagbvkVIhEy9TnEV+fgXsng13nZM90JQbrcPrf6DvW2mXARlz+DK7DLiDP+qeKoD1FCVx/1SpFL1CLq9Mhw==
+ version "5.2.7"
+ resolved "https://registry.yarnpkg.com/vite/-/vite-5.2.7.tgz#e1b8a985eb54fcb9467d7f7f009d87485016df6e"
+ integrity sha512-k14PWOKLI6pMaSzAuGtT+Cf0YmIx12z9YGon39onaJNy8DLBfBJrzg9FQEmkAM5lpHBZs9wksWAsyF/HkpEwJA==
dependencies:
esbuild "^0.20.1"
- postcss "^8.4.36"
+ postcss "^8.4.38"
rollup "^4.13.0"
optionalDependencies:
fsevents "~2.3.3"
diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml
index 96a1c7942aa..394912995dd 100644
--- a/dependency-versions/pom.xml
+++ b/dependency-versions/pom.xml
@@ -61,11 +61,11 @@
<apache.httpclient5.vespa.version>5.3.1</apache.httpclient5.vespa.version>
<apache.httpcore5.vespa.version>5.2.4</apache.httpcore5.vespa.version>
<apiguardian.vespa.version>1.1.2</apiguardian.vespa.version>
- <asm.vespa.version>9.6</asm.vespa.version>
+ <asm.vespa.version>9.7</asm.vespa.version>
<assertj.vespa.version>3.25.3</assertj.vespa.version>
<!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories -->
- <aws-sdk.vespa.version>1.12.686</aws-sdk.vespa.version>
+ <aws-sdk.vespa.version>1.12.691</aws-sdk.vespa.version>
<athenz.vespa.version>1.11.54</athenz.vespa.version>
<!-- Athenz END -->
@@ -76,16 +76,16 @@
xargs perl -pi -e 's/major = [0-9]+, minor = [0-9]+, micro = [0-9]+/major = 5, minor = 3, micro = 0/g'
-->
<bouncycastle.vespa.version>1.76</bouncycastle.vespa.version>
- <byte-buddy.vespa.version>1.14.12</byte-buddy.vespa.version>
+ <byte-buddy.vespa.version>1.14.13</byte-buddy.vespa.version>
<checker-qual.vespa.version>3.38.0</checker-qual.vespa.version>
<commons-beanutils.vespa.version>1.9.4</commons-beanutils.vespa.version>
<commons-codec.vespa.version>1.16.1</commons-codec.vespa.version>
<commons-collections.vespa.version>3.2.2</commons-collections.vespa.version>
<commons-csv.vespa.version>1.10.0</commons-csv.vespa.version>
<commons-digester.vespa.version>3.2</commons-digester.vespa.version>
- <commons-io.vespa.version>2.15.1</commons-io.vespa.version>
+ <commons-io.vespa.version>2.16.0</commons-io.vespa.version>
<commons-lang3.vespa.version>3.14.0</commons-lang3.vespa.version>
- <commons-logging.vespa.version>1.3.0</commons-logging.vespa.version> <!-- Bindings exported by jdisc through jcl-over-slf4j. -->
+ <commons-logging.vespa.version>1.3.1</commons-logging.vespa.version> <!-- Bindings exported by jdisc through jcl-over-slf4j. -->
<commons.math3.vespa.version>3.6.1</commons.math3.vespa.version>
<commons-compress.vespa.version>1.26.1</commons-compress.vespa.version>
<commons-cli.vespa.version>1.6.0</commons-cli.vespa.version>
@@ -100,7 +100,7 @@
<findbugs.vespa.version>3.0.2</findbugs.vespa.version> <!-- Should be kept in sync with guava -->
<hamcrest.vespa.version>2.2</hamcrest.vespa.version>
<hdrhistogram.vespa.version>2.1.12</hdrhistogram.vespa.version>
- <huggingface.vespa.version>0.26.0</huggingface.vespa.version>
+ <huggingface.vespa.version>0.27.0</huggingface.vespa.version>
<icu4j.vespa.version>74.2</icu4j.vespa.version>
<java-jjwt.vespa.version>0.11.5</java-jjwt.vespa.version>
<java-jwt.vespa.version>4.4.0</java-jwt.vespa.version>
@@ -140,7 +140,7 @@
<surefire.vespa.version>3.2.5</surefire.vespa.version>
<velocity.vespa.version>2.3</velocity.vespa.version>
<velocity.tools.vespa.version>3.1</velocity.tools.vespa.version>
- <wiremock.vespa.version>3.4.2</wiremock.vespa.version>
+ <wiremock.vespa.version>3.5.2</wiremock.vespa.version>
<xerces.vespa.version>2.12.2</xerces.vespa.version>
<zero-allocation-hashing.vespa.version>0.16</zero-allocation-hashing.vespa.version>
<zookeeper.client.vespa.version>3.9.2</zookeeper.client.vespa.version>
@@ -166,7 +166,7 @@
<maven-deploy-plugin.vespa.version>3.1.1</maven-deploy-plugin.vespa.version>
<maven-enforcer-plugin.vespa.version>3.4.1</maven-enforcer-plugin.vespa.version>
<maven-failsafe-plugin.vespa.version>3.2.5</maven-failsafe-plugin.vespa.version>
- <maven-gpg-plugin.vespa.version>3.2.1</maven-gpg-plugin.vespa.version>
+ <maven-gpg-plugin.vespa.version>3.2.2</maven-gpg-plugin.vespa.version>
<maven-install-plugin.vespa.version>3.1.1</maven-install-plugin.vespa.version>
<maven-jar-plugin.vespa.version>3.3.0</maven-jar-plugin.vespa.version>
<maven-javadoc-plugin.vespa.version>3.6.3</maven-javadoc-plugin.vespa.version>
diff --git a/document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java b/document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java
index 63998358325..a2864d12dd6 100644
--- a/document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java
+++ b/document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java
@@ -9,6 +9,7 @@ import com.yahoo.document.DataType;
import com.yahoo.document.Document;
import com.yahoo.document.DocumentUpdate;
import com.yahoo.document.FieldPath;
+import com.yahoo.document.WeightedSetDataType;
import com.yahoo.document.annotation.AnnotationReference;
import com.yahoo.document.datatypes.Array;
import com.yahoo.document.datatypes.BoolFieldValue;
@@ -135,8 +136,7 @@ public class DocumentUpdateJsonSerializer {
if (writeArithmeticFieldPathUpdate(update, generator)) continue;
generator.writeFieldName(update.getUpdateType().name().toLowerCase());
- if (update instanceof AssignFieldPathUpdate) {
- AssignFieldPathUpdate assignUp = (AssignFieldPathUpdate) update;
+ if (update instanceof AssignFieldPathUpdate assignUp) {
if (assignUp.getExpression() != null) {
throw new RuntimeException("Unable to parse expression: " + assignUp.getExpression());
} else {
@@ -176,11 +176,11 @@ public class DocumentUpdateJsonSerializer {
wrapIOException(() -> {
generator.writeObjectFieldStart(fieldUpdate.getField().getName());
- ArrayList<ValueUpdate> removeValueUpdates = new ArrayList<>();
- ArrayList<ValueUpdate> addValueUpdates = new ArrayList<>();
+ ArrayList<ValueUpdate<?>> removeValueUpdates = new ArrayList<>();
+ ArrayList<ValueUpdate<?>> addValueUpdates = new ArrayList<>();
- final DataType dataType = fieldUpdate.getField().getDataType();
- for (ValueUpdate valueUpdate : fieldUpdate.getValueUpdates()) {
+ DataType dataType = fieldUpdate.getField().getDataType();
+ for (ValueUpdate<?> valueUpdate : fieldUpdate.getValueUpdates()) {
if (valueUpdate instanceof RemoveValueUpdate) {
removeValueUpdates.add(valueUpdate);
} else if (valueUpdate instanceof AddValueUpdate) {
@@ -197,19 +197,44 @@ public class DocumentUpdateJsonSerializer {
});
}
- private void writeAddOrRemoveValueUpdates(String arrayFieldName, ArrayList<ValueUpdate> valueUpdates, DataType dataType) throws IOException {
- if (!valueUpdates.isEmpty()) {
- generator.writeArrayFieldStart(arrayFieldName);
- for (ValueUpdate valueUpdate : valueUpdates) {
- valueUpdate.serialize(this, dataType);
- }
- generator.writeEndArray();
+ private void writeAddOrRemoveValueUpdates(String operation, ArrayList<ValueUpdate<?>> valueUpdates, DataType dataType) throws IOException {
+ if (valueUpdates.isEmpty()) return;
+
+ if (dataType instanceof WeightedSetDataType)
+ writeAddOrRemoveValueUpdatesForWeightedSet(operation, valueUpdates, dataType);
+ else
+ writeAddOrRemoveValueUpdatesForArray(operation, valueUpdates, dataType);
+ }
+
+ private void writeAddOrRemoveValueUpdatesForArray(String operation, ArrayList<ValueUpdate<?>> valueUpdates, DataType dataType) throws IOException {
+ generator.writeArrayFieldStart(operation);
+ for (ValueUpdate<?> valueUpdate : valueUpdates) {
+ valueUpdate.serialize(this, dataType);
+ }
+ generator.writeEndArray();
+ }
+
+ private void writeAddOrRemoveValueUpdatesForWeightedSet(String operation, ArrayList<ValueUpdate<?>> valueUpdates, DataType dataType) throws IOException {
+ generator.writeObjectFieldStart(operation);
+ for (ValueUpdate<?> valueUpdate : valueUpdates) {
+ valueUpdate.serialize(this, dataType);
}
+ generator.writeEndObject();
}
@Override
public void write(AddValueUpdate update, DataType superType) {
- update.getValue().serialize(this);
+ if (superType instanceof WeightedSetDataType)
+ writeWeightedSet(update);
+ else
+ update.getValue().serialize(this);
+ }
+
+ private void writeWeightedSet(AddValueUpdate update) {
+ wrapIOException(() -> {
+ generator.writeFieldName(update.getValue().toString());
+ generator.writeNumber(update.getWeight());
+ });
}
/* This is the 'match' operation */
@@ -226,25 +251,13 @@ public class DocumentUpdateJsonSerializer {
@Override
public void write(ArithmeticValueUpdate update) {
- final ArithmeticValueUpdate.Operator operator = update.getOperator();
- final String operationKey;
-
- switch (operator) {
- case ADD:
- operationKey = "increment";
- break;
- case DIV:
- operationKey = "divide";
- break;
- case MUL:
- operationKey = "multiply";
- break;
- case SUB:
- operationKey = "decrement";
- break;
- default:
- throw new RuntimeException("Unrecognized arithmetic operator '%s'".formatted(operator.name));
- }
+ ArithmeticValueUpdate.Operator operator = update.getOperator();
+ String operationKey = switch (operator) {
+ case ADD -> "increment";
+ case DIV -> "divide";
+ case MUL -> "multiply";
+ case SUB -> "decrement";
+ };
wrapIOException(() -> generator.writeFieldName(operationKey));
update.getValue().serialize(this);
diff --git a/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java b/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java
index 2981b09f418..abb31891739 100644
--- a/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java
+++ b/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java
@@ -25,10 +25,10 @@ import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
import static com.yahoo.test.json.JsonTestHelper.assertJsonEquals;
import static com.yahoo.test.json.JsonTestHelper.inputJson;
+import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Tests roundtrip serialization (JSON -> DocumentUpdate -> Buffer -> DocumentUpdate -> JSON) of document updates.
@@ -104,11 +104,7 @@ public class DocumentUpdateJsonSerializerTest {
DocumentUpdateJsonSerializer serializer = new DocumentUpdateJsonSerializer(outputStream);
serializer.serialize(update);
- try {
- return new String(outputStream.toByteArray(), "UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
+ return outputStream.toString(UTF_8);
}
private static void roundtripSerializeJsonAndMatch(String jsonDoc, String expectedJsonDoc) {
@@ -196,6 +192,29 @@ public class DocumentUpdateJsonSerializerTest {
}
@Test
+ public void testAddWeightedSet() {
+ roundtripSerializeJsonAndMatch(inputJson(
+ "{",
+ " 'update': 'DOCUMENT_ID',",
+ " 'fields': {",
+ " 'int_set': {",
+ " 'add': {",
+ " '123': 2,",
+ " '789': 3",
+ " }",
+ " },",
+ " 'string_set': {",
+ " 'add': {",
+ " 'meow': 4,",
+ " 'slurp': 5",
+ " }",
+ " }",
+ " }",
+ "}"
+ ));
+ }
+
+ @Test
public void testAddUpdate() {
roundtripSerializeJsonAndMatch(inputJson(
"{",
diff --git a/integration/intellij/build.gradle.kts b/integration/intellij/build.gradle.kts
index 6af7a48c0c1..89101020920 100644
--- a/integration/intellij/build.gradle.kts
+++ b/integration/intellij/build.gradle.kts
@@ -4,7 +4,7 @@ import org.jetbrains.grammarkit.tasks.GenerateParserTask
plugins {
id("java-library")
- id("org.jetbrains.intellij") version "1.17.2"
+ id("org.jetbrains.intellij") version "1.17.3"
id("org.jetbrains.grammarkit") version "2022.3.2.2"
id("maven-publish") // to deploy the plugin into a Maven repo
}
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
index 35645deffa4..169648967d7 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
@@ -17,6 +17,7 @@ import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
import java.nio.file.Paths;
+import java.util.BitSet;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
@@ -124,18 +125,44 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
}
Map<String, Tensor> outputs = evaluator.evaluate(inputs);
- Tensor tokenEmbeddings = outputs.get(outputName);
- var result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask);
- var normalized = normalize ? normalize(result, tensorType) : result;
+ IndexedTensor tokenEmbeddings = (IndexedTensor) outputs.get(outputName);
+ long[] resultShape = tokenEmbeddings.shape();
+ //shape batch, sequence, embedding dimensionality
+ if (resultShape.length != 3) {
+ throw new IllegalArgumentException("" +
+ "Expected 3 output dimensions for output name '" +
+ outputName + "': [batch, sequence, embedding], got " + resultShape.length);
+ }
+ Tensor result;
+ if (tensorType.valueType() == TensorType.Value.INT8) {
+ long outputDimensions = resultShape[2];
+ long targetDim = tensorType.dimensions().get(0).size().get();
+
+ if(targetDim * 8 > outputDimensions) {
+ throw new IllegalArgumentException("Cannot pack " + outputDimensions + " into " + targetDim + " int8s");
+ }
+ //Dimensionality flexibility 🪆 - packing only the first 8*targetDim values from the model output
+ long firstDimensions = 8 * targetDim;
+ String name = tensorType.indexedSubtype().dimensions().get(0).name();
+ //perform pooling and normalizing using floating point embeddings before binarizing
+ //using the firstDimensions as the target dimensionality
+ TensorType poolingType = new TensorType.Builder(TensorType.Value.FLOAT).indexed(name, firstDimensions).build();
+ result = poolingStrategy.toSentenceEmbedding(poolingType, tokenEmbeddings, attentionMask);
+ result = normalize? normalize(result, poolingType) : result;
+ result = binarize((IndexedTensor) result, tensorType);
+
+ } else { // regular floating points embeddings
+ result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask);
+ result = normalize ? normalize(result, tensorType) : result;
+ }
runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
- return normalized;
+ return result;
}
Tensor normalize(Tensor embedding, TensorType tensorType) {
double sumOfSquares = 0.0;
Tensor.Builder builder = Tensor.Builder.of(tensorType);
-
for (int i = 0; i < tensorType.dimensions().get(0).size().get(); i++) {
double item = embedding.get(TensorAddress.of(i));
sumOfSquares += item * item;
@@ -151,6 +178,29 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
return builder.build();
}
+ static public Tensor binarize(IndexedTensor embedding, TensorType tensorType) {
+ Tensor.Builder builder = Tensor.Builder.of(tensorType);
+ BitSet bitSet = new BitSet(8);
+ int index = 0;
+ for (int d = 0; d < embedding.sizeAsInt(); d++) {
+ var value = embedding.get(d);
+ int bitIndex = 7 - (d % 8);
+ if (value > 0.0) {
+ bitSet.set(bitIndex);
+ } else {
+ bitSet.clear(bitIndex);
+ }
+ if ((d + 1) % 8 == 0) {
+ byte[] bytes = bitSet.toByteArray();
+ byte packed = (bytes.length == 0) ? 0 : bytes[0];
+ builder.cell(TensorAddress.of(index), packed);
+ index++;
+ bitSet = new BitSet(8);
+ }
+ }
+ return builder.build();
+ }
+
private IndexedTensor createTensorRepresentation(List<Long> input, String dimension) {
int size = input.size();
TensorType type = new TensorType.Builder(TensorType.Value.FLOAT).indexed(dimension, size).build();
diff --git a/model-integration/src/test/java/ai/vespa/embedding/HuggingFaceEmbedderTest.java b/model-integration/src/test/java/ai/vespa/embedding/HuggingFaceEmbedderTest.java
new file mode 100644
index 00000000000..1ce1d955b00
--- /dev/null
+++ b/model-integration/src/test/java/ai/vespa/embedding/HuggingFaceEmbedderTest.java
@@ -0,0 +1,126 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package ai.vespa.embedding;
+
+import ai.vespa.embedding.huggingface.HuggingFaceEmbedder;
+import ai.vespa.modelintegration.evaluator.OnnxRuntime;
+import com.yahoo.config.ModelReference;
+import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
+import com.yahoo.language.process.Embedder;
+import com.yahoo.tensor.IndexedTensor;
+import com.yahoo.tensor.Tensor;
+import com.yahoo.tensor.TensorType;
+import com.yahoo.tensor.TensorAddress;
+import org.junit.Test;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assume.assumeTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue;
+import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode;
+import com.yahoo.searchlib.rankingexpression.rule.UnpackBitsNode;
+
+public class HuggingFaceEmbedderTest {
+
+ static HuggingFaceEmbedder embedder = getEmbedder();
+ static HuggingFaceEmbedder normalizedEmbedder = getNormalizedEmbedder();
+ static Embedder.Context context = new Embedder.Context("schema.indexing");
+
+ @Test
+ public void testBinarization() {
+ TensorType typeOne = TensorType.fromSpec("tensor<int8>(x[1])");
+ TensorType typeTwo = TensorType.fromSpec("tensor<int8>(x[2])");
+ assertPackRight("tensor(x[8]):[0,0,0,0,0,0,0,0]", "tensor<int8>(x[1]):[0]", typeOne);
+ assertPackRight("tensor(x[8]):[1,1,1,1,1,1,1,1]", "tensor<int8>(x[1]):[-1]", typeOne);
+ assertPackRight("tensor(x[16]):[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]", "tensor<int8>(x[2]):[0, -1]", typeTwo);
+
+ assertPackRight("tensor(x[8]):[0,1,0,1,0,1,0,1]", "tensor<int8>(x[1]):[85]", typeOne);
+ assertPackRight("tensor(x[8]):[1,0,1,0,1,0,1,0]", "tensor<int8>(x[1]):[-86]", typeOne);
+ assertPackRight("tensor(x[16]):[0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0]", "tensor<int8>(x[2]):[85, -86]", typeTwo);
+
+ assertPackRight("tensor(x[8]):[1,1,1,1,0,0,0,0]", "tensor<int8>(x[1]):[-16]", typeOne);
+ assertPackRight("tensor(x[8]):[0,0,0,0,1,1,1,1]", "tensor<int8>(x[1]):[15]", typeOne);
+ assertPackRight("tensor(x[16]):[1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1]", "tensor<int8>(x[2]):[-16, 15]", typeTwo);
+ }
+
+ private void assertPackRight(String input, String expected, TensorType type) {
+ Tensor inputTensor = Tensor.from(input);
+ Tensor result = HuggingFaceEmbedder.binarize((IndexedTensor) inputTensor, type);
+ assertEquals(expected.toString(), result.toString());
+ //Verify against what is done in ranking with unpack_bits
+ Tensor unpacked = expandBitTensor(result);
+ assertEquals(inputTensor.toString(), unpacked.toString());
+ }
+
+ @Test
+ public void testEmbedder() {
+ String input = "This is a test";
+
+ Tensor expected = Tensor.from("tensor<float>(x[8]):[-0.666, 0.335, 0.227, 0.0919, -0.069, 0.323, 0.422, 0.270]");
+ Tensor result = embedder.embed(input, context, TensorType.fromSpec(("tensor<float>(x[8])")));
+ for(int i = 0; i < 8; i++) {
+ assertEquals(expected.get(TensorAddress.of(i)), result.get(TensorAddress.of(i)), 1e-2);
+ }
+ // Thresholding on the above gives [0, 1, 1, 1, 0, 1, 1, 1] which is packed into 119 (int8)
+ Tensor binarizedResult = embedder.embed(input, context, TensorType.fromSpec(("tensor<int8>(x[1])")));
+ assertEquals("tensor<int8>(x[1]):[119]", binarizedResult.toString());
+
+ binarizedResult = embedder.embed(input, context, TensorType.fromSpec(("tensor<int8>(x[2])")));
+ assertEquals("tensor<int8>(x[2]):[119, 44]", binarizedResult.toAbbreviatedString());
+
+ binarizedResult = embedder.embed(input, context, TensorType.fromSpec(("tensor<int8>(x[48])")));
+ assertTrue(binarizedResult.toAbbreviatedString().startsWith("tensor<int8>(x[48]):[119, 44"));
+
+ assertThrows(IllegalArgumentException.class, () -> {
+ // throws because the target tensor type is not compatible with the model output
+ //49*8 > 384
+ embedder.embed(input, context, TensorType.fromSpec(("tensor<int8>(x[49])")));
+ });
+ Tensor float16Result = embedder.embed(input, context, TensorType.fromSpec(("tensor<bfloat16>(x[1])")));
+ assertEquals(-0.666, float16Result.sum().asDouble(),1e-3);
+ }
+
+ @Test
+ public void testEmbedderWithNormalization() {
+ String input = "This is a test";
+
+ Tensor result = normalizedEmbedder.embed(input, context, TensorType.fromSpec(("tensor<float>(x[8])")));
+ assertEquals(1.0, result.multiply(result).sum().asDouble(), 1e-3);
+
+ result = normalizedEmbedder.embed(input, context, TensorType.fromSpec(("tensor<float>(x[16])")));
+ assertEquals(1.0, result.multiply(result).sum().asDouble(), 1e-3);
+ Tensor binarizedResult = embedder.embed(input, context, TensorType.fromSpec(("tensor<int8>(x[2])")));
+ assertEquals("tensor<int8>(x[2]):[119, 44]", binarizedResult.toAbbreviatedString());
+ }
+
+ private static HuggingFaceEmbedder getEmbedder() {
+ String vocabPath = "src/test/models/onnx/transformer/real_tokenizer.json";
+ String modelPath = "src/test/models/onnx/transformer/embedding_model.onnx";
+ assumeTrue(OnnxRuntime.isRuntimeAvailable(modelPath));
+ HuggingFaceEmbedderConfig.Builder builder = new HuggingFaceEmbedderConfig.Builder();
+ builder.tokenizerPath(ModelReference.valueOf(vocabPath));
+ builder.transformerModel(ModelReference.valueOf(modelPath));
+ builder.transformerGpuDevice(-1);
+ return new HuggingFaceEmbedder(new OnnxRuntime(), Embedder.Runtime.testInstance(), builder.build());
+ }
+ private static HuggingFaceEmbedder getNormalizedEmbedder() {
+ String vocabPath = "src/test/models/onnx/transformer/real_tokenizer.json";
+ String modelPath = "src/test/models/onnx/transformer/embedding_model.onnx";
+ assumeTrue(OnnxRuntime.isRuntimeAvailable(modelPath));
+ HuggingFaceEmbedderConfig.Builder builder = new HuggingFaceEmbedderConfig.Builder();
+ builder.tokenizerPath(ModelReference.valueOf(vocabPath));
+ builder.transformerModel(ModelReference.valueOf(modelPath));
+ builder.transformerGpuDevice(-1);
+ builder.normalize(true);
+ return new HuggingFaceEmbedder(new OnnxRuntime(), Embedder.Runtime.testInstance(), builder.build());
+ }
+
+ public static Tensor expandBitTensor(Tensor packed) {
+ var unpacker = new UnpackBitsNode(new ReferenceNode("input"), TensorType.Value.DOUBLE, "big");
+ var context = new MapContext();
+ context.put("input", new TensorValue(packed));
+ return unpacker.evaluate(context).asTensor();
+ }
+}
diff --git a/model-integration/src/test/models/onnx/transformer/embedding_model.onnx b/model-integration/src/test/models/onnx/transformer/embedding_model.onnx
new file mode 100644
index 00000000000..266ed567344
--- /dev/null
+++ b/model-integration/src/test/models/onnx/transformer/embedding_model.onnx
Binary files differ
diff --git a/parent/pom.xml b/parent/pom.xml
index d71dd5b1130..ca399019aa6 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -317,7 +317,7 @@
-->
<groupId>org.openrewrite.maven</groupId>
<artifactId>rewrite-maven-plugin</artifactId>
- <version>5.26.0</version>
+ <version>5.27.0</version>
<configuration>
<activeRecipes>
<recipe>org.openrewrite.java.testing.junit5.JUnit5BestPractices</recipe>
diff --git a/searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp b/searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp
index bac817077c4..e360c3005b8 100644
--- a/searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp
+++ b/searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp
@@ -94,4 +94,10 @@ TEST_F(AttributeTokensDFWTest, single_string)
expect_field("[ '' ]", 2);
}
+TEST_F(AttributeTokensDFWTest, missing_atribute)
+{
+ setup("delayed_add_attribute_aspect");
+ expect_field("null", 1);
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp
index 9e0dafc5e91..e47d16b03c0 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp
@@ -170,8 +170,12 @@ AttributeTokensDFW::insertField(uint32_t docid, const IDocsumStoreDocument*, Get
{
auto& field_writer_state = state._fieldWriterStates[_state_index];
if (!field_writer_state) {
- const auto& attr = *state.getAttribute(getIndex());
- field_writer_state = make_field_writer_state(attr, state.get_stash());
+ const auto attr = state.getAttribute(getIndex());
+ if (attr != nullptr) {
+ field_writer_state = make_field_writer_state(*attr, state.get_stash());
+ } else {
+ field_writer_state = &state.get_stash().create<EmptyDocsumFieldWriterState>();
+ }
}
field_writer_state->insertField(docid, target);
}