aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/bitcompression
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa/searchlib/bitcompression')
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/README.md56
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.h2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/countcompression.h2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/pagedict4.h2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.h2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.h2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocccompression.h2
14 files changed, 69 insertions, 13 deletions
diff --git a/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt b/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt
index 1572fbced7d..ff25bb2deb7 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(searchlib_bitcompression OBJECT
SOURCES
compression.cpp
diff --git a/searchlib/src/vespa/searchlib/bitcompression/README.md b/searchlib/src/vespa/searchlib/bitcompression/README.md
new file mode 100644
index 00000000000..e387ec099fb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/README.md
@@ -0,0 +1,56 @@
+<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+
+## About the disk dictionary format
+
+The designs of the disk index dictionary formats were incremental, due
+to changing requirements over the years.
+
+### 1st generation
+
+Patricia tree in memory.
+
+### 2nd generation, 1998-09-04
+
+Problem: Dictionary did not fit in memory (machine had 512 MB ram)
+when indexing 5 million web documents on a single machine.
+
+Changed format to variable length records on disk, with a sparse
+version of the dictionary in memory (each 256th word) to limit disk
+access for binary search.
+
+### 3rd generation, 2000-03-09
+
+Problem: Too many disk read operations and too many bytes read from disk
+(limited PCI bandwidth).
+
+Changed format to a "paged" dictionary where a dictionary lookup would
+use 1 disk read, reading 4 kiB of data. Data was not compressed. Could
+not memory map whole dictionary. The sparse files were read into
+memory and used to determine the page to use for further lookup.
+Binary search within the pages read from disk.
+
+### 4th generation, 2002-08-16
+
+Problem: Dictionary used too much disk space.
+
+Changed format to compressed format. Decompression could not contain
+much state, thus delta values were compressed using exp golomb coding.
+
+Two levels of skip lists within each page, where skip list on a level
+contained enough information to skip on all levels below within the
+same page.
+
+Start of word was replaced by a byte telling how many bytes is
+ommitted due to the prefix being common with previous words (word
+before in dictionary and word before in the lookup order).
+
+### 5th generation, 2010-08-21
+
+Payload ("value") changed when skip information was added for large
+posting lists. Added overflow handling for long words / huge payloads.
+Added another level of pages ("sparse pages") to improve compression.
+
+### 6th generation, 2015-05-12
+
+Started using a separate dictionary for each index field instead of a
+shared dictionary across all index fields. Minor changes.
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
index 4752ddfb64f..0f089c60e4b 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "compression.h"
#include <vespa/searchlib/fef/termfieldmatchdata.h>
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h
index 2d6b8083d43..232572a6314 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp
index 7c38931df77..a1f27028874 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "countcompression.h"
#include <vespa/searchlib/index/postinglistcounts.h>
diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h
index 2f3b0646bdb..6dd8ec9d350 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
index 444c935b6f8..335b953e90c 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "pagedict4.h"
#include <vespa/searchlib/index/postinglistcounts.h>
diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
index b162bdc3f2b..ba53b415368 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.cpp
index 1276ed410d9..e2b2d849f24 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.cpp
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "posocc_field_params.h"
#include <vespa/searchcommon/common/schema.h>
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.h b/searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.h
index 9894bfb112d..ea8e6bcf73c 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocc_field_params.h
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.cpp
index 9d6258ce26f..92368db8cf6 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.cpp
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "posocc_fields_params.h"
#include <vespa/searchlib/index/postinglistparams.h>
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.h b/searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.h
index 8748557e5a7..df5d80b5a29 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocc_fields_params.h
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp
index 8e1bfd2875c..bfd09337bab 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "posocccompression.h"
#include "posocc_fields_params.h"
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h
index 2dc747f6265..64ed4b5fd37 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include "compression.h"