summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago G. Martins <tmartins@verizonmedia.com>2020-07-07 20:41:02 +0200
committerGitHub <noreply@github.com>2020-07-07 20:41:02 +0200
commit01608842ffabc5e9dd79101f26ec41fa096bb2ab (patch)
tree3950e899203076472e67fb815b65a38ed4d19090
parent9e9431d2a93fede9d35daaee363ca5ca431f366d (diff)
parentb77cd04441b47a479386b5337f1699c2ec98870b (diff)
Merge pull request #13653 from vespa-engine/tgm/application-package
Tgm/application package
-rw-r--r--python/vespa/notebooks/application_package.ipynb176
-rw-r--r--python/vespa/settings.ini2
-rw-r--r--python/vespa/vespa/json_serialization.py77
-rw-r--r--python/vespa/vespa/package.py449
-rw-r--r--python/vespa/vespa/templates/hosts.xml7
-rw-r--r--python/vespa/vespa/templates/schema.txt28
-rw-r--r--python/vespa/vespa/templates/services.xml16
-rw-r--r--python/vespa/vespa/test_package.py243
8 files changed, 997 insertions, 1 deletions
diff --git a/python/vespa/notebooks/application_package.ipynb b/python/vespa/notebooks/application_package.ipynb
new file mode 100644
index 00000000000..5cc1638f7de
--- /dev/null
+++ b/python/vespa/notebooks/application_package.ipynb
@@ -0,0 +1,176 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# hide\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Vespa - Application Package\n",
+ "\n",
+ "> Python API to create, modify and deploy application packages"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our goal is to create, modify and deploy simple application packages using our python API. This enables us to run data analysis experiments that are fully integrated with Vespa. As an example, we want to create the application package we used in our [text search tutorial](https://docs.vespa.ai/documentation/tutorials/text-search.html). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Application spec"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our goal in this section is to create the following `msmarco` schema using our python API."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "```\n",
+ "schema msmarco {\n",
+ " document msmarco {\n",
+ " field id type string {\n",
+ " indexing: attribute | summary\n",
+ " }\n",
+ " field title type string {\n",
+ " indexing: index | summary\n",
+ " index: enable-bm25\n",
+ " }\n",
+ " field body type string {\n",
+ " indexing: index | summary\n",
+ " index: enable-bm25\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " fieldset default {\n",
+ " fields: title, body\n",
+ " }\n",
+ "\n",
+ " rank-profile default {\n",
+ " first-phase {\n",
+ " expression: nativeRank(title, body)\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " rank-profile bm25 inherits default {\n",
+ " first-phase {\n",
+ " expression: bm25(title) + bm25(body)\n",
+ " }\n",
+ " }\n",
+ "\n",
+ "}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Schema API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from vespa.package import Document, Field, Schema, FieldSet, RankProfile, ApplicationPackage\n",
+ "\n",
+ "document = Document(\n",
+ " fields=[\n",
+ " Field(name = \"id\", type = \"string\", indexing = [\"attribute\", \"summary\"]),\n",
+ " Field(name = \"title\", type = \"string\", indexing = [\"index\", \"summary\"], index = \"enable-bm25\"),\n",
+ " Field(name = \"body\", type = \"string\", indexing = [\"index\", \"summary\"], index = \"enable-bm25\") \n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "msmarco_schema = Schema(\n",
+ " name = \"msmarco\", \n",
+ " document = document, \n",
+ " fieldsets = [FieldSet(name = \"default\", fields = [\"title\", \"body\"])],\n",
+ " rank_profiles = [RankProfile(name = \"default\", first_phase = \"nativeRank(title, body)\")]\n",
+ ")\n",
+ "\n",
+ "app_package = ApplicationPackage(name = \"msmarco\", schema=msmarco_schema)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Deploy it locally"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": [
+ "from vespa.package import VespaDocker\n",
+ "\n",
+ "vespa_docker = VespaDocker(application_package=app_package)\n",
+ "vespa_docker.deploy(disk_folder=\"/Users/username/sample_application\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Change the application package and redeploy"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can add a new rank profile and redeploy our application"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "app_package.schema.add_rank_profile(\n",
+ " RankProfile(name = \"bm25\", inherits = \"default\", first_phase = \"bm25(title) + bm25(body)\")\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": [
+ "vespa_docker.deploy(disk_folder=\"/Users/username/sample_application\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "vespa",
+ "language": "python",
+ "name": "vespa"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python/vespa/settings.ini b/python/vespa/settings.ini
index 60394dcb425..4c1c22e3d22 100644
--- a/python/vespa/settings.ini
+++ b/python/vespa/settings.ini
@@ -21,7 +21,7 @@ license = apache2
status = 2
# Optional. Same format as setuptools requirements
-requirements = requests pandas
+requirements = requests pandas docker jinja2
# Optional. Same format as setuptools console_scripts
# console_scripts =
# Optional. Same format as setuptools dependency-links
diff --git a/python/vespa/vespa/json_serialization.py b/python/vespa/vespa/json_serialization.py
new file mode 100644
index 00000000000..d5f059326e7
--- /dev/null
+++ b/python/vespa/vespa/json_serialization.py
@@ -0,0 +1,77 @@
+import datetime
+import json
+import typing
+
+T = typing.TypeVar("T")
+
+
+class ToJson(object):
+ """
+ Utility mix-in class for serializing an object to JSON. It does not really
+ do any conversion on its own, but forces serialization into a standardized
+ API.
+
+ The serialized class is put into an envelope with some data to make it easier
+ to understand what has happened.
+
+ {
+ "version": 1,
+ "class": "Field",
+ "serialized_at": "2018-10-24T12:55:32+00:00",
+ "data": { ... }
+ }
+
+ * version: This value is hard-coded to 1.
+ * class: The name of the class we serialized. For debugging purposes.
+ * serialized_at: The time we serialized the instance of the class. For debugging purposes.
+ * data: The actual data of the serialized class.
+
+ All serialization is based on converting objects to a `dict` which is then converted
+ to JSON using the standard Python json library.
+ """
+
+ @property
+ def to_dict(self) -> typing.Mapping:
+ raise NotImplementedError
+
+ @property
+ def to_envelope(self) -> typing.Mapping:
+ return {
+ "version": 1,
+ "class": self.__class__.__name__,
+ "serialized_at": datetime.datetime.utcnow().isoformat(),
+ "data": self.to_dict,
+ }
+
+ @property
+ def to_json(self) -> str:
+ mapping = self.to_envelope
+ return json.dumps(mapping)
+
+
+class FromJson(typing.Generic[T]):
+ """
+ A mix-in class for deserializing from JSON to an object that implements this class.
+ All JSON must have the same envelope as ToJson to be able to properly deserialize the
+ contents of the mapping.
+ """
+
+ deserializers: typing.MutableMapping[str, "FromJson"] = {}
+
+ def __init_subclass__(cls, **kwargs):
+ super().__init_subclass__(**kwargs) # type: ignore
+ FromJson.deserializers[cls.__name__] = cls
+
+ @staticmethod
+ def from_json(json_string: str) -> T:
+ mapping = json.loads(json_string)
+ return FromJson.map(mapping)
+
+ @staticmethod
+ def map(mapping: typing.Mapping) -> T:
+ mapping_class = FromJson.deserializers[mapping["class"]]
+ return mapping_class.from_dict(mapping["data"])
+
+ @staticmethod
+ def from_dict(mapping: typing.Mapping) -> T:
+ raise NotImplementedError
diff --git a/python/vespa/vespa/package.py b/python/vespa/vespa/package.py
new file mode 100644
index 00000000000..7b6b5fae757
--- /dev/null
+++ b/python/vespa/vespa/package.py
@@ -0,0 +1,449 @@
+import os
+from time import sleep
+from typing import List, Mapping, Optional
+from pathlib import Path
+
+from jinja2 import Environment, PackageLoader, select_autoescape
+import docker
+
+from vespa.json_serialization import ToJson, FromJson
+
+
+class Field(ToJson, FromJson["Field"]):
+ def __init__(
+ self,
+ name: str,
+ type: str,
+ indexing: Optional[List[str]] = None,
+ index: Optional[str] = None,
+ ) -> None:
+ """
+ Object representing a Vespa document field.
+
+ :param name: Field name.
+ :param type: Field data type.
+ :param indexing: Configures how to process data of a field during indexing.
+ :param index: Sets index parameters. Content in fields with index are normalized and tokenized by default.
+ """
+ self.name = name
+ self.type = type
+ self.indexing = indexing
+ self.index = index
+
+ @property
+ def indexing_to_text(self) -> Optional[str]:
+ if self.indexing is not None:
+ return " | ".join(self.indexing)
+
+ @staticmethod
+ def from_dict(mapping: Mapping) -> "Field":
+ return Field(
+ name=mapping["name"],
+ type=mapping["type"],
+ indexing=mapping.get("indexing", None),
+ index=mapping.get("index", None),
+ )
+
+ @property
+ def to_dict(self) -> Mapping:
+ map = {"name": self.name, "type": self.type}
+ if self.indexing is not None:
+ map.update(indexing=self.indexing)
+ if self.index is not None:
+ map.update(index=self.index)
+ return map
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return False
+ return (
+ self.name == other.name
+ and self.type == other.type
+ and self.indexing == other.indexing
+ and self.index == other.index
+ )
+
+ def __repr__(self):
+ return "{0}({1}, {2}, {3}, {4})".format(
+ self.__class__.__name__,
+ repr(self.name),
+ repr(self.type),
+ repr(self.indexing),
+ repr(self.index),
+ )
+
+
+class Document(ToJson, FromJson["Document"]):
+ def __init__(self, fields: Optional[List[Field]] = None) -> None:
+ """
+ Object representing a Vespa document.
+
+ """
+ self.fields = [] if not fields else fields
+
+ def add_fields(self, *fields: Field):
+ """
+ Add Fields to the document.
+
+ :param fields: fields to be added
+ :return:
+ """
+ self.fields.extend(fields)
+
+ @staticmethod
+ def from_dict(mapping: Mapping) -> "Document":
+ return Document(fields=[FromJson.map(field) for field in mapping.get("fields")])
+
+ @property
+ def to_dict(self) -> Mapping:
+ map = {"fields": [field.to_envelope for field in self.fields]}
+ return map
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return False
+ return self.fields == other.fields
+
+ def __repr__(self):
+ return "{0}({1})".format(
+ self.__class__.__name__, repr(self.fields) if self.fields else None
+ )
+
+
+class FieldSet(ToJson, FromJson["FieldSet"]):
+ def __init__(self, name: str, fields: List[str]) -> None:
+ """
+ A fieldset groups fields together for searching.
+
+ :param name: Name of the fieldset
+ :param fields: Field names to be included in the fieldset.
+ """
+ self.name = name
+ self.fields = fields
+
+ @property
+ def fields_to_text(self):
+ if self.fields is not None:
+ return ", ".join(self.fields)
+
+ @staticmethod
+ def from_dict(mapping: Mapping) -> "FieldSet":
+ return FieldSet(name=mapping["name"], fields=mapping["fields"])
+
+ @property
+ def to_dict(self) -> Mapping:
+ map = {"name": self.name, "fields": self.fields}
+ return map
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return False
+ return self.name == other.name and self.fields == other.fields
+
+ def __repr__(self):
+ return "{0}({1}, {2})".format(
+ self.__class__.__name__, repr(self.name), repr(self.fields)
+ )
+
+
+class RankProfile(ToJson, FromJson["RankProfile"]):
+ def __init__(
+ self, name: str, first_phase: str, inherits: Optional[str] = None
+ ) -> None:
+ """
+ Define a Vespa rank profile
+
+ :param name: Rank profile name.
+ :param first_phase: First phase ranking expression.
+ """
+ self.name = name
+ self.first_phase = first_phase
+ self.inherits = inherits
+
+ @staticmethod
+ def from_dict(mapping: Mapping) -> "RankProfile":
+ return RankProfile(
+ name=mapping["name"],
+ first_phase=mapping["first_phase"],
+ inherits=mapping.get("inherits", None),
+ )
+
+ @property
+ def to_dict(self) -> Mapping:
+ map = {"name": self.name, "first_phase": self.first_phase}
+ if self.inherits is not None:
+ map.update({"inherits": self.inherits})
+ return map
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return False
+ return (
+ self.name == other.name
+ and self.first_phase == other.first_phase
+ and self.inherits == other.inherits
+ )
+
+ def __repr__(self):
+ return "{0}({1}, {2}, {3})".format(
+ self.__class__.__name__,
+ repr(self.name),
+ repr(self.first_phase),
+ repr(self.inherits),
+ )
+
+
+class Schema(ToJson, FromJson["Schema"]):
+ def __init__(
+ self,
+ name: str,
+ document: Document,
+ fieldsets: Optional[List[FieldSet]] = None,
+ rank_profiles: Optional[List[RankProfile]] = None,
+ ) -> None:
+ """
+ Create a Vespa Schema.
+
+ :param name: Schema name.
+ :param document: Vespa document associated with the Schema.
+ :param fieldsets: A list of `FieldSet` associated with the Schema.
+ :param rank_profiles: A list of `RankProfile` associated with the Schema.
+ """
+ self.name = name
+ self.document = document
+
+ self.fieldsets = {}
+ if fieldsets is not None:
+ self.fieldsets = {fieldset.name: fieldset for fieldset in fieldsets}
+
+ self.rank_profiles = {}
+ if rank_profiles is not None:
+ self.rank_profiles = {
+ rank_profile.name: rank_profile for rank_profile in rank_profiles
+ }
+
+ def add_rank_profile(self, rank_profile: RankProfile) -> None:
+ """
+ Add a `RankProfile` to the `Schema`.
+ :param rank_profile: `RankProfile` to be added.
+ :return: None.
+ """
+ self.rank_profiles[rank_profile.name] = rank_profile
+
+ @staticmethod
+ def from_dict(mapping: Mapping) -> "Schema":
+ return Schema(
+ name=mapping["name"],
+ document=FromJson.map(mapping["document"]),
+ fieldsets=[FromJson.map(fieldset) for fieldset in mapping["fieldsets"]],
+ rank_profiles=[
+ FromJson.map(rank_profile) for rank_profile in mapping["rank_profiles"]
+ ],
+ )
+
+ @property
+ def to_dict(self) -> Mapping:
+ map = {
+ "name": self.name,
+ "document": self.document.to_envelope,
+ "fieldsets": [
+ self.fieldsets[name].to_envelope for name in self.fieldsets.keys()
+ ],
+ "rank_profiles": [
+ self.rank_profiles[name].to_envelope
+ for name in self.rank_profiles.keys()
+ ],
+ }
+ return map
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return False
+ return (
+ self.name == other.name
+ and self.document == other.document
+ and self.fieldsets == other.fieldsets
+ and self.rank_profiles == other.rank_profiles
+ )
+
+ def __repr__(self):
+ return "{0}({1}, {2}, {3}, {4})".format(
+ self.__class__.__name__,
+ repr(self.name),
+ repr(self.document),
+ repr(
+ [field for field in self.fieldsets.values()] if self.fieldsets else None
+ ),
+ repr(
+ [rank_profile for rank_profile in self.rank_profiles.values()]
+ if self.rank_profiles
+ else None
+ ),
+ )
+
+
+class ApplicationPackage(ToJson, FromJson["ApplicationPackage"]):
+ def __init__(self, name: str, schema: Schema) -> None:
+ """
+ Vespa Application Package.
+
+ :param name: Application name.
+ :param schema: Schema of the application.
+ """
+ self.name = name
+ self.schema = schema
+
+ @property
+ def schema_to_text(self):
+ env = Environment(
+ loader=PackageLoader("vespa", "templates"),
+ autoescape=select_autoescape(
+ disabled_extensions=("txt",), default_for_string=True, default=True,
+ ),
+ )
+ env.trim_blocks = True
+ env.lstrip_blocks = True
+ schema_template = env.get_template("schema.txt")
+ return schema_template.render(
+ schema_name=self.schema.name,
+ document_name=self.schema.name,
+ fields=self.schema.document.fields,
+ fieldsets=self.schema.fieldsets,
+ rank_profiles=self.schema.rank_profiles,
+ )
+
+ @property
+ def hosts_to_text(self):
+ env = Environment(
+ loader=PackageLoader("vespa", "templates"),
+ autoescape=select_autoescape(
+ disabled_extensions=("txt",), default_for_string=True, default=True,
+ ),
+ )
+ env.trim_blocks = True
+ env.lstrip_blocks = True
+ schema_template = env.get_template("hosts.xml")
+ return schema_template.render()
+
+ @property
+ def services_to_text(self):
+ env = Environment(
+ loader=PackageLoader("vespa", "templates"),
+ autoescape=select_autoescape(
+ disabled_extensions=("txt",), default_for_string=True, default=True,
+ ),
+ )
+ env.trim_blocks = True
+ env.lstrip_blocks = True
+ schema_template = env.get_template("services.xml")
+ return schema_template.render(
+ application_name=self.name, document_name=self.schema.name,
+ )
+
+ def create_application_package_files(self, dir_path):
+ Path(os.path.join(dir_path, "application/schemas")).mkdir(
+ parents=True, exist_ok=True
+ )
+ with open(
+ os.path.join(
+ dir_path, "application/schemas/{}.sd".format(self.schema.name)
+ ),
+ "w",
+ ) as f:
+ f.write(self.schema_to_text)
+ with open(os.path.join(dir_path, "application/hosts.xml"), "w") as f:
+ f.write(self.hosts_to_text)
+ with open(os.path.join(dir_path, "application/services.xml"), "w") as f:
+ f.write(self.services_to_text)
+
+ @staticmethod
+ def from_dict(mapping: Mapping) -> "ApplicationPackage":
+ schema = mapping.get("schema", None)
+ if schema is not None:
+ schema = FromJson.map(schema)
+ return ApplicationPackage(name=mapping["name"], schema=schema)
+
+ @property
+ def to_dict(self) -> Mapping:
+ map = {"name": self.name}
+ if self.schema is not None:
+ map.update({"schema": self.schema.to_envelope})
+ return map
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return False
+ return self.name == other.name and self.schema == other.schema
+
+ def __repr__(self):
+ return "{0}({1}, {2})".format(
+ self.__class__.__name__, repr(self.name), repr(self.schema)
+ )
+
+
+class VespaDocker(object):
+ def __init__(self, application_package: ApplicationPackage) -> None:
+ """
+ Deploy application to a Vespa container
+
+ :param application_package: ApplicationPackage to be deployed.
+ """
+ self.application_package = application_package
+ self.container = None
+
+ def run_vespa_engine_container(self, disk_folder: str, container_memory: str):
+ """
+ Run a vespa container.
+
+ :param disk_folder: Folder containing the application files.
+ :param container_memory: Memory limit of the container
+ :return:
+ """
+ client = docker.from_env()
+ if self.container is None:
+ try:
+ self.container = client.containers.get(self.application_package.name)
+ except docker.errors.NotFound:
+ self.container = client.containers.run(
+ "vespaengine/vespa",
+ detach=True,
+ mem_limit=container_memory,
+ name=self.application_package.name,
+ hostname=self.application_package.name,
+ privileged=True,
+ volumes={disk_folder: {"bind": "/app", "mode": "rw"}},
+ ports={8080: 8080, 19112: 19112},
+ )
+
+ def check_configuration_server(self) -> bool:
+ """
+ Check if configuration server is running and ready for deployment
+
+ :return: True if configuration server is running.
+ """
+ return (
+ self.container is not None
+ and self.container.exec_run(
+ "bash -c 'curl -s --head http://localhost:19071/ApplicationStatus'"
+ )
+ .output.decode("utf-8")
+ .split("\r\n")[0]
+ == "HTTP/1.1 200 OK"
+ )
+
+ def deploy(self, disk_folder: str, container_memory: str = "4G"):
+
+ self.application_package.create_application_package_files(dir_path=disk_folder)
+
+ self.run_vespa_engine_container(
+ disk_folder=disk_folder, container_memory=container_memory
+ )
+
+ while not self.check_configuration_server():
+ print("Waiting for configuration server.")
+ sleep(5)
+
+ deployment = self.container.exec_run(
+ "bash -c '/opt/vespa/bin/vespa-deploy prepare /app/application && /opt/vespa/bin/vespa-deploy activate'"
+ )
+ return deployment.output.decode("utf-8").split("\n")
diff --git a/python/vespa/vespa/templates/hosts.xml b/python/vespa/vespa/templates/hosts.xml
new file mode 100644
index 00000000000..5c88f4c1609
--- /dev/null
+++ b/python/vespa/vespa/templates/hosts.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!-- Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<hosts>
+ <host name="localhost">
+ <alias>node1</alias>
+ </host>
+</hosts> \ No newline at end of file
diff --git a/python/vespa/vespa/templates/schema.txt b/python/vespa/vespa/templates/schema.txt
new file mode 100644
index 00000000000..0849cbbad6f
--- /dev/null
+++ b/python/vespa/vespa/templates/schema.txt
@@ -0,0 +1,28 @@
+schema {{ schema_name }} {
+ document {{ document_name }} {
+ {% for field in fields %}
+ field {{ field.name }} type {{ field.type }} {
+ {% if field.indexing %}
+ indexing: {{ field.indexing_to_text }}
+ {% endif %}
+ {% if field.index %}
+ index: {{ field.index }}
+ {% endif %}
+ }
+ {% endfor %}
+ }
+{% for key, value in fieldsets.items() %}
+ fieldset {{ key }} {
+ fields: {{ value.fields_to_text }}
+ }
+{% endfor %}
+{% for key, value in rank_profiles.items() %}
+ rank-profile {{ key }}{% if value.inherits %} inherits {{ value.inherits }}{% endif %} {
+ {% if value.first_phase %}
+ first-phase {
+ expression: {{ value.first_phase }}
+ }
+ {% endif %}
+ }
+{% endfor %}
+} \ No newline at end of file
diff --git a/python/vespa/vespa/templates/services.xml b/python/vespa/vespa/templates/services.xml
new file mode 100644
index 00000000000..c6bda296be9
--- /dev/null
+++ b/python/vespa/vespa/templates/services.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<services version="1.0">
+ <container id="{{ application_name }}_container" version="1.0">
+ <search></search>
+ <document-api></document-api>
+ </container>
+ <content id="{{ application_name }}_content" version="1.0">
+ <redundancy reply-after="1">1</redundancy>
+ <documents>
+ <document type="{{ document_name }}" mode="index"></document>
+ </documents>
+ <nodes>
+ <node distribution-key="0" hostalias="node1"></node>
+ </nodes>
+ </content>
+</services> \ No newline at end of file
diff --git a/python/vespa/vespa/test_package.py b/python/vespa/vespa/test_package.py
new file mode 100644
index 00000000000..1dca8bbf014
--- /dev/null
+++ b/python/vespa/vespa/test_package.py
@@ -0,0 +1,243 @@
+import unittest
+
+from vespa.package import (
+ Field,
+ Document,
+ FieldSet,
+ RankProfile,
+ Schema,
+ ApplicationPackage,
+)
+
+
+class TestField(unittest.TestCase):
+ def test_field_name_type(self):
+ field = Field(name="test_name", type="string")
+ self.assertEqual(field.name, "test_name")
+ self.assertEqual(field.type, "string")
+ self.assertEqual(field.to_dict, {"name": "test_name", "type": "string"})
+ self.assertEqual(field, Field(name="test_name", type="string"))
+ self.assertEqual(field, Field.from_dict(field.to_dict))
+ self.assertIsNone(field.indexing_to_text)
+
+ def test_field_name_type_indexing_index(self):
+ field = Field(
+ name="body",
+ type="string",
+ indexing=["index", "summary"],
+ index="enable-bm25",
+ )
+ self.assertEqual(field.name, "body")
+ self.assertEqual(field.type, "string")
+ self.assertEqual(field.indexing, ["index", "summary"])
+ self.assertEqual(field.index, "enable-bm25")
+ self.assertEqual(
+ field.to_dict,
+ {
+ "name": "body",
+ "type": "string",
+ "indexing": ["index", "summary"],
+ "index": "enable-bm25",
+ },
+ )
+ self.assertEqual(
+ field,
+ Field(
+ name="body",
+ type="string",
+ indexing=["index", "summary"],
+ index="enable-bm25",
+ ),
+ )
+ self.assertEqual(field, Field.from_dict(field.to_dict))
+ self.assertEqual(field.indexing_to_text, "index | summary")
+
+
+class TestDocument(unittest.TestCase):
+ def test_empty_document(self):
+ document = Document()
+ self.assertEqual(document.fields, [])
+ self.assertEqual(document.to_dict, {"fields": []})
+ self.assertEqual(document, Document.from_dict(document.to_dict))
+
+ def test_document_one_field(self):
+ document = Document()
+ field = Field(name="test_name", type="string")
+ document.add_fields(field)
+ self.assertEqual(document.fields, [field])
+ self.assertEqual(document, Document.from_dict(document.to_dict))
+ self.assertEqual(document, Document([field]))
+
+ def test_document_two_fields(self):
+ document = Document()
+ field_1 = Field(name="test_name", type="string")
+ field_2 = Field(
+ name="body",
+ type="string",
+ indexing=["index", "summary"],
+ index="enable-bm25",
+ )
+ document.add_fields(field_1, field_2)
+ self.assertEqual(document.fields, [field_1, field_2])
+ self.assertEqual(document, Document.from_dict(document.to_dict))
+ self.assertEqual(document, Document([field_1, field_2]))
+
+
+class TestFieldSet(unittest.TestCase):
+ def test_fieldset(self):
+ field_set = FieldSet(name="default", fields=["title", "body"])
+ self.assertEqual(field_set.name, "default")
+ self.assertEqual(field_set.fields, ["title", "body"])
+ self.assertEqual(field_set, FieldSet.from_dict(field_set.to_dict))
+ self.assertEqual(field_set.fields_to_text, "title, body")
+
+
+class TestRankProfile(unittest.TestCase):
+ def test_rank_profile(self):
+ rank_profile = RankProfile(name="bm25", first_phase="bm25(title) + bm25(body)")
+ self.assertEqual(rank_profile.name, "bm25")
+ self.assertEqual(rank_profile.first_phase, "bm25(title) + bm25(body)")
+ self.assertEqual(rank_profile, RankProfile.from_dict(rank_profile.to_dict))
+
+ def test_rank_profile_inherits(self):
+ rank_profile = RankProfile(
+ name="bm25", first_phase="bm25(title) + bm25(body)", inherits="default"
+ )
+ self.assertEqual(rank_profile.name, "bm25")
+ self.assertEqual(rank_profile.first_phase, "bm25(title) + bm25(body)")
+ self.assertEqual(rank_profile, RankProfile.from_dict(rank_profile.to_dict))
+
+
+class TestSchema(unittest.TestCase):
+ def test_schema(self):
+ schema = Schema(
+ name="test_schema",
+ document=Document(fields=[Field(name="test_name", type="string")]),
+ fieldsets=[FieldSet(name="default", fields=["title", "body"])],
+ rank_profiles=[
+ RankProfile(name="bm25", first_phase="bm25(title) + bm25(body)")
+ ],
+ )
+ self.assertEqual(schema, Schema.from_dict(schema.to_dict))
+ self.assertDictEqual(
+ schema.rank_profiles,
+ {"bm25": RankProfile(name="bm25", first_phase="bm25(title) + bm25(body)")},
+ )
+ schema.add_rank_profile(
+ RankProfile(name="default", first_phase="NativeRank(title)")
+ )
+ self.assertDictEqual(
+ schema.rank_profiles,
+ {
+ "bm25": RankProfile(
+ name="bm25", first_phase="bm25(title) + bm25(body)"
+ ),
+ "default": RankProfile(name="default", first_phase="NativeRank(title)"),
+ },
+ )
+
+
+class TestApplicationPackage(unittest.TestCase):
+ def setUp(self) -> None:
+ test_schema = Schema(
+ name="msmarco",
+ document=Document(
+ fields=[
+ Field(name="id", type="string", indexing=["attribute", "summary"]),
+ Field(
+ name="title",
+ type="string",
+ indexing=["index", "summary"],
+ index="enable-bm25",
+ ),
+ Field(
+ name="body",
+ type="string",
+ indexing=["index", "summary"],
+ index="enable-bm25",
+ ),
+ ]
+ ),
+ fieldsets=[FieldSet(name="default", fields=["title", "body"])],
+ rank_profiles=[
+ RankProfile(name="default", first_phase="nativeRank(title, body)"),
+ RankProfile(
+ name="bm25",
+ first_phase="bm25(title) + bm25(body)",
+ inherits="default",
+ ),
+ ],
+ )
+ self.app_package = ApplicationPackage(name="test_app", schema=test_schema)
+
+ def test_application_package(self):
+ self.assertEqual(
+ self.app_package, ApplicationPackage.from_dict(self.app_package.to_dict)
+ )
+
+ def test_schema_to_text(self):
+ expected_result = (
+ "schema msmarco {\n"
+ " document msmarco {\n"
+ " field id type string {\n"
+ " indexing: attribute | summary\n"
+ " }\n"
+ " field title type string {\n"
+ " indexing: index | summary\n"
+ " index: enable-bm25\n"
+ " }\n"
+ " field body type string {\n"
+ " indexing: index | summary\n"
+ " index: enable-bm25\n"
+ " }\n"
+ " }\n"
+ " fieldset default {\n"
+ " fields: title, body\n"
+ " }\n"
+ " rank-profile default {\n"
+ " first-phase {\n"
+ " expression: nativeRank(title, body)\n"
+ " }\n"
+ " }\n"
+ " rank-profile bm25 inherits default {\n"
+ " first-phase {\n"
+ " expression: bm25(title) + bm25(body)\n"
+ " }\n"
+ " }\n"
+ "}"
+ )
+ self.assertEqual(self.app_package.schema_to_text, expected_result)
+
+ def test_hosts_to_text(self):
+ expected_result = (
+ '<?xml version="1.0" encoding="utf-8" ?>\n'
+ "<!-- Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->\n"
+ "<hosts>\n"
+ ' <host name="localhost">\n'
+ " <alias>node1</alias>\n"
+ " </host>\n"
+ "</hosts>"
+ )
+ self.assertEqual(self.app_package.hosts_to_text, expected_result)
+
+ def test_services_to_text(self):
+ expected_result = (
+ '<?xml version="1.0" encoding="UTF-8"?>\n'
+ '<services version="1.0">\n'
+ ' <container id="test_app_container" version="1.0">\n'
+ " <search></search>\n"
+ " <document-api></document-api>\n"
+ " </container>\n"
+ ' <content id="test_app_content" version="1.0">\n'
+ ' <redundancy reply-after="1">1</redundancy>\n'
+ " <documents>\n"
+ ' <document type="msmarco" mode="index"></document>\n'
+ " </documents>\n"
+ " <nodes>\n"
+ ' <node distribution-key="0" hostalias="node1"></node>\n'
+ " </nodes>\n"
+ " </content>\n"
+ "</services>"
+ )
+
+ self.assertEqual(self.app_package.services_to_text, expected_result)