aboutsummaryrefslogtreecommitdiffstats
path: root/python/vespa/docs/sphinx/source/query.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'python/vespa/docs/sphinx/source/query.ipynb')
-rw-r--r--python/vespa/docs/sphinx/source/query.ipynb297
1 files changed, 0 insertions, 297 deletions
diff --git a/python/vespa/docs/sphinx/source/query.ipynb b/python/vespa/docs/sphinx/source/query.ipynb
deleted file mode 100644
index ec1d5e3ec01..00000000000
--- a/python/vespa/docs/sphinx/source/query.ipynb
+++ /dev/null
@@ -1,297 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Query Vespa applications\n",
- "\n",
- "> Python API to query Vespa applications"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can connect to the CORD-19 Search app and use it to exemplify the query API"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "from vespa.application import Vespa\n",
- "\n",
- "app = Vespa(url = \"https://api.cord19.vespa.ai\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Specify the request body\n",
- "\n",
- "> Full flexibility by specifying the entire request body"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "body = {\n",
- " 'yql': 'select title, abstract from sources * where userQuery();',\n",
- " 'hits': 5,\n",
- " 'query': 'Is remdesivir an effective treatment for COVID-19?',\n",
- " 'type': 'any',\n",
- " 'ranking': 'bm25'\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "results = app.query(body=body)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "202768"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "results.number_documents_retrieved"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Specify a query model"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Query + term-matching + rank profile"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "from vespa.query import Query, OR, RankProfile\n",
- "\n",
- "results = app.query(\n",
- " query=\"Is remdesivir an effective treatment for COVID-19?\", \n",
- " query_model = Query(\n",
- " match_phase=OR(), \n",
- " rank_profile=RankProfile(name=\"bm25\")\n",
- " )\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "202768"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "results.number_documents_retrieved"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Query + term-matching + ann operator + rank_profile"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "from vespa.query import Query, ANN, WeakAnd, Union, RankProfile\n",
- "from random import random\n",
- "\n",
- "match_phase = Union(\n",
- " WeakAnd(hits = 10), \n",
- " ANN(\n",
- " doc_vector=\"title_embedding\", \n",
- " query_vector=\"title_vector\", \n",
- " embedding_model=lambda x: [random() for x in range(768)],\n",
- " hits = 10,\n",
- " label=\"title\"\n",
- " )\n",
- ")\n",
- "rank_profile = RankProfile(name=\"bm25\", list_features=True)\n",
- "query_model = Query(match_phase=match_phase, rank_profile=rank_profile)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "results = app.query(query=\"Is remdesivir an effective treatment for COVID-19?\", \n",
- " query_model=query_model)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "1049"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "results.number_documents_retrieved"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Recall specific documents"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's take a look at the top 3 ids from the last query."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[198698, 120155, 120154]"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "top_ids = [hit[\"fields\"][\"id\"] for hit in results.hits[0:3]]\n",
- "top_ids"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Assume that we now want to retrieve the second and third ids above. We can do so with the `recall` argument."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [],
- "source": [
- "results_with_recall = app.query(query=\"Is remdesivir an effective treatment for COVID-19?\", \n",
- " query_model=query_model,\n",
- " recall = (\"id\", top_ids[1:3]))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "It will only retrieve the documents with Vespa field `id` that is defined on the list that is inside the tuple."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[120155, 120154]"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "id_recalled = [hit[\"fields\"][\"id\"] for hit in results_with_recall.hits]\n",
- "id_recalled"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}