diff options
Diffstat (limited to 'python/vespa/docs/sphinx/source/query.ipynb')
-rw-r--r-- | python/vespa/docs/sphinx/source/query.ipynb | 297 |
1 files changed, 0 insertions, 297 deletions
diff --git a/python/vespa/docs/sphinx/source/query.ipynb b/python/vespa/docs/sphinx/source/query.ipynb deleted file mode 100644 index ec1d5e3ec01..00000000000 --- a/python/vespa/docs/sphinx/source/query.ipynb +++ /dev/null @@ -1,297 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Query Vespa applications\n", - "\n", - "> Python API to query Vespa applications" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can connect to the CORD-19 Search app and use it to exemplify the query API" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from vespa.application import Vespa\n", - "\n", - "app = Vespa(url = \"https://api.cord19.vespa.ai\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Specify the request body\n", - "\n", - "> Full flexibility by specifying the entire request body" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "body = {\n", - " 'yql': 'select title, abstract from sources * where userQuery();',\n", - " 'hits': 5,\n", - " 'query': 'Is remdesivir an effective treatment for COVID-19?',\n", - " 'type': 'any',\n", - " 'ranking': 'bm25'\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "results = app.query(body=body)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "202768" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results.number_documents_retrieved" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Specify a query model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Query + term-matching + rank profile" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from vespa.query import Query, OR, RankProfile\n", - "\n", - "results = app.query(\n", - " query=\"Is remdesivir an effective treatment for COVID-19?\", \n", - " query_model = Query(\n", - " match_phase=OR(), \n", - " rank_profile=RankProfile(name=\"bm25\")\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "202768" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results.number_documents_retrieved" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Query + term-matching + ann operator + rank_profile" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "from vespa.query import Query, ANN, WeakAnd, Union, RankProfile\n", - "from random import random\n", - "\n", - "match_phase = Union(\n", - " WeakAnd(hits = 10), \n", - " ANN(\n", - " doc_vector=\"title_embedding\", \n", - " query_vector=\"title_vector\", \n", - " embedding_model=lambda x: [random() for x in range(768)],\n", - " hits = 10,\n", - " label=\"title\"\n", - " )\n", - ")\n", - "rank_profile = RankProfile(name=\"bm25\", list_features=True)\n", - "query_model = Query(match_phase=match_phase, rank_profile=rank_profile)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "results = app.query(query=\"Is remdesivir an effective treatment for COVID-19?\", \n", - " query_model=query_model)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1049" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results.number_documents_retrieved" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Recall specific documents" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's take a look at the top 3 ids from the last query." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[198698, 120155, 120154]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "top_ids = [hit[\"fields\"][\"id\"] for hit in results.hits[0:3]]\n", - "top_ids" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Assume that we now want to retrieve the second and third ids above. We can do so with the `recall` argument." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "results_with_recall = app.query(query=\"Is remdesivir an effective treatment for COVID-19?\", \n", - " query_model=query_model,\n", - " recall = (\"id\", top_ids[1:3]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It will only retrieve the documents with Vespa field `id` that is defined on the list that is inside the tuple." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[120155, 120154]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "id_recalled = [hit[\"fields\"][\"id\"] for hit in results_with_recall.hits]\n", - "id_recalled" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} |