summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authortmartins <thigm85@gmail.com>2020-06-04 12:35:38 +0200
committertmartins <thigm85@gmail.com>2020-06-04 12:35:38 +0200
commit4f86f03994ed2cfcf0980f5b76e57110d65597d5 (patch)
treedc89d6aba5bb93755d5a835ad1e663af196323fd /python
parent3570a88c42890577dab56e401a799e25009cbb02 (diff)
update docs
Diffstat (limited to 'python')
-rw-r--r--python/vespa/README.md60
-rw-r--r--python/vespa/docs/collect_training_data.html36
-rw-r--r--python/vespa/docs/index.html44
-rw-r--r--python/vespa/docs/query.html126
-rw-r--r--python/vespa/vespa/_nbdev.py4
5 files changed, 195 insertions, 75 deletions
diff --git a/python/vespa/README.md b/python/vespa/README.md
index 23d8541bcfe..c316564f3c1 100644
--- a/python/vespa/README.md
+++ b/python/vespa/README.md
@@ -10,7 +10,7 @@
> Connect to a running Vespa application
-```python
+```
from vespa.application import Vespa
app = Vespa(url = "https://api.cord19.vespa.ai")
@@ -20,7 +20,7 @@ app = Vespa(url = "https://api.cord19.vespa.ai")
> Easily define matching and ranking criteria
-```python
+```
from vespa.query import Query, Union, WeakAnd, ANN, RankProfile
from random import random
@@ -44,21 +44,21 @@ query_model = Query(match_phase=match_phase, rank_profile=rank_profile)
> Send queries via the query API. See the [query page](/vespa/query) for more examples.
-```python
+```
query_result = app.query(
query="Is remdesivir an effective treatment for COVID-19?",
query_model=query_model
)
```
-```python
+```
query_result["root"]["fields"]
```
- {'totalCount': 1078}
+ {'totalCount': 1077}
@@ -66,7 +66,7 @@ query_result["root"]["fields"]
> How to structure labelled data
-```python
+```
labelled_data = [
{
"query_id": 0,
@@ -87,12 +87,12 @@ Non-relevant documents are assigned `"score": 0` by default. Relevant documents
> Collect training data to analyse and/or improve ranking functions. See the [collect training data page](/vespa/collect_training_data) for more examples.
-```python
+```
training_data_batch = app.collect_training_data(
labelled_data = labelled_data,
id_field = "id",
query_model = query_model,
- number_random_docs = 2
+ number_additional_docs = 2
)
training_data_batch
```
@@ -186,7 +186,7 @@ training_data_batch
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>47396</td>
+ <td>56212</td>
<td>0</td>
<td>0</td>
</tr>
@@ -210,7 +210,7 @@ training_data_batch
<td>0.617188</td>
<td>0.428571</td>
<td>0.457087</td>
- <td>32484</td>
+ <td>34026</td>
<td>0</td>
<td>0</td>
</tr>
@@ -258,7 +258,7 @@ training_data_batch
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>47396</td>
+ <td>56212</td>
<td>0</td>
<td>0</td>
</tr>
@@ -282,7 +282,7 @@ training_data_batch
<td>0.617188</td>
<td>0.428571</td>
<td>0.457087</td>
- <td>32484</td>
+ <td>34026</td>
<td>0</td>
<td>0</td>
</tr>
@@ -330,7 +330,7 @@ training_data_batch
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>28609</td>
+ <td>29774</td>
<td>1</td>
<td>0</td>
</tr>
@@ -349,12 +349,12 @@ training_data_batch
<td>...</td>
<td>0.0</td>
<td>0.0</td>
- <td>0.333333</td>
+ <td>0.500000</td>
<td>1.0</td>
<td>1.000000</td>
- <td>0.250000</td>
- <td>0.641667</td>
- <td>19805</td>
+ <td>0.333333</td>
+ <td>0.700000</td>
+ <td>22787</td>
<td>1</td>
<td>0</td>
</tr>
@@ -402,7 +402,7 @@ training_data_batch
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>28609</td>
+ <td>29774</td>
<td>1</td>
<td>0</td>
</tr>
@@ -421,12 +421,12 @@ training_data_batch
<td>...</td>
<td>0.0</td>
<td>0.0</td>
- <td>0.333333</td>
+ <td>0.500000</td>
<td>1.0</td>
<td>1.000000</td>
- <td>0.250000</td>
- <td>0.641667</td>
- <td>19805</td>
+ <td>0.333333</td>
+ <td>0.700000</td>
+ <td>22787</td>
<td>1</td>
<td>0</td>
</tr>
@@ -446,7 +446,7 @@ We will define the following evaluation metrics:
* recall @ 10 per query
* MRR @ 10 per query
-```python
+```
from vespa.evaluation import MatchRatio, Recall, ReciprocalRank
eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)]
@@ -454,7 +454,7 @@ eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)]
Evaluate:
-```python
+```
evaluation = app.evaluate(
labelled_data = labelled_data,
eval_metrics = eval_metrics,
@@ -497,18 +497,18 @@ evaluation
<tr>
<th>0</th>
<td>0</td>
- <td>1145</td>
- <td>58692</td>
- <td>0.019509</td>
+ <td>1267</td>
+ <td>62529</td>
+ <td>0.020263</td>
<td>0</td>
<td>0</td>
</tr>
<tr>
<th>1</th>
<td>1</td>
- <td>805</td>
- <td>58692</td>
- <td>0.013716</td>
+ <td>887</td>
+ <td>62529</td>
+ <td>0.014185</td>
<td>0</td>
<td>0</td>
</tr>
diff --git a/python/vespa/docs/collect_training_data.html b/python/vespa/docs/collect_training_data.html
index 088e766700c..609b7182482 100644
--- a/python/vespa/docs/collect_training_data.html
+++ b/python/vespa/docs/collect_training_data.html
@@ -115,7 +115,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<span class="n">labelled_data</span> <span class="o">=</span> <span class="n">labelled_data</span><span class="p">,</span>
<span class="n">id_field</span> <span class="o">=</span> <span class="s2">&quot;id&quot;</span><span class="p">,</span>
<span class="n">query_model</span> <span class="o">=</span> <span class="n">query_model</span><span class="p">,</span>
- <span class="n">number_random_docs</span> <span class="o">=</span> <span class="mi">2</span>
+ <span class="n">number_additional_docs</span> <span class="o">=</span> <span class="mi">2</span>
<span class="p">)</span>
<span class="n">training_data_batch</span>
</pre></div>
@@ -217,7 +217,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>47396</td>
+ <td>56212</td>
<td>0</td>
<td>0</td>
</tr>
@@ -241,7 +241,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>0.617188</td>
<td>0.428571</td>
<td>0.457087</td>
- <td>32484</td>
+ <td>34026</td>
<td>0</td>
<td>0</td>
</tr>
@@ -289,7 +289,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>47396</td>
+ <td>56212</td>
<td>0</td>
<td>0</td>
</tr>
@@ -313,7 +313,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>0.617188</td>
<td>0.428571</td>
<td>0.457087</td>
- <td>32484</td>
+ <td>34026</td>
<td>0</td>
<td>0</td>
</tr>
@@ -361,7 +361,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>28609</td>
+ <td>29774</td>
<td>1</td>
<td>0</td>
</tr>
@@ -385,7 +385,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>0.333333</td>
<td>0.700000</td>
- <td>22584</td>
+ <td>22787</td>
<td>1</td>
<td>0</td>
</tr>
@@ -433,7 +433,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>28609</td>
+ <td>29774</td>
<td>1</td>
<td>0</td>
</tr>
@@ -457,7 +457,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>0.333333</td>
<td>0.700000</td>
- <td>22584</td>
+ <td>22787</td>
<td>1</td>
<td>0</td>
</tr>
@@ -502,7 +502,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<span class="n">relevant_id</span> <span class="o">=</span> <span class="n">doc_data</span><span class="p">[</span><span class="s2">&quot;id&quot;</span><span class="p">],</span>
<span class="n">id_field</span> <span class="o">=</span> <span class="s2">&quot;id&quot;</span><span class="p">,</span>
<span class="n">query_model</span> <span class="o">=</span> <span class="n">query_model</span><span class="p">,</span>
- <span class="n">number_random_docs</span> <span class="o">=</span> <span class="mi">2</span>
+ <span class="n">number_additional_docs</span> <span class="o">=</span> <span class="mi">2</span>
<span class="p">)</span>
<span class="n">training_data</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">training_data_point</span><span class="p">)</span>
<span class="n">training_data</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">from_records</span><span class="p">(</span><span class="n">training_data</span><span class="p">)</span>
@@ -606,7 +606,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>47396</td>
+ <td>56212</td>
<td>0</td>
<td>0</td>
</tr>
@@ -630,7 +630,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>0.617188</td>
<td>0.428571</td>
<td>0.457087</td>
- <td>32484</td>
+ <td>34026</td>
<td>0</td>
<td>0</td>
</tr>
@@ -678,7 +678,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>47396</td>
+ <td>56212</td>
<td>0</td>
<td>0</td>
</tr>
@@ -702,7 +702,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>0.617188</td>
<td>0.428571</td>
<td>0.457087</td>
- <td>32484</td>
+ <td>34026</td>
<td>0</td>
<td>0</td>
</tr>
@@ -750,7 +750,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>28609</td>
+ <td>29774</td>
<td>1</td>
<td>0</td>
</tr>
@@ -774,7 +774,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>0.333333</td>
<td>0.700000</td>
- <td>22584</td>
+ <td>22787</td>
<td>1</td>
<td>0</td>
</tr>
@@ -822,7 +822,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>28609</td>
+ <td>29774</td>
<td>1</td>
<td>0</td>
</tr>
@@ -846,7 +846,7 @@ description: "Collect training data to analyse and/or improve ranking functions"
<td>1.000000</td>
<td>0.333333</td>
<td>0.700000</td>
- <td>22584</td>
+ <td>22787</td>
<td>1</td>
<td>0</td>
</tr>
diff --git a/python/vespa/docs/index.html b/python/vespa/docs/index.html
index 1cbcf513826..054c74f3bec 100644
--- a/python/vespa/docs/index.html
+++ b/python/vespa/docs/index.html
@@ -158,7 +158,7 @@ description: "Provide data analysis support for Vespa applications"
<div class="output_text output_subarea output_execute_result">
-<pre>{&#39;totalCount&#39;: 1078}</pre>
+<pre>{&#39;totalCount&#39;: 1077}</pre>
</div>
</div>
@@ -231,7 +231,7 @@ description: "Provide data analysis support for Vespa applications"
<span class="n">labelled_data</span> <span class="o">=</span> <span class="n">labelled_data</span><span class="p">,</span>
<span class="n">id_field</span> <span class="o">=</span> <span class="s2">&quot;id&quot;</span><span class="p">,</span>
<span class="n">query_model</span> <span class="o">=</span> <span class="n">query_model</span><span class="p">,</span>
- <span class="n">number_random_docs</span> <span class="o">=</span> <span class="mi">2</span>
+ <span class="n">number_additional_docs</span> <span class="o">=</span> <span class="mi">2</span>
<span class="p">)</span>
<span class="n">training_data_batch</span>
</pre></div>
@@ -333,7 +333,7 @@ description: "Provide data analysis support for Vespa applications"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>47396</td>
+ <td>56212</td>
<td>0</td>
<td>0</td>
</tr>
@@ -357,7 +357,7 @@ description: "Provide data analysis support for Vespa applications"
<td>0.617188</td>
<td>0.428571</td>
<td>0.457087</td>
- <td>32484</td>
+ <td>34026</td>
<td>0</td>
<td>0</td>
</tr>
@@ -405,7 +405,7 @@ description: "Provide data analysis support for Vespa applications"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>47396</td>
+ <td>56212</td>
<td>0</td>
<td>0</td>
</tr>
@@ -429,7 +429,7 @@ description: "Provide data analysis support for Vespa applications"
<td>0.617188</td>
<td>0.428571</td>
<td>0.457087</td>
- <td>32484</td>
+ <td>34026</td>
<td>0</td>
<td>0</td>
</tr>
@@ -477,7 +477,7 @@ description: "Provide data analysis support for Vespa applications"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>28609</td>
+ <td>29774</td>
<td>1</td>
<td>0</td>
</tr>
@@ -496,12 +496,12 @@ description: "Provide data analysis support for Vespa applications"
<td>...</td>
<td>0.0</td>
<td>0.0</td>
- <td>0.333333</td>
+ <td>0.500000</td>
<td>1.0</td>
<td>1.000000</td>
- <td>0.250000</td>
- <td>0.641667</td>
- <td>19805</td>
+ <td>0.333333</td>
+ <td>0.700000</td>
+ <td>22787</td>
<td>1</td>
<td>0</td>
</tr>
@@ -549,7 +549,7 @@ description: "Provide data analysis support for Vespa applications"
<td>1.000000</td>
<td>1.000000</td>
<td>1.000000</td>
- <td>28609</td>
+ <td>29774</td>
<td>1</td>
<td>0</td>
</tr>
@@ -568,12 +568,12 @@ description: "Provide data analysis support for Vespa applications"
<td>...</td>
<td>0.0</td>
<td>0.0</td>
- <td>0.333333</td>
+ <td>0.500000</td>
<td>1.0</td>
<td>1.000000</td>
- <td>0.250000</td>
- <td>0.641667</td>
- <td>19805</td>
+ <td>0.333333</td>
+ <td>0.700000</td>
+ <td>22787</td>
<td>1</td>
<td>0</td>
</tr>
@@ -694,18 +694,18 @@ description: "Provide data analysis support for Vespa applications"
<tr>
<th>0</th>
<td>0</td>
- <td>1145</td>
- <td>58692</td>
- <td>0.019509</td>
+ <td>1267</td>
+ <td>62529</td>
+ <td>0.020263</td>
<td>0</td>
<td>0</td>
</tr>
<tr>
<th>1</th>
<td>1</td>
- <td>805</td>
- <td>58692</td>
- <td>0.013716</td>
+ <td>887</td>
+ <td>62529</td>
+ <td>0.014185</td>
<td>0</td>
<td>0</td>
</tr>
diff --git a/python/vespa/docs/query.html b/python/vespa/docs/query.html
index efb1caa7070..a92f85ad04b 100644
--- a/python/vespa/docs/query.html
+++ b/python/vespa/docs/query.html
@@ -123,7 +123,7 @@ description: "Python query API"
<div class="output_text output_subarea output_execute_result">
-<pre>{&#39;totalCount&#39;: 49206}</pre>
+<pre>{&#39;totalCount&#39;: 52387}</pre>
</div>
</div>
@@ -136,6 +136,12 @@ description: "Python query API"
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
+<h2 id="Specify-a-query-model">Specify a query model<a class="anchor-link" href="#Specify-a-query-model"> </a></h2>
+</div>
+</div>
+</div>
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
<h3 id="Query-+-term-matching-+-rank-profile">Query + term-matching + rank profile<a class="anchor-link" href="#Query-+-term-matching-+-rank-profile"> </a></h3>
</div>
</div>
@@ -187,7 +193,7 @@ description: "Python query API"
<div class="output_text output_subarea output_execute_result">
-<pre>{&#39;totalCount&#39;: 49206}</pre>
+<pre>{&#39;totalCount&#39;: 52387}</pre>
</div>
</div>
@@ -275,7 +281,121 @@ description: "Python query API"
<div class="output_text output_subarea output_execute_result">
-<pre>{&#39;totalCount&#39;: 1081}</pre>
+<pre>{&#39;totalCount&#39;: 1084}</pre>
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+ {% endraw %}
+
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h2 id="Recall-specific-documents">Recall specific documents<a class="anchor-link" href="#Recall-specific-documents"> </a></h2>
+</div>
+</div>
+</div>
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<p>Let's take a look at the top 3 ids from the last query.</p>
+
+</div>
+</div>
+</div>
+ {% raw %}
+
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+ <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="n">top_ids</span> <span class="o">=</span> <span class="p">[</span><span class="n">hit</span><span class="p">[</span><span class="s2">&quot;fields&quot;</span><span class="p">][</span><span class="s2">&quot;id&quot;</span><span class="p">]</span> <span class="k">for</span> <span class="n">hit</span> <span class="ow">in</span> <span class="n">results</span><span class="p">[</span><span class="s2">&quot;root&quot;</span><span class="p">][</span><span class="s2">&quot;children&quot;</span><span class="p">][</span><span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]]</span>
+<span class="n">top_ids</span>
+</pre></div>
+
+ </div>
+</div>
+</div>
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+
+<div class="output_text output_subarea output_execute_result">
+<pre>[40215, 18456, 33692]</pre>
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+ {% endraw %}
+
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<p>Assume that we now want to retrieve the second and third ids above. We can do so with the <code>recall</code> argument.</p>
+
+</div>
+</div>
+</div>
+ {% raw %}
+
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+ <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="n">results_with_recall</span> <span class="o">=</span> <span class="n">app</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="s2">&quot;Is remdesivir an effective treatment for COVID-19?&quot;</span><span class="p">,</span>
+ <span class="n">query_model</span><span class="o">=</span><span class="n">query_model</span><span class="p">,</span>
+ <span class="n">recall</span> <span class="o">=</span> <span class="p">(</span><span class="s2">&quot;id&quot;</span><span class="p">,</span> <span class="n">top_ids</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">3</span><span class="p">]))</span>
+</pre></div>
+
+ </div>
+</div>
+</div>
+
+</div>
+ {% endraw %}
+
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<p>It will only retrieve the documents with Vespa field <code>id</code> that is defined on the list that is inside the tuple.</p>
+
+</div>
+</div>
+</div>
+ {% raw %}
+
+<div class="cell border-box-sizing code_cell rendered">
+<div class="input">
+
+<div class="inner_cell">
+ <div class="input_area">
+<div class=" highlight hl-ipython3"><pre><span></span><span class="n">id_recalled</span> <span class="o">=</span> <span class="p">[</span><span class="n">hit</span><span class="p">[</span><span class="s2">&quot;fields&quot;</span><span class="p">][</span><span class="s2">&quot;id&quot;</span><span class="p">]</span> <span class="k">for</span> <span class="n">hit</span> <span class="ow">in</span> <span class="n">results_with_recall</span><span class="p">[</span><span class="s2">&quot;root&quot;</span><span class="p">][</span><span class="s2">&quot;children&quot;</span><span class="p">]]</span>
+<span class="n">id_recalled</span>
+</pre></div>
+
+ </div>
+</div>
+</div>
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+
+<div class="output_text output_subarea output_execute_result">
+<pre>[18456, 33692]</pre>
</div>
</div>
diff --git a/python/vespa/vespa/_nbdev.py b/python/vespa/vespa/_nbdev.py
index 47118aa72f9..b68d7b2f4bc 100644
--- a/python/vespa/vespa/_nbdev.py
+++ b/python/vespa/vespa/_nbdev.py
@@ -6,8 +6,8 @@ index = {}
modules = []
-doc_url = "https://thigm85.github.io/vespa/"
+doc_url = "https://vespa-engine.github.io/vespa/"
-git_url = "https://github.com/thigm85/vespa/tree/master/"
+git_url = "https://github.com/vespa-engine/vespa/tree/master/"
def custom_doc_links(name): return None