diff options
author | tmartins <thigm85@gmail.com> | 2020-06-04 12:35:38 +0200 |
---|---|---|
committer | tmartins <thigm85@gmail.com> | 2020-06-04 12:35:38 +0200 |
commit | 4f86f03994ed2cfcf0980f5b76e57110d65597d5 (patch) | |
tree | dc89d6aba5bb93755d5a835ad1e663af196323fd /python | |
parent | 3570a88c42890577dab56e401a799e25009cbb02 (diff) |
update docs
Diffstat (limited to 'python')
-rw-r--r-- | python/vespa/README.md | 60 | ||||
-rw-r--r-- | python/vespa/docs/collect_training_data.html | 36 | ||||
-rw-r--r-- | python/vespa/docs/index.html | 44 | ||||
-rw-r--r-- | python/vespa/docs/query.html | 126 | ||||
-rw-r--r-- | python/vespa/vespa/_nbdev.py | 4 |
5 files changed, 195 insertions, 75 deletions
diff --git a/python/vespa/README.md b/python/vespa/README.md index 23d8541bcfe..c316564f3c1 100644 --- a/python/vespa/README.md +++ b/python/vespa/README.md @@ -10,7 +10,7 @@ > Connect to a running Vespa application -```python +``` from vespa.application import Vespa app = Vespa(url = "https://api.cord19.vespa.ai") @@ -20,7 +20,7 @@ app = Vespa(url = "https://api.cord19.vespa.ai") > Easily define matching and ranking criteria -```python +``` from vespa.query import Query, Union, WeakAnd, ANN, RankProfile from random import random @@ -44,21 +44,21 @@ query_model = Query(match_phase=match_phase, rank_profile=rank_profile) > Send queries via the query API. See the [query page](/vespa/query) for more examples. -```python +``` query_result = app.query( query="Is remdesivir an effective treatment for COVID-19?", query_model=query_model ) ``` -```python +``` query_result["root"]["fields"] ``` - {'totalCount': 1078} + {'totalCount': 1077} @@ -66,7 +66,7 @@ query_result["root"]["fields"] > How to structure labelled data -```python +``` labelled_data = [ { "query_id": 0, @@ -87,12 +87,12 @@ Non-relevant documents are assigned `"score": 0` by default. Relevant documents > Collect training data to analyse and/or improve ranking functions. See the [collect training data page](/vespa/collect_training_data) for more examples. -```python +``` training_data_batch = app.collect_training_data( labelled_data = labelled_data, id_field = "id", query_model = query_model, - number_random_docs = 2 + number_additional_docs = 2 ) training_data_batch ``` @@ -186,7 +186,7 @@ training_data_batch <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>47396</td> + <td>56212</td> <td>0</td> <td>0</td> </tr> @@ -210,7 +210,7 @@ training_data_batch <td>0.617188</td> <td>0.428571</td> <td>0.457087</td> - <td>32484</td> + <td>34026</td> <td>0</td> <td>0</td> </tr> @@ -258,7 +258,7 @@ training_data_batch <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>47396</td> + <td>56212</td> <td>0</td> <td>0</td> </tr> @@ -282,7 +282,7 @@ training_data_batch <td>0.617188</td> <td>0.428571</td> <td>0.457087</td> - <td>32484</td> + <td>34026</td> <td>0</td> <td>0</td> </tr> @@ -330,7 +330,7 @@ training_data_batch <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>28609</td> + <td>29774</td> <td>1</td> <td>0</td> </tr> @@ -349,12 +349,12 @@ training_data_batch <td>...</td> <td>0.0</td> <td>0.0</td> - <td>0.333333</td> + <td>0.500000</td> <td>1.0</td> <td>1.000000</td> - <td>0.250000</td> - <td>0.641667</td> - <td>19805</td> + <td>0.333333</td> + <td>0.700000</td> + <td>22787</td> <td>1</td> <td>0</td> </tr> @@ -402,7 +402,7 @@ training_data_batch <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>28609</td> + <td>29774</td> <td>1</td> <td>0</td> </tr> @@ -421,12 +421,12 @@ training_data_batch <td>...</td> <td>0.0</td> <td>0.0</td> - <td>0.333333</td> + <td>0.500000</td> <td>1.0</td> <td>1.000000</td> - <td>0.250000</td> - <td>0.641667</td> - <td>19805</td> + <td>0.333333</td> + <td>0.700000</td> + <td>22787</td> <td>1</td> <td>0</td> </tr> @@ -446,7 +446,7 @@ We will define the following evaluation metrics: * recall @ 10 per query * MRR @ 10 per query -```python +``` from vespa.evaluation import MatchRatio, Recall, ReciprocalRank eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)] @@ -454,7 +454,7 @@ eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)] Evaluate: -```python +``` evaluation = app.evaluate( labelled_data = labelled_data, eval_metrics = eval_metrics, @@ -497,18 +497,18 @@ evaluation <tr> <th>0</th> <td>0</td> - <td>1145</td> - <td>58692</td> - <td>0.019509</td> + <td>1267</td> + <td>62529</td> + <td>0.020263</td> <td>0</td> <td>0</td> </tr> <tr> <th>1</th> <td>1</td> - <td>805</td> - <td>58692</td> - <td>0.013716</td> + <td>887</td> + <td>62529</td> + <td>0.014185</td> <td>0</td> <td>0</td> </tr> diff --git a/python/vespa/docs/collect_training_data.html b/python/vespa/docs/collect_training_data.html index 088e766700c..609b7182482 100644 --- a/python/vespa/docs/collect_training_data.html +++ b/python/vespa/docs/collect_training_data.html @@ -115,7 +115,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <span class="n">labelled_data</span> <span class="o">=</span> <span class="n">labelled_data</span><span class="p">,</span> <span class="n">id_field</span> <span class="o">=</span> <span class="s2">"id"</span><span class="p">,</span> <span class="n">query_model</span> <span class="o">=</span> <span class="n">query_model</span><span class="p">,</span> - <span class="n">number_random_docs</span> <span class="o">=</span> <span class="mi">2</span> + <span class="n">number_additional_docs</span> <span class="o">=</span> <span class="mi">2</span> <span class="p">)</span> <span class="n">training_data_batch</span> </pre></div> @@ -217,7 +217,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>47396</td> + <td>56212</td> <td>0</td> <td>0</td> </tr> @@ -241,7 +241,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>0.617188</td> <td>0.428571</td> <td>0.457087</td> - <td>32484</td> + <td>34026</td> <td>0</td> <td>0</td> </tr> @@ -289,7 +289,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>47396</td> + <td>56212</td> <td>0</td> <td>0</td> </tr> @@ -313,7 +313,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>0.617188</td> <td>0.428571</td> <td>0.457087</td> - <td>32484</td> + <td>34026</td> <td>0</td> <td>0</td> </tr> @@ -361,7 +361,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>28609</td> + <td>29774</td> <td>1</td> <td>0</td> </tr> @@ -385,7 +385,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>0.333333</td> <td>0.700000</td> - <td>22584</td> + <td>22787</td> <td>1</td> <td>0</td> </tr> @@ -433,7 +433,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>28609</td> + <td>29774</td> <td>1</td> <td>0</td> </tr> @@ -457,7 +457,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>0.333333</td> <td>0.700000</td> - <td>22584</td> + <td>22787</td> <td>1</td> <td>0</td> </tr> @@ -502,7 +502,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <span class="n">relevant_id</span> <span class="o">=</span> <span class="n">doc_data</span><span class="p">[</span><span class="s2">"id"</span><span class="p">],</span> <span class="n">id_field</span> <span class="o">=</span> <span class="s2">"id"</span><span class="p">,</span> <span class="n">query_model</span> <span class="o">=</span> <span class="n">query_model</span><span class="p">,</span> - <span class="n">number_random_docs</span> <span class="o">=</span> <span class="mi">2</span> + <span class="n">number_additional_docs</span> <span class="o">=</span> <span class="mi">2</span> <span class="p">)</span> <span class="n">training_data</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">training_data_point</span><span class="p">)</span> <span class="n">training_data</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">from_records</span><span class="p">(</span><span class="n">training_data</span><span class="p">)</span> @@ -606,7 +606,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>47396</td> + <td>56212</td> <td>0</td> <td>0</td> </tr> @@ -630,7 +630,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>0.617188</td> <td>0.428571</td> <td>0.457087</td> - <td>32484</td> + <td>34026</td> <td>0</td> <td>0</td> </tr> @@ -678,7 +678,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>47396</td> + <td>56212</td> <td>0</td> <td>0</td> </tr> @@ -702,7 +702,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>0.617188</td> <td>0.428571</td> <td>0.457087</td> - <td>32484</td> + <td>34026</td> <td>0</td> <td>0</td> </tr> @@ -750,7 +750,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>28609</td> + <td>29774</td> <td>1</td> <td>0</td> </tr> @@ -774,7 +774,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>0.333333</td> <td>0.700000</td> - <td>22584</td> + <td>22787</td> <td>1</td> <td>0</td> </tr> @@ -822,7 +822,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>28609</td> + <td>29774</td> <td>1</td> <td>0</td> </tr> @@ -846,7 +846,7 @@ description: "Collect training data to analyse and/or improve ranking functions" <td>1.000000</td> <td>0.333333</td> <td>0.700000</td> - <td>22584</td> + <td>22787</td> <td>1</td> <td>0</td> </tr> diff --git a/python/vespa/docs/index.html b/python/vespa/docs/index.html index 1cbcf513826..054c74f3bec 100644 --- a/python/vespa/docs/index.html +++ b/python/vespa/docs/index.html @@ -158,7 +158,7 @@ description: "Provide data analysis support for Vespa applications" <div class="output_text output_subarea output_execute_result"> -<pre>{'totalCount': 1078}</pre> +<pre>{'totalCount': 1077}</pre> </div> </div> @@ -231,7 +231,7 @@ description: "Provide data analysis support for Vespa applications" <span class="n">labelled_data</span> <span class="o">=</span> <span class="n">labelled_data</span><span class="p">,</span> <span class="n">id_field</span> <span class="o">=</span> <span class="s2">"id"</span><span class="p">,</span> <span class="n">query_model</span> <span class="o">=</span> <span class="n">query_model</span><span class="p">,</span> - <span class="n">number_random_docs</span> <span class="o">=</span> <span class="mi">2</span> + <span class="n">number_additional_docs</span> <span class="o">=</span> <span class="mi">2</span> <span class="p">)</span> <span class="n">training_data_batch</span> </pre></div> @@ -333,7 +333,7 @@ description: "Provide data analysis support for Vespa applications" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>47396</td> + <td>56212</td> <td>0</td> <td>0</td> </tr> @@ -357,7 +357,7 @@ description: "Provide data analysis support for Vespa applications" <td>0.617188</td> <td>0.428571</td> <td>0.457087</td> - <td>32484</td> + <td>34026</td> <td>0</td> <td>0</td> </tr> @@ -405,7 +405,7 @@ description: "Provide data analysis support for Vespa applications" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>47396</td> + <td>56212</td> <td>0</td> <td>0</td> </tr> @@ -429,7 +429,7 @@ description: "Provide data analysis support for Vespa applications" <td>0.617188</td> <td>0.428571</td> <td>0.457087</td> - <td>32484</td> + <td>34026</td> <td>0</td> <td>0</td> </tr> @@ -477,7 +477,7 @@ description: "Provide data analysis support for Vespa applications" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>28609</td> + <td>29774</td> <td>1</td> <td>0</td> </tr> @@ -496,12 +496,12 @@ description: "Provide data analysis support for Vespa applications" <td>...</td> <td>0.0</td> <td>0.0</td> - <td>0.333333</td> + <td>0.500000</td> <td>1.0</td> <td>1.000000</td> - <td>0.250000</td> - <td>0.641667</td> - <td>19805</td> + <td>0.333333</td> + <td>0.700000</td> + <td>22787</td> <td>1</td> <td>0</td> </tr> @@ -549,7 +549,7 @@ description: "Provide data analysis support for Vespa applications" <td>1.000000</td> <td>1.000000</td> <td>1.000000</td> - <td>28609</td> + <td>29774</td> <td>1</td> <td>0</td> </tr> @@ -568,12 +568,12 @@ description: "Provide data analysis support for Vespa applications" <td>...</td> <td>0.0</td> <td>0.0</td> - <td>0.333333</td> + <td>0.500000</td> <td>1.0</td> <td>1.000000</td> - <td>0.250000</td> - <td>0.641667</td> - <td>19805</td> + <td>0.333333</td> + <td>0.700000</td> + <td>22787</td> <td>1</td> <td>0</td> </tr> @@ -694,18 +694,18 @@ description: "Provide data analysis support for Vespa applications" <tr> <th>0</th> <td>0</td> - <td>1145</td> - <td>58692</td> - <td>0.019509</td> + <td>1267</td> + <td>62529</td> + <td>0.020263</td> <td>0</td> <td>0</td> </tr> <tr> <th>1</th> <td>1</td> - <td>805</td> - <td>58692</td> - <td>0.013716</td> + <td>887</td> + <td>62529</td> + <td>0.014185</td> <td>0</td> <td>0</td> </tr> diff --git a/python/vespa/docs/query.html b/python/vespa/docs/query.html index efb1caa7070..a92f85ad04b 100644 --- a/python/vespa/docs/query.html +++ b/python/vespa/docs/query.html @@ -123,7 +123,7 @@ description: "Python query API" <div class="output_text output_subarea output_execute_result"> -<pre>{'totalCount': 49206}</pre> +<pre>{'totalCount': 52387}</pre> </div> </div> @@ -136,6 +136,12 @@ description: "Python query API" <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> +<h2 id="Specify-a-query-model">Specify a query model<a class="anchor-link" href="#Specify-a-query-model"> </a></h2> +</div> +</div> +</div> +<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> <h3 id="Query-+-term-matching-+-rank-profile">Query + term-matching + rank profile<a class="anchor-link" href="#Query-+-term-matching-+-rank-profile"> </a></h3> </div> </div> @@ -187,7 +193,7 @@ description: "Python query API" <div class="output_text output_subarea output_execute_result"> -<pre>{'totalCount': 49206}</pre> +<pre>{'totalCount': 52387}</pre> </div> </div> @@ -275,7 +281,121 @@ description: "Python query API" <div class="output_text output_subarea output_execute_result"> -<pre>{'totalCount': 1081}</pre> +<pre>{'totalCount': 1084}</pre> +</div> + +</div> + +</div> +</div> + +</div> + {% endraw %} + +<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<h2 id="Recall-specific-documents">Recall specific documents<a class="anchor-link" href="#Recall-specific-documents"> </a></h2> +</div> +</div> +</div> +<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>Let's take a look at the top 3 ids from the last query.</p> + +</div> +</div> +</div> + {% raw %} + +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> + +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">top_ids</span> <span class="o">=</span> <span class="p">[</span><span class="n">hit</span><span class="p">[</span><span class="s2">"fields"</span><span class="p">][</span><span class="s2">"id"</span><span class="p">]</span> <span class="k">for</span> <span class="n">hit</span> <span class="ow">in</span> <span class="n">results</span><span class="p">[</span><span class="s2">"root"</span><span class="p">][</span><span class="s2">"children"</span><span class="p">][</span><span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]]</span> +<span class="n">top_ids</span> +</pre></div> + + </div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + +<div class="output_area"> + + + +<div class="output_text output_subarea output_execute_result"> +<pre>[40215, 18456, 33692]</pre> +</div> + +</div> + +</div> +</div> + +</div> + {% endraw %} + +<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>Assume that we now want to retrieve the second and third ids above. We can do so with the <code>recall</code> argument.</p> + +</div> +</div> +</div> + {% raw %} + +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> + +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">results_with_recall</span> <span class="o">=</span> <span class="n">app</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="s2">"Is remdesivir an effective treatment for COVID-19?"</span><span class="p">,</span> + <span class="n">query_model</span><span class="o">=</span><span class="n">query_model</span><span class="p">,</span> + <span class="n">recall</span> <span class="o">=</span> <span class="p">(</span><span class="s2">"id"</span><span class="p">,</span> <span class="n">top_ids</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">3</span><span class="p">]))</span> +</pre></div> + + </div> +</div> +</div> + +</div> + {% endraw %} + +<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>It will only retrieve the documents with Vespa field <code>id</code> that is defined on the list that is inside the tuple.</p> + +</div> +</div> +</div> + {% raw %} + +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> + +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">id_recalled</span> <span class="o">=</span> <span class="p">[</span><span class="n">hit</span><span class="p">[</span><span class="s2">"fields"</span><span class="p">][</span><span class="s2">"id"</span><span class="p">]</span> <span class="k">for</span> <span class="n">hit</span> <span class="ow">in</span> <span class="n">results_with_recall</span><span class="p">[</span><span class="s2">"root"</span><span class="p">][</span><span class="s2">"children"</span><span class="p">]]</span> +<span class="n">id_recalled</span> +</pre></div> + + </div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + +<div class="output_area"> + + + +<div class="output_text output_subarea output_execute_result"> +<pre>[18456, 33692]</pre> </div> </div> diff --git a/python/vespa/vespa/_nbdev.py b/python/vespa/vespa/_nbdev.py index 47118aa72f9..b68d7b2f4bc 100644 --- a/python/vespa/vespa/_nbdev.py +++ b/python/vespa/vespa/_nbdev.py @@ -6,8 +6,8 @@ index = {} modules = [] -doc_url = "https://thigm85.github.io/vespa/" +doc_url = "https://vespa-engine.github.io/vespa/" -git_url = "https://github.com/thigm85/vespa/tree/master/" +git_url = "https://github.com/vespa-engine/vespa/tree/master/" def custom_doc_links(name): return None |