summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authortmartins <thigm85@gmail.com>2020-06-11 12:35:42 +0200
committertmartins <thigm85@gmail.com>2020-06-11 12:35:42 +0200
commita33169859cb5d3809db4fd592916c77941499e55 (patch)
tree29b143753cbd15e4aab21f06067f23efdeb52c60 /python
parenta63581080b8a49790b4af27542b6ce8910cc472f (diff)
clean notebook
Diffstat (limited to 'python')
-rw-r--r--python/vespa/notebooks/index.ipynb657
1 files changed, 12 insertions, 645 deletions
diff --git a/python/vespa/notebooks/index.ipynb b/python/vespa/notebooks/index.ipynb
index 20b87c46083..e6ffdc538e2 100644
--- a/python/vespa/notebooks/index.ipynb
+++ b/python/vespa/notebooks/index.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -45,7 +45,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -65,7 +65,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -99,7 +99,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -111,20 +111,9 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "969"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"query_result.number_documents_retrieved"
]
@@ -140,7 +129,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -176,552 +165,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>attributeMatch(authors.first)</th>\n",
- " <th>attributeMatch(authors.first).averageWeight</th>\n",
- " <th>attributeMatch(authors.first).completeness</th>\n",
- " <th>attributeMatch(authors.first).fieldCompleteness</th>\n",
- " <th>attributeMatch(authors.first).importance</th>\n",
- " <th>attributeMatch(authors.first).matches</th>\n",
- " <th>attributeMatch(authors.first).maxWeight</th>\n",
- " <th>attributeMatch(authors.first).normalizedWeight</th>\n",
- " <th>attributeMatch(authors.first).normalizedWeightedWeight</th>\n",
- " <th>attributeMatch(authors.first).queryCompleteness</th>\n",
- " <th>...</th>\n",
- " <th>textSimilarity(results).queryCoverage</th>\n",
- " <th>textSimilarity(results).score</th>\n",
- " <th>textSimilarity(title).fieldCoverage</th>\n",
- " <th>textSimilarity(title).order</th>\n",
- " <th>textSimilarity(title).proximity</th>\n",
- " <th>textSimilarity(title).queryCoverage</th>\n",
- " <th>textSimilarity(title).score</th>\n",
- " <th>document_id</th>\n",
- " <th>query_id</th>\n",
- " <th>relevant</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.062500</td>\n",
- " <td>0.0</td>\n",
- " <td>0.000000</td>\n",
- " <td>0.142857</td>\n",
- " <td>0.055357</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.000000</td>\n",
- " <td>97200</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.266667</td>\n",
- " <td>1.0</td>\n",
- " <td>0.869792</td>\n",
- " <td>0.571429</td>\n",
- " <td>0.679189</td>\n",
- " <td>69447</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.142857</td>\n",
- " <td>0.0</td>\n",
- " <td>0.437500</td>\n",
- " <td>0.142857</td>\n",
- " <td>0.224554</td>\n",
- " <td>3</td>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.000000</td>\n",
- " <td>97200</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.266667</td>\n",
- " <td>1.0</td>\n",
- " <td>0.869792</td>\n",
- " <td>0.571429</td>\n",
- " <td>0.679189</td>\n",
- " <td>69447</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.111111</td>\n",
- " <td>0.0</td>\n",
- " <td>0.000000</td>\n",
- " <td>0.083333</td>\n",
- " <td>0.047222</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.000000</td>\n",
- " <td>116256</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.187500</td>\n",
- " <td>1.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>0.250000</td>\n",
- " <td>0.612500</td>\n",
- " <td>14888</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.083333</td>\n",
- " <td>0.0</td>\n",
- " <td>0.000000</td>\n",
- " <td>0.083333</td>\n",
- " <td>0.041667</td>\n",
- " <td>5</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.000000</td>\n",
- " <td>1.000000</td>\n",
- " <td>116256</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11</th>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>...</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.187500</td>\n",
- " <td>1.0</td>\n",
- " <td>1.000000</td>\n",
- " <td>0.250000</td>\n",
- " <td>0.612500</td>\n",
- " <td>14888</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>12 rows × 984 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " attributeMatch(authors.first) \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).averageWeight \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).completeness \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).fieldCompleteness \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).importance \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).matches \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).maxWeight \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).normalizedWeight \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).normalizedWeightedWeight \\\n",
- "0 0.0 \n",
- "1 0.0 \n",
- "2 0.0 \n",
- "3 0.0 \n",
- "4 0.0 \n",
- "5 0.0 \n",
- "6 0.0 \n",
- "7 0.0 \n",
- "8 0.0 \n",
- "9 0.0 \n",
- "10 0.0 \n",
- "11 0.0 \n",
- "\n",
- " attributeMatch(authors.first).queryCompleteness ... \\\n",
- "0 0.0 ... \n",
- "1 0.0 ... \n",
- "2 0.0 ... \n",
- "3 0.0 ... \n",
- "4 0.0 ... \n",
- "5 0.0 ... \n",
- "6 0.0 ... \n",
- "7 0.0 ... \n",
- "8 0.0 ... \n",
- "9 0.0 ... \n",
- "10 0.0 ... \n",
- "11 0.0 ... \n",
- "\n",
- " textSimilarity(results).queryCoverage textSimilarity(results).score \\\n",
- "0 0.0 0.0 \n",
- "1 0.0 0.0 \n",
- "2 0.0 0.0 \n",
- "3 0.0 0.0 \n",
- "4 0.0 0.0 \n",
- "5 0.0 0.0 \n",
- "6 0.0 0.0 \n",
- "7 0.0 0.0 \n",
- "8 0.0 0.0 \n",
- "9 0.0 0.0 \n",
- "10 0.0 0.0 \n",
- "11 0.0 0.0 \n",
- "\n",
- " textSimilarity(title).fieldCoverage textSimilarity(title).order \\\n",
- "0 0.062500 0.0 \n",
- "1 1.000000 1.0 \n",
- "2 0.266667 1.0 \n",
- "3 0.142857 0.0 \n",
- "4 1.000000 1.0 \n",
- "5 0.266667 1.0 \n",
- "6 0.111111 0.0 \n",
- "7 1.000000 1.0 \n",
- "8 0.187500 1.0 \n",
- "9 0.083333 0.0 \n",
- "10 1.000000 1.0 \n",
- "11 0.187500 1.0 \n",
- "\n",
- " textSimilarity(title).proximity textSimilarity(title).queryCoverage \\\n",
- "0 0.000000 0.142857 \n",
- "1 1.000000 1.000000 \n",
- "2 0.869792 0.571429 \n",
- "3 0.437500 0.142857 \n",
- "4 1.000000 1.000000 \n",
- "5 0.869792 0.571429 \n",
- "6 0.000000 0.083333 \n",
- "7 1.000000 1.000000 \n",
- "8 1.000000 0.250000 \n",
- "9 0.000000 0.083333 \n",
- "10 1.000000 1.000000 \n",
- "11 1.000000 0.250000 \n",
- "\n",
- " textSimilarity(title).score document_id query_id relevant \n",
- "0 0.055357 0 0 1 \n",
- "1 1.000000 97200 0 0 \n",
- "2 0.679189 69447 0 0 \n",
- "3 0.224554 3 0 1 \n",
- "4 1.000000 97200 0 0 \n",
- "5 0.679189 69447 0 0 \n",
- "6 0.047222 1 1 1 \n",
- "7 1.000000 116256 1 0 \n",
- "8 0.612500 14888 1 0 \n",
- "9 0.041667 5 1 1 \n",
- "10 1.000000 116256 1 0 \n",
- "11 0.612500 14888 1 0 \n",
- "\n",
- "[12 rows x 984 columns]"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"training_data_batch = app.collect_training_data(\n",
" labelled_data = labelled_data,\n",
@@ -753,7 +199,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -771,76 +217,9 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>query_id</th>\n",
- " <th>match_ratio_retrieved_docs</th>\n",
- " <th>match_ratio_docs_available</th>\n",
- " <th>match_ratio_value</th>\n",
- " <th>recall_10_value</th>\n",
- " <th>reciprocal_rank_10_value</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0</td>\n",
- " <td>1040</td>\n",
- " <td>127518</td>\n",
- " <td>0.008156</td>\n",
- " <td>0.0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>1</td>\n",
- " <td>922</td>\n",
- " <td>127518</td>\n",
- " <td>0.007230</td>\n",
- " <td>0.0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " query_id match_ratio_retrieved_docs match_ratio_docs_available \\\n",
- "0 0 1040 127518 \n",
- "1 1 922 127518 \n",
- "\n",
- " match_ratio_value recall_10_value reciprocal_rank_10_value \n",
- "0 0.008156 0.0 0 \n",
- "1 0.007230 0.0 0 "
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"evaluation = app.evaluate(\n",
" labelled_data = labelled_data,\n",
@@ -857,18 +236,6 @@
"display_name": "vespa",
"language": "python",
"name": "vespa"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.7"
}
},
"nbformat": 4,