diff options
author | tmartins <thigm85@gmail.com> | 2020-06-11 12:35:42 +0200 |
---|---|---|
committer | tmartins <thigm85@gmail.com> | 2020-06-11 12:35:42 +0200 |
commit | a33169859cb5d3809db4fd592916c77941499e55 (patch) | |
tree | 29b143753cbd15e4aab21f06067f23efdeb52c60 /python | |
parent | a63581080b8a49790b4af27542b6ce8910cc472f (diff) |
clean notebook
Diffstat (limited to 'python')
-rw-r--r-- | python/vespa/notebooks/index.ipynb | 657 |
1 files changed, 12 insertions, 645 deletions
diff --git a/python/vespa/notebooks/index.ipynb b/python/vespa/notebooks/index.ipynb index 20b87c46083..e6ffdc538e2 100644 --- a/python/vespa/notebooks/index.ipynb +++ b/python/vespa/notebooks/index.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -111,20 +111,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "969" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "query_result.number_documents_retrieved" ] @@ -140,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -176,552 +165,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>attributeMatch(authors.first)</th>\n", - " <th>attributeMatch(authors.first).averageWeight</th>\n", - " <th>attributeMatch(authors.first).completeness</th>\n", - " <th>attributeMatch(authors.first).fieldCompleteness</th>\n", - " <th>attributeMatch(authors.first).importance</th>\n", - " <th>attributeMatch(authors.first).matches</th>\n", - " <th>attributeMatch(authors.first).maxWeight</th>\n", - " <th>attributeMatch(authors.first).normalizedWeight</th>\n", - " <th>attributeMatch(authors.first).normalizedWeightedWeight</th>\n", - " <th>attributeMatch(authors.first).queryCompleteness</th>\n", - " <th>...</th>\n", - " <th>textSimilarity(results).queryCoverage</th>\n", - " <th>textSimilarity(results).score</th>\n", - " <th>textSimilarity(title).fieldCoverage</th>\n", - " <th>textSimilarity(title).order</th>\n", - " <th>textSimilarity(title).proximity</th>\n", - " <th>textSimilarity(title).queryCoverage</th>\n", - " <th>textSimilarity(title).score</th>\n", - " <th>document_id</th>\n", - " <th>query_id</th>\n", - " <th>relevant</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.062500</td>\n", - " <td>0.0</td>\n", - " <td>0.000000</td>\n", - " <td>0.142857</td>\n", - " <td>0.055357</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>97200</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.266667</td>\n", - " <td>1.0</td>\n", - " <td>0.869792</td>\n", - " <td>0.571429</td>\n", - " <td>0.679189</td>\n", - " <td>69447</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.142857</td>\n", - " <td>0.0</td>\n", - " <td>0.437500</td>\n", - " <td>0.142857</td>\n", - " <td>0.224554</td>\n", - " <td>3</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>97200</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.266667</td>\n", - " <td>1.0</td>\n", - " <td>0.869792</td>\n", - " <td>0.571429</td>\n", - " <td>0.679189</td>\n", - " <td>69447</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.111111</td>\n", - " <td>0.0</td>\n", - " <td>0.000000</td>\n", - " <td>0.083333</td>\n", - " <td>0.047222</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>116256</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.187500</td>\n", - " <td>1.0</td>\n", - " <td>1.000000</td>\n", - " <td>0.250000</td>\n", - " <td>0.612500</td>\n", - " <td>14888</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.083333</td>\n", - " <td>0.0</td>\n", - " <td>0.000000</td>\n", - " <td>0.083333</td>\n", - " <td>0.041667</td>\n", - " <td>5</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>116256</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>...</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.187500</td>\n", - " <td>1.0</td>\n", - " <td>1.000000</td>\n", - " <td>0.250000</td>\n", - " <td>0.612500</td>\n", - " <td>14888</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>12 rows × 984 columns</p>\n", - "</div>" - ], - "text/plain": [ - " attributeMatch(authors.first) \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).averageWeight \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).completeness \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).fieldCompleteness \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).importance \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).matches \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).maxWeight \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).normalizedWeight \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).normalizedWeightedWeight \\\n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "5 0.0 \n", - "6 0.0 \n", - "7 0.0 \n", - "8 0.0 \n", - "9 0.0 \n", - "10 0.0 \n", - "11 0.0 \n", - "\n", - " attributeMatch(authors.first).queryCompleteness ... \\\n", - "0 0.0 ... \n", - "1 0.0 ... \n", - "2 0.0 ... \n", - "3 0.0 ... \n", - "4 0.0 ... \n", - "5 0.0 ... \n", - "6 0.0 ... \n", - "7 0.0 ... \n", - "8 0.0 ... \n", - "9 0.0 ... \n", - "10 0.0 ... \n", - "11 0.0 ... \n", - "\n", - " textSimilarity(results).queryCoverage textSimilarity(results).score \\\n", - "0 0.0 0.0 \n", - "1 0.0 0.0 \n", - "2 0.0 0.0 \n", - "3 0.0 0.0 \n", - "4 0.0 0.0 \n", - "5 0.0 0.0 \n", - "6 0.0 0.0 \n", - "7 0.0 0.0 \n", - "8 0.0 0.0 \n", - "9 0.0 0.0 \n", - "10 0.0 0.0 \n", - "11 0.0 0.0 \n", - "\n", - " textSimilarity(title).fieldCoverage textSimilarity(title).order \\\n", - "0 0.062500 0.0 \n", - "1 1.000000 1.0 \n", - "2 0.266667 1.0 \n", - "3 0.142857 0.0 \n", - "4 1.000000 1.0 \n", - "5 0.266667 1.0 \n", - "6 0.111111 0.0 \n", - "7 1.000000 1.0 \n", - "8 0.187500 1.0 \n", - "9 0.083333 0.0 \n", - "10 1.000000 1.0 \n", - "11 0.187500 1.0 \n", - "\n", - " textSimilarity(title).proximity textSimilarity(title).queryCoverage \\\n", - "0 0.000000 0.142857 \n", - "1 1.000000 1.000000 \n", - "2 0.869792 0.571429 \n", - "3 0.437500 0.142857 \n", - "4 1.000000 1.000000 \n", - "5 0.869792 0.571429 \n", - "6 0.000000 0.083333 \n", - "7 1.000000 1.000000 \n", - "8 1.000000 0.250000 \n", - "9 0.000000 0.083333 \n", - "10 1.000000 1.000000 \n", - "11 1.000000 0.250000 \n", - "\n", - " textSimilarity(title).score document_id query_id relevant \n", - "0 0.055357 0 0 1 \n", - "1 1.000000 97200 0 0 \n", - "2 0.679189 69447 0 0 \n", - "3 0.224554 3 0 1 \n", - "4 1.000000 97200 0 0 \n", - "5 0.679189 69447 0 0 \n", - "6 0.047222 1 1 1 \n", - "7 1.000000 116256 1 0 \n", - "8 0.612500 14888 1 0 \n", - "9 0.041667 5 1 1 \n", - "10 1.000000 116256 1 0 \n", - "11 0.612500 14888 1 0 \n", - "\n", - "[12 rows x 984 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "training_data_batch = app.collect_training_data(\n", " labelled_data = labelled_data,\n", @@ -753,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -771,76 +217,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>query_id</th>\n", - " <th>match_ratio_retrieved_docs</th>\n", - " <th>match_ratio_docs_available</th>\n", - " <th>match_ratio_value</th>\n", - " <th>recall_10_value</th>\n", - " <th>reciprocal_rank_10_value</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0</td>\n", - " <td>1040</td>\n", - " <td>127518</td>\n", - " <td>0.008156</td>\n", - " <td>0.0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>1</td>\n", - " <td>922</td>\n", - " <td>127518</td>\n", - " <td>0.007230</td>\n", - " <td>0.0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " query_id match_ratio_retrieved_docs match_ratio_docs_available \\\n", - "0 0 1040 127518 \n", - "1 1 922 127518 \n", - "\n", - " match_ratio_value recall_10_value reciprocal_rank_10_value \n", - "0 0.008156 0.0 0 \n", - "1 0.007230 0.0 0 " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "evaluation = app.evaluate(\n", " labelled_data = labelled_data,\n", @@ -857,18 +236,6 @@ "display_name": "vespa", "language": "python", "name": "vespa" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" } }, "nbformat": 4, |