aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenrik <henrik.hoiness@online.no>2018-07-26 15:20:28 +0200
committerHenrik <henrik.hoiness@online.no>2018-07-26 15:20:28 +0200
commit4a33b491a005ba6ed6f66a5c0a361bd7e67e3c72 (patch)
treeeaa59c0af02fdf3e669167d8290c31227107c93a
parentf9407be3f00c58d5034465fe8305ba92811630ba (diff)
Added arg --application_name, now creating folder for searchdefs ++
-rw-r--r--config-model/src/main/python/ES_Vespa_parser.py118
1 files changed, 55 insertions, 63 deletions
diff --git a/config-model/src/main/python/ES_Vespa_parser.py b/config-model/src/main/python/ES_Vespa_parser.py
index 91b576d5788..477b0db4744 100644
--- a/config-model/src/main/python/ES_Vespa_parser.py
+++ b/config-model/src/main/python/ES_Vespa_parser.py
@@ -3,16 +3,15 @@ import json
import argparse
import os, sys
-
# Parsing Elastic Search documents to Vespa documents
# Example of usage: python ES_Vespa_parser.py my_index.json my_index_mapping.json
__author__ = 'henrhoi'
class ElasticSearchParser:
-
document_file = None
mapping_file = None
+ application_name = None
search_definitions = {}
path = ""
_all = True
@@ -22,50 +21,49 @@ class ElasticSearchParser:
def __init__(self):
parser = argparse.ArgumentParser()
- parser.add_argument("documents", help="location of file with documents to be parsed")
- parser.add_argument("mappings", help="location of file with mappings")
+ parser.add_argument("documents_path", help="location of file with documents to be parsed", type=str)
+ parser.add_argument("mappings_path", help="location of file with mappings", type=str)
+ parser.add_argument("--application_name", help="name of application", default="application_name", type=str)
args = parser.parse_args()
- self.document_file = args.documents
- self.mapping_file = args.mappings
+ self.document_file = args.documents_path
+ self.mapping_file = args.mappings_path
+ self.application_name = args.application_name
def main(self):
- self.path = os.getcwd() + "/documents_and_document_definitions/"
+ self.path = os.getcwd() + "/application/"
try:
- os.mkdir(self.path, 0755);
+ os.mkdir(self.path, 0o777)
print(" > Created folder '" + self.path + "'")
except OSError:
- print(" > Folder '"+ self.path +"' already existed")
-
+ print(" > Folder '" + self.path + "' already existed")
try:
- os.mkdir(self.path + "searchdefinitions/", 0755);
+ os.makedirs(self.path + "searchdefinitions/", 0o777)
print(" > Created folder '" + self.path + "searchdefinitions/" + "'")
except OSError:
- print(" > Folder '"+ self.path + "searchdefinitions/" +"' already existed")
+ print(" > Folder '" + self.path + "searchdefinitions/" + "' already existed")
self.parse()
self.createServices_xml()
self.createHosts_xml()
-
-
def getMapping(self, type):
unparsed_mapping_file = open(self.mapping_file, "r")
type_mapping = {}
for line in unparsed_mapping_file:
data = json.loads(line)
- index = data.keys()[0]
+ index = list(data.keys())[0]
mappings = data[index]["mappings"][type]["properties"]
- #Checking if some fields could be no-index
+ # Checking if some fields could be no-index
try:
_all_enabled = data[index]["mappings"][type]["_all"]["enabled"]
if not _all_enabled:
self._all = False
- print(" > All fields in the document type '" + type + "' is not searchable. Go inside "+self.path + type + ".sd to add which fields that should be searchable")
+ print(" > All fields in the document type '" + type + "' is not searchable. Go inside " + self.path + type + ".sd to add which fields that should be searchable")
except KeyError:
- print(" > All fields in the document type '"+type+"' is searchable")
+ print(" > All fields in the document type '" + type + "' is searchable")
self.walk(mappings, type_mapping, "properties")
@@ -89,7 +87,7 @@ class ElasticSearchParser:
type = data["_type"]
parsed_data = {
- "put": "id:application_name:" + type + "::" + data["_id"],
+ "put": "id:"+self.application_name+":" + type + "::" + data["_id"],
"fields": {}
}
@@ -115,8 +113,8 @@ class ElasticSearchParser:
def createSearchDefinition(self, type, type_mapping):
file_path = self.path + "searchdefinitions/" + type + ".sd"
new_sd = open(file_path, "w")
- new_sd.write("search "+ type + " {\n")
- new_sd.write(" document "+ type + " {\n")
+ new_sd.write("search " + type + " {\n")
+ new_sd.write(" document " + type + " {\n")
for key, item in type_mapping.items():
new_sd.write(" field " + key + " type " + self.get_type(item) + " {\n")
@@ -132,58 +130,54 @@ class ElasticSearchParser:
file_path = self.path + "services.xml"
new_services = open(file_path, "w")
template = ("<?xml version='1.0' encoding='UTF-8'?>"
- "<services version='1.0'>\n\n"
- " <container id='default' version='1.0'>\n"
- " <search/>\n"
- " <document-api/>\n"
- " <nodes>\n"
- " <node hostalias='node1'/>\n"
- " </nodes>\n"
- " </container>\n\n"
- " <content id='content' version='1.0'>\n"
- " <redundancy>1</redundancy>\n"
- " <search>\n"
- " <visibility-delay>1.0</visibility-delay>\n"
- " </search>\n"
- " <documents>\n")
-
- for i in range(0,len(self.types)):
- template += " <document mode='index' type='"+self.types[i]+"'/>\n"
-
+ "<services version='1.0'>\n\n"
+ " <container id='default' version='1.0'>\n"
+ " <search/>\n"
+ " <document-api/>\n"
+ " <nodes>\n"
+ " <node hostalias='node1'/>\n"
+ " </nodes>\n"
+ " </container>\n\n"
+ " <content id='content' version='1.0'>\n"
+ " <redundancy>1</redundancy>\n"
+ " <search>\n"
+ " <visibility-delay>1.0</visibility-delay>\n"
+ " </search>\n"
+ " <documents>\n")
+
+ for i in range(0, len(self.types)):
+ template += " <document mode='index' type='" + self.types[i] + "'/>\n"
template += (" </documents>\n"
- " <nodes>\n"
- " <node hostalias='node1' distribution-key=\"0\"/>\n"
- " </nodes>\n"
- " <engine>\n"
- " <proton>\n"
- " <searchable-copies>1</searchable-copies>\n"
- " </proton>\n"
- " </engine>\n"
- " </content>\n\n"
- "</services>")
+ " <nodes>\n"
+ " <node hostalias='node1' distribution-key=\"0\"/>\n"
+ " </nodes>\n"
+ " <engine>\n"
+ " <proton>\n"
+ " <searchable-copies>1</searchable-copies>\n"
+ " </proton>\n"
+ " </engine>\n"
+ " </content>\n\n"
+ "</services>")
new_services.write(template)
new_services.close()
print(" > Created services.xml at '" + file_path + "'")
-
-
def createHosts_xml(self):
file_path = self.path + "hosts.xml"
new_hosts = open(file_path, "w")
- template = ("<?xml version=\"1.0\" encoding=\"utf-8\" ?>"
- "<hosts>"
- " <host name=\"localhost\">"
- " <alias>node1</alias>"
- " </host>"
+ template = ("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n"
+ "<hosts>\n"
+ " <host name=\"localhost\">\n"
+ " <alias>node1</alias>\n"
+ " </host>\n"
"</hosts>")
new_hosts.write(template)
new_hosts.close()
print(" > Created hosts.xml at '" + file_path + "'")
-
def get_type(self, type):
return {
"text": "string",
@@ -193,12 +187,11 @@ class ElasticSearchParser:
"double": "double",
"boolean": "string",
"ip": "text",
- "byte" : "byte",
- "float" : "float",
+ "byte": "byte",
+ "float": "float",
}[type]
-
def get_indexing(self, key, key_type):
if not self._all:
return "summary"
@@ -211,7 +204,6 @@ class ElasticSearchParser:
return "summary"
-
def walk(self, node, mapping, parent):
for key, item in node.items():
if isinstance(item, dict):
@@ -219,10 +211,10 @@ class ElasticSearchParser:
elif key == "type":
mapping[parent] = item
elif key == "include_in_all":
- if not item: # Field should not be searchable
+ if not item: # Field should not be searchable
self.no_index.append(parent)
elif key == "index" and parent != "properties":
- if item == "no": # Field should not be searchable
+ if item == "no": # Field should not be searchable
self.no_index.append(parent)