diff options
author | Henrik <henrik.hoiness@online.no> | 2018-07-26 15:20:28 +0200 |
---|---|---|
committer | Henrik <henrik.hoiness@online.no> | 2018-07-26 15:20:28 +0200 |
commit | 4a33b491a005ba6ed6f66a5c0a361bd7e67e3c72 (patch) | |
tree | eaa59c0af02fdf3e669167d8290c31227107c93a /config-model | |
parent | f9407be3f00c58d5034465fe8305ba92811630ba (diff) |
Added arg --application_name, now creating folder for searchdefs ++
Diffstat (limited to 'config-model')
-rw-r--r-- | config-model/src/main/python/ES_Vespa_parser.py | 118 |
1 files changed, 55 insertions, 63 deletions
diff --git a/config-model/src/main/python/ES_Vespa_parser.py b/config-model/src/main/python/ES_Vespa_parser.py index 91b576d5788..477b0db4744 100644 --- a/config-model/src/main/python/ES_Vespa_parser.py +++ b/config-model/src/main/python/ES_Vespa_parser.py @@ -3,16 +3,15 @@ import json import argparse import os, sys - # Parsing Elastic Search documents to Vespa documents # Example of usage: python ES_Vespa_parser.py my_index.json my_index_mapping.json __author__ = 'henrhoi' class ElasticSearchParser: - document_file = None mapping_file = None + application_name = None search_definitions = {} path = "" _all = True @@ -22,50 +21,49 @@ class ElasticSearchParser: def __init__(self): parser = argparse.ArgumentParser() - parser.add_argument("documents", help="location of file with documents to be parsed") - parser.add_argument("mappings", help="location of file with mappings") + parser.add_argument("documents_path", help="location of file with documents to be parsed", type=str) + parser.add_argument("mappings_path", help="location of file with mappings", type=str) + parser.add_argument("--application_name", help="name of application", default="application_name", type=str) args = parser.parse_args() - self.document_file = args.documents - self.mapping_file = args.mappings + self.document_file = args.documents_path + self.mapping_file = args.mappings_path + self.application_name = args.application_name def main(self): - self.path = os.getcwd() + "/documents_and_document_definitions/" + self.path = os.getcwd() + "/application/" try: - os.mkdir(self.path, 0755); + os.mkdir(self.path, 0o777) print(" > Created folder '" + self.path + "'") except OSError: - print(" > Folder '"+ self.path +"' already existed") - + print(" > Folder '" + self.path + "' already existed") try: - os.mkdir(self.path + "searchdefinitions/", 0755); + os.makedirs(self.path + "searchdefinitions/", 0o777) print(" > Created folder '" + self.path + "searchdefinitions/" + "'") except OSError: - print(" > Folder '"+ self.path + "searchdefinitions/" +"' already existed") + print(" > Folder '" + self.path + "searchdefinitions/" + "' already existed") self.parse() self.createServices_xml() self.createHosts_xml() - - def getMapping(self, type): unparsed_mapping_file = open(self.mapping_file, "r") type_mapping = {} for line in unparsed_mapping_file: data = json.loads(line) - index = data.keys()[0] + index = list(data.keys())[0] mappings = data[index]["mappings"][type]["properties"] - #Checking if some fields could be no-index + # Checking if some fields could be no-index try: _all_enabled = data[index]["mappings"][type]["_all"]["enabled"] if not _all_enabled: self._all = False - print(" > All fields in the document type '" + type + "' is not searchable. Go inside "+self.path + type + ".sd to add which fields that should be searchable") + print(" > All fields in the document type '" + type + "' is not searchable. Go inside " + self.path + type + ".sd to add which fields that should be searchable") except KeyError: - print(" > All fields in the document type '"+type+"' is searchable") + print(" > All fields in the document type '" + type + "' is searchable") self.walk(mappings, type_mapping, "properties") @@ -89,7 +87,7 @@ class ElasticSearchParser: type = data["_type"] parsed_data = { - "put": "id:application_name:" + type + "::" + data["_id"], + "put": "id:"+self.application_name+":" + type + "::" + data["_id"], "fields": {} } @@ -115,8 +113,8 @@ class ElasticSearchParser: def createSearchDefinition(self, type, type_mapping): file_path = self.path + "searchdefinitions/" + type + ".sd" new_sd = open(file_path, "w") - new_sd.write("search "+ type + " {\n") - new_sd.write(" document "+ type + " {\n") + new_sd.write("search " + type + " {\n") + new_sd.write(" document " + type + " {\n") for key, item in type_mapping.items(): new_sd.write(" field " + key + " type " + self.get_type(item) + " {\n") @@ -132,58 +130,54 @@ class ElasticSearchParser: file_path = self.path + "services.xml" new_services = open(file_path, "w") template = ("<?xml version='1.0' encoding='UTF-8'?>" - "<services version='1.0'>\n\n" - " <container id='default' version='1.0'>\n" - " <search/>\n" - " <document-api/>\n" - " <nodes>\n" - " <node hostalias='node1'/>\n" - " </nodes>\n" - " </container>\n\n" - " <content id='content' version='1.0'>\n" - " <redundancy>1</redundancy>\n" - " <search>\n" - " <visibility-delay>1.0</visibility-delay>\n" - " </search>\n" - " <documents>\n") - - for i in range(0,len(self.types)): - template += " <document mode='index' type='"+self.types[i]+"'/>\n" - + "<services version='1.0'>\n\n" + " <container id='default' version='1.0'>\n" + " <search/>\n" + " <document-api/>\n" + " <nodes>\n" + " <node hostalias='node1'/>\n" + " </nodes>\n" + " </container>\n\n" + " <content id='content' version='1.0'>\n" + " <redundancy>1</redundancy>\n" + " <search>\n" + " <visibility-delay>1.0</visibility-delay>\n" + " </search>\n" + " <documents>\n") + + for i in range(0, len(self.types)): + template += " <document mode='index' type='" + self.types[i] + "'/>\n" template += (" </documents>\n" - " <nodes>\n" - " <node hostalias='node1' distribution-key=\"0\"/>\n" - " </nodes>\n" - " <engine>\n" - " <proton>\n" - " <searchable-copies>1</searchable-copies>\n" - " </proton>\n" - " </engine>\n" - " </content>\n\n" - "</services>") + " <nodes>\n" + " <node hostalias='node1' distribution-key=\"0\"/>\n" + " </nodes>\n" + " <engine>\n" + " <proton>\n" + " <searchable-copies>1</searchable-copies>\n" + " </proton>\n" + " </engine>\n" + " </content>\n\n" + "</services>") new_services.write(template) new_services.close() print(" > Created services.xml at '" + file_path + "'") - - def createHosts_xml(self): file_path = self.path + "hosts.xml" new_hosts = open(file_path, "w") - template = ("<?xml version=\"1.0\" encoding=\"utf-8\" ?>" - "<hosts>" - " <host name=\"localhost\">" - " <alias>node1</alias>" - " </host>" + template = ("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + "<hosts>\n" + " <host name=\"localhost\">\n" + " <alias>node1</alias>\n" + " </host>\n" "</hosts>") new_hosts.write(template) new_hosts.close() print(" > Created hosts.xml at '" + file_path + "'") - def get_type(self, type): return { "text": "string", @@ -193,12 +187,11 @@ class ElasticSearchParser: "double": "double", "boolean": "string", "ip": "text", - "byte" : "byte", - "float" : "float", + "byte": "byte", + "float": "float", }[type] - def get_indexing(self, key, key_type): if not self._all: return "summary" @@ -211,7 +204,6 @@ class ElasticSearchParser: return "summary" - def walk(self, node, mapping, parent): for key, item in node.items(): if isinstance(item, dict): @@ -219,10 +211,10 @@ class ElasticSearchParser: elif key == "type": mapping[parent] = item elif key == "include_in_all": - if not item: # Field should not be searchable + if not item: # Field should not be searchable self.no_index.append(parent) elif key == "index" and parent != "properties": - if item == "no": # Field should not be searchable + if item == "no": # Field should not be searchable self.no_index.append(parent) |