Added arg --application_name, now creating folder for searchdefs ++

author: Henrik <henrik.hoiness@online.no> 2018-07-26 15:20:28 +0200
committer: Henrik <henrik.hoiness@online.no> 2018-07-26 15:20:28 +0200
commit: 4a33b491a005ba6ed6f66a5c0a361bd7e67e3c72 (patch)
tree: eaa59c0af02fdf3e669167d8290c31227107c93a
parent: f9407be3f00c58d5034465fe8305ba92811630ba (diff)
1 files changed, 55 insertions, 63 deletions
diff --git a/config-model/src/main/python/ES_Vespa_parser.py b/config-model/src/main/python/ES_Vespa_parser.py
index 91b576d5788..477b0db4744 100644
--- a/config-model/src/main/python/ES_Vespa_parser.py
+++ b/config-model/src/main/python/ES_Vespa_parser.py
@@ -3,16 +3,15 @@ import json
 import argparse
 import os, sys
 
-
 # Parsing Elastic Search documents to Vespa documents
 # Example of usage:  python ES_Vespa_parser.py my_index.json my_index_mapping.json
 __author__ = 'henrhoi'
 
 
 class ElasticSearchParser:
-
     document_file = None
     mapping_file = None
+    application_name = None
     search_definitions = {}
     path = ""
     _all = True
@@ -22,50 +21,49 @@ class ElasticSearchParser:
 
     def __init__(self):
         parser = argparse.ArgumentParser()
-        parser.add_argument("documents", help="location of file with documents to be parsed")
-        parser.add_argument("mappings", help="location of file with mappings")
+        parser.add_argument("documents_path", help="location of file with documents to be parsed", type=str)
+        parser.add_argument("mappings_path", help="location of file with mappings", type=str)
+        parser.add_argument("--application_name", help="name of application", default="application_name", type=str)
         args = parser.parse_args()
 
-        self.document_file = args.documents
-        self.mapping_file = args.mappings
+        self.document_file = args.documents_path
+        self.mapping_file = args.mappings_path
+        self.application_name = args.application_name
 
     def main(self):
-        self.path = os.getcwd() + "/documents_and_document_definitions/"
+        self.path = os.getcwd() + "/application/"
         try:
-            os.mkdir(self.path, 0755);
+            os.mkdir(self.path, 0o777)
             print(" > Created folder '" + self.path + "'")
         except OSError:
-            print(" > Folder '"+ self.path +"' already existed")
-
+            print(" > Folder '" + self.path + "' already existed")
 
         try:
-            os.mkdir(self.path + "searchdefinitions/", 0755);
+            os.makedirs(self.path + "searchdefinitions/", 0o777)
             print(" > Created folder '" + self.path + "searchdefinitions/" + "'")
         except OSError:
-            print(" > Folder '"+ self.path + "searchdefinitions/" +"' already existed")
+            print(" > Folder '" + self.path + "searchdefinitions/" + "' already existed")
 
         self.parse()
         self.createServices_xml()
         self.createHosts_xml()
 
-
-
     def getMapping(self, type):
         unparsed_mapping_file = open(self.mapping_file, "r")
         type_mapping = {}
         for line in unparsed_mapping_file:
             data = json.loads(line)
-            index = data.keys()[0]
+            index = list(data.keys())[0]
             mappings = data[index]["mappings"][type]["properties"]
 
-            #Checking if some fields could be no-index
+            # Checking if some fields could be no-index
             try:
                 _all_enabled = data[index]["mappings"][type]["_all"]["enabled"]
                 if not _all_enabled:
                     self._all = False
-                    print(" > All fields in the document type '" + type + "' is not searchable. Go inside "+self.path + type + ".sd to add which fields that should be searchable")
+                    print(" > All fields in the document type '" + type + "' is not searchable. Go inside " + self.path + type + ".sd to add which fields that should be searchable")
             except KeyError:
-                print(" > All fields in the document type '"+type+"' is searchable")
+                print(" > All fields in the document type '" + type + "' is searchable")
 
             self.walk(mappings, type_mapping, "properties")
 
@@ -89,7 +87,7 @@ class ElasticSearchParser:
             type = data["_type"]
 
             parsed_data = {
-                "put": "id:application_name:" + type + "::" + data["_id"],
+                "put": "id:"+self.application_name+":" + type + "::" + data["_id"],
                 "fields": {}
             }
 
@@ -115,8 +113,8 @@ class ElasticSearchParser:
     def createSearchDefinition(self, type, type_mapping):
         file_path = self.path + "searchdefinitions/" + type + ".sd"
         new_sd = open(file_path, "w")
-        new_sd.write("search "+ type + " {\n")
-        new_sd.write("    document "+ type + " {\n")
+        new_sd.write("search " + type + " {\n")
+        new_sd.write("    document " + type + " {\n")
 
         for key, item in type_mapping.items():
             new_sd.write("        field " + key + " type " + self.get_type(item) + " {\n")
@@ -132,58 +130,54 @@ class ElasticSearchParser:
         file_path = self.path + "services.xml"
         new_services = open(file_path, "w")
         template = ("<?xml version='1.0' encoding='UTF-8'?>"
-        "<services version='1.0'>\n\n"
-        "  <container id='default' version='1.0'>\n"
-        "    <search/>\n"
-        "    <document-api/>\n"
-        "    <nodes>\n"
-        "      <node hostalias='node1'/>\n"
-        "    </nodes>\n"
-        "  </container>\n\n"
-        "  <content id='content' version='1.0'>\n"
-        "    <redundancy>1</redundancy>\n"
-        "    <search>\n"
-        "      <visibility-delay>1.0</visibility-delay>\n"
-        "    </search>\n"
-        "    <documents>\n")
-
-        for i in range(0,len(self.types)):
-            template += "      <document mode='index' type='"+self.types[i]+"'/>\n"
-
+                    "<services version='1.0'>\n\n"
+                    "  <container id='default' version='1.0'>\n"
+                    "    <search/>\n"
+                    "    <document-api/>\n"
+                    "    <nodes>\n"
+                    "      <node hostalias='node1'/>\n"
+                    "    </nodes>\n"
+                    "  </container>\n\n"
+                    "  <content id='content' version='1.0'>\n"
+                    "    <redundancy>1</redundancy>\n"
+                    "    <search>\n"
+                    "      <visibility-delay>1.0</visibility-delay>\n"
+                    "    </search>\n"
+                    "    <documents>\n")
+
+        for i in range(0, len(self.types)):
+            template += "      <document mode='index' type='" + self.types[i] + "'/>\n"
 
         template += ("    </documents>\n"
-        "    <nodes>\n"
-        "      <node hostalias='node1' distribution-key=\"0\"/>\n"
-        "    </nodes>\n"
-        "    <engine>\n"
-        "      <proton>\n"
-        "        <searchable-copies>1</searchable-copies>\n"
-        "      </proton>\n"
-        "    </engine>\n"
-        "  </content>\n\n"
-        "</services>")
+                     "    <nodes>\n"
+                     "      <node hostalias='node1' distribution-key=\"0\"/>\n"
+                     "    </nodes>\n"
+                     "    <engine>\n"
+                     "      <proton>\n"
+                     "        <searchable-copies>1</searchable-copies>\n"
+                     "      </proton>\n"
+                     "    </engine>\n"
+                     "  </content>\n\n"
+                     "</services>")
 
         new_services.write(template)
         new_services.close()
         print(" > Created services.xml at '" + file_path + "'")
 
-
-
     def createHosts_xml(self):
         file_path = self.path + "hosts.xml"
         new_hosts = open(file_path, "w")
-        template = ("<?xml version=\"1.0\" encoding=\"utf-8\" ?>"
-                    "<hosts>"
-                    "  <host name=\"localhost\">"
-                    "    <alias>node1</alias>"
-                    "  </host>" 
+        template = ("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n"
+                    "<hosts>\n"
+                    "  <host name=\"localhost\">\n"
+                    "    <alias>node1</alias>\n"
+                    "  </host>\n"
                     "</hosts>")
 
         new_hosts.write(template)
         new_hosts.close()
         print(" > Created hosts.xml at '" + file_path + "'")
 
-
     def get_type(self, type):
         return {
             "text": "string",
@@ -193,12 +187,11 @@ class ElasticSearchParser:
             "double": "double",
             "boolean": "string",
             "ip": "text",
-            "byte" : "byte",
-            "float" : "float",
+            "byte": "byte",
+            "float": "float",
 
         }[type]
 
-
     def get_indexing(self, key, key_type):
         if not self._all:
             return "summary"
@@ -211,7 +204,6 @@ class ElasticSearchParser:
 
         return "summary"
 
-
     def walk(self, node, mapping, parent):
         for key, item in node.items():
             if isinstance(item, dict):
@@ -219,10 +211,10 @@ class ElasticSearchParser:
             elif key == "type":
                 mapping[parent] = item
             elif key == "include_in_all":
-                if not item:                    # Field should not be searchable
+                if not item:  # Field should not be searchable
                     self.no_index.append(parent)
             elif key == "index" and parent != "properties":
-                if item == "no":                 # Field should not be searchable
+                if item == "no":  # Field should not be searchable
                     self.no_index.append(parent)
author	Henrik <henrik.hoiness@online.no>	2018-07-26 15:20:28 +0200
committer	Henrik <henrik.hoiness@online.no>	2018-07-26 15:20:28 +0200
commit	4a33b491a005ba6ed6f66a5c0a361bd7e67e3c72 (patch)
tree	eaa59c0af02fdf3e669167d8290c31227107c93a
parent	f9407be3f00c58d5034465fe8305ba92811630ba (diff)