aboutsummaryrefslogtreecommitdiffstats
path: root/configgen/src/main/java/com/yahoo/config/codegen/NormalizedDefinition.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /configgen/src/main/java/com/yahoo/config/codegen/NormalizedDefinition.java
Publish
Diffstat (limited to 'configgen/src/main/java/com/yahoo/config/codegen/NormalizedDefinition.java')
-rw-r--r--configgen/src/main/java/com/yahoo/config/codegen/NormalizedDefinition.java200
1 files changed, 200 insertions, 0 deletions
diff --git a/configgen/src/main/java/com/yahoo/config/codegen/NormalizedDefinition.java b/configgen/src/main/java/com/yahoo/config/codegen/NormalizedDefinition.java
new file mode 100644
index 00000000000..1847150d86d
--- /dev/null
+++ b/configgen/src/main/java/com/yahoo/config/codegen/NormalizedDefinition.java
@@ -0,0 +1,200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.config.codegen;
+
+import java.io.*;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.text.DecimalFormat;
+import java.security.MessageDigest;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+
+/**
+ *
+ * Does normalizing (removing comments, trimming whitespace etc.) and calculation of md5sum
+ * of config definitions
+ *
+ * @author <a href="musum@yahoo-inc.com">Harald Musum</a>
+ */
+public class NormalizedDefinition {
+ /* Patterns used for finding ranges in config definitions */
+ private static final Pattern intPattern = Pattern.compile(".*int.*range.*");
+ private static final Pattern doublePattern = Pattern.compile(".*double.*range.*");
+ private MessageDigest md5;
+
+ String defMd5 = null;
+ List<String> normalizedContent = null;
+
+ public NormalizedDefinition() {
+ try {
+ md5 = MessageDigest.getInstance("MD5");
+ } catch (java.security.NoSuchAlgorithmException e) {
+ throw new RuntimeException("Unable to create MD5 digest", e);
+ }
+ normalizedContent = new ArrayList<>();
+ }
+
+ public NormalizedDefinition normalize(BufferedReader reader) throws IOException {
+ String s;
+ List<String> input = new ArrayList<>();
+ while ((s = reader.readLine()) != null) {
+ String normalized = normalize(s);
+ if (normalized.length() > 0) {
+ input.add(normalized);
+ }
+ }
+ normalizedContent = input;
+ return this;
+ }
+
+ /**
+ * Normalizes a config definition line. Each string is normalized according to the
+ * rules of config and definition files before they are used:
+ * <ul>
+ * <li>Remove trailing space.<li>
+ * <li>Remove trailing comments, and spaces before trailing comments.</li>
+ * <li>Remove empty lines</li>
+ * <li>Keep comment lines</li>
+ * </ul>
+ * The supplied list is changed in-place
+ *
+ * @param line A config definition line
+ * @return a normalized config definition line
+ */
+ public static String normalize(String line) {
+ //System.out.println("before line=" + line + ";");
+ // Normalize line
+ line = line.trim();
+ Matcher m = intPattern.matcher(line);
+ if (m.matches()) {
+ String formattedMax = new DecimalFormat("#.#").format(0x7fffffff);
+ String formattedMin = new DecimalFormat("#.#").format(-0x80000000);
+ line = line.replaceFirst("\\[,", "["+formattedMin+",");
+ line = line.replaceFirst(",\\]", ","+formattedMax+"]");
+ }
+ m = doublePattern.matcher(line);
+ if (m.matches()) {
+ String formattedMax = new DecimalFormat("#.#").format(1e308);
+ String formattedMin = new DecimalFormat("#.#").format(-1e308);
+ line = line.replaceFirst("\\[,", "["+formattedMin+",");
+ line = line.replaceFirst(",\\]", ","+formattedMax+"]");
+ }
+ line = removeComment(line);
+ if (!line.isEmpty()) {
+ line = stripSpaces(line);
+ line = line.replaceAll("\\s,", ","); // Remove space before comma (for enums)
+ line += "\n";
+ }
+ //System.out.println("after line=" + line + ";");
+ return line;
+ }
+
+ // Removes comment char and text after it, unless comment char is inside a string
+ // Keeps comment lines (lines that start with #)
+ private static String removeComment(String line) {
+ int index = line.indexOf("#");
+ if (!line.contains("#") || index == 0) return line;
+
+ int firstQuote = line.indexOf("\"");
+ if (firstQuote > 0) {
+ int secondQuote = line.indexOf("\"", firstQuote + 1);
+ if (index > secondQuote) {
+ line = line.substring(0, index);
+ line = line.trim();
+ }
+ } else {
+ line = line.substring(0, index);
+ line = line.trim();
+ }
+
+ return line;
+ }
+
+ public void addNormalizedLine(String line) {
+ normalizedContent.add(line);
+ }
+
+ public String generateMd5Sum() {
+ for (String line : normalizedContent) {
+ String s = normalize(line);
+ if (!s.isEmpty()) {
+ md5.update(toBytes(s));
+ }
+ }
+ defMd5 = toHexString(md5.digest()).toLowerCase();
+ //System.out.println("md5=" + defMd5) ;
+ return defMd5;
+ }
+
+
+ // The two methods below are copied from vespajlib (com.yahoo.text.Utf8 and com.yahoo.io.HexDump)
+ // since configgen cannot depend on any other modules (at least not as it is done now)
+ public static byte[] toBytes(String str) {
+ Charset charset = Charset.forName("utf-8");
+
+ ByteBuffer b = charset.encode(str);
+ byte[] result = new byte[b.remaining()];
+ b.get(result);
+ return result;
+ }
+
+ private String toHexString(byte[] bytes) {
+ StringBuilder sb = new StringBuilder(bytes.length * 2);
+ for (byte aByte : bytes) {
+ sb.append(String.format("%02x", aByte));
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Replaces sequences of spaces with 1 space, unless inside quotes. Public for testing;
+ * @param str String to strip spaces from
+ * @return String with spaces stripped
+ */
+ public static String stripSpaces(String str) {
+ StringBuilder ret = new StringBuilder("");
+ boolean inQuotes = false;
+ boolean inSpaceSequence = false;
+ for (char c : str.toCharArray()) {
+ if (Character.isWhitespace(c)) {
+ if (inQuotes) {
+ ret.append(c);
+ continue;
+ }
+ if (!inSpaceSequence) {
+ // start of space sequence
+ inSpaceSequence=true;
+ ret.append(" ");
+ }
+ } else {
+ if (inSpaceSequence) {
+ inSpaceSequence=false;
+ }
+ if (c=='\"') {
+ inQuotes=!inQuotes;
+ }
+ ret.append(c);
+ }
+ }
+ return ret.toString();
+ }
+
+ public List<String> getNormalizedContent() {
+ return normalizedContent;
+ }
+
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ for (String line : normalizedContent) {
+ builder.append(line.replace("\"", "\\\""));
+ builder.append("\\n\\\n");
+ }
+ return builder.toString();
+ }
+
+ public String getDefMd5() {
+ return defMd5;
+ }
+}