aboutsummaryrefslogtreecommitdiffstats
path: root/configgen/src/main/java/com/yahoo/config/codegen/NormalizedDefinition.java
blob: 5d36b3cb77daf41f7cb682462b4acce18bb26c0b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.config.codegen;

import java.io.*;
import java.util.List;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.text.DecimalFormat;
import java.security.MessageDigest;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;

/**
 *
 * Does normalizing (removing comments, trimming whitespace etc.) and calculation of md5sum
 * of config definitions
 *
 * @author hmusum
 */
public class NormalizedDefinition {

    // Patterns used for finding ranges in config definitions
    private static final Pattern intPattern = Pattern.compile(".*int.*range.*");
    private static final Pattern doublePattern = Pattern.compile(".*double.*range.*");
    private final MessageDigest md5;

    String defMd5 = null;
    List<String> normalizedContent = null;

    public NormalizedDefinition() {
        try {
            md5 = MessageDigest.getInstance("MD5");
        } catch (java.security.NoSuchAlgorithmException e) {
            throw new RuntimeException("Unable to create MD5 digest", e);
        }
        normalizedContent = new ArrayList<>();
    }

    public NormalizedDefinition normalize(BufferedReader reader) throws IOException {
        String s;
        List<String> input = new ArrayList<>();
        while ((s = reader.readLine()) != null) {
            String normalized = normalize(s);
            if (normalized.length() > 0) {
                input.add(normalized);
            }
        }
        normalizedContent = input;
        return this;
    }

    /**
     * Normalizes a config definition line. Each string is normalized according to the
     * rules of config and definition files before they are used:
     * <ul>
     * <li>Remove trailing space.<li>
     * <li>Remove trailing comments, and spaces before trailing comments.</li>
     * <li>Remove empty lines</li>
     * <li>Keep comment lines</li>
     * </ul>
     * The supplied list is changed in-place
     *
     * @param line a config definition line
     * @return a normalized config definition line
     */
    public static String normalize(String line) {
        //System.out.println("before line=" + line + ";");
        // Normalize line
        line = line.trim();
        Matcher m = intPattern.matcher(line);
        if (m.matches()) {
            String formattedMax = new DecimalFormat("#.#").format(0x7fffffff);
            String formattedMin = new DecimalFormat("#.#").format(-0x80000000);
            line = line.replaceFirst("\\[,", "["+formattedMin+",");
            line = line.replaceFirst(",\\]", ","+formattedMax+"]");
        }
        m = doublePattern.matcher(line);
        if (m.matches()) {
            String formattedMax = new DecimalFormat("#.#").format(1e308);
            String formattedMin = new DecimalFormat("#.#").format(-1e308);
            line = line.replaceFirst("\\[,", "["+formattedMin+",");
            line = line.replaceFirst(",\\]", ","+formattedMax+"]");
        }
        line = removeComment(line);
        if (!line.isEmpty()) {
            line = stripSpaces(line);
            line = line.replaceAll("\\s,", ",");  // Remove space before comma (for enums)
            line += "\n";
        }
        //System.out.println("after line=" + line + ";");
        return line;
    }

    // Removes comment char and text after it, unless comment char is inside a string
    // Keeps comment lines (lines that start with #)
    private static String removeComment(String line) {
        int index = line.indexOf("#");
        if (!line.contains("#") || index == 0) return line;

        int firstQuote = line.indexOf("\"");
        if (firstQuote > 0) {
            int secondQuote = line.indexOf("\"", firstQuote + 1);
            if (index > secondQuote) {
                line = line.substring(0, index);
                line = line.trim();
            }
        } else  {
            line = line.substring(0, index);
            line = line.trim();
        }

        return line;
    }

    public void addNormalizedLine(String line) {
        normalizedContent.add(line);
    }

    public String generateMd5Sum() {
        for (String line : normalizedContent) {
            String s = normalize(line);
            if (!s.isEmpty()) {
                md5.update(toBytes(s));
            }
        }
        defMd5 = toHexString(md5.digest()).toLowerCase();
        //System.out.println("md5=" + defMd5) ;
        return defMd5;
    }


    // The two methods below are copied from vespajlib (com.yahoo.text.Utf8 and com.yahoo.io.HexDump)
    // since configgen cannot depend on any other modules (at least not as it is done now)
    public static byte[] toBytes(String str) {
        Charset charset = Charset.forName("utf-8");

        ByteBuffer b = charset.encode(str);
        byte[] result = new byte[b.remaining()];
        b.get(result);
        return result;
    }

    private String toHexString(byte[] bytes) {
        StringBuilder sb =  new StringBuilder(bytes.length * 2);
        for (byte aByte : bytes) {
            sb.append(String.format("%02x", aByte));
        }
        return sb.toString();
    }

    /**
     * Replaces sequences of spaces with 1 space, unless inside quotes. Public for testing.
     *
     * @param str String to strip spaces from
     * @return String with spaces stripped
     */
    public static String stripSpaces(String str) {
        StringBuilder ret = new StringBuilder("");
        boolean inQuotes = false;
        boolean inSpaceSequence = false;
        for (char c : str.toCharArray()) {
            if (Character.isWhitespace(c)) {
                if (inQuotes) {
                    ret.append(c);
                    continue;
                }
                if (!inSpaceSequence) {
                    // start of space sequence
                    inSpaceSequence=true;
                    ret.append(" ");
                }
            } else {
                if (inSpaceSequence) {
                    inSpaceSequence=false;
                }
                if (c=='\"') {
                    inQuotes=!inQuotes;
                }
                ret.append(c);
            }
        }
        return ret.toString();
    }

    public List<String> getNormalizedContent() {
        return normalizedContent;
    }

    @Override
    public String toString() {
    	StringBuilder builder = new StringBuilder();
    	for (String line : normalizedContent) {
    		builder.append(line.replace("\"", "\\\""));
    		builder.append("\\n\\\n");
    	}
    	return builder.toString();
    }

    public String getDefMd5() {
        return defMd5;
    }

}