aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/simple/kstem/CharArraySet.java
blob: 36bc898507abd40f265d5bd5c99d6404ba2f600e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
/*
 * This is adapted from the Lucene code base which is Copyright 2008 Apache Software Foundation and Licensed
 * under the terms of the Apache License, Version 2.0.
 */
package com.yahoo.language.simple.kstem;


import java.util.AbstractSet;
import java.util.Collection;
import java.util.Iterator;
import java.util.Set;

/**
 * A simple class that stores Strings as char[]'s in a
 * hash table.  Note that this is not a general purpose
 * class.  For example, it cannot remove items from the
 * set, nor does it resize its hash table to be smaller,
 * etc.  It is designed to be quick to test if a char[]
 * is in the set without the necessity of converting it
 * to a String first.
 *
 * <P>
 * <em>Please note:</em> This class implements {@link java.util.Set Set} but
 * does not behave like it should in all cases. The generic type is
 * {@code Set<Object>}, because you can add any object to it,
 * that has a string representation. The add methods will use
 * {@link Object#toString} and store the result using a {@code char[]}
 * buffer. The same behavior have the {@code contains()} methods.
 * The {@link #iterator()} returns an {@code Iterator<char[]>}.
 */
public class CharArraySet extends AbstractSet<Object> {

  public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
  private static final Object PLACEHOLDER = new Object();
  
  private final CharArrayMap<Object> map;
  
  /**
   * Create set with enough capacity to hold startSize terms
   * 
   * @param startSize
   *          the initial capacity
   * @param ignoreCase
   *          <code>false</code> if and only if the set should be case sensitive
   *          otherwise <code>true</code>.
   */
  public CharArraySet(int startSize, boolean ignoreCase) {
    this(new CharArrayMap<>(startSize, ignoreCase));
  }

  /**
   * Creates a set from a Collection of objects. 
   * 
   * @param c
   *          a collection whose elements to be placed into the set
   * @param ignoreCase
   *          <code>false</code> if and only if the set should be case sensitive
   *          otherwise <code>true</code>.
   */
  public CharArraySet(Collection<?> c, boolean ignoreCase) {
    this(c.size(), ignoreCase);
    addAll(c);
  }

  /** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
  CharArraySet(final CharArrayMap<Object> map){
    this.map = map;
  }
  
  /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
  @Override
  public void clear() {
    map.clear();
  }

  /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
   * are in the set */
  public boolean contains(char[] text, int off, int len) {
    return map.containsKey(text, off, len);
  }

  /** true if the <code>CharSequence</code> is in the set */
  public boolean contains(CharSequence cs) {
    return map.containsKey(cs);
  }

  @Override
  public boolean contains(Object o) {
    return map.containsKey(o);
  }

  @Override
  public boolean add(Object o) {
    return map.put(o, PLACEHOLDER) == null;
  }

  /** Add this CharSequence into the set */
  public boolean add(CharSequence text) {
    return map.put(text, PLACEHOLDER) == null;
  }
  
  /** Add this String into the set */
  public boolean add(String text) {
    return map.put(text, PLACEHOLDER) == null;
  }

  /** Add this char[] directly to the set.
   * If ignoreCase is true for this Set, the text array will be directly modified.
   * The user should never modify this text array after calling this method.
   */
  public boolean add(char[] text) {
    return map.put(text, PLACEHOLDER) == null;
  }

  @Override
  public int size() {
    return map.size();
  }
  
  /**
   * Returns an unmodifiable {@link CharArraySet}. This allows to provide
   * unmodifiable views of internal sets for "read-only" use.
   * 
   * @param set
   *          a set for which the unmodifiable set is returned.
   * @return an new unmodifiable {@link CharArraySet}.
   * @throws NullPointerException
   *           if the given set is <code>null</code>.
   */
  public static CharArraySet unmodifiableSet(CharArraySet set) {
    if (set == null)
      throw new NullPointerException("Given set is null");
    if (set == EMPTY_SET)
      return EMPTY_SET;
    if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap)
      return set;
    return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
  }

  /**
   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
   * is a {@link CharArraySet} the ignoreCase property will be preserved.
   * 
   * @param set
   *          a set to copy
   * @return a copy of the given set as a {@link CharArraySet}. If the given set
   *         is a {@link CharArraySet} the ignoreCase property as well as the
   *         matchVersion will be of the given set will be preserved.
   */
  public static CharArraySet copy(final Set<?> set) {
    if(set == EMPTY_SET)
      return EMPTY_SET;
    if(set instanceof CharArraySet) {
      final CharArraySet source = (CharArraySet) set;
      return new CharArraySet(CharArrayMap.copy(source.map));
    }
    return new CharArraySet(set, false);
  }
  
  /**
   * Returns an {@link Iterator} for {@code char[]} instances in this set.
   */
  @Override @SuppressWarnings("unchecked")
  public Iterator<Object> iterator() {
    // use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
    return map.originalKeySet().iterator();
  }
  
  @Override
  public String toString() {
    final StringBuilder sb = new StringBuilder("[");
    for (Object item : this) {
      if (sb.length()>1) sb.append(", ");
      if (item instanceof char[]) {
        sb.append((char[]) item);
      } else {
        sb.append(item);
      }
    }
    return sb.append(']').toString();
  }

}