aboutsummaryrefslogtreecommitdiffstats
path: root/fsa/src/main/java/com/yahoo/fsa/MetaData.java
blob: 26a7bdcaa9dc919ed5a5d4ca76bf8aa6c7ac6291 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.fsa;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.ByteOrder;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.charset.Charset;

import com.yahoo.fsa.FSA;


/**
 * Class for accessing meta-data (dat-files) used by FSA applications.
 *
 * @author  <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
 **/
public class MetaData {

  private boolean          _ok = false;
  private MappedByteBuffer _header;
  private MappedByteBuffer _data;
  private Charset          _charset;


  public MetaData(String filename){
    init(filename, "utf-8");
  }

  public MetaData(String filename, String charsetname){
    init(filename, charsetname);
  }

  public boolean isOk(){
    return _ok;
  }

  private void init(String filename, String charsetname){

    _charset = Charset.forName(charsetname);

    FileInputStream file;
    try {
      file = new FileInputStream(filename);
    }
    catch (FileNotFoundException e) {
      System.out.print("MetaData file " + filename + " not found.\n");
      return;
    }

    try {
      _header = file.getChannel().map(MapMode.READ_ONLY,0,256);
      _header.order(ByteOrder.LITTLE_ENDIAN);
      if(h_magic()!=-2025936501){
        System.out.print("MetaData bad magic " + h_magic() +"\n");
        return;
      }
      _data = file.getChannel().map(MapMode.READ_ONLY,
                                    256,
                                    h_size());
      _data.order(ByteOrder.LITTLE_ENDIAN);
      _ok=true;
    }
    catch (IOException e) {
      System.out.print("MetaData IO exception.\n");
      return;
    }
  }

  private int h_magic(){
    return _header.getInt(0);
  }
  private int h_version(){
    return _header.getInt(4);
  }
  private int h_checksum(){
    return _header.getInt(8);
  }
  private int h_size(){
    return _header.getInt(12);
  }
  private int h_reserved(int i){
    if(i<0||i>9){
      return 0;
    }
    return _header.getInt(16+4*i);
  }
  private int h_user(int i){
    if(i<0||i>49){
      return 0;
    }
    return _header.getInt(56+4*i);
  }


  private ByteBuffer encode(CharBuffer chrbuf){
    return _charset.encode(chrbuf);
  }

  private String decode(ByteBuffer buf){
    return _charset.decode(buf).toString();
  }


  public int user(int i){
    if(!_ok){
      return 0;
    }
    return h_user(i);
  }

  public int getIntEntry(int idx)
  {
    if(_ok){
      return _data.getInt(idx*4);
    }
    else
      return 0;
  }

  public ByteBuffer getDirectRecordEntry(int idx, int size)
  {
    if(_ok){
      ByteBuffer meta = ByteBuffer.allocate(size);
      meta.order(ByteOrder.LITTLE_ENDIAN);
      _data.position(idx*size);
      _data.get(meta.array(),0,size);
      return meta;
    }
    else
      return null;
  }

  public ByteBuffer getIndirectRecordEntry(int idx, int size)
  {
    if(_ok){
      int offset = _data.getInt(idx*4);
      ByteBuffer meta = ByteBuffer.allocate(size);
      meta.order(ByteOrder.LITTLE_ENDIAN);
      _data.position(offset);
      _data.get(meta.array(),0,size);
      return meta;
    }
    else
      return null;
  }

  public ByteBuffer getIndirectRecordEntry(int idx)
  {
    if(_ok){
      int offset = _data.getInt(idx*4);
      int size = _data.getInt(offset);
      ByteBuffer meta = ByteBuffer.allocate(size);
      meta.order(ByteOrder.LITTLE_ENDIAN);
      _data.position(offset+4);
      _data.get(meta.array(),0,size);
      return meta;
    }
    else
      return null;
  }

  public String getStringEntry(int stringOffset){
    if(_ok){
      int length = 0;
      _data.position(stringOffset);
      while(_data.get()!=0){
        length++;
      }
      ByteBuffer meta = ByteBuffer.allocate(length);
      meta.order(ByteOrder.LITTLE_ENDIAN);
      _data.position(stringOffset);
      _data.get(meta.array(),0,length);
      return decode(meta);
    }
    return null;
  }

  public String[] getStringArrayEntry(int stringOffset, int numStrings){
    if(_ok && numStrings>0){
      String[] stringArray = new String[numStrings];
      int pos=stringOffset;
      for(int i=0;i<numStrings;i++){
        int length = 0;
        _data.position(pos);
        while(_data.get()!=0){
          length++;
        }
        ByteBuffer meta = ByteBuffer.allocate(length);
        meta.order(ByteOrder.LITTLE_ENDIAN);
        _data.position(pos);
        _data.get(meta.array(),0,length);
        stringArray[i] = decode(meta);
        pos += length+1;
      }
      return stringArray;
    }
    return null;
  }

  //// test ////
  public static void main(String[] args) {
    String file = "dmozPred_2.dat";

    MetaData metaData = new MetaData(file);

    System.out.println("Loading MetaData "+file+": "+metaData.isOk());
  }



}