Java Source Code: org.apache.lucene.index.FieldsWriter


   1: package org.apache.lucene.index;
   2: 
   3: /**
   4:  * Copyright 2004 The Apache Software Foundation
   5:  * 
   6:  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
   7:  * use this file except in compliance with the License. You may obtain a copy of
   8:  * the License at
   9:  * 
  10:  * http://www.apache.org/licenses/LICENSE-2.0
  11:  * 
  12:  * Unless required by applicable law or agreed to in writing, software
  13:  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  14:  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  15:  * License for the specific language governing permissions and limitations under
  16:  * the License.
  17:  */
  18: 
  19: import java.io.ByteArrayOutputStream;
  20: import java.io.IOException;
  21: import java.util.Iterator;
  22: import java.util.zip.Deflater;
  23: 
  24: import org.apache.lucene.document.Document;
  25: import org.apache.lucene.document.Fieldable;
  26: import org.apache.lucene.store.Directory;
  27: import org.apache.lucene.store.RAMOutputStream;
  28: import org.apache.lucene.store.IndexOutput;
  29: import org.apache.lucene.store.IndexInput;
  30: 
  31: final class FieldsWriter
  32:	  {
  33:  static final byte FIELD_IS_TOKENIZED = 0x1;
  34:  static final byte FIELD_IS_BINARY = 0x2;
  35:  static final byte FIELD_IS_COMPRESSED = 0x4;
  36:  
  37:    private FieldInfos fieldInfos;
  38:
  39:    private IndexOutput fieldsStream;
  40:
  41:    private IndexOutput indexStream;
  42:
  43:    private boolean doClose;
  44:
  45:	      FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
  46:        fieldInfos = fn;
  47:        fieldsStream = d.createOutput(segment + ".fdt");
  48:        indexStream = d.createOutput(segment + ".fdx");
  49:        doClose = true;
  50:    }
  51:
  52:	      FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) throws IOException {
  53:        fieldInfos = fn;
  54:        fieldsStream = fdt;
  55:        indexStream = fdx;
  56:        doClose = false;
  57:    }
  58:
  59:    // Writes the contents of buffer into the fields stream
  60:    // and adds a new entry for this document into the index
  61:    // stream.  This assumes the buffer was already written
  62:    // in the correct fields format.
  63:	      void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException {
  64:      indexStream.writeLong(fieldsStream.getFilePointer());
  65:      fieldsStream.writeVInt(numStoredFields);
  66:      buffer.writeTo(fieldsStream);
  67:    }
  68:
  69:	      void flush() throws IOException {
  70:      indexStream.flush();
  71:      fieldsStream.flush();
  72:    }
  73:
  74:	      final void close() throws IOException {
  75:	        if (doClose) {
  76:        fieldsStream.close();
  77:        indexStream.close();
  78:      }
  79:    }
  80:
  81:	      final void writeField(FieldInfo fi, Fieldable field) throws IOException {
  82:      // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
  83:      // and field.binaryValue() already returns the compressed value for a field
  84:      // with isCompressed()==true, so we disable compression in that case
  85:      boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
  86:      fieldsStream.writeVInt(fi.number);
  87:      byte bits = 0;
  88:      if (field.isTokenized())
  89:        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
  90:      if (field.isBinary())
  91:        bits |= FieldsWriter.FIELD_IS_BINARY;
  92:      if (field.isCompressed())
  93:        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
  94:                
  95:      fieldsStream.writeByte(bits);
  96:                
  97:	        if (field.isCompressed()) {
  98:        // compression is enabled for the current field
  99:        byte[] data = null;
 100:                  
 101:	          if (disableCompression) {
 102:          // optimized case for merging, the data
 103:          // is already compressed
 104:          data = field.binaryValue();
 105:        } else {
 106:          // check if it is a binary field
 107:	            if (field.isBinary()) {
 108:            data = compress(field.binaryValue());
 109:          }
 110:	            else {
 111:            data = compress(field.stringValue().getBytes("UTF-8"));
 112:          }
 113:        }
 114:        final int len = data.length;
 115:        fieldsStream.writeVInt(len);
 116:        fieldsStream.writeBytes(data, len);
 117:      }
 118:	        else {
 119:        // compression is disabled for the current field
 120:	          if (field.isBinary()) {
 121:          byte[] data = field.binaryValue();
 122:          final int len = data.length;
 123:          fieldsStream.writeVInt(len);
 124:          fieldsStream.writeBytes(data, len);
 125:        }
 126:	          else {
 127:          fieldsStream.writeString(field.stringValue());
 128:        }
 129:      }
 130:    }
 131:
 132:    /** Bulk write a contiguous series of documents.  The
 133:     *  lengths array is the length (in bytes) of each raw
 134:     *  document.  The stream IndexInput is the
 135:     *  fieldsStream from which we should bulk-copy all
 136:     *  bytes. */
 137:	      final void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException {
 138:      long position = fieldsStream.getFilePointer();
 139:      long start = position;
 140:	        for(int i=0;i<numDocs;i++) {
 141:        indexStream.writeLong(position);
 142:        position += lengths[i];
 143:      }
 144:      fieldsStream.copyBytes(stream, position-start);
 145:      assert fieldsStream.getFilePointer() == position;
 146:    }
 147:
 148:	      final void addDocument(Document doc) throws IOException {
 149:        indexStream.writeLong(fieldsStream.getFilePointer());
 150:
 151:        int storedCount = 0;
 152:        Iterator fieldIterator = doc.getFields().iterator();
 153:	          while (fieldIterator.hasNext()) {
 154:            Fieldable field = (Fieldable) fieldIterator.next();
 155:            if (field.isStored())
 156:                storedCount++;
 157:        }
 158:        fieldsStream.writeVInt(storedCount);
 159:
 160:        fieldIterator = doc.getFields().iterator();
 161:	          while (fieldIterator.hasNext()) {
 162:            Fieldable field = (Fieldable) fieldIterator.next();
 163:            if (field.isStored())
 164:              writeField(fieldInfos.fieldInfo(field.name()), field);
 165:        }
 166:    }
 167:
 168:	      private final byte[] compress (byte[] input) {
 169:
 170:      // Create the compressor with highest level of compression
 171:      Deflater compressor = new Deflater();
 172:      compressor.setLevel(Deflater.BEST_COMPRESSION);
 173:
 174:      // Give the compressor the data to compress
 175:      compressor.setInput(input);
 176:      compressor.finish();
 177:
 178:      /*
 179:       * Create an expandable byte array to hold the compressed data.
 180:       * You cannot use an array that's the same size as the orginal because
 181:       * there is no guarantee that the compressed data will be smaller than
 182:       * the uncompressed data.
 183:       */
 184:      ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
 185:
 186:      // Compress the data
 187:      byte[] buf = new byte[1024];
 188:	        while (!compressor.finished()) {
 189:        int count = compressor.deflate(buf);
 190:        bos.write(buf, 0, count);
 191:      }
 192:      
 193:      compressor.end();
 194:
 195:      // Get the compressed data
 196:      return bos.toByteArray();
 197:    }
 198:}