apache · clintropolis · Aug 2, 2022 · imply-cheddar · Sep 8, 2022 · imply-cheddar
diff --git a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedDoubleWriter.java b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedDoubleWriter.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.data;
+
+import it.unimi.dsi.fastutil.doubles.DoubleIterator;
+import org.apache.druid.io.Channels;
+import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
+import org.apache.druid.segment.serde.Serializer;
+import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
+import org.apache.druid.segment.writeout.WriteOutBytes;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.WritableByteChannel;
+
+/**
+ * Specialized version of {@link FixedIndexedWriter} for writing double value types, with no support for null values,
+ * and no verification that data is actually sorted. The resulting data can be read into either
+ * {@link FixedIndexedDoubles} or a {@link FixedIndexed<Double>}, since the format is identical.
+ *
+ * Callers should be certain that the data written is in fact sorted if specifying it as such. If null values need
+ * to be stored then the generic {@link FixedIndexedWriter} should be used instead.
+ */
+public class FixedIndexedDoubleWriter implements Serializer
+{
+  private static final int PAGE_SIZE = 4096;
+  private final SegmentWriteOutMedium segmentWriteOutMedium;
+  private final ByteBuffer scratch;
+  private int numWritten;
+  @Nullable
+  private WriteOutBytes valuesOut = null;
+
+  private final boolean isSorted;
+
+  public FixedIndexedDoubleWriter(SegmentWriteOutMedium segmentWriteOutMedium, boolean sorted)
+  {
+    this.segmentWriteOutMedium = segmentWriteOutMedium;
+    // this is a matter of faith, nothing checks
+    this.isSorted = sorted;
+    this.scratch = ByteBuffer.allocate(Double.BYTES).order(ByteOrder.nativeOrder());
+  }
+
+  public void open() throws IOException
+  {
+    this.valuesOut = segmentWriteOutMedium.makeWriteOutBytes();
+  }
+
+  @Override
+  public long getSerializedSize()
+  {
+    return Byte.BYTES + Byte.BYTES + Integer.BYTES + valuesOut.size();
+  }
+
+  public void write(double objectToWrite) throws IOException
+  {
+    scratch.clear();
+    scratch.putDouble(objectToWrite);
+    scratch.flip();
+    Channels.writeFully(valuesOut, scratch);
+    numWritten++;
+  }
+
+  @Override
+  public void writeTo(
+      WritableByteChannel channel,
+      FileSmoosher smoosher
+  ) throws IOException
+  {
+    scratch.clear();
+    // version 0
+    scratch.put((byte) 0);
+    // no flags, this thing is never sorted
+    byte flags = 0x00;
+    if (isSorted) {
+      flags = (byte) (flags | FixedIndexed.IS_SORTED_MASK);
+    }
+    scratch.put(flags);
+    scratch.flip();
+    Channels.writeFully(channel, scratch);
+    scratch.clear();
+    scratch.putInt(numWritten);
+    scratch.flip();
+    Channels.writeFully(channel, scratch);
+    valuesOut.writeTo(channel);
+  }
+
+  public DoubleIterator getIterator()
+  {
+    final ByteBuffer iteratorBuffer = ByteBuffer.allocate(Double.BYTES * PAGE_SIZE).order(ByteOrder.nativeOrder());
+
+    return new DoubleIterator()
+    {
+      @Override
+      public double nextDouble()
+      {
+        if (pos == 0 || iteratorBuffer.position() >= iteratorBuffer.limit()) {
+          readPage();
+        }
+        final double value = iteratorBuffer.getDouble();
+        pos++;
+        return value;
+      }
+
+      int pos = 0;
+
+      @Override
+      public boolean hasNext()
+      {
+        return pos < numWritten;
+      }
+
+      private void readPage()
+      {
+        iteratorBuffer.clear();
+        try {
+          if (numWritten - pos < PAGE_SIZE) {
+            int size = (numWritten - pos) * Double.BYTES;
+            iteratorBuffer.limit(size);
+            valuesOut.readFully((long) pos * Double.BYTES, iteratorBuffer);
+          } else {
+            valuesOut.readFully((long) pos * Double.BYTES, iteratorBuffer);
+          }
+          iteratorBuffer.flip();
+        }
+        catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+  }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedDoubles.java b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedDoubles.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.data;
+
+import com.google.common.base.Preconditions;
+import it.unimi.dsi.fastutil.doubles.DoubleComparator;
+import it.unimi.dsi.fastutil.doubles.DoubleComparators;
+import it.unimi.dsi.fastutil.doubles.DoubleIterator;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.query.monomorphicprocessing.HotLoopCallee;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+
+import javax.annotation.Nullable;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Iterator;
+
+/**
+ * Specialized implementation for {@link FixedIndexed<Double>} which does not contain any null values, allowing it to
+ * deal in java double value types instead of {@link Double} objects, and utilize specialized {@link ByteBuffer} methods
+ * to more efficiently read data.
+ */
+public final class FixedIndexedDoubles implements Indexed<Double>, HotLoopCallee
+{
+  public static FixedIndexedDoubles read(ByteBuffer bb, ByteOrder byteOrder)
+  {
+    final ByteBuffer buffer = bb.asReadOnlyBuffer().order(byteOrder);
+    final byte version = buffer.get();
+    Preconditions.checkState(version == 0, "Unknown version [%s]", version);
+    final byte flags = buffer.get();
+    final boolean hasNull = (flags & NullHandling.IS_NULL_BYTE) == NullHandling.IS_NULL_BYTE ? true : false;
+    final boolean isSorted = (flags & FixedIndexed.IS_SORTED_MASK) == FixedIndexed.IS_SORTED_MASK ? true : false;
+    Preconditions.checkState(!hasNull, "Cannot use FixedIndexedInts for FixedIndex with null values");
+    Preconditions.checkState(!(hasNull && !isSorted), "cannot have null values if not sorted");
+    final int size = buffer.getInt() + (hasNull ? 1 : 0);
+    final int valuesOffset = buffer.position();
+    final FixedIndexedDoubles fixedIndexed = new FixedIndexedDoubles(
+        buffer,
+        isSorted,
+        size,
+        valuesOffset
+    );
+    bb.position(buffer.position() + (Double.BYTES * size));
+    return fixedIndexed;
+  }
+
+  private final ByteBuffer buffer;
+  private final int size;
+  private final int valuesOffset;
+  private final boolean isSorted;
+  private final DoubleComparator comparator;
+
+  private FixedIndexedDoubles(
+      ByteBuffer buffer,
+      boolean isSorted,
+      int size,
+      int valuesOffset
+  )
+  {
+    this.buffer = buffer;
+    this.size = size;
+    this.valuesOffset = valuesOffset;
+    this.isSorted = isSorted;
+    this.comparator = DoubleComparators.NATURAL_COMPARATOR;
+  }
+
+  @Override
+  public int size()
+  {
+    return size;
+  }
+
+  @Nullable
+  @Override
+  public Double get(int index)
+  {
+    return getDouble(index);
+  }
+
+  @Override
+  public int indexOf(@Nullable Double value)
+  {
+    if (value == null) {
+      return -1;
+    }
+    return indexOf(value.doubleValue());
+  }
+
+  public double getDouble(int index)
+  {
+    return buffer.getDouble(valuesOffset + (index * Double.BYTES));
+  }
+
+  public int indexOf(double value)
+  {
+    if (!isSorted) {
+      throw new UnsupportedOperationException("Reverse lookup not allowed.");
+    }
+    int minIndex = 0;
+    int maxIndex = size - 1;
+    while (minIndex <= maxIndex) {
+      int currIndex = (minIndex + maxIndex) >>> 1;
+
+      double currValue = getDouble(currIndex);
+      int comparison = comparator.compare(currValue, value);
+      if (comparison == 0) {
+        return currIndex;
+      }
+
+      if (comparison < 0) {
+        minIndex = currIndex + 1;
+      } else {
+        maxIndex = currIndex - 1;
+      }
+    }
+
+    return -(minIndex + 1);
+  }
+
+  public DoubleIterator doubleIterator()
+  {
+    final ByteBuffer copy = buffer.asReadOnlyBuffer().order(buffer.order());
+    copy.position(valuesOffset);
+    copy.limit(valuesOffset + (size * Double.BYTES));
+    return new DoubleIterator()
+    {
+      @Override
+      public double nextDouble()
+      {
+        return copy.getDouble();
+      }
+
+      @Override
+      public boolean hasNext()
+      {
+        return copy.hasRemaining();
+      }
+    };
+  }
+
+  @Override
+  public Iterator<Double> iterator()
+  {
+    final DoubleIterator doubleIterator = doubleIterator();
+    return new Iterator<Double>()
+    {
+      @Override
+      public boolean hasNext()
+      {
+        return doubleIterator.hasNext();
+      }
+
+      @Override
+      public Double next()
+      {
+        return doubleIterator.nextDouble();
+      }
+    };
+  }
+
+  @Override
+  public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+  {
+    inspector.visit("buffer", buffer);
+    inspector.visit("comparator", comparator);
+  }
+
+  @Override
+  public String toString()
+  {
+    StringBuilder sb = new StringBuilder("FixedIndexedDoubles[");
+    if (size() > 0) {
+      for (int i = 0; i < size(); i++) {
+        double value = getDouble(i);
+        sb.append(value).append(',').append(' ');
+      }
+      sb.setLength(sb.length() - 2);
+    }
+    sb.append(']');
+    return sb.toString();
+  }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedIntWriter.java b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedIntWriter.java
@@ -33,8 +33,12 @@
 import java.nio.channels.WritableByteChannel;
 
 /**
- * Specialized version of {@link FixedIndexedWriter} for writing ints, with no support for null values, and no
- * verification that data is actually sorted, it just trusts you and takes your word for it
+ * Specialized version of {@link FixedIndexedWriter} for writing int value types, with no support for null values,
+ * and no verification that data is actually sorted. The resulting data can be read into either
+ * {@link FixedIndexedInts} or a {@link FixedIndexed<Integer>}, since the format is identical.
+ *
+ * Callers should be certain that the data written is in fact sorted if specifying it as such. If null values need
+ * to be stored then the generic {@link FixedIndexedWriter} should be used instead.
  */
 public final class FixedIndexedIntWriter implements Serializer
 {