Skip to content

Commit

Permalink
Core: Improve bit density in object storage layout (#7128)
Browse files Browse the repository at this point in the history
Co-authored-by: Prashant Singh <[email protected]>
Co-authored-by: Ryan Blue <[email protected]>
  • Loading branch information
3 people authored Apr 3, 2023
1 parent 2371323 commit a4a07ba
Showing 1 changed file with 18 additions and 8 deletions.
26 changes: 18 additions & 8 deletions core/src/main/java/org/apache/iceberg/LocationProviders.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@
*/
package org.apache.iceberg;

import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.function.Function;
import org.apache.hadoop.fs.Path;
import org.apache.iceberg.common.DynConstructors;
import org.apache.iceberg.io.LocationProvider;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.relocated.com.google.common.hash.HashCode;
import org.apache.iceberg.relocated.com.google.common.hash.HashFunction;
import org.apache.iceberg.relocated.com.google.common.hash.Hashing;
import org.apache.iceberg.relocated.com.google.common.io.BaseEncoding;
import org.apache.iceberg.util.LocationUtil;
import org.apache.iceberg.util.PropertyUtil;

Expand Down Expand Up @@ -104,9 +106,10 @@ public String newDataLocation(String filename) {
}

static class ObjectStoreLocationProvider implements LocationProvider {
private static final Function<Object, Integer> HASH_FUNC =
Transforms.bucket(Integer.MAX_VALUE).bind(Types.StringType.get());

private static final HashFunction HASH_FUNC = Hashing.murmur3_32_fixed();
private static final BaseEncoding BASE64_ENCODER = BaseEncoding.base64Url().omitPadding();
private final ThreadLocal<byte[]> temp = ThreadLocal.withInitial(() -> new byte[4]);
private final String storageLocation;
private final String context;

Expand Down Expand Up @@ -143,11 +146,11 @@ public String newDataLocation(PartitionSpec spec, StructLike partitionData, Stri

@Override
public String newDataLocation(String filename) {
int hash = HASH_FUNC.apply(filename);
String hash = computeHash(filename);
if (context != null) {
return String.format("%s/%08x/%s/%s", storageLocation, hash, context, filename);
return String.format("%s/%s/%s/%s", storageLocation, hash, context, filename);
} else {
return String.format("%s/%08x/%s", storageLocation, hash, filename);
return String.format("%s/%s/%s", storageLocation, hash, filename);
}
}

Expand All @@ -167,5 +170,12 @@ private static String pathContext(String tableLocation) {

return resolvedContext;
}

private String computeHash(String fileName) {
byte[] bytes = temp.get();
HashCode hash = HASH_FUNC.hashString(fileName, StandardCharsets.UTF_8);
hash.writeBytesTo(bytes, 0, 4);
return BASE64_ENCODER.encode(bytes);
}
}
}

0 comments on commit a4a07ba

Please sign in to comment.