Skip to content

Commit

Permalink
[HUDI-7373] revert config hoodie.write.handle.missing.cols.with.lossl…
Browse files Browse the repository at this point in the history
…ess.type.promotion (apache#10611)


---------

Co-authored-by: Jonathan Vexler <=>
  • Loading branch information
jonvex authored Feb 8, 2024
1 parent 734015f commit 635d84a
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,14 @@ public class HoodieCommonConfig extends HoodieConfig {
+ " operation will fail schema compatibility check. Set this option to true will make the newly added "
+ " column nullable to successfully complete the write operation.");

public static final ConfigProperty<String> HANDLE_MISSING_COLUMNS_WITH_LOSSLESS_TYPE_PROMOTIONS = ConfigProperty
.key("hoodie.write.handle.missing.cols.with.lossless.type.promotion")
public static final ConfigProperty<String> SET_NULL_FOR_MISSING_COLUMNS = ConfigProperty
.key("hoodie.write.set.null.for.missing.columns")
.defaultValue("false")
.markAdvanced()
.withAlternatives("hoodie.write.set.null.for.missing.columns")
.sinceVersion("0.14.1")
.withDocumentation("When a nullable column is missing from incoming batch during a write operation, the write "
+ " operation will fail schema compatibility check. Set this option to true will make the missing "
+ " column be filled with null values to successfully complete the write operation. Similarly lossless promotion"
+ " are type promotions that are not back compatible like long to int, double to float etc can be handled "
+ " by setting this config to true, in which case incoming data will be promoted to the table schema type"
+ " and written to the table.");
+ " column be filled with null values to successfully complete the write operation.");

public static final ConfigProperty<ExternalSpillableMap.DiskMapType> SPILLABLE_DISK_MAP_TYPE = ConfigProperty
.key("hoodie.common.spillable.diskmap.type")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ object DataSourceWriteOptions {
@Deprecated
val RECONCILE_SCHEMA: ConfigProperty[java.lang.Boolean] = HoodieCommonConfig.RECONCILE_SCHEMA

val HANDLE_MISSING_COLUMNS_WITH_LOSSLESS_TYPE_PROMOTIONS: ConfigProperty[String] = HoodieCommonConfig.HANDLE_MISSING_COLUMNS_WITH_LOSSLESS_TYPE_PROMOTIONS
val SET_NULL_FOR_MISSING_COLUMNS: ConfigProperty[String] = HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS

val MAKE_NEW_COLUMNS_NULLABLE: ConfigProperty[java.lang.Boolean] = HoodieCommonConfig.MAKE_NEW_COLUMNS_NULLABLE

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ object HoodieSchemaUtils {
latestTableSchemaOpt: Option[Schema],
internalSchemaOpt: Option[InternalSchema],
opts: Map[String, String]): Schema = {
val setNullForMissingColumns = opts.getOrDefault(DataSourceWriteOptions.HANDLE_MISSING_COLUMNS_WITH_LOSSLESS_TYPE_PROMOTIONS.key(),
DataSourceWriteOptions.HANDLE_MISSING_COLUMNS_WITH_LOSSLESS_TYPE_PROMOTIONS.defaultValue).toBoolean
val setNullForMissingColumns = opts.getOrDefault(DataSourceWriteOptions.SET_NULL_FOR_MISSING_COLUMNS.key(),
DataSourceWriteOptions.SET_NULL_FOR_MISSING_COLUMNS.defaultValue).toBoolean
val shouldReconcileSchema = opts(DataSourceWriteOptions.RECONCILE_SCHEMA.key()).toBoolean
val shouldValidateSchemasCompatibility = opts.getOrDefault(HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key,
HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.defaultValue).toBoolean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
import java.util.concurrent.TimeUnit;
import java.util.function.Function;

import static org.apache.hudi.common.config.HoodieCommonConfig.HANDLE_MISSING_COLUMNS_WITH_LOSSLESS_TYPE_PROMOTIONS;
import static org.apache.hudi.common.config.HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS;
import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeCommitMetadata;
import static org.apache.hudi.common.util.StringUtils.nonEmpty;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL;
Expand Down Expand Up @@ -610,7 +610,7 @@ static HoodieDeltaStreamer.Config makeConfigForHudiIncrSrc(String srcBasePath, S
cfg.schemaProviderClassName = schemaProviderClassName;
}
List<String> cfgs = new ArrayList<>();
cfgs.add(HANDLE_MISSING_COLUMNS_WITH_LOSSLESS_TYPE_PROMOTIONS.key() + "=true");
cfgs.add(SET_NULL_FOR_MISSING_COLUMNS.key() + "=true");
cfgs.add("hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=" + addReadLatestOnMissingCkpt);
cfgs.add("hoodie.deltastreamer.source.hoodieincr.path=" + srcBasePath);
// No partition
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ protected HoodieDeltaStreamer.Config getDeltaStreamerConfig(String[] transformer
extraProps.setProperty(HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key(), "false");
extraProps.setProperty("hoodie.datasource.write.table.type", tableType);
extraProps.setProperty("hoodie.datasource.write.row.writer.enable", rowWriterEnable.toString());
extraProps.setProperty(DataSourceWriteOptions.HANDLE_MISSING_COLUMNS_WITH_LOSSLESS_TYPE_PROMOTIONS().key(), Boolean.toString(nullForDeletedCols));
extraProps.setProperty(DataSourceWriteOptions.SET_NULL_FOR_MISSING_COLUMNS().key(), Boolean.toString(nullForDeletedCols));

//we set to 0 so that we create new base files on insert instead of adding inserts to existing filegroups via small file handling
extraProps.setProperty("hoodie.parquet.small.file.limit", "0");
Expand Down

0 comments on commit 635d84a

Please sign in to comment.