diff --git a/.changelog/39138.txt b/.changelog/39138.txt new file mode 100644 index 00000000000..332753ad962 --- /dev/null +++ b/.changelog/39138.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_bedrockagent_data_source: Add `semantic_chunking_configuration` and `hierarchical_chunking_configuration` arguments +``` diff --git a/internal/service/bedrockagent/bedrockagent_test.go b/internal/service/bedrockagent/bedrockagent_test.go index 1eb4061f2ca..1c312caf18d 100644 --- a/internal/service/bedrockagent/bedrockagent_test.go +++ b/internal/service/bedrockagent/bedrockagent_test.go @@ -25,6 +25,8 @@ func TestAccBedrockAgent_serial(t *testing.T) { acctest.CtDisappears: testAccDataSource_disappears, "full": testAccDataSource_full, "update": testAccDataSource_update, + "semantic": testAccDataSource_fullSemantic, + "hierarchical": testAccDataSource_fullHierarchical, }, } diff --git a/internal/service/bedrockagent/data_source.go b/internal/service/bedrockagent/data_source.go index 0bb419f1ba3..0011e94a012 100644 --- a/internal/service/bedrockagent/data_source.go +++ b/internal/service/bedrockagent/data_source.go @@ -14,10 +14,12 @@ import ( "github.com/aws/aws-sdk-go-v2/service/bedrockagent" awstypes "github.com/aws/aws-sdk-go-v2/service/bedrockagent/types" "github.com/hashicorp/terraform-plugin-framework-timeouts/resource/timeouts" + "github.com/hashicorp/terraform-plugin-framework-validators/int32validator" "github.com/hashicorp/terraform-plugin-framework-validators/int64validator" "github.com/hashicorp/terraform-plugin-framework-validators/listvalidator" "github.com/hashicorp/terraform-plugin-framework-validators/setvalidator" "github.com/hashicorp/terraform-plugin-framework-validators/stringvalidator" + "github.com/hashicorp/terraform-plugin-framework/path" "github.com/hashicorp/terraform-plugin-framework/resource" "github.com/hashicorp/terraform-plugin-framework/resource/schema" "github.com/hashicorp/terraform-plugin-framework/resource/schema/int64planmodifier" @@ -41,6 +43,13 @@ import ( "github.com/hashicorp/terraform-provider-aws/names" ) +const ( + hierarchicalLevelConfigurations = 2 + hierarchicalMaxTokens = 8192 + semanticBreakpointPercentileThresholdMin = 50 + semanticBreakpointPercentileThresholdMax = 99 +) + // @FrameworkResource(name="Data Source") func newDataSourceResource(_ context.Context) (resource.ResourceWithConfigure, error) { r := &dataSourceResource{} @@ -200,6 +209,7 @@ func (r *dataSourceResource) Schema(ctx context.Context, request resource.Schema }, Validators: []validator.List{ listvalidator.SizeAtMost(1), + listvalidator.ConflictsWith(path.MatchRelative().AtParent().AtName("hierarchical_chunking_configuration"), path.MatchRelative().AtParent().AtName("semantic_chunking_configuration")), }, NestedObject: schema.NestedBlockObject{ Attributes: map[string]schema.Attribute{ @@ -224,6 +234,136 @@ func (r *dataSourceResource) Schema(ctx context.Context, request resource.Schema }, }, }, + "hierarchical_chunking_configuration": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[hierarchicalChunkingConfigurationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + listvalidator.ConflictsWith(path.MatchRelative().AtParent().AtName("fixed_size_chunking_configuration"), path.MatchRelative().AtParent().AtName("semantic_chunking_configuration")), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "overlap_tokens": schema.Int32Attribute{ + Required: true, + }, + }, + Blocks: map[string]schema.Block{ + "level_configuration": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[hierarchicalChunkingLevelConfigurationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeBetween(hierarchicalLevelConfigurations, hierarchicalLevelConfigurations), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "max_tokens": schema.Int32Attribute{ + Required: true, + Validators: []validator.Int32{ + int32validator.Between(1, hierarchicalMaxTokens), + }, + }, + }, + }, + }, + }, + }, + }, + "semantic_chunking_configuration": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[semanticChunkingConfigurationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + listvalidator.ConflictsWith(path.MatchRelative().AtParent().AtName("fixed_size_chunking_configuration"), path.MatchRelative().AtParent().AtName("hierarchical_chunking_configuration")), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "breakpoint_percentile_threshold": schema.Int32Attribute{ + Required: true, + Validators: []validator.Int32{ + int32validator.Between(semanticBreakpointPercentileThresholdMin, semanticBreakpointPercentileThresholdMax), + }, + }, + "buffer_size": schema.Int32Attribute{ + Required: true, + Validators: []validator.Int32{ + int32validator.Between(0, 1), + }, + }, + "max_token": schema.Int32Attribute{ + Required: true, + Validators: []validator.Int32{ + int32validator.AtLeast(1), + }, + }, + }, + }, + }, + }, + }, + }, + "parsing_configuration": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[parsingConfigurationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "parsing_strategy": schema.StringAttribute{ + CustomType: fwtypes.StringEnumType[awstypes.ParsingStrategy](), + Required: true, + PlanModifiers: []planmodifier.String{ + stringplanmodifier.RequiresReplace(), + }, + }, + }, + Blocks: map[string]schema.Block{ + "bedrock_foundation_model_configuration": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[bedrockFoundationModelConfigurationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "model_arn": schema.StringAttribute{ + CustomType: fwtypes.ARNType, + Required: true, + PlanModifiers: []planmodifier.String{ + stringplanmodifier.RequiresReplace(), + }, + }, + }, + Blocks: map[string]schema.Block{ + "parsing_prompt": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[parsingPromptModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "parsing_prompt_string": schema.StringAttribute{ + Required: true, + }, + }, + }, + }, + }, + }, + }, }, }, }, @@ -508,14 +648,46 @@ type serverSideEncryptionConfigurationModel struct { type vectorIngestionConfigurationModel struct { ChunkingConfiguration fwtypes.ListNestedObjectValueOf[chunkingConfigurationModel] `tfsdk:"chunking_configuration"` + ParsingConfiguration fwtypes.ListNestedObjectValueOf[parsingConfigurationModel] `tfsdk:"parsing_configuration"` +} + +type parsingConfigurationModel struct { + ParsingStrategy fwtypes.StringEnum[awstypes.ParsingStrategy] `tfsdk:"parsing_strategy"` + BedrockFoundationModelConfiguration fwtypes.ListNestedObjectValueOf[bedrockFoundationModelConfigurationModel] `tfsdk:"bedrock_foundation_model_configuration"` +} + +type bedrockFoundationModelConfigurationModel struct { + ModelArn fwtypes.ARN `tfsdk:"model_arn"` + ParsingPrompt fwtypes.ListNestedObjectValueOf[parsingPromptModel] `tfsdk:"parsing_prompt"` +} + +type parsingPromptModel struct { + ParsingPromptText types.String `tfsdk:"parsing_prompt_string"` } type chunkingConfigurationModel struct { - ChunkingStrategy fwtypes.StringEnum[awstypes.ChunkingStrategy] `tfsdk:"chunking_strategy"` - FixedSizeChunkingConfiguration fwtypes.ListNestedObjectValueOf[fixedSizeChunkingConfigurationModel] `tfsdk:"fixed_size_chunking_configuration"` + ChunkingStrategy fwtypes.StringEnum[awstypes.ChunkingStrategy] `tfsdk:"chunking_strategy"` + FixedSizeChunkingConfiguration fwtypes.ListNestedObjectValueOf[fixedSizeChunkingConfigurationModel] `tfsdk:"fixed_size_chunking_configuration"` + HierarchicalChunkingConfiguration fwtypes.ListNestedObjectValueOf[hierarchicalChunkingConfigurationModel] `tfsdk:"hierarchical_chunking_configuration"` + SemanticChunkingConfiguration fwtypes.ListNestedObjectValueOf[semanticChunkingConfigurationModel] `tfsdk:"semantic_chunking_configuration"` } type fixedSizeChunkingConfigurationModel struct { MaxTokens types.Int64 `tfsdk:"max_tokens"` OverlapPercentage types.Int64 `tfsdk:"overlap_percentage"` } + +type semanticChunkingConfigurationModel struct { + BreakpointPercentileThreshold types.Int32 `tfsdk:"breakpoint_percentile_threshold"` + BufferSize types.Int32 `tfsdk:"buffer_size"` + MaxTokens types.Int32 `tfsdk:"max_token"` +} + +type hierarchicalChunkingConfigurationModel struct { + LevelConfigurations fwtypes.ListNestedObjectValueOf[hierarchicalChunkingLevelConfigurationModel] `tfsdk:"level_configuration"` + OverlapTokens types.Int32 `tfsdk:"overlap_tokens"` +} + +type hierarchicalChunkingLevelConfigurationModel struct { + MaxTokens types.Int32 `tfsdk:"max_tokens"` +} diff --git a/internal/service/bedrockagent/data_source_test.go b/internal/service/bedrockagent/data_source_test.go index 3ad82a9556f..a58cf794e74 100644 --- a/internal/service/bedrockagent/data_source_test.go +++ b/internal/service/bedrockagent/data_source_test.go @@ -131,6 +131,133 @@ func testAccDataSource_full(t *testing.T) { }) } +func testAccDataSource_fullSemantic(t *testing.T) { + acctest.SkipIfExeNotOnPath(t, "psql") + acctest.SkipIfExeNotOnPath(t, "jq") + acctest.SkipIfExeNotOnPath(t, "aws") + + ctx := acctest.Context(t) + if testing.Short() { + t.Skip("skipping long-running test in short mode") + } + + var dataSource types.DataSource + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_bedrockagent_data_source.test" + foundationModel := "amazon.titan-embed-text-v1" + + resource.Test(t, resource.TestCase{ + PreCheck: func() { + acctest.PreCheck(ctx, t) + }, + ErrorCheck: acctest.ErrorCheck(t, names.BedrockAgentServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + ExternalProviders: map[string]resource.ExternalProvider{ + "null": { + Source: "hashicorp/null", + VersionConstraint: "3.2.2", + }, + }, + CheckDestroy: testAccCheckDataSourceDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccDataSourceConfig_fullSemantic(rName, foundationModel), + Check: resource.ComposeAggregateTestCheckFunc( + testAccCheckDataSourceExists(ctx, resourceName, &dataSource), + resource.TestCheckResourceAttr(resourceName, "data_deletion_policy", "RETAIN"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttrSet(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_arn"), + resource.TestCheckNoResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_owner_account_id"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.#", acctest.Ct1), + resource.TestCheckTypeSetElemAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.*", "Europe/France/Nouvelle-Aquitaine/Bordeaux"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.type", "S3"), + resource.TestCheckResourceAttrSet(resourceName, "data_source_id"), + resource.TestCheckResourceAttr(resourceName, names.AttrDescription, "testing"), + resource.TestCheckResourceAttr(resourceName, names.AttrName, rName), + resource.TestCheckResourceAttr(resourceName, "server_side_encryption_configuration.#", acctest.Ct0), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.chunking_strategy", "SEMANTIC"), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.semantic_chunking_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.semantic_chunking_configuration.0.breakpoint_percentile_threshold", "80"), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.semantic_chunking_configuration.0.buffer_size", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.semantic_chunking_configuration.0.max_token", acctest.Ct10), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func testAccDataSource_fullHierarchical(t *testing.T) { + acctest.SkipIfExeNotOnPath(t, "psql") + acctest.SkipIfExeNotOnPath(t, "jq") + acctest.SkipIfExeNotOnPath(t, "aws") + + ctx := acctest.Context(t) + if testing.Short() { + t.Skip("skipping long-running test in short mode") + } + + var dataSource types.DataSource + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_bedrockagent_data_source.test" + foundationModel := "amazon.titan-embed-text-v1" + + resource.Test(t, resource.TestCase{ + PreCheck: func() { + acctest.PreCheck(ctx, t) + }, + ErrorCheck: acctest.ErrorCheck(t, names.BedrockAgentServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + ExternalProviders: map[string]resource.ExternalProvider{ + "null": { + Source: "hashicorp/null", + VersionConstraint: "3.2.2", + }, + }, + CheckDestroy: testAccCheckDataSourceDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccDataSourceConfig_fullHierarchical(rName, foundationModel), + Check: resource.ComposeAggregateTestCheckFunc( + testAccCheckDataSourceExists(ctx, resourceName, &dataSource), + resource.TestCheckResourceAttr(resourceName, "data_deletion_policy", "RETAIN"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttrSet(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_arn"), + resource.TestCheckNoResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_owner_account_id"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.#", acctest.Ct1), + resource.TestCheckTypeSetElemAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.*", "Europe/France/Nouvelle-Aquitaine/Bordeaux"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.type", "S3"), + resource.TestCheckResourceAttrSet(resourceName, "data_source_id"), + resource.TestCheckResourceAttr(resourceName, names.AttrDescription, "testing"), + resource.TestCheckResourceAttr(resourceName, names.AttrName, rName), + resource.TestCheckResourceAttr(resourceName, "server_side_encryption_configuration.#", acctest.Ct0), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.chunking_strategy", "HIERARCHICAL"), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.hierarchical_chunking_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.hierarchical_chunking_configuration.0.overlap_tokens", acctest.Ct2), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.hierarchical_chunking_configuration.0.level_configuration.#", acctest.Ct2), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.hierarchical_chunking_configuration.0.level_configuration.0.max_tokens", "15"), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.hierarchical_chunking_configuration.0.level_configuration.1.max_tokens", acctest.Ct10), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + // Prerequisites: // * psql run via null_resource/provisioner "local-exec" // * jq for parsing output from aws cli to retrieve postgres password @@ -356,6 +483,74 @@ resource "aws_bedrockagent_data_source" "test" { `, rName)) } +func testAccDataSourceConfig_fullSemantic(rName, embeddingModel string) string { + return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel), fmt.Sprintf(` +resource "aws_bedrockagent_data_source" "test" { + name = %[1]q + knowledge_base_id = aws_bedrockagent_knowledge_base.test.id + data_deletion_policy = "RETAIN" + description = "testing" + + data_source_configuration { + type = "S3" + + s3_configuration { + bucket_arn = aws_s3_bucket.test.arn + inclusion_prefixes = ["Europe/France/Nouvelle-Aquitaine/Bordeaux"] + } + } + + vector_ingestion_configuration { + chunking_configuration { + chunking_strategy = "SEMANTIC" + + semantic_chunking_configuration { + breakpoint_percentile_threshold = 80 + buffer_size = 1 + max_token = 10 + } + } + } +} +`, rName)) +} + +func testAccDataSourceConfig_fullHierarchical(rName, embeddingModel string) string { + return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel), fmt.Sprintf(` +resource "aws_bedrockagent_data_source" "test" { + name = %[1]q + knowledge_base_id = aws_bedrockagent_knowledge_base.test.id + data_deletion_policy = "RETAIN" + description = "testing" + + data_source_configuration { + type = "S3" + + s3_configuration { + bucket_arn = aws_s3_bucket.test.arn + inclusion_prefixes = ["Europe/France/Nouvelle-Aquitaine/Bordeaux"] + } + } + + vector_ingestion_configuration { + chunking_configuration { + chunking_strategy = "HIERARCHICAL" + + hierarchical_chunking_configuration { + overlap_tokens = 2 + level_configuration { + max_tokens = 15 + } + level_configuration { + max_tokens = 10 + } + } + } + } +} +`, rName)) +} + func testAccDataSourceConfig_updated(rName, embeddingModel string) string { return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel), fmt.Sprintf(` resource "aws_bedrockagent_data_source" "test" { diff --git a/website/docs/r/bedrockagent_data_source.html.markdown b/website/docs/r/bedrockagent_data_source.html.markdown index e153730831b..37c26ea47f8 100644 --- a/website/docs/r/bedrockagent_data_source.html.markdown +++ b/website/docs/r/bedrockagent_data_source.html.markdown @@ -73,8 +73,10 @@ The `vector_ingestion_configuration` configuration block supports the following The `chunking_configuration` configuration block supports the following arguments: -* `chunking_strategy` - (Required, Forces new resource) Option for chunking your source data, either in fixed-sized chunks or as one chunk. Valid values: `FIXED_SIZE`, `NONE`. -* `fixed_size_chunking_configuration` - (Optional, Forces new resource) Configurations for when you choose fixed-size chunking. If you set the chunking_strategy as `NONE`, exclude this field. See [`fixed_size_chunking_configuration`](#fixed_size_chunking_configuration-block) for details. +* `chunking_strategy` - (Required, Forces new resource) Option for chunking your source data, either in fixed-sized chunks or as one chunk. Valid values: `FIXED_SIZE`, `HIERARCHICAL`, `SEMANTIC`, `NONE`. +* `fixed_size_chunking_configuration` - (Optional, Forces new resource) Configurations for when you choose fixed-size chunking. Requires chunking_strategy as `FIXED_SIZE`. See [`fixed_size_chunking_configuration`](#fixed_size_chunking_configuration-block) for details. +* `hierarchical_chunking_configuration` - (Optional, Forces new resource) Configurations for when you choose hierarchical chunking. Requires chunking_strategy as `HIERARCHICAL`. See [`hierarchical_chunking_configuration`](#hierarchical_chunking_configuration-block) for details. +* `semantic_chunking_configuration` - (Optional, Forces new resource) Configurations for when you choose semantic chunking. Requires chunking_strategy as `SEMANTIC`. See [`semantic_chunking_configuration`](#semantic_chunking_configuration-block) for details. ### `fixed_size_chunking_configuration` block @@ -83,6 +85,27 @@ The `fixed_size_chunking_configuration` block supports the following arguments: * `max_tokens` - (Required, Forces new resource) Maximum number of tokens to include in a chunk. * `overlap_percentage` - (Optional, Forces new resource) Percentage of overlap between adjacent chunks of a data source. +### `hierarchical_chunking_configuration` block + +The `hierarchical_chunking_configuration` block supports the following arguments: + +* `level_configuration` - (Required, Forces new resource) Maximum number of tokens to include in a chunk. Must contain two `level_configurations`. See [`level_configurations`](#level_configuration-block) for details. +* `overlap_tokens` - (Required, Forces new resource) The number of tokens to repeat across chunks in the same layer. + +### `level_configuration` block + +The `level_configuration` block supports the following arguments: + +* `max_tokens` - (Required) The maximum number of tokens that a chunk can contain in this layer. + +### `semantic_chunking_configuration` block + +The `semantic_chunking_configuration` block supports the following arguments: + +* `breakpoint_percentile_threshold` - (Required, Forces new resource) The dissimilarity threshold for splitting chunks. +* `buffer_size` - (Required, Forces new resource) The buffer size. +* `max_tokens` - (Required, Forces new resource) The maximum number of tokens a chunk can contain. + ## Attribute Reference This resource exports the following attributes in addition to the arguments above: