Skip to content

Commit

Permalink
Merge pull request #39138 from drewtul/f-add-advanced-rag
Browse files Browse the repository at this point in the history
Support for alternative semantic and hierarchical chunking configurations
  • Loading branch information
jar-b authored Sep 4, 2024
2 parents 2d1f707 + 430e1f6 commit 0e7f8a7
Show file tree
Hide file tree
Showing 5 changed files with 399 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .changelog/39138.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
resource/aws_bedrockagent_data_source: Add `semantic_chunking_configuration` and `hierarchical_chunking_configuration` arguments
```
2 changes: 2 additions & 0 deletions internal/service/bedrockagent/bedrockagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ func TestAccBedrockAgent_serial(t *testing.T) {
acctest.CtDisappears: testAccDataSource_disappears,
"full": testAccDataSource_full,
"update": testAccDataSource_update,
"semantic": testAccDataSource_fullSemantic,
"hierarchical": testAccDataSource_fullHierarchical,
},
}

Expand Down
176 changes: 174 additions & 2 deletions internal/service/bedrockagent/data_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ import (
"github.com/aws/aws-sdk-go-v2/service/bedrockagent"
awstypes "github.com/aws/aws-sdk-go-v2/service/bedrockagent/types"
"github.com/hashicorp/terraform-plugin-framework-timeouts/resource/timeouts"
"github.com/hashicorp/terraform-plugin-framework-validators/int32validator"
"github.com/hashicorp/terraform-plugin-framework-validators/int64validator"
"github.com/hashicorp/terraform-plugin-framework-validators/listvalidator"
"github.com/hashicorp/terraform-plugin-framework-validators/setvalidator"
"github.com/hashicorp/terraform-plugin-framework-validators/stringvalidator"
"github.com/hashicorp/terraform-plugin-framework/path"
"github.com/hashicorp/terraform-plugin-framework/resource"
"github.com/hashicorp/terraform-plugin-framework/resource/schema"
"github.com/hashicorp/terraform-plugin-framework/resource/schema/int64planmodifier"
Expand All @@ -41,6 +43,13 @@ import (
"github.com/hashicorp/terraform-provider-aws/names"
)

const (
hierarchicalLevelConfigurations = 2
hierarchicalMaxTokens = 8192
semanticBreakpointPercentileThresholdMin = 50
semanticBreakpointPercentileThresholdMax = 99
)

// @FrameworkResource(name="Data Source")
func newDataSourceResource(_ context.Context) (resource.ResourceWithConfigure, error) {
r := &dataSourceResource{}
Expand Down Expand Up @@ -200,6 +209,7 @@ func (r *dataSourceResource) Schema(ctx context.Context, request resource.Schema
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
listvalidator.ConflictsWith(path.MatchRelative().AtParent().AtName("hierarchical_chunking_configuration"), path.MatchRelative().AtParent().AtName("semantic_chunking_configuration")),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
Expand All @@ -224,6 +234,136 @@ func (r *dataSourceResource) Schema(ctx context.Context, request resource.Schema
},
},
},
"hierarchical_chunking_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[hierarchicalChunkingConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
listvalidator.ConflictsWith(path.MatchRelative().AtParent().AtName("fixed_size_chunking_configuration"), path.MatchRelative().AtParent().AtName("semantic_chunking_configuration")),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"overlap_tokens": schema.Int32Attribute{
Required: true,
},
},
Blocks: map[string]schema.Block{
"level_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[hierarchicalChunkingLevelConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeBetween(hierarchicalLevelConfigurations, hierarchicalLevelConfigurations),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"max_tokens": schema.Int32Attribute{
Required: true,
Validators: []validator.Int32{
int32validator.Between(1, hierarchicalMaxTokens),
},
},
},
},
},
},
},
},
"semantic_chunking_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[semanticChunkingConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
listvalidator.ConflictsWith(path.MatchRelative().AtParent().AtName("fixed_size_chunking_configuration"), path.MatchRelative().AtParent().AtName("hierarchical_chunking_configuration")),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"breakpoint_percentile_threshold": schema.Int32Attribute{
Required: true,
Validators: []validator.Int32{
int32validator.Between(semanticBreakpointPercentileThresholdMin, semanticBreakpointPercentileThresholdMax),
},
},
"buffer_size": schema.Int32Attribute{
Required: true,
Validators: []validator.Int32{
int32validator.Between(0, 1),
},
},
"max_token": schema.Int32Attribute{
Required: true,
Validators: []validator.Int32{
int32validator.AtLeast(1),
},
},
},
},
},
},
},
},
"parsing_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[parsingConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"parsing_strategy": schema.StringAttribute{
CustomType: fwtypes.StringEnumType[awstypes.ParsingStrategy](),
Required: true,
PlanModifiers: []planmodifier.String{
stringplanmodifier.RequiresReplace(),
},
},
},
Blocks: map[string]schema.Block{
"bedrock_foundation_model_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[bedrockFoundationModelConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"model_arn": schema.StringAttribute{
CustomType: fwtypes.ARNType,
Required: true,
PlanModifiers: []planmodifier.String{
stringplanmodifier.RequiresReplace(),
},
},
},
Blocks: map[string]schema.Block{
"parsing_prompt": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[parsingPromptModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"parsing_prompt_string": schema.StringAttribute{
Required: true,
},
},
},
},
},
},
},
},
},
},
Expand Down Expand Up @@ -508,14 +648,46 @@ type serverSideEncryptionConfigurationModel struct {

type vectorIngestionConfigurationModel struct {
ChunkingConfiguration fwtypes.ListNestedObjectValueOf[chunkingConfigurationModel] `tfsdk:"chunking_configuration"`
ParsingConfiguration fwtypes.ListNestedObjectValueOf[parsingConfigurationModel] `tfsdk:"parsing_configuration"`
}

type parsingConfigurationModel struct {
ParsingStrategy fwtypes.StringEnum[awstypes.ParsingStrategy] `tfsdk:"parsing_strategy"`
BedrockFoundationModelConfiguration fwtypes.ListNestedObjectValueOf[bedrockFoundationModelConfigurationModel] `tfsdk:"bedrock_foundation_model_configuration"`
}

type bedrockFoundationModelConfigurationModel struct {
ModelArn fwtypes.ARN `tfsdk:"model_arn"`
ParsingPrompt fwtypes.ListNestedObjectValueOf[parsingPromptModel] `tfsdk:"parsing_prompt"`
}

type parsingPromptModel struct {
ParsingPromptText types.String `tfsdk:"parsing_prompt_string"`
}

type chunkingConfigurationModel struct {
ChunkingStrategy fwtypes.StringEnum[awstypes.ChunkingStrategy] `tfsdk:"chunking_strategy"`
FixedSizeChunkingConfiguration fwtypes.ListNestedObjectValueOf[fixedSizeChunkingConfigurationModel] `tfsdk:"fixed_size_chunking_configuration"`
ChunkingStrategy fwtypes.StringEnum[awstypes.ChunkingStrategy] `tfsdk:"chunking_strategy"`
FixedSizeChunkingConfiguration fwtypes.ListNestedObjectValueOf[fixedSizeChunkingConfigurationModel] `tfsdk:"fixed_size_chunking_configuration"`
HierarchicalChunkingConfiguration fwtypes.ListNestedObjectValueOf[hierarchicalChunkingConfigurationModel] `tfsdk:"hierarchical_chunking_configuration"`
SemanticChunkingConfiguration fwtypes.ListNestedObjectValueOf[semanticChunkingConfigurationModel] `tfsdk:"semantic_chunking_configuration"`
}

type fixedSizeChunkingConfigurationModel struct {
MaxTokens types.Int64 `tfsdk:"max_tokens"`
OverlapPercentage types.Int64 `tfsdk:"overlap_percentage"`
}

type semanticChunkingConfigurationModel struct {
BreakpointPercentileThreshold types.Int32 `tfsdk:"breakpoint_percentile_threshold"`
BufferSize types.Int32 `tfsdk:"buffer_size"`
MaxTokens types.Int32 `tfsdk:"max_token"`
}

type hierarchicalChunkingConfigurationModel struct {
LevelConfigurations fwtypes.ListNestedObjectValueOf[hierarchicalChunkingLevelConfigurationModel] `tfsdk:"level_configuration"`
OverlapTokens types.Int32 `tfsdk:"overlap_tokens"`
}

type hierarchicalChunkingLevelConfigurationModel struct {
MaxTokens types.Int32 `tfsdk:"max_tokens"`
}
Loading

0 comments on commit 0e7f8a7

Please sign in to comment.