Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[@azure/cosmos] Add check for TOP/LIMIT in nonStreamingOrderBy queries #30503

Merged
merged 9 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sdk/cosmosdb/cosmos/review/cosmos.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,7 @@ export interface FeedOptions extends SharedOptions {
type: string;
condition: string;
};
allowUnboundedNonStreamingQueries?: boolean;
bufferItems?: boolean;
// @deprecated
continuation?: string;
Expand Down Expand Up @@ -2542,7 +2543,6 @@ export interface VectorEmbedding {

// @public
export enum VectorEmbeddingDataType {
Float16 = "float16",
Float32 = "float32",
Int8 = "int8",
UInt8 = "uint8"
Expand Down
4 changes: 0 additions & 4 deletions sdk/cosmosdb/cosmos/src/documents/VectorEmbeddingPolicy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ export interface VectorEmbedding {
* Represents the data type of the vector.
*/
export enum VectorEmbeddingDataType {
/**
* 16-bit floating point number.
*/
Float16 = "float16",
/**
* 32-bit floating point number.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the MIT license.
import { ClientContext } from "../ClientContext";
import { Response, FeedOptions } from "../request";
import { PartitionedQueryExecutionInfo, QueryInfo } from "../request/ErrorResponse";
import { ErrorResponse, PartitionedQueryExecutionInfo, QueryInfo } from "../request/ErrorResponse";
import { CosmosHeaders } from "./CosmosHeaders";
import { OffsetLimitEndpointComponent } from "./EndpointComponent/OffsetLimitEndpointComponent";
import { OrderByEndpointComponent } from "./EndpointComponent/OrderByEndpointComponent";
Expand All @@ -27,7 +27,7 @@ export class PipelinedQueryExecutionContext implements ExecutionContext {
private pageSize: number;
private vectorSearchBufferSize: number = 0;
private static DEFAULT_PAGE_SIZE = 10;
private static DEFAULT_VECTOR_SEARCH_BUFFER_SIZE = 2000;
private static DEFAULT_MAX_VECTOR_SEARCH_BUFFER_SIZE = 50000;
private nonStreamingOrderBy = false;

constructor(
Expand All @@ -50,10 +50,24 @@ export class PipelinedQueryExecutionContext implements ExecutionContext {
const sortOrders = partitionedQueryExecutionInfo.queryInfo.orderBy;
// TODO: Currently we don't get any field from backend to determine streaming queries
if (this.nonStreamingOrderBy) {
if (!options.allowUnboundedNonStreamingQueries) {
this.checkQueryConstraints(partitionedQueryExecutionInfo.queryInfo);
}

this.vectorSearchBufferSize = this.calculateVectorSearchBufferSize(
partitionedQueryExecutionInfo.queryInfo,
options,
);
const maxBufferSize = options["vectorSearchBufferSize"]
? options["vectorSearchBufferSize"]
: PipelinedQueryExecutionContext.DEFAULT_MAX_VECTOR_SEARCH_BUFFER_SIZE;

if (this.vectorSearchBufferSize > maxBufferSize) {
throw new ErrorResponse(
`Executing a vector search query with TOP or OFFSET + LIMIT value ${this.vectorSearchBufferSize} larger than the vectorSearchBufferSize ${maxBufferSize} ` +
`is not allowed`,
);
}

const distinctType = partitionedQueryExecutionInfo.queryInfo.distinctType;
const context: ExecutionContext = new ParallelQueryExecutionContext(
Expand Down Expand Up @@ -261,6 +275,19 @@ export class PipelinedQueryExecutionContext implements ExecutionContext {
? queryInfo.offset + queryInfo.limit
: options["vectorSearchBufferSize"] && options["vectorSearchBufferSize"] > 0
? options["vectorSearchBufferSize"]
: PipelinedQueryExecutionContext.DEFAULT_VECTOR_SEARCH_BUFFER_SIZE;
: PipelinedQueryExecutionContext.DEFAULT_MAX_VECTOR_SEARCH_BUFFER_SIZE;
}

private checkQueryConstraints(queryInfo: QueryInfo): void {
const hasTop = queryInfo.top || queryInfo.top === 0;
const hasLimit = queryInfo.limit || queryInfo.limit === 0;
if (!hasTop && !hasLimit) {
throw new ErrorResponse(
"Executing a vector search query without TOP or LIMIT can consume a large number of RUs " +
"very fast and have long runtimes. Please ensure you are using one of the above two filters " +
"with your vector search query.",
);
}
return;
}
}
5 changes: 5 additions & 0 deletions sdk/cosmosdb/cosmos/src/request/FeedOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,9 @@ export interface FeedOptions extends SharedOptions {
* Default: false. Set to true to avoid error from an old gateway that doesn't support this feature.
*/
disableNonStreamingOrderByQuery?: boolean;
/**
* Valid only for non streaming order by query.
* Default: false; When set to true, it allows queries to bypass the default behavior that blocks nonStreaming queries without top or limit clauses.
*/
allowUnboundedNonStreamingQueries?: boolean;
}
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,14 @@ describe("Vector search feature", async () => {
it("should execute vector search query", async function () {
// create a queryiterator to run vector search query
const query =
"SELECT c.id AS Id, VectorDistance([0.056419, -0.021141], c.vector1, true, {distanceFunction:'euclidean'}) AS similarityScore from c ORDER BY VectorDistance([0.056419, -0.021141], c.vector1, true, {distanceFunction:'euclidean'})";
"SELECT TOP 10 c.id AS Id, VectorDistance([0.056419, -0.021141], c.vector1, true, {distanceFunction:'euclidean'}) AS similarityScore from c ORDER BY VectorDistance([0.056419, -0.021141], c.vector1, true, {distanceFunction:'euclidean'})";
await executeQueryAndVerifyOrder(container, query, 3, false);
});

it("should execute distinct vector search query", async function () {
// create a queryiterator to run vector search query
const query =
"SELECT distinct c.id AS Id, VectorDistance([0.056419, -0.021141], c.vector1, true, {distanceFunction:'euclidean'}) AS similarityScore from c ORDER BY VectorDistance([0.056419, -0.021141], c.vector1, true, {distanceFunction:'euclidean'})";
"SELECT distinct TOP 10 c.id AS Id, VectorDistance([0.056419, -0.021141], c.vector1, true, {distanceFunction:'euclidean'}) AS similarityScore from c ORDER BY VectorDistance([0.056419, -0.021141], c.vector1, true, {distanceFunction:'euclidean'})";
await executeQueryAndVerifyOrder(container, query, 3, false);
});

Expand Down
Loading