Skip to content

Commit

Permalink
feat: trgm index for search
Browse files Browse the repository at this point in the history
  • Loading branch information
tea-artist committed Jan 13, 2025
1 parent 0d7f64f commit a1a666d
Show file tree
Hide file tree
Showing 15 changed files with 437 additions and 149 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"teableio",
"testid",
"topo",
"trgm",
"umami",
"univer",
"zustand"
Expand Down
3 changes: 3 additions & 0 deletions apps/nestjs-backend/src/db-provider/db.provider.interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import type { IAggregationQueryInterface } from './aggregation-query/aggregation
import type { BaseQueryAbstract } from './base-query/abstract';
import type { IFilterQueryInterface } from './filter-query/filter-query.interface';
import type { IGroupQueryExtra, IGroupQueryInterface } from './group-query/group-query.interface';
import type { IndexBuilderAbstract } from './search-query/index-builder.abstract';
import type { ISortQueryInterface } from './sort-query/sort-query.interface';

export type IFilterQueryExtra = {
Expand Down Expand Up @@ -169,6 +170,8 @@ export interface IDbProvider {

getExistFtsIndexSql(originQueryBuilder: Knex.QueryBuilder, dbTableName: string): string | null;

trgmIndex(): IndexBuilderAbstract;

shareFilterCollaboratorsQuery(
originQueryBuilder: Knex.QueryBuilder,
dbFieldName: string,
Expand Down
5 changes: 5 additions & 0 deletions apps/nestjs-backend/src/db-provider/postgres.provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { FilterQueryPostgres } from './filter-query/postgres/filter-query.postgr
import type { IGroupQueryExtra, IGroupQueryInterface } from './group-query/group-query.interface';
import { GroupQueryPostgres } from './group-query/group-query.postgres';
import { SearchQueryAbstract } from './search-query/abstract';
import { IndexBuilderPostgres } from './search-query/index-builder.postgres';
import { FullTextSearchQueryPostgresBuilder } from './search-query/search-fts-query.postgres';
import {
SearchQueryPostgresBuilder,
Expand Down Expand Up @@ -402,6 +403,10 @@ export class PostgresProvider implements IDbProvider {
return FullTextSearchQueryPostgresBuilder.getExistFtsIndexSql(originQueryBuilder, dbTableName);
}

trgmIndex() {
return new IndexBuilderPostgres();
}

shareFilterCollaboratorsQuery(
originQueryBuilder: Knex.QueryBuilder,
dbFieldName: string,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import type { IFieldInstance } from '../../features/field/model/factory';

export abstract class IndexBuilderAbstract {
abstract getDropIndexSql(dbTableName: string): string;

abstract getCreateIndexSql(dbTableName: string, searchFields: IFieldInstance[]): string[];

abstract getExistFtsIndexSql(dbTableName: string): string;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/* eslint-disable sonarjs/no-duplicate-string */
import { CellValueType } from '@teable/core';
import type { IFieldInstance } from '../../features/field/model/factory';
import { IndexBuilderAbstract } from './index-builder.abstract';
import type { ISearchCellValueType } from './types';

export class FieldFormatter {
static getSearchableExpression(field: IFieldInstance, isArray = false): string | null {
const { cellValueType, dbFieldName, options, isStructuredCellValue } = field;

// base expression
const baseExpression = (() => {
switch (cellValueType as ISearchCellValueType) {
case CellValueType.Number: {
const precision =
(options as { formatting?: { precision?: number } })?.formatting?.precision ?? 0;
return `ROUND(value::numeric, ${precision})::text`;
}
case CellValueType.DateTime: {
// date type not support full text search
return null;
}
case CellValueType.String: {
if (isStructuredCellValue) {
return `value->>'title'`;
}
return 'value::text';
}
default:
return 'value::text';
}
})();

if (baseExpression === null) {
return null;
}

// handle array type
// gin cannot handle any sub-query, so we need to use array_to_string to convert array to stringZ
if (isArray) {
return `"${dbFieldName}"::text`;
}

// handle single value type
return baseExpression.replace(/value/g, `"${dbFieldName}"`);
}

// expression for generating index
static getIndexExpression(field: IFieldInstance): string | null {
return this.getSearchableExpression(field, field.isMultipleCellValue);
}
}

export class IndexBuilderPostgres extends IndexBuilderAbstract {
private getIndexName(table: string, field: IFieldInstance): string {
return `idx_trgm_${table}_${field.dbFieldName}`;
}

createOneIndexSql(dbTableName: string, field: IFieldInstance): string | null {
const [schema, table] = dbTableName.split('.');
const indexName = this.getIndexName(table, field);
const expression = FieldFormatter.getIndexExpression(field);
if (expression === null) {
return null;
}

return `
CREATE INDEX IF NOT EXISTS "${indexName}"
ON "${schema}"."${table}"
USING gin ((${expression}) gin_trgm_ops)
`;
}

getDropIndexSql(dbTableName: string): string {
const [schema, table] = dbTableName.split('.');
return `
DO $$
DECLARE
_index record;
BEGIN
FOR _index IN
SELECT indexname
FROM pg_indexes
WHERE schemaname = '${schema}'
AND tablename = '${table}'
AND indexname LIKE 'idx_trgm_${table}_%'
LOOP
EXECUTE 'DROP INDEX IF EXISTS "' || '${schema}' || '"."' || _index.indexname || '"';
END LOOP;
END $$;
`;
}

getCreateIndexSql(dbTableName: string, searchFields: IFieldInstance[]): string[] {
return searchFields
.map((field) => {
const expression = FieldFormatter.getIndexExpression(field);
return expression ? this.createOneIndexSql(dbTableName, field) : null;
})
.filter((sql): sql is string => sql !== null);
}

getExistFtsIndexSql(dbTableName: string): string {
const [schema, table] = dbTableName.split('.');
return `
SELECT EXISTS (
SELECT 1
FROM pg_indexes
WHERE schemaname = '${schema}'
AND tablename = '${table}'
AND indexname LIKE 'idx_trgm_${table}%'
)`;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { CellValueType } from '@teable/core';
import type { IFieldInstance } from '../../features/field/model/factory';
import { IndexBuilderAbstract } from './index-builder.abstract';
import type { ISearchCellValueType } from './types';

export class FieldFormatter {
static getSearchableExpression(field: IFieldInstance, isArray = false): string {
const { cellValueType, dbFieldName, options, isStructuredCellValue } = field;

// base expression
const baseExpression = (() => {
switch (cellValueType as ISearchCellValueType) {
case CellValueType.Number: {
const precision =
(options as { formatting?: { precision?: number } })?.formatting?.precision ?? 0;
return `ROUND(CAST(value AS REAL), ${precision})`;
}
case CellValueType.DateTime: {
// SQLite doesn't support timezone conversion directly
// We'll format the date in a basic format
return `strftime('%Y-%m-%d %H:%M', value)`;
}
case CellValueType.String: {
if (isStructuredCellValue) {
return `json_extract(value, '$.title')`;
}
return 'CAST(value AS TEXT)';
}
default:
return 'CAST(value AS TEXT)';
}
})();

// handle array type
if (isArray) {
return `(
WITH RECURSIVE split(word, str) AS (
SELECT '', json_extract(${dbFieldName}, '$') || ','
UNION ALL
SELECT
substr(str, 0, instr(str, ',')),
substr(str, instr(str, ',') + 1)
FROM split WHERE str != ''
)
SELECT group_concat(${baseExpression.replace(/value/g, 'word')}, ', ')
FROM split WHERE word != ''
)`;
}

// handle single value type
return baseExpression.replace(/value/g, dbFieldName);
}

// expression for generating index
static getIndexExpression(field: IFieldInstance): string {
return this.getSearchableExpression(field, field.isMultipleCellValue);
}
}

export class IndexBuilderSqlite extends IndexBuilderAbstract {
private createOneIndexSql(dbTableName: string, field: IFieldInstance): string[] {
const indexName = `idx_fts_${dbTableName}_${field.dbFieldName}`;
const expression = FieldFormatter.getIndexExpression(field);

return [
`CREATE VIRTUAL TABLE IF NOT EXISTS "${indexName}" USING fts5(
content,
content=${dbTableName},
content_rowid=__id,
tokenize='porter unicode61'
)`,
`INSERT INTO "${indexName}"(content) SELECT ${expression} FROM "${dbTableName}"`,
];
}

getDropIndexSql(dbTableName: string): string {
return `SELECT 'DROP TABLE IF EXISTS "' || name || '";'
FROM sqlite_master
WHERE type='table'
AND name LIKE 'idx_fts_${dbTableName}_%'`;
}

getCreateIndexSql(dbTableName: string, searchFields: IFieldInstance[]): string[] {
return searchFields.map((field) => this.createOneIndexSql(dbTableName, field)).flat();
}

getExistFtsIndexSql(dbTableName: string): string {
return `SELECT EXISTS (
SELECT 1
FROM sqlite_master
WHERE type='table'
AND name LIKE 'idx_fts_${dbTableName}_%'
)`;
}
}
Loading

0 comments on commit a1a666d

Please sign in to comment.