Skip to content

Commit

Permalink
feat(catalog): more file type, qa api, and file catalog api (#412)
Browse files Browse the repository at this point in the history
Because

1. catelog need to support more file upload 
2. question answering api
3. file catalog api

This commit

add related proto

---------

Co-authored-by: droplet-bot <[email protected]>
  • Loading branch information
Yougigun and droplet-bot authored Aug 5, 2024
1 parent 0f4071f commit 055bbd0
Show file tree
Hide file tree
Showing 5 changed files with 424 additions and 43 deletions.
22 changes: 16 additions & 6 deletions artifact/artifact/v1alpha/artifact.proto
Original file line number Diff line number Diff line change
Expand Up @@ -261,17 +261,17 @@ enum FileProcessStatus {
FILE_PROCESS_STATUS_UNSPECIFIED = 0;
// NOTSTARTED
FILE_PROCESS_STATUS_NOTSTARTED = 1;
// waiting
// file is waiting for embedding process
FILE_PROCESS_STATUS_WAITING = 2;
// embedding process is running
// file is converting
FILE_PROCESS_STATUS_CONVERTING = 3;
// embedding process is done
// file is chunking
FILE_PROCESS_STATUS_CHUNKING = 4;
// embedding process is failed
// file is embedding
FILE_PROCESS_STATUS_EMBEDDING = 5;
// COMPLETED
// completed
FILE_PROCESS_STATUS_COMPLETED = 6;
// Failed
// failed
FILE_PROCESS_STATUS_FAILED = 7;
}

Expand All @@ -291,6 +291,16 @@ enum FileType {
FILE_TYPE_JPEG = 5;
// JPG
FILE_TYPE_JPG = 6;
// HTML
FILE_TYPE_HTML = 7;
// DOCX
FILE_TYPE_DOCX = 8;
// DOC
FILE_TYPE_DOC = 9;
// PPT
FILE_TYPE_PPT = 10;
// PPTX
FILE_TYPE_PPTX = 11;
}

// file mata data
Expand Down
19 changes: 19 additions & 0 deletions artifact/artifact/v1alpha/artifact_public_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package artifact.artifact.v1alpha;
// Artifact definitions
import "artifact/artifact/v1alpha/artifact.proto";
import "artifact/artifact/v1alpha/chunk.proto";
import "artifact/artifact/v1alpha/file_catalog.proto";
import "artifact/artifact/v1alpha/qa.proto";
// Google API
import "google/api/annotations.proto";
import "google/api/visibility.proto";
Expand Down Expand Up @@ -128,4 +130,21 @@ service ArtifactPublicService {
};
option (grpc.gateway.protoc_gen_openapiv2.options.openapiv2_operation) = {tags: "Catalog"};
}

// Question Answering
rpc QuestionAnswering(QuestionAnsweringRequest) returns (QuestionAnsweringResponse) {
option (google.api.http) = {
post: "/v1alpha/namespaces/{namespace_id}/catalogs/{catalog_id}/qa"
body: "*"
};
option (grpc.gateway.protoc_gen_openapiv2.options.openapiv2_operation) = {tags: "Catalog"};
}

// Get file catalog
rpc GetFileCatalog(GetFileCatalogRequest) returns (GetFileCatalogResponse) {
option (google.api.http) = {
get: "/v1alpha/namespaces/{namespace_id}/catalogs/{catalog_id}/file-catalog"
};
option (grpc.gateway.protoc_gen_openapiv2.options.openapiv2_operation) = {tags: "Catalog"};
}
}
94 changes: 94 additions & 0 deletions artifact/artifact/v1alpha/file_catalog.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
syntax = "proto3";

package artifact.artifact.v1alpha;
// Artifact definitions
import "artifact/artifact/v1alpha/artifact.proto";

// Protocol Buffers Well-Known Types
import "google/protobuf/timestamp.proto";

// GetFileCatalogRequest
message GetFileCatalogRequest{
// id of the namespace
string namespace_id = 1;
// id of the catalog
string catalog_id = 2;
// id of the file(i.e. file name)
string file_id = 3;
// Uid of the file
string file_uid = 4;
}

// GetFileCatalogResponse
message GetFileCatalogResponse{
// metadata
message Metadata {
// file uid
string file_uid = 1;
// file id
string file_id = 2;
// file type
FileType file_type = 3;
// file size in bytes
int64 file_size = 4;
// upload time
google.protobuf.Timestamp file_upload_time = 5;
// file process status
FileProcessStatus file_process_status = 6;
}
// text message
message Text{
// pipelines
repeated string pipeline_ids = 1;
// transformed content
string transformed_content = 2;
// transformed content uid
string transformed_content_uid = 3;
// transformed content chunk number
int32 transformed_content_chunk_num = 4;
// transformed content token number
int32 transformed_content_token_num = 5;
// transformed content update time
google.protobuf.Timestamp transformed_content_update_time = 6;
}
// chunk type
enum ChunkType {
// unspecified
CHUNK_TYPE_UNSPECIFIED = 0;
// text
CHUNK_TYPE_TEXT = 1;
// image
CHUNK_TYPE_IMAGE = 2;
// audio
CHUNK_TYPE_AUDIO = 3;
// video
CHUNK_TYPE_VIDEO = 4;
}
// chunk message
message Chunk {
// chunk uid
string uid = 1;
// chunk type. i.e. text, image, audio, and video
ChunkType type = 2;
// chunk start position
int32 start_pos = 3;
// chunk end position
int32 end_pos =4;
// chunk content
string content = 5;
// chunk tokens num
int32 tokens_num = 6;
// embedding. float32 array
repeated float embedding = 7;
// chunk create time
google.protobuf.Timestamp create_time = 8;
// chunk retrievable
bool retrievable = 9;
}
// file catalog
Metadata metadata = 1;
// text
Text text = 2;
// chunks
repeated Chunk chunks = 3;
}
28 changes: 28 additions & 0 deletions artifact/artifact/v1alpha/qa.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
syntax = "proto3";

package artifact.artifact.v1alpha;

// Artifact definitions
import "artifact/artifact/v1alpha/chunk.proto";


// QuestionAnsweringRequest
message QuestionAnsweringRequest {
// id of the namespace
string namespace_id = 1;
// id of the catalog
string catalog_id = 2;
// question to be answered
string question = 3;
// topk default to 5
int32 topk = 4;
}

// QuestionAnsweringResponse
message QuestionAnsweringResponse {
// answer to the question
string answer = 1;
// chunks
repeated SimilarityChunk similar_chunks = 2;
}

Loading

0 comments on commit 055bbd0

Please sign in to comment.