Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

H-3922: Add Google Vertex AI calls to shared AI request logic #6290

Merged
merged 8 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions apps/hash-ai-worker-ts/eslint.config.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
import { createBase } from "@local/eslint/deprecated";
import { createBase, defineConfig } from "@local/eslint/deprecated";

export default createBase(import.meta.dirname);
export default [
...createBase(import.meta.dirname),
...defineConfig([
{
rules: {
/**
* @todo we should have separate browser/node configs
*/
"react-hooks/rules-of-hooks": "off",
},
},
]),
];
1 change: 1 addition & 0 deletions apps/hash-ai-worker-ts/scripts/compare-llm-response.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ export const compareLlmResponses = async () => {
const llmResponses = await Promise.all(
models.map((model) => {
return getLlmResponse(
// @ts-expect-error -- inference stumbling on Google AI model, @todo figure out why
{
...llmParams,
model,
Expand Down
6 changes: 5 additions & 1 deletion apps/hash-ai-worker-ts/scripts/compare-llm-response/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@ import type { AccountId } from "@local/hash-graph-types/account";

import type {
AnthropicLlmParams,
GoogleAiParams,
LlmParams,
OpenAiLlmParams,
} from "../../src/activities/shared/get-llm-response/types.js";

export type CompareLlmResponseConfig = {
models: LlmParams["model"][];
llmParams: Omit<AnthropicLlmParams, "model"> & Omit<OpenAiLlmParams, "model">;
llmParams:
| Omit<AnthropicLlmParams, "model">
| Omit<OpenAiLlmParams, "model">
| Omit<GoogleAiParams, "model">;
accountId?: AccountId;
};
Original file line number Diff line number Diff line change
@@ -1,13 +1,3 @@
import { createWriteStream } from "node:fs";
import { mkdir, unlink } from "node:fs/promises";
import path from "node:path";
import { Readable } from "node:stream";
import { finished } from "node:stream/promises";
import type { ReadableStream } from "node:stream/web";
import { fileURLToPath } from "node:url";

import { getAwsS3Config } from "@local/hash-backend-utils/aws-config";
import { AwsS3StorageProvider } from "@local/hash-backend-utils/file-storage/aws-s3-storage-provider";
import type {
OriginProvenance,
PropertyProvenance,
Expand All @@ -22,14 +12,13 @@ import {
type OutputNameForAction,
} from "@local/hash-isomorphic-utils/flows/action-definitions";
import type { PersistedEntity } from "@local/hash-isomorphic-utils/flows/types";
import { generateUuid } from "@local/hash-isomorphic-utils/generate-uuid";
import {
blockProtocolPropertyTypes,
systemPropertyTypes,
} from "@local/hash-isomorphic-utils/ontology-type-ids";
import type {
DocProperties,
File,
TitlePropertyValue,
} from "@local/hash-isomorphic-utils/system-types/shared";
import { extractEntityUuidFromEntityId } from "@local/hash-subgraph";
import { StatusCode } from "@local/status";
Expand All @@ -43,15 +32,15 @@ import { getEntityByFilter } from "../shared/get-entity-by-filter.js";
import { getFlowContext } from "../shared/get-flow-context.js";
import { graphApiClient } from "../shared/graph-api-client.js";
import { logProgress } from "../shared/log-progress.js";
import { useFileSystemPathFromEntity } from "../shared/use-file-system-file-from-url.js";
import { generateDocumentPropertyPatches } from "./infer-metadata-from-document-action/generate-property-patches.js";
import { generateDocumentProposedEntitiesAndCreateClaims } from "./infer-metadata-from-document-action/generate-proposed-entities-and-claims.js";
import { getLlmAnalysisOfDoc } from "./infer-metadata-from-document-action/get-llm-analysis-of-doc.js";
import type { FlowActionActivity } from "./types.js";

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

const baseFilePath = path.join(__dirname, "/var/tmp_files");
const isFileEntity = (entity: Entity): entity is Entity<File> =>
systemPropertyTypes.fileStorageKey.propertyTypeBaseUrl in entity.properties &&
blockProtocolPropertyTypes.fileUrl.propertyTypeBaseUrl in entity.properties;

export const inferMetadataFromDocumentAction: FlowActionActivity = async ({
inputs,
Expand Down Expand Up @@ -109,118 +98,62 @@ export const inferMetadataFromDocumentAction: FlowActionActivity = async ({
};
}

const fileUrl =
documentEntity.properties[
blockProtocolPropertyTypes.fileUrl.propertyTypeBaseUrl
];

if (!fileUrl) {
return {
code: StatusCode.InvalidArgument,
contents: [],
message: `Document entity with entityId ${documentEntityId} does not have a fileUrl property`,
};
}

if (typeof fileUrl !== "string") {
if (!isFileEntity(documentEntity)) {
return {
code: StatusCode.InvalidArgument,
contents: [],
message: `Document entity with entityId ${documentEntityId} has a fileUrl property of type '${typeof fileUrl}', expected 'string'`,
message: `Document entity with entityId ${documentEntityId} is not a file entity`,
};
}

const storageKey =
const fileUrl =
documentEntity.properties[
systemPropertyTypes.fileStorageKey.propertyTypeBaseUrl
"https://blockprotocol.org/@blockprotocol/types/property-type/file-url/"
];

if (!storageKey) {
return {
code: StatusCode.InvalidArgument,
contents: [],
message: `Document entity with entityId ${documentEntityId} does not have a fileStorageKey property`,
};
}

if (typeof storageKey !== "string") {
return {
code: StatusCode.InvalidArgument,
contents: [],
message: `Document entity with entityId ${documentEntityId} has a fileStorageKey property of type '${typeof storageKey}', expected 'string'`,
};
}

await mkdir(baseFilePath, { recursive: true });

const filePath = `${baseFilePath}/${generateUuid()}.pdf`;

const s3Config = getAwsS3Config();

const downloadProvider = new AwsS3StorageProvider(s3Config);

const urlForDownload = await downloadProvider.presignDownload({
entity: documentEntity as Entity<File>,
expiresInSeconds: 60 * 60,
key: storageKey,
});

const fetchFileResponse = await fetch(urlForDownload);

if (!fetchFileResponse.ok || !fetchFileResponse.body) {
if (!fileUrl) {
return {
code: StatusCode.NotFound,
contents: [],
message: `Document entity with entityId ${documentEntityId} has a fileUrl ${fileUrl} that could not be fetched: ${fetchFileResponse.statusText}`,
};
}

try {
const fileStream = createWriteStream(filePath);
await finished(
Readable.fromWeb(
fetchFileResponse.body as ReadableStream<Uint8Array>,
).pipe(fileStream),
);
} catch (error) {
await unlink(filePath);
return {
code: StatusCode.Internal,
contents: [],
message: `Failed to write file to file system: ${(error as Error).message}`,
message: `Document entity with entityId ${documentEntityId} does not have a fileUrl property`,
};
}

const pdfParser = new PDFParser();

const documentJson = await new Promise<Output>((resolve, reject) => {
pdfParser.on("pdfParser_dataError", (errData) =>
reject(errData.parserError),
);

pdfParser.on("pdfParser_dataReady", (pdfData) => {
resolve(pdfData);
});

// @todo: https://linear.app/hash/issue/H-3769/investigate-new-eslint-errors
// eslint-disable-next-line @typescript-eslint/prefer-promise-reject-errors
pdfParser.loadPDF(filePath).catch((err) => reject(err));
});

const numberOfPages = documentJson.Pages.length;

/**
* @todo H-3620: handle documents exceeding Vertex AI limit of 30MB
*/

const documentMetadata = await getLlmAnalysisOfDoc({
fileSystemPath: filePath,
hashFileStorageKey: storageKey,
entityId: documentEntityId,
fileUrl,
});

await unlink(filePath);
const { documentMetadata, numberOfPages } = await useFileSystemPathFromEntity(
documentEntity,
async ({ fileSystemPath }) => {
const documentJson = await new Promise<Output>((resolve, reject) => {
pdfParser.on("pdfParser_dataError", (errData) =>
reject(errData.parserError),
);

pdfParser.on("pdfParser_dataReady", (pdfData) => {
resolve(pdfData);
});

// @todo: https://linear.app/hash/issue/H-3769/investigate-new-eslint-errors
// eslint-disable-next-line @typescript-eslint/prefer-promise-reject-errors
pdfParser.loadPDF(fileSystemPath).catch((err) => reject(err));
});

const numPages = documentJson.Pages.length;

/**
* @todo H-3620: handle documents exceeding Vertex AI limit of 30MB
*/

const metadata = await getLlmAnalysisOfDoc({
fileEntity: documentEntity,
});

return {
documentMetadata: metadata,
numberOfPages: numPages,
};
},
);

const {
authors,
Expand Down Expand Up @@ -291,16 +224,20 @@ export const inferMetadataFromDocumentAction: FlowActionActivity = async ({
},
]);

const title = updatedEntity.properties[
systemPropertyTypes.title.propertyTypeBaseUrl
] as TitlePropertyValue;
const title =
"https://hash.ai/@hash/types/property-type/title/" in
updatedEntity.properties
? (updatedEntity.properties as DocProperties)[
"https://hash.ai/@hash/types/property-type/title/"
]
: undefined;

const proposedEntities =
await generateDocumentProposedEntitiesAndCreateClaims({
aiAssistantAccountId,
documentEntityId,
documentMetadata: { authors },
documentTitle: title,
documentTitle: title ?? "[Untitled]",
provenance,
propertyProvenance,
});
Expand Down
Loading
Loading