Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

blob deduping for odsp driver #639

Merged
merged 7 commits into from
Nov 23, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
buildHierarchy,
fromBase64ToUtf8,
fromUtf8ToBase64,
gitHashFile,
PerformanceEvent,
} from "@microsoft/fluid-core-utils";
import * as resources from "@microsoft/fluid-gitresources";
Expand All @@ -34,12 +35,15 @@ import { OdspCache } from "./odspCache";
import { getWithRetryForTokenRefresh, throwOdspNetworkError } from "./OdspUtils";

export class OdspDocumentStorageManager implements IDocumentStorageManager {
private readonly blobsShaToPathMap: Map<string, string> = new Map<string, string>();
jatgarg marked this conversation as resolved.
Show resolved Hide resolved
private readonly blobsShaCache: Map<string, string> = new Map();
private readonly blobCache: Map<string, resources.IBlob> = new Map();
private readonly treesCache: Map<string, resources.ITree> = new Map();

private readonly attributesBlobHandles: Set<string> = new Set();

private readonly queryString: string;
private lastSummaryHandle: string | undefined;
private readonly appId: string;

private _ops: ISequencedDeltaOpMessage[] | undefined;
Expand Down Expand Up @@ -103,6 +107,11 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
blob.content = fromUtf8ToBase64(JSON.stringify(documentAttributes));
}
jatgarg marked this conversation as resolved.
Show resolved Hide resolved

// Populate the cache with paths from sha-to-path mapping.
const hash = gitHashFile(Buffer.from(blob.content, blob.encoding));
if (this.blobsShaToPathMap.has(blob.sha)) {
this.blobsShaCache.set(hash, this.blobsShaToPathMap.get(blob.sha)!);
}
return blob;
jatgarg marked this conversation as resolved.
Show resolved Hide resolved
}

Expand Down Expand Up @@ -154,7 +163,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
return null;
}

const hierarchicalTree = buildHierarchy(tree);
const hierarchicalTree = buildHierarchy(tree, this.blobsShaToPathMap);

// decode commit paths
const commits = {};
Expand Down Expand Up @@ -326,6 +335,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
throw new Error(`Failed to write summary tree`);
}

this.lastSummaryHandle = result.sha;
return {
handle: result.sha,
handleType: api.SummaryType.Tree,
Expand Down Expand Up @@ -406,6 +416,9 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
let hierarchicalProtocolTree: api.ISnapshotTree;
let appTree: resources.ITree | null;

// This appTreeId is the unacked handle of the last summary generated.
this.lastSummaryHandle = appTreeId;
jatgarg marked this conversation as resolved.
Show resolved Hide resolved

if (typeof (protocolTreeOrId) === "string") {
// backwards compat for older summaries
const trees = await Promise.all([
Expand All @@ -420,7 +433,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {

appTree = trees[1];

hierarchicalProtocolTree = buildHierarchy(protocolTree);
hierarchicalProtocolTree = buildHierarchy(protocolTree, this.blobsShaToPathMap);

} else {
appTree = await this.readTree(appTreeId);
Expand All @@ -432,7 +445,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
throw new Error("Invalid app tree");
}

const hierarchicalAppTree = buildHierarchy(appTree);
const hierarchicalAppTree = buildHierarchy(appTree, this.blobsShaToPathMap);

if (hierarchicalProtocolTree.blobs) {
const attributesBlob = hierarchicalProtocolTree.blobs.attributes;
Expand Down Expand Up @@ -465,7 +478,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
};
}

const snapshotTree = this.convertSummaryToSnapshotTree(tree);
const snapshotTree = this.convertSummaryToSnapshotTree(tree, 0, "");
jatgarg marked this conversation as resolved.
Show resolved Hide resolved

const snapshot: ISnapshotRequest = {
entries: snapshotTree.entries!,
Expand All @@ -491,7 +504,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
/**
* Converts a summary tree to ODSP tree
*/
private convertSummaryToSnapshotTree(tree: api.ISummaryTree, depth: number = 0): ISnapshotTree {
private convertSummaryToSnapshotTree(tree: api.ISummaryTree, depth: number = 0, path: string): ISnapshotTree {
const snapshotTree: ISnapshotTree = {
entries: [],
}!;
Expand All @@ -505,18 +518,25 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {

switch (summaryObject.type) {
case api.SummaryType.Tree:
value = this.convertSummaryToSnapshotTree(summaryObject, depth + 1);
value = this.convertSummaryToSnapshotTree(summaryObject, depth + 1, `${path}/${key}`);
break;

case api.SummaryType.Blob:
const content = typeof summaryObject.content === "string" ? summaryObject.content : summaryObject.content.toString("base64");
const encoding = typeof summaryObject.content === "string" ? "utf-8" : "base64";

value = {
content,
encoding,
};

const hash = gitHashFile(Buffer.from(content, encoding));
// If the cache has the hash of the blob and handle of last summary is also present, then use that to generate complete path for
// the given blob.
if (!this.blobsShaCache.has(hash) || !this.lastSummaryHandle) {
value = {
content,
encoding,
};
this.blobsShaCache.set(hash, `${path}/${key}`);
} else {
id = `${this.lastSummaryHandle}${this.blobsShaCache.get(hash)}`;
}
break;

case api.SummaryType.Handle:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import * as assert from "assert";
*/
export class DocumentStorageService implements IDocumentStorageService {

private readonly blobsShaCache = new Set<string>();
private readonly blobsShaCache = new Map<string, string>();
jatgarg marked this conversation as resolved.
Show resolved Hide resolved
public get repositoryUrl(): string {
return "";
}
Expand Down Expand Up @@ -59,7 +59,7 @@ export class DocumentStorageService implements IDocumentStorageService {

public async read(blobId: string): Promise<string> {
const value = await this.manager.getBlob(blobId);
this.blobsShaCache.add(value.sha);
this.blobsShaCache.set(value.sha, value.sha);
return value.content;
}

Expand Down Expand Up @@ -110,7 +110,7 @@ export class DocumentStorageService implements IDocumentStorageService {
if (!this.blobsShaCache.has(hash)) {
const blob = await this.manager.createBlob(content, encoding);
assert.strictEqual(hash, blob.sha, "Blob.sha and hash do not match!!");
this.blobsShaCache.add(blob.sha);
this.blobsShaCache.set(blob.sha, blob.sha);
}
return hash;
case SummaryType.Commit:
Expand Down
7 changes: 5 additions & 2 deletions packages/loader/utils/src/blobs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,10 @@ function flattenCore(path: string, treeEntries: ITreeEntry[], blobMap: Map<strin
* @param flatTree - a flat tree
* @returns the hierarchical tree
*/
export function buildHierarchy(flatTree: git.ITree, blobsShaCache: Set<string> = new Set<string>()): ISnapshotTree {
export function buildHierarchy(
flatTree: git.ITree,
blobsShaCache: Map<string, string> = new Map<string, string>()): ISnapshotTree {
jatgarg marked this conversation as resolved.
Show resolved Hide resolved

const lookup: { [path: string]: ISnapshotTree } = {};
const root: ISnapshotTree = { id: flatTree.sha, blobs: {}, commits: {}, trees: {} };
lookup[""] = root;
Expand All @@ -141,7 +144,7 @@ export function buildHierarchy(flatTree: git.ITree, blobsShaCache: Set<string> =
lookup[entry.path] = newTree;
} else if (entry.type === "blob") {
node.blobs[decodeURIComponent(entryPathBase)] = entry.sha;
blobsShaCache.add(entry.sha);
blobsShaCache.set(entry.sha, `/${entry.path}`);
} else if (entry.type === "commit") {
node.commits[decodeURIComponent(entryPathBase)] = entry.sha;
}
Expand Down
2 changes: 1 addition & 1 deletion packages/utils/odsp-utils/src/odsp-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ async function getDriveItem(
return Promise.reject(createRequestError("Unable to get drive/item id from path", getDriveItemResult));
}
// try createing the file
const contentUri = `${getDriveItemUrl}:/content`;
const contentUri = `${getDriveItemUrl}/content`;
const createResult = await putAsync(server, clientConfig, tokens, contentUri);
if (createResult.status !== 201) {
return Promise.reject(createRequestError("Failed to create file", createResult));
Expand Down