Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

blob deduping for odsp driver #639

Merged
merged 7 commits into from
Nov 23, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
buildHierarchy,
fromBase64ToUtf8,
fromUtf8ToBase64,
gitHashFile,
PerformanceEvent,
} from "@microsoft/fluid-core-utils";
import * as resources from "@microsoft/fluid-gitresources";
Expand All @@ -34,12 +35,16 @@ import { OdspCache } from "./odspCache";
import { getWithRetryForTokenRefresh, throwOdspNetworkError } from "./OdspUtils";

export class OdspDocumentStorageManager implements IDocumentStorageManager {
private readonly blobsIdToPathMap: Map<string, string> = new Map();
private readonly blobsShaToPathCache: Map<string, string> = new Map();
private readonly blobsPathToShaMap: Map<string, string> = new Map();
private readonly blobCache: Map<string, resources.IBlob> = new Map();
private readonly treesCache: Map<string, resources.ITree> = new Map();

private readonly attributesBlobHandles: Set<string> = new Set();

private readonly queryString: string;
private lastSummaryHandle: string | undefined;
private readonly appId: string;

private _ops: ISequencedDeltaOpMessage[] | undefined;
Expand Down Expand Up @@ -103,6 +108,12 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
blob.content = fromUtf8ToBase64(JSON.stringify(documentAttributes));
}
jatgarg marked this conversation as resolved.
Show resolved Hide resolved

// Populate the cache with paths from sha-to-path mapping.
const path = this.blobsIdToPathMap.get(blob.sha);
if (path) {
const hash = gitHashFile(Buffer.from(blob.content, blob.encoding));
this.blobsShaToPathCache.set(hash, path);
}
return blob;
jatgarg marked this conversation as resolved.
Show resolved Hide resolved
}

Expand Down Expand Up @@ -268,6 +279,17 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
const { trees, blobs, ops, sha } = odspSnapshot;
if (trees) {
this.initTreesCache(trees);
this.blobsIdToPathMap.clear();
for (const tree of this.treesCache.values()) {
for (const entry of tree.tree) {
if (entry.type === "blob") {
this.blobsIdToPathMap.set(entry.sha, `/${entry.path}`);
} else if (entry.type === "commit" && entry.path === ".app") {
// This is the unacked handle of the latest summary generated.
this.lastSummaryHandle = entry.sha;
}
}
}
}

if (blobs) {
Expand Down Expand Up @@ -326,6 +348,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
throw new Error(`Failed to write summary tree`);
}

this.lastSummaryHandle = result.sha;
return {
handle: result.sha,
handleType: api.SummaryType.Tree,
Expand Down Expand Up @@ -491,7 +514,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
/**
* Converts a summary tree to ODSP tree
*/
private convertSummaryToSnapshotTree(tree: api.ISummaryTree, depth: number = 0): ISnapshotTree {
private convertSummaryToSnapshotTree(tree: api.ISummaryTree, depth: number = 0, path: string = ""): ISnapshotTree {
const snapshotTree: ISnapshotTree = {
entries: [],
}!;
Expand All @@ -505,18 +528,34 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {

switch (summaryObject.type) {
case api.SummaryType.Tree:
value = this.convertSummaryToSnapshotTree(summaryObject, depth + 1);
value = this.convertSummaryToSnapshotTree(summaryObject, depth + 1, `${path}/${key}`);
break;

case api.SummaryType.Blob:
const content = typeof summaryObject.content === "string" ? summaryObject.content : summaryObject.content.toString("base64");
const encoding = typeof summaryObject.content === "string" ? "utf-8" : "base64";

value = {
content,
encoding,
};

const hash = gitHashFile(Buffer.from(content, encoding));
let completePath = this.blobsShaToPathCache.get(hash);
// If the cache has the hash of the blob and handle of last summary is also present, then use that to generate complete path for
// the given blob.
if (!completePath || !this.lastSummaryHandle) {
value = {
content,
encoding,
};
completePath = `${path}/${key}`;
const prevSha = this.blobsPathToShaMap.get(completePath);
// This is done to delete entry which has different sha at same complete path. We just delete previous entry even if it is same
// so as to prevent string matching.
if (prevSha) {
this.blobsShaToPathCache.delete(prevSha);
}
this.blobsShaToPathCache.set(hash, completePath);
jatgarg marked this conversation as resolved.
Show resolved Hide resolved
this.blobsPathToShaMap.set(completePath, hash);
} else {
id = `${this.lastSummaryHandle}${completePath}`;
}
break;

case api.SummaryType.Handle:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ import * as assert from "assert";
*/
export class DocumentStorageService implements IDocumentStorageService {

private readonly blobsShaCache = new Set<string>();
// The values of this cache is useless. We only need the keys. So we are always putting
// empty strings as values.
private readonly blobsShaCache = new Map<string, string>();
jatgarg marked this conversation as resolved.
Show resolved Hide resolved
public get repositoryUrl(): string {
return "";
}
Expand Down Expand Up @@ -59,7 +61,7 @@ export class DocumentStorageService implements IDocumentStorageService {

public async read(blobId: string): Promise<string> {
const value = await this.manager.getBlob(blobId);
this.blobsShaCache.add(value.sha);
this.blobsShaCache.set(value.sha, "");
return value.content;
}

Expand Down Expand Up @@ -110,7 +112,7 @@ export class DocumentStorageService implements IDocumentStorageService {
if (!this.blobsShaCache.has(hash)) {
const blob = await this.manager.createBlob(content, encoding);
assert.strictEqual(hash, blob.sha, "Blob.sha and hash do not match!!");
this.blobsShaCache.add(blob.sha);
this.blobsShaCache.set(blob.sha, "");
}
return hash;
case SummaryType.Commit:
Expand Down
8 changes: 6 additions & 2 deletions packages/loader/utils/src/blobs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,13 @@ function flattenCore(path: string, treeEntries: ITreeEntry[], blobMap: Map<strin
* Build a tree hierarchy base on a flat tree
*
* @param flatTree - a flat tree
* @param blobsShaToPathCache - Map with blobs sha as keys and values as path of the blob.
* @returns the hierarchical tree
*/
export function buildHierarchy(flatTree: git.ITree, blobsShaCache: Set<string> = new Set<string>()): ISnapshotTree {
export function buildHierarchy(
flatTree: git.ITree,
blobsShaToPathCache: Map<string, string> = new Map<string, string>()): ISnapshotTree {

const lookup: { [path: string]: ISnapshotTree } = {};
const root: ISnapshotTree = { id: flatTree.sha, blobs: {}, commits: {}, trees: {} };
lookup[""] = root;
Expand All @@ -141,7 +145,7 @@ export function buildHierarchy(flatTree: git.ITree, blobsShaCache: Set<string> =
lookup[entry.path] = newTree;
} else if (entry.type === "blob") {
node.blobs[decodeURIComponent(entryPathBase)] = entry.sha;
blobsShaCache.add(entry.sha);
blobsShaToPathCache.set(entry.sha, `/${entry.path}`);
} else if (entry.type === "commit") {
node.commits[decodeURIComponent(entryPathBase)] = entry.sha;
}
Expand Down
2 changes: 1 addition & 1 deletion packages/utils/odsp-utils/src/odsp-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ async function getDriveItem(
return Promise.reject(createRequestError("Unable to get drive/item id from path", getDriveItemResult));
}
// try createing the file
const contentUri = `${getDriveItemUrl}:/content`;
const contentUri = `${getDriveItemUrl}/content`;
const createResult = await putAsync(server, clientConfig, tokens, contentUri);
if (createResult.status !== 201) {
return Promise.reject(createRequestError("Failed to create file", createResult));
Expand Down