Skip to content

Commit

Permalink
blob deduping for odsp driver (#639)
Browse files Browse the repository at this point in the history
* blob deduping for odsp driver

* pr suggestions

* update map in get latest

* have 2 caches with latest and prev caching

* populate cache in blob read

* change comment

* make map local
  • Loading branch information
jatgarg authored Nov 23, 2019
1 parent 9be786c commit 5f68a0e
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
buildHierarchy,
fromBase64ToUtf8,
fromUtf8ToBase64,
gitHashFile,
PerformanceEvent,
} from "@microsoft/fluid-core-utils";
import * as resources from "@microsoft/fluid-gitresources";
Expand All @@ -34,12 +35,17 @@ import { OdspCache } from "./odspCache";
import { getWithRetryForTokenRefresh, throwOdspNetworkError } from "./OdspUtils";

export class OdspDocumentStorageManager implements IDocumentStorageManager {
// This cache is associated with mapping sha to path for previous summary which belongs to last summary handle.
private readonly blobsShaToPathCache: Map<string, string> = new Map();
// This cache is associated with mapping sha to path for currently generated summary.
private readonly blobsShaToPathCacheLatest: Map<string, string> = new Map();
private readonly blobCache: Map<string, resources.IBlob> = new Map();
private readonly treesCache: Map<string, resources.ITree> = new Map();

private readonly attributesBlobHandles: Set<string> = new Set();

private readonly queryString: string;
private lastSummaryHandle: string | undefined;
private readonly appId: string;

private _ops: ISequencedDeltaOpMessage[] | undefined;
Expand Down Expand Up @@ -266,12 +272,31 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
this.odspCache.put(odspCacheKey, odspSnapshot, 10000);
}
const { trees, blobs, ops, sha } = odspSnapshot;
const blobsIdToPathMap: Map<string, string> = new Map();
if (trees) {
this.initTreesCache(trees);
for (const tree of this.treesCache.values()) {
for (const entry of tree.tree) {
if (entry.type === "blob") {
blobsIdToPathMap.set(entry.sha, `/${entry.path}`);
} else if (entry.type === "commit" && entry.path === ".app") {
// This is the unacked handle of the latest summary generated.
this.lastSummaryHandle = entry.sha;
}
}
}
}

if (blobs) {
this.initBlobsCache(blobs);
// Populate the cache with paths from id-to-path mapping.
for (const blob of this.blobCache.values()) {
const path = blobsIdToPathMap.get(blob.sha);
if (path) {
const hash = gitHashFile(Buffer.from(blob.content, blob.encoding));
this.blobsShaToPathCache.set(hash, path);
}
}
}

this.ops = ops;
Expand Down Expand Up @@ -321,11 +346,16 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
public async uploadSummary(tree: api.ISummaryTree): Promise<api.ISummaryHandle> {
this.checkSnapshotUrl();

this.blobsShaToPathCacheLatest.clear();
const result = await this.writeSummaryTree(tree);
if (!result || !result.sha) {
throw new Error(`Failed to write summary tree`);
}

this.blobsShaToPathCache.clear();
for (const [key, value] of this.blobsShaToPathCacheLatest) {
this.blobsShaToPathCache.set(key, value);
}
this.lastSummaryHandle = result.sha;
return {
handle: result.sha,
handleType: api.SummaryType.Tree,
Expand Down Expand Up @@ -491,7 +521,7 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
/**
* Converts a summary tree to ODSP tree
*/
private convertSummaryToSnapshotTree(tree: api.ISummaryTree, depth: number = 0): ISnapshotTree {
private convertSummaryToSnapshotTree(tree: api.ISummaryTree, depth: number = 0, path: string = ""): ISnapshotTree {
const snapshotTree: ISnapshotTree = {
entries: [],
}!;
Expand All @@ -505,18 +535,27 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {

switch (summaryObject.type) {
case api.SummaryType.Tree:
value = this.convertSummaryToSnapshotTree(summaryObject, depth + 1);
value = this.convertSummaryToSnapshotTree(summaryObject, depth + 1, `${path}/${key}`);
break;

case api.SummaryType.Blob:
const content = typeof summaryObject.content === "string" ? summaryObject.content : summaryObject.content.toString("base64");
const encoding = typeof summaryObject.content === "string" ? "utf-8" : "base64";

value = {
content,
encoding,
};

const hash = gitHashFile(Buffer.from(content, encoding));
let completePath = this.blobsShaToPathCache.get(hash);
// If the cache has the hash of the blob and handle of last summary is also present, then use that to generate complete path for
// the given blob.
if (!completePath || !this.lastSummaryHandle) {
value = {
content,
encoding,
};
completePath = `${path}/${key}`;
this.blobsShaToPathCacheLatest.set(hash, completePath);
} else {
id = `${this.lastSummaryHandle}${completePath}`;
}
break;

case api.SummaryType.Handle:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ import * as assert from "assert";
*/
export class DocumentStorageService implements IDocumentStorageService {

private readonly blobsShaCache = new Set<string>();
// The values of this cache is useless. We only need the keys. So we are always putting
// empty strings as values.
private readonly blobsShaCache = new Map<string, string>();
public get repositoryUrl(): string {
return "";
}
Expand Down Expand Up @@ -59,7 +61,7 @@ export class DocumentStorageService implements IDocumentStorageService {

public async read(blobId: string): Promise<string> {
const value = await this.manager.getBlob(blobId);
this.blobsShaCache.add(value.sha);
this.blobsShaCache.set(value.sha, "");
return value.content;
}

Expand Down Expand Up @@ -110,7 +112,7 @@ export class DocumentStorageService implements IDocumentStorageService {
if (!this.blobsShaCache.has(hash)) {
const blob = await this.manager.createBlob(content, encoding);
assert.strictEqual(hash, blob.sha, "Blob.sha and hash do not match!!");
this.blobsShaCache.add(blob.sha);
this.blobsShaCache.set(blob.sha, "");
}
return hash;
case SummaryType.Commit:
Expand Down
8 changes: 6 additions & 2 deletions packages/loader/utils/src/blobs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,13 @@ function flattenCore(path: string, treeEntries: ITreeEntry[], blobMap: Map<strin
* Build a tree hierarchy base on a flat tree
*
* @param flatTree - a flat tree
* @param blobsShaToPathCache - Map with blobs sha as keys and values as path of the blob.
* @returns the hierarchical tree
*/
export function buildHierarchy(flatTree: git.ITree, blobsShaCache: Set<string> = new Set<string>()): ISnapshotTree {
export function buildHierarchy(
flatTree: git.ITree,
blobsShaToPathCache: Map<string, string> = new Map<string, string>()): ISnapshotTree {

const lookup: { [path: string]: ISnapshotTree } = {};
const root: ISnapshotTree = { id: flatTree.sha, blobs: {}, commits: {}, trees: {} };
lookup[""] = root;
Expand All @@ -141,7 +145,7 @@ export function buildHierarchy(flatTree: git.ITree, blobsShaCache: Set<string> =
lookup[entry.path] = newTree;
} else if (entry.type === "blob") {
node.blobs[decodeURIComponent(entryPathBase)] = entry.sha;
blobsShaCache.add(entry.sha);
blobsShaToPathCache.set(entry.sha, `/${entry.path}`);
} else if (entry.type === "commit") {
node.commits[decodeURIComponent(entryPathBase)] = entry.sha;
}
Expand Down
2 changes: 1 addition & 1 deletion packages/utils/odsp-utils/src/odsp-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ async function getDriveItem(
return Promise.reject(createRequestError("Unable to get drive/item id from path", getDriveItemResult));
}
// try createing the file
const contentUri = `${getDriveItemUrl}:/content`;
const contentUri = `${getDriveItemUrl}/content`;
const createResult = await putAsync(server, clientConfig, tokens, contentUri);
if (createResult.status !== 201) {
return Promise.reject(createRequestError("Failed to create file", createResult));
Expand Down

0 comments on commit 5f68a0e

Please sign in to comment.