Skip to content

Commit

Permalink
Adding logic for blobs deduping in 0.11 from master. (#648)
Browse files Browse the repository at this point in the history
* Stop writing duplicated blobs to storage for routerlicious driver. (#592)

* stop writing duplicated blobs to storage

* change hashing logic

* add assert

* populate cache with the summary blobs (#626)

* populate cache with the summary blobs

* add in cache while reading tree

* rename

* blob deduping for odsp driver (#639)

* blob deduping for odsp driver

* pr suggestions

* update map in get latest

* have 2 caches with latest and prev caching

* populate cache in blob read

* change comment

* make map local

* make local

* local
  • Loading branch information
jatgarg authored Nov 23, 2019
1 parent 84700b7 commit 662e904
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
buildHierarchy,
fromBase64ToUtf8,
fromUtf8ToBase64,
gitHashFile,
PerformanceEvent,
} from "@microsoft/fluid-core-utils";
import * as resources from "@microsoft/fluid-gitresources";
Expand All @@ -34,12 +35,15 @@ import { OdspCache } from "./odspCache";
import { getWithRetryForTokenRefresh, throwOdspNetworkError } from "./OdspUtils";

export class OdspDocumentStorageManager implements IDocumentStorageManager {
// This cache is associated with mapping sha to path for previous summary which belongs to last summary handle.
private blobsShaToPathCache: Map<string, string> = new Map();
private readonly blobCache: Map<string, resources.IBlob> = new Map();
private readonly treesCache: Map<string, resources.ITree> = new Map();

private readonly attributesBlobHandles: Set<string> = new Set();

private readonly queryString: string;
private lastSummaryHandle: string | undefined;
private readonly appId: string;

private _ops: ISequencedDeltaOpMessage[] | undefined;
Expand Down Expand Up @@ -266,12 +270,31 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
this.odspCache.put(odspCacheKey, odspSnapshot, 10000);
}
const { trees, blobs, ops, sha } = odspSnapshot;
const blobsIdToPathMap: Map<string, string> = new Map();
if (trees) {
this.initTreesCache(trees);
for (const tree of this.treesCache.values()) {
for (const entry of tree.tree) {
if (entry.type === "blob") {
blobsIdToPathMap.set(entry.sha, `/${entry.path}`);
} else if (entry.type === "commit" && entry.path === ".app") {
// This is the unacked handle of the latest summary generated.
this.lastSummaryHandle = entry.sha;
}
}
}
}

if (blobs) {
this.initBlobsCache(blobs);
// Populate the cache with paths from id-to-path mapping.
for (const blob of this.blobCache.values()) {
const path = blobsIdToPathMap.get(blob.sha);
if (path) {
const hash = gitHashFile(Buffer.from(blob.content, blob.encoding));
this.blobsShaToPathCache.set(hash, path);
}
}
}

this.ops = ops;
Expand Down Expand Up @@ -321,11 +344,15 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
public async uploadSummary(tree: api.ISummaryTree): Promise<api.ISummaryHandle> {
this.checkSnapshotUrl();

const result = await this.writeSummaryTree(tree);
const { result, blobsShaToPathCacheLatest } = await this.writeSummaryTree(tree);
if (!result || !result.sha) {
throw new Error(`Failed to write summary tree`);
}
if (blobsShaToPathCacheLatest) {
this.blobsShaToPathCache = blobsShaToPathCacheLatest;
}

this.lastSummaryHandle = result.sha;
return {
handle: result.sha,
handleType: api.SummaryType.Tree,
Expand Down Expand Up @@ -458,14 +485,15 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
return summarySnapshotTree;
}

private async writeSummaryTree(tree: api.SummaryTree, depth: number = 0): Promise<ISnapshotResponse> {
private async writeSummaryTree(tree: api.SummaryTree, depth: number = 0): Promise<{ result: ISnapshotResponse, blobsShaToPathCacheLatest?: Map<string, string>}> {
if (tree.type === api.SummaryType.Handle) {
return {
sha: tree.handle,
result: { sha: tree.handle },
};
}

const snapshotTree = this.convertSummaryToSnapshotTree(tree);
// This cache is associated with mapping sha to path for currently generated summary.
const blobsShaToPathCacheLatest: Map<string, string> = new Map();
const snapshotTree = this.convertSummaryToSnapshotTree(tree, blobsShaToPathCacheLatest);

const snapshot: ISnapshotRequest = {
entries: snapshotTree.entries!,
Expand All @@ -484,14 +512,14 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {
const postBody = JSON.stringify(snapshot);

const response = await this.fetchWrapper.post<ISnapshotResponse>(url, postBody, headers);
return response.content;
return { result: response.content, blobsShaToPathCacheLatest };
});
}

/**
* Converts a summary tree to ODSP tree
*/
private convertSummaryToSnapshotTree(tree: api.ISummaryTree, depth: number = 0): ISnapshotTree {
private convertSummaryToSnapshotTree(tree: api.ISummaryTree, blobsShaToPathCacheLatest: Map<string, string>, depth: number = 0, path: string = ""): ISnapshotTree {
const snapshotTree: ISnapshotTree = {
entries: [],
}!;
Expand All @@ -505,18 +533,27 @@ export class OdspDocumentStorageManager implements IDocumentStorageManager {

switch (summaryObject.type) {
case api.SummaryType.Tree:
value = this.convertSummaryToSnapshotTree(summaryObject, depth + 1);
value = this.convertSummaryToSnapshotTree(summaryObject, blobsShaToPathCacheLatest, depth + 1, `${path}/${key}`);
break;

case api.SummaryType.Blob:
const content = typeof summaryObject.content === "string" ? summaryObject.content : summaryObject.content.toString("base64");
const encoding = typeof summaryObject.content === "string" ? "utf-8" : "base64";

value = {
content,
encoding,
};

const hash = gitHashFile(Buffer.from(content, encoding));
let completePath = this.blobsShaToPathCache.get(hash);
// If the cache has the hash of the blob and handle of last summary is also present, then use that to generate complete path for
// the given blob.
if (!completePath || !this.lastSummaryHandle) {
value = {
content,
encoding,
};
completePath = `${path}/${key}`;
blobsShaToPathCacheLatest.set(hash, completePath);
} else {
id = `${this.lastSummaryHandle}${completePath}`;
}
break;

case api.SummaryType.Handle:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Licensed under the MIT License.
*/

import { buildHierarchy } from "@microsoft/fluid-core-utils";
import { buildHierarchy, gitHashFile } from "@microsoft/fluid-core-utils";
import * as resources from "@microsoft/fluid-gitresources";
import {
FileMode,
Expand All @@ -18,11 +18,16 @@ import {
SummaryType,
} from "@microsoft/fluid-protocol-definitions";
import * as gitStorage from "@microsoft/fluid-server-services-client";
import * as assert from "assert";

/**
* Document access to underlying storage for routerlicious driver.
*/
export class DocumentStorageService implements IDocumentStorageService {

// The values of this cache is useless. We only need the keys. So we are always putting
// empty strings as values.
private readonly blobsShaCache = new Map<string, string>();
public get repositoryUrl(): string {
return "";
}
Expand All @@ -42,7 +47,7 @@ export class DocumentStorageService implements IDocumentStorageService {
}

const tree = await this.manager.getTree(requestVersion.treeId);
return buildHierarchy(tree);
return buildHierarchy(tree, this.blobsShaCache);
}

public async getVersions(versionId: string, count: number): Promise<IVersion[]> {
Expand All @@ -56,6 +61,7 @@ export class DocumentStorageService implements IDocumentStorageService {

public async read(blobId: string): Promise<string> {
const value = await this.manager.getBlob(blobId);
this.blobsShaCache.set(value.sha, "");
return value.content;
}

Expand Down Expand Up @@ -101,9 +107,14 @@ export class DocumentStorageService implements IDocumentStorageService {
case SummaryType.Blob:
const content = typeof value.content === "string" ? value.content : value.content.toString("base64");
const encoding = typeof value.content === "string" ? "utf-8" : "base64";
const blob = await this.manager.createBlob(content, encoding);
return blob.sha;

// The gitHashFile would return the same hash as returned by the server as blob.sha
const hash = gitHashFile(Buffer.from(content, encoding));
if (!this.blobsShaCache.has(hash)) {
const blob = await this.manager.createBlob(content, encoding);
assert.strictEqual(hash, blob.sha, "Blob.sha and hash do not match!!");
this.blobsShaCache.set(blob.sha, "");
}
return hash;
case SummaryType.Commit:
const commitTreeHandle = await this.writeSummaryObject(
value.tree,
Expand Down
7 changes: 6 additions & 1 deletion packages/loader/utils/src/blobs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,13 @@ function flattenCore(path: string, treeEntries: ITreeEntry[], blobMap: Map<strin
* Build a tree hierarchy base on a flat tree
*
* @param flatTree - a flat tree
* @param blobsShaToPathCache - Map with blobs sha as keys and values as path of the blob.
* @returns the hierarchical tree
*/
export function buildHierarchy(flatTree: git.ITree): ISnapshotTree {
export function buildHierarchy(
flatTree: git.ITree,
blobsShaToPathCache: Map<string, string> = new Map<string, string>()): ISnapshotTree {

const lookup: { [path: string]: ISnapshotTree } = {};
const root: ISnapshotTree = { id: flatTree.sha, blobs: {}, commits: {}, trees: {} };
lookup[""] = root;
Expand All @@ -141,6 +145,7 @@ export function buildHierarchy(flatTree: git.ITree): ISnapshotTree {
lookup[entry.path] = newTree;
} else if (entry.type === "blob") {
node.blobs[decodeURIComponent(entryPathBase)] = entry.sha;
blobsShaToPathCache.set(entry.sha, `/${entry.path}`);
} else if (entry.type === "commit") {
node.commits[decodeURIComponent(entryPathBase)] = entry.sha;
}
Expand Down
2 changes: 1 addition & 1 deletion packages/utils/odsp-utils/src/odsp-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ async function getDriveItem(
return Promise.reject(createRequestError("Unable to get drive/item id from path", getDriveItemResult));
}
// try createing the file
const contentUri = `${getDriveItemUrl}:/content`;
const contentUri = `${getDriveItemUrl}/content`;
const createResult = await putAsync(server, clientConfig, tokens, contentUri);
if (createResult.status !== 201) {
return Promise.reject(createRequestError("Failed to create file", createResult));
Expand Down

0 comments on commit 662e904

Please sign in to comment.