From 32722f07691d654d19f21bf5bc54f4a6d75b5d88 Mon Sep 17 00:00:00 2001 From: rburgst Date: Tue, 10 Aug 2021 00:26:12 +0200 Subject: [PATCH] fix(wordpress): ensure all file links are rewritten (#32679) * fix(wordpress): ensure all file links are rewritten - in the case of either having a small `schema.perPage` setting or many many referenced resources on a single page only the first page of links were properly rewritten - now we ensure that all pages are fully processed until we resolve the promise - this is an improved recommit of #31652 fixes #31646 Co-authored-by: gatsbybot Co-authored-by: Tyler Barnes --- .../fetch-referenced-media-items.test.js | 289 ++++++++++++++++++ .../fetch-referenced-media-items.js | 248 ++++++++------- 2 files changed, 412 insertions(+), 125 deletions(-) create mode 100644 packages/gatsby-source-wordpress/__tests__/fetch-referenced-media-items.test.js diff --git a/packages/gatsby-source-wordpress/__tests__/fetch-referenced-media-items.test.js b/packages/gatsby-source-wordpress/__tests__/fetch-referenced-media-items.test.js new file mode 100644 index 0000000000000..f03fbcfe9950b --- /dev/null +++ b/packages/gatsby-source-wordpress/__tests__/fetch-referenced-media-items.test.js @@ -0,0 +1,289 @@ +jest.mock(`../dist/utils/fetch-graphql`, () => jest.fn()) + +import fetchGraphql from "../dist/utils/fetch-graphql" +import { fetchMediaItemsBySourceUrl, fetchMediaItemsById } from "../dist/steps/source-nodes/fetch-nodes/fetch-referenced-media-items" +import { createContentDigest } from "gatsby-core-utils" +import store from "../dist/store" + +const fakeReporter = { + panic: msg => { + console.error(msg) + }, + info: msg => { + console.log(msg) + }, +} + +const getNodeMock = jest.fn() + + +describe(`fetch-referenced-media-items`, () => { + beforeAll(() => { + store.dispatch.gatsbyApi.setState({ + pluginOptions: { + schema: { + perPage: 2, + }, + }, + }) + }) + + afterEach(() => { + jest.resetAllMocks() + }) + + describe(`fetchMediaItemsBySourceUrl`, () => { + +const createApi = () => { + return { + actions: { + createTypes: jest.fn(), + createNode: jest.fn(), + deleteNode: jest.fn(), + }, + reporter: fakeReporter, + createNodeId: jest.fn(), + async getNode(id) { + return { + localFile: { + id: id, + }, + } + }, + } +} + + it(`should properly download multiple pages`, async () => { + fetchGraphql + .mockResolvedValueOnce({ + data: { + mediaItem__index_0: null, + mediaItem__index_1: null, + }, + }) + .mockResolvedValueOnce({ + data: { + mediaItem__index_2: { + id: 2, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + }, + mediaItem__index_3: { + id: 3, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + }, + }, + }) + .mockResolvedValueOnce({ + data: { + mediaItem__index_4: null, + mediaItem__index_5: null, + }, + }) + .mockResolvedValueOnce({ + data: { + mediaItem__index_6: null, + mediaItem__index_7: null, + }, + }) + const result = await fetchMediaItemsBySourceUrl({ + mediaItemUrls: [ + `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3?_=7`, + `https://wordpress.host/wp-content/uploads/2018/05/file2.mp3?_=7`, + `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + `https://wordpress.host/wp-content/uploads/2018/05/file2.mp3`, + ], + createContentDigest, + helpers: createApi(), + }) + expect(result).toHaveLength(2) + }) + + + it(`should properly download a single page if there is only 1`, async () => { + store.dispatch.gatsbyApi.setState({ + pluginOptions: { + schema: { + perPage: 5, + }, + }, + }) + + fetchGraphql + .mockResolvedValueOnce({ + data: { + mediaItem__index_0: { + id: 0, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + }, + mediaItem__index_1: { + id: 1, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + }, + }, + }) + + const result = await fetchMediaItemsBySourceUrl({ + mediaItemUrls: [ + `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + `https://wordpress.host/wp-content/uploads/2018/05/file2.mp3`, + ], + selectionSet: `id\nmediaItemUrl`, + createContentDigest, + helpers: createApi(), + }) + expect(result).toHaveLength(2) + }) + }) + + + describe(`fetchMediaItemsById`, () => { + + const createApi = () => { + return { + actions: { + createTypes: jest.fn(), + createNode: jest.fn(), + deleteNode: jest.fn(), + }, + reporter: fakeReporter, + createNodeId: jest.fn(), + getNode: getNodeMock + } + } + + it(`should properly download multiple pages of ids`, async () => { + getNodeMock + .mockReturnValueOnce(undefined) + .mockReturnValueOnce(undefined) + .mockReturnValueOnce(undefined) + .mockReturnValueOnce(undefined) + .mockReturnValueOnce({ + localFile: { + id: 0, + }}) + .mockReturnValueOnce({ + localFile: { + id: 1, + }}) + .mockReturnValueOnce({ + localFile: { + id: 2, + }}) + .mockReturnValueOnce({ + localFile: { + id: 3, + }}) + store.dispatch.gatsbyApi.setState({ + pluginOptions: { + schema: { + perPage: 2, + }, + }, + }) + + + fetchGraphql + .mockResolvedValueOnce({ + data: { + mediaItems: { + nodes: [{ + id: 0, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + },{ + id: 1, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + },] + } + }, + }) + .mockResolvedValueOnce({ + data: { + mediaItems: { + nodes: [{ + id: 2, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file2.mp3`, + },{ + id: 3, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file3.mp3`, + },] + } + }, + }) + const result = await fetchMediaItemsById({ + mediaItemIds: [ + btoa(`attachment:1`), + btoa(`attachment:2`), + btoa(`attachment:3`), + btoa(`attachment:4`), + ], + settings: { + limit: 5 + }, + typeInfo: { + pluralName: `mediaItems`, + nodesTypeName: `MediaItem` + }, + + createContentDigest, + helpers: createApi(), + }) + expect(result).toHaveLength(4) + }) + + + xit(`should properly download a single page of ids if there is only 1`, async () => { + getNodeMock + .mockReturnValueOnce(undefined) + .mockReturnValueOnce(undefined) + .mockReturnValueOnce({ + localFile: { + id: 0, + }}) + .mockReturnValueOnce({ + localFile: { + id: 1, + }}) + + store.dispatch.gatsbyApi.setState({ + pluginOptions: { + schema: { + perPage: 5, + }, + }, + }) + + fetchGraphql + .mockResolvedValueOnce({ + data: { + mediaItems: { + nodes: [{ + id: 0, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + },{ + id: 1, + mediaItemUrl: `https://wordpress.host/wp-content/uploads/2018/05/file1.mp3`, + },] + } + }, + }) + + const result = await fetchMediaItemsById({ + mediaItemIds: [ + btoa(`attachment:1`), btoa(`attachment:2`) + ], + settings: { + limit: 5 + }, + typeInfo: { + pluralName: `mediaItems`, + nodesTypeName: `MediaItem` + }, + + selectionSet: `id\nmediaItemUrl`, + createContentDigest, + helpers: createApi(), + }) + expect(result).toHaveLength(2) + }) + }) +}) diff --git a/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js b/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js index 5d4135d0aec66..0ce128956ba61 100644 --- a/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js +++ b/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js @@ -271,7 +271,7 @@ const processAndDedupeImageUrls = urls => }, urls) ) -const fetchMediaItemsBySourceUrl = async ({ +export const fetchMediaItemsBySourceUrl = async ({ mediaItemUrls, selectionSet, builtFragments, @@ -316,106 +316,105 @@ const fetchMediaItemsBySourceUrl = async ({ // since we're using an async queue, we need a way to know when it's finished // we pass this resolve function into the queue function so it can let us // know when it's finished - let resolveFutureNodes - const futureNodes = new Promise(resolve => { - resolveFutureNodes = (nodes = []) => - // combine our resolved nodes we fetched with our cached nodes - resolve([...nodes, ...previouslyCachedMediaItemNodes]) - }) // we have no media items to fetch, // so we need to resolve this promise // otherwise it will never resolve below. if (!mediaItemUrlsPages.length) { - resolveFutureNodes() + return Promise.resolve([]) } + const allPromises = [] // for all the images we don't have cached, loop through and get them all for (const [index, sourceUrls] of mediaItemUrlsPages.entries()) { - pushPromiseOntoRetryQueue({ - helpers, - createContentDigest, - actions, - queue: mediaNodeFetchQueue, - retryKey: `Media Item by sourceUrl query #${index}, digest: ${createContentDigest( - sourceUrls.join() - )}`, - retryPromise: async () => { - const query = /* GraphQL */ ` - query MEDIA_ITEMS { - ${sourceUrls - .map( - (sourceUrl, index) => /* GraphQL */ ` - mediaItem__index_${index}: mediaItem(id: "${sourceUrl}", idType: SOURCE_URL) { - ...MediaItemFragment - } - ` - ) - .join(` `)} - } + const curPromise = new Promise(resolve => { + pushPromiseOntoRetryQueue({ + helpers, + createContentDigest, + actions, + queue: mediaNodeFetchQueue, + retryKey: `Media Item by sourceUrl query #${index}, digest: ${createContentDigest( + sourceUrls.join() + )}`, + retryPromise: async () => { + const query = /* GraphQL */ ` + query MEDIA_ITEMS { + ${sourceUrls + .map( + (sourceUrl, index) => /* GraphQL */ ` + mediaItem__index_${index}: mediaItem(id: "${sourceUrl}", idType: SOURCE_URL) { + ...MediaItemFragment + } + ` + ) + .join(` `)} + } - fragment MediaItemFragment on MediaItem { - ${selectionSet} - } + fragment MediaItemFragment on MediaItem { + ${selectionSet} + } - ${builtFragments || ``} - ` + ${builtFragments || ``} + ` - const { data } = await fetchGraphql({ - query, - variables: { - first: perPage, - after: null, - }, - errorContext: `Error occurred while fetching "MediaItem" nodes in inline html.`, - }) + const { data } = await fetchGraphql({ + query, + variables: { + first: perPage, + after: null, + }, + errorContext: `Error occurred while fetching "MediaItem" nodes in inline html.`, + }) - // since we're getting each media item on it's single node root field - // we just needs the values of each property in the response - // anything that returns null is because we tried to get the source url - // plus the source url minus resize patterns. So there will be nulls - // since only the full source url will return data - const thisPagesNodes = Object.values(data).filter(Boolean) - - // take the WPGraphQL nodes we received and create Gatsby nodes out of them - const nodes = await Promise.all( - thisPagesNodes.map(node => - createMediaItemNode({ - node, - helpers, - createContentDigest, - actions, - allMediaItemNodes, - parentName: `Fetching referenced MediaItem nodes by sourceUrl`, - }) + // since we're getting each media item on it's single node root field + // we just needs the values of each property in the response + // anything that returns null is because we tried to get the source url + // plus the source url minus resize patterns. So there will be nulls + // since only the full source url will return data + const thisPagesNodes = Object.values(data).filter(Boolean) + + // take the WPGraphQL nodes we received and create Gatsby nodes out of them + const nodes = await Promise.all( + thisPagesNodes.map(node => + createMediaItemNode({ + node, + helpers, + createContentDigest, + actions, + allMediaItemNodes, + parentName: `Fetching referenced MediaItem nodes by sourceUrl`, + }) + ) ) - ) - nodes.forEach((node, index) => { - if (!node) { - return - } + nodes.forEach((node, index) => { + if (!node) { + return + } - // this is how we're caching nodes we've previously fetched. - store.dispatch.imageNodes.pushNodeMeta({ - id: node.localFile.id, - sourceUrl: sourceUrls[index], - modifiedGmt: node.modifiedGmt, + // this is how we're caching nodes we've previously fetched. + store.dispatch.imageNodes.pushNodeMeta({ + id: node.localFile.id, + sourceUrl: sourceUrls[index], + modifiedGmt: node.modifiedGmt, + }) }) - }) - resolveFutureNodes(nodes) - }, + resolve(nodes) + }, + }) }) + allPromises.push(curPromise) } await mediaNodeFetchQueue.onIdle() await mediaFileFetchQueue.onIdle() - return futureNodes + const allResults = await Promise.all(allPromises) + return allResults.flat() } -const fetchMediaItemsById = async ({ +export const fetchMediaItemsById = async ({ mediaItemIds, settings, url, @@ -434,34 +433,31 @@ const fetchMediaItemsById = async ({ const chunkedIds = chunk(newMediaItemIds, perPage) - let resolveFutureNodes - const futureNodes = new Promise(resolve => { - resolveFutureNodes = resolve - }) - if (!newMediaItemIds.length) { - resolveFutureNodes([]) + return Promise.resolve([]) } const allMediaItemNodes = [] + const allPromises = [] for (const [index, relayIds] of chunkedIds.entries()) { - pushPromiseOntoRetryQueue({ - helpers, - createContentDigest, - actions, - queue: mediaNodeFetchQueue, - retryKey: `Media Item query #${index}, digest: ${createContentDigest( - relayIds.join() - )}`, - retryPromise: async () => { - // relay id's are base64 encoded from strings like attachment:89381 - // where 89381 is the id we want for our query - // so we split on the : and get the last item in the array, which is the id - // once we can get a list of media items by relay id's, we can remove atob - const ids = relayIds.map(id => atob(id).split(`:`).slice(-1)[0]) - - const query = ` + const curPromise = new Promise(resolve => { + pushPromiseOntoRetryQueue({ + helpers, + createContentDigest, + actions, + queue: mediaNodeFetchQueue, + retryKey: `Media Item query #${index}, digest: ${createContentDigest( + relayIds.join() + )}`, + retryPromise: async () => { + // relay id's are base64 encoded from strings like attachment:89381 + // where 89381 is the id we want for our query + // so we split on the : and get the last item in the array, which is the id + // once we can get a list of media items by relay id's, we can remove atob + const ids = relayIds.map(id => atob(id).split(`:`).slice(-1)[0]) + + const query = ` query MEDIA_ITEMS($in: [ID]) { mediaItems(first: ${perPage}, where:{ in: $in }) { nodes { @@ -472,43 +468,45 @@ const fetchMediaItemsById = async ({ ${builtFragments || ``} ` + const allNodesOfContentType = await paginatedWpNodeFetch({ + first: perPage, + contentTypePlural: typeInfo.pluralName, + nodeTypeName: typeInfo.nodesTypeName, + query, + url, + helpers, + settings, + in: ids, + // this allows us to retry-on-end-of-queue + throwFetchErrors: true, + }) - const allNodesOfContentType = await paginatedWpNodeFetch({ - first: perPage, - contentTypePlural: typeInfo.pluralName, - nodeTypeName: typeInfo.nodesTypeName, - query, - url, - helpers, - settings, - in: ids, - // this allows us to retry-on-end-of-queue - throwFetchErrors: true, - }) - - const nodes = await Promise.all( - allNodesOfContentType.map(node => - createMediaItemNode({ - node, - helpers, - createContentDigest, - actions, - allMediaItemNodes, - referencedMediaItemNodeIds: mediaItemIds, - parentName: `Fetching referenced MediaItem nodes by id`, - }) + const nodes = await Promise.all( + allNodesOfContentType.map(node => + createMediaItemNode({ + node, + helpers, + createContentDigest, + actions, + allMediaItemNodes, + referencedMediaItemNodeIds: mediaItemIds, + parentName: `Fetching referenced MediaItem nodes by id`, + }) + ) ) - ) - resolveFutureNodes(nodes) - }, + resolve(nodes) + }, + }) }) + allPromises.push(curPromise) } await mediaNodeFetchQueue.onIdle() await mediaFileFetchQueue.onIdle() - return futureNodes + const allResults = await Promise.all(allPromises) + return allResults.flat() } export default async function fetchReferencedMediaItemsAndCreateNodes({