Skip to content

Commit 311b202

Browse files
TylerBarnesgatsbybotrenovate[bot]Talaxy009pieh
authored
fix(gatsby-source-contentful): reduce memory usage (#37910)
* add touch nodes optout action * cleanup * start adding backreference cache * revert formatting * remove unused variable * remove unused var * switch from opting out types from stale nodes to opting out entire plugin * add no nodes warning * fix bug where double bound actions are ignored when there are no args * update progress mock * get owner from plugin * rename type * rename types * use redux types instead of pulling from lmdb * remove unused line * Update source-nodes.ts * use CREATE_NODE action instead of adding a new type owner action * add typeowners test * test touchNodes and enableStatefulSourceNodes() * fix contentful tests * snapshot updates * chore(changelogs): update changelogs (#37808) * fix(deps): update starters and examples - gatsby to ^5.8.1 (#37806) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> * fix(gatsby): Validate sub plugins options (#37804) * fix(gatsby): validate sub plugins options of gatsby-plugin-mdx * revert hardcoded path to subplugins * validate subplugins if they are not under 'options.plugins' field --------- Co-authored-by: Michal Piechowiak <[email protected]> * fix(create-gatsby): Use correct name in summary message (#37809) * actual fix * other misc changes * chore(release): Publish next - [email protected] - [email protected] - [email protected] - [email protected] - [email protected] * remove testing timeout * minimal docs * reword * add comment * reportOnce instead of throwing an error * consolidate typeOwners * use new typesToPlugins Map keys instead of pluginsToTypes Map values * consolidate remaining typeOwners object checks into new typeOwners reducer * fix missing owner error * fix type errors and incorrect plugin object reference * maybe fix unit tests * make SitePage nodes owned by internal-data-bridge * add missing fields on some TOUCH_NODE actions * skip owner checks when deleting child nodes * sp * Update yarn.lock * reduce memory usage by storing minimal backreference info and not blocking the event loop * remove coment * remove extra logs * add helper to explain what the setImmediate promise is for * remove unneeded entryNode condition * refactor createAssetNodes to use async/await * add cached node counts by assets vs content * refactor to fix linting failure * fix tests * ensure mutated existing node data is applied during updates * restore previous logging behaviour * fix test state * always reset memory node count when existingNodes is empty * revert more logging * update test to use new createAssetNodes changes * store count instead of huge set of id's to use less memory * remove debugger * make sure sys.type exists before checking * move enableStatefulSourceNodes call to main sourceNodes fn * use util * fix e2e test - ContentfulTag node doesn't have sys.type * always exclude ContentfulTag nodes from memory Map cache * fix linting (consistent return) --------- Co-authored-by: GatsbyJS Bot <[email protected]> Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Taozc <[email protected]> Co-authored-by: Michal Piechowiak <[email protected]> Co-authored-by: Lennart <[email protected]>
1 parent c1f0bbf commit 311b202

File tree

6 files changed

+667
-408
lines changed

6 files changed

+667
-408
lines changed

Diff for: packages/gatsby-source-contentful/src/__tests__/download-contentful-assets.js

+9-11
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,15 @@ describe(`downloadContentfulAssets`, () => {
6666
const assetNodes = []
6767
for (const assetItem of fixtures) {
6868
assetNodes.push(
69-
...(await Promise.all(
70-
createAssetNodes({
71-
assetItem,
72-
createNode,
73-
createNodeId,
74-
defaultLocale,
75-
locales,
76-
space,
77-
pluginConfig,
78-
})
79-
))
69+
...(await createAssetNodes({
70+
assetItem,
71+
createNode,
72+
createNodeId,
73+
defaultLocale,
74+
locales,
75+
space,
76+
pluginConfig,
77+
}))
8078
)
8179
}
8280

Diff for: packages/gatsby-source-contentful/src/__tests__/gatsby-node.js

+24-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
sourceNodes,
77
onPreInit,
88
} from "../gatsby-node"
9+
import { existingNodes, is, memoryNodeCounts } from "../backreferences"
910
import { fetchContent, fetchContentTypes } from "../fetch"
1011
import { makeId } from "../normalize"
1112

@@ -59,7 +60,12 @@ describe(`gatsby-node`, () => {
5960

6061
const actions = {
6162
createTypes: jest.fn(),
62-
setPluginStatus: jest.fn(),
63+
setPluginStatus: jest.fn(pluginStatusObject => {
64+
pluginStatus = {
65+
...pluginStatus,
66+
...pluginStatusObject,
67+
}
68+
}),
6369
createNode: jest.fn(async node => {
6470
// similar checks as gatsby does
6571
if (!_.isPlainObject(node)) {
@@ -99,9 +105,20 @@ describe(`gatsby-node`, () => {
99105
}),
100106
buildInterfaceType: jest.fn(),
101107
}
108+
let pluginStatus = {}
109+
const resetPluginStatus = () => {
110+
pluginStatus = {}
111+
}
102112
const store = {
103113
getState: jest.fn(() => {
104-
return { program: { directory: process.cwd() }, status: {} }
114+
return {
115+
program: { directory: process.cwd() },
116+
status: {
117+
plugins: {
118+
[`gatsby-source-contentful`]: pluginStatus,
119+
},
120+
},
121+
}
105122
}),
106123
}
107124
const cache = createMockCache()
@@ -404,7 +421,11 @@ describe(`gatsby-node`, () => {
404421
})
405422
}
406423

407-
beforeEach(() => {
424+
beforeEach(async () => {
425+
existingNodes.clear()
426+
is.firstSourceNodesCallOfCurrentNodeProcess = true
427+
resetPluginStatus()
428+
408429
// @ts-ignore
409430
fetchContent.mockClear()
410431
// @ts-ignore
+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// @ts-check
2+
import { hasFeature } from "gatsby-plugin-utils/index"
3+
import { getDataStore } from "gatsby/dist/datastore"
4+
import { untilNextEventLoopTick } from "./utils"
5+
6+
// Array of all existing Contentful nodes. Make it global and incrementally update it because it's hella slow to recreate this on every data update for large sites.
7+
export const existingNodes = new Map()
8+
9+
let allNodesLoopCount = 0
10+
11+
// "is" === object so it can be overridden by tests
12+
export const is = {
13+
firstSourceNodesCallOfCurrentNodeProcess: true,
14+
}
15+
16+
export async function getExistingCachedNodes({
17+
actions,
18+
getNode,
19+
pluginConfig,
20+
}) {
21+
const { touchNode } = actions
22+
23+
const needToTouchNodes =
24+
!hasFeature(`stateful-source-nodes`) &&
25+
is.firstSourceNodesCallOfCurrentNodeProcess
26+
27+
if (existingNodes.size === 0) {
28+
memoryNodeCountsBySysType.Asset = 0
29+
memoryNodeCountsBySysType.Entry = 0
30+
31+
const dataStore = getDataStore()
32+
const allNodeTypeNames = Array.from(dataStore.getTypes())
33+
34+
for (const typeName of allNodeTypeNames) {
35+
const typeNodes = dataStore.iterateNodesByType(typeName)
36+
37+
const firstNodeOfType = Array.from(typeNodes.slice(0, 1))[0]
38+
39+
if (
40+
!firstNodeOfType ||
41+
firstNodeOfType.internal.owner !== `gatsby-source-contentful`
42+
) {
43+
continue
44+
}
45+
46+
for (const node of typeNodes) {
47+
if (needToTouchNodes) {
48+
touchNode(node)
49+
50+
if (node?.fields?.includes(`localFile`)) {
51+
// Prevent GraphQL type inference from crashing on this property
52+
const fullNode = getNode(node.id)
53+
const localFileNode = getNode(fullNode.fields.localFile)
54+
touchNode(localFileNode)
55+
}
56+
}
57+
58+
if (++allNodesLoopCount % 5000 === 0) {
59+
// dont block the event loop
60+
await untilNextEventLoopTick()
61+
}
62+
63+
addNodeToExistingNodesCache(node)
64+
}
65+
66+
// dont block the event loop
67+
await untilNextEventLoopTick()
68+
}
69+
}
70+
71+
is.firstSourceNodesCallOfCurrentNodeProcess = false
72+
73+
return {
74+
existingNodes,
75+
memoryNodeCountsBySysType,
76+
}
77+
}
78+
79+
const memoryNodeCountsBySysType = {
80+
Asset: 0,
81+
Entry: 0,
82+
}
83+
84+
// store only the fields we need to compare to reduce memory usage. if a node is updated we'll use getNode to grab the whole node before updating it
85+
export function addNodeToExistingNodesCache(node) {
86+
if (node.internal.type === `ContentfulTag`) {
87+
return
88+
}
89+
90+
if (
91+
node.sys.type in memoryNodeCountsBySysType &&
92+
!existingNodes.has(node.id)
93+
) {
94+
memoryNodeCountsBySysType[node.sys.type] ||= 0
95+
memoryNodeCountsBySysType[node.sys.type]++
96+
}
97+
98+
const cacheNode = {
99+
id: node.id,
100+
contentful_id: node.contentful_id,
101+
sys: {
102+
type: node.sys.type,
103+
},
104+
node_locale: node.node_locale,
105+
children: node.children,
106+
internal: {
107+
owner: node.internal.owner,
108+
},
109+
__memcache: true,
110+
}
111+
112+
for (const key of Object.keys(node)) {
113+
if (key.endsWith(`___NODE`)) {
114+
cacheNode[key] = node[key]
115+
}
116+
}
117+
118+
existingNodes.set(node.id, cacheNode)
119+
}
120+
121+
export function removeNodeFromExistingNodesCache(node) {
122+
if (node.internal.type === `ContentfulTag`) {
123+
return
124+
}
125+
126+
if (
127+
node.sys.type in memoryNodeCountsBySysType &&
128+
existingNodes.has(node.id)
129+
) {
130+
memoryNodeCountsBySysType[node.sys.type] ||= 0
131+
memoryNodeCountsBySysType[node.sys.type]--
132+
133+
if (memoryNodeCountsBySysType[node.sys.type] < 0) {
134+
memoryNodeCountsBySysType[node.sys.type] = 0
135+
}
136+
}
137+
138+
existingNodes.delete(node.id)
139+
}

0 commit comments

Comments
 (0)