forked from github/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind-unused-assets.js
123 lines (100 loc) · 3.97 KB
/
find-unused-assets.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env node
import { flatten } from 'lodash-es'
import path from 'path'
import walk from 'walk-sync'
import { execSync } from 'child_process'
import assert from 'assert'
import loadSiteData from '../../lib/site-data.js'
import { loadPages } from '../../lib/page-data.js'
import patterns from '../../lib/patterns.js'
import getDataReferences from '../../lib/get-liquid-data-references.js'
const imagesPath = '/assets/images'
// these paths should remain in the repo even if they are not referenced directly
const ignoreList = ['/assets/images/help/site-policy', 'site.data.reusables.policies']
// search these dirs for images or data references
// content files are handled separately in assetsReferencedInContent
const dirsToGrep = [
'includes',
'layouts',
'javascripts',
'stylesheets',
'README.md',
'data/reusables',
'data/variables',
]
const validArgs = ['reusables', 'variables', 'images']
export default async function findUnusedAssets(assetType) {
assert(validArgs.includes(assetType), `arg must be one of: ${validArgs.join(', ')}`)
const pages = await getEnglishPages()
const data = await loadSiteData()
// step 1. find all assets that exist in the repo
const allReusablesInRepo = data.en.site.data.reusables
const allVariablesInRepo = data.en.site.data.variables
const allImagesInRepo = getAllImagesInRepo()
// step 2. find assets referenced in content by searching page markdown
const assetsReferencedInContent = flatten(
pages.map((page) => {
const fullContent = [page.intro, page.title, page.product, page.markdown].join()
return assetType === 'images'
? getImageReferences(fullContent)
: getDataReferences(fullContent)
})
)
// step 3. find assets referenced in non-content directories
const assetsReferencedInNonContentDirs = getAssetsReferencedInNonContentDirs(assetType)
// step 4. combine all the referenced assets into one array
const allReferencedAssets = [
...new Set(assetsReferencedInContent.concat(assetsReferencedInNonContentDirs)),
]
// step 5. return asssets that exist but are not referenced
switch (assetType) {
case 'images':
return getUnusedImages(allImagesInRepo, allReferencedAssets)
case 'reusables':
return getUnusedData(allReusablesInRepo, assetType, allReferencedAssets)
case 'variables':
return getUnusedData(allVariablesInRepo, assetType, allReferencedAssets)
}
}
async function getEnglishPages() {
const pages = await loadPages()
return pages.filter((page) => page.languageCode === 'en')
}
function getAllImagesInRepo() {
return walk(path.join(process.cwd(), imagesPath), { directories: false })
.filter((relPath) => !relPath.endsWith('.md') && !relPath.match(/^(octicons|site)\//))
.map((relPath) => path.join(imagesPath, relPath))
}
function getAssetsReferencedInNonContentDirs(assetType) {
const regex = assetType === 'images' ? patterns.imagePath.source : patterns.dataReference.source
const grepCmd = `egrep -rh '${regex}' ${dirsToGrep.join(' ')}`
const grepResults = execSync(grepCmd).toString()
return assetType === 'images' ? getImageReferences(grepResults) : getDataReferences(grepResults)
}
function getImageReferences(text) {
return (text.match(patterns.imagePath) || []).map((ref) => {
return ref.replace(/\.\.\//g, '').trim()
})
}
function getUnusedData(allDataInRepo, assetType, allReferencedAssets) {
const unusedData = []
Object.keys(allDataInRepo).forEach((filename) => {
Object.keys(allDataInRepo[filename]).forEach((key) => {
const name = `site.data.${assetType}.${filename}.${key}`
if (
!allReferencedAssets.includes(name) &&
!ignoreList.find((ignored) => name.startsWith(ignored))
) {
unusedData.push(name)
}
})
})
return unusedData
}
function getUnusedImages(allImagesInRepo, allReferencedAssets) {
return allImagesInRepo.filter(
(image) =>
!allReferencedAssets.includes(image) &&
!ignoreList.find((ignored) => image.startsWith(ignored))
)
}