-
Notifications
You must be signed in to change notification settings - Fork 1
/
filterSourceGitignore.nix
360 lines (337 loc) · 13 KB
/
filterSourceGitignore.nix
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
{ lib, match }:
# This module implements a partial .gitignore parser
# for use in the nix evaluation phase.
# It is useful to filter out the same source files
# that your git implementation ignores, to get a clean
# build source when importing from a local folder
# with an unclean work tree (e.g. while developing).
# DIFFERENCES to `man 5 gitignore`:
# - Trailing spaces are not ignored.
# - Negations are not implemented (but recognized).
# - ** is not implemented.
# - ? is not implemented.
# - Bracketing with [] is not implemented.
# - The character \ is forbidden alltogether because we
# did not want to implement escaping. Rename your files.
let
# Throw away the regex matches in the result of `builtins.split`.
onlySplitElems = builtins.filter (x: !builtins.isList x);
# Split on `\n`.
splitLines = str: onlySplitElems (builtins.split "\n" str);
# Split on `/`.
splitPathElems = str: onlySplitElems (builtins.split "/" str);
# The nix evaluator only has a builtin for matching on perl regexes,
# no support for real parsing. So we make due matching agains lines
# of our .gitignore file in two steps:
#
# `matchLine` uses `lineMatchers` to filter out comments and empty lines,
# and to fail on lines starting with `!`, which we don’t support.
#
# `toPathSpec` uses `pathElemMatchers` to convert eath path element
# of the resulting pre-filtered expressions (split on `/`) into a
# structured glob representation of that element.
# It fails on unsupported characters, like `\`.
lineMatchers = builtins.concatStringsSep "|" [
''(^$)'' # 0: empty string (is ignored)
''^(#).*'' # 1: comment (is ignored) (no escaping with \ implemented)
''^(!)(.*)'' # 2: possible inversion and 3: rest of line
# will not enable a file starting with \#, but who cares …
''^\\([!#].*)'' # 4: escaped # or !
''(.+)'' # 5: anything else
];
# Returns a pre-filtered line, or `""` if the line should be ignored.
matchLine = l:
let ignore = "";
res = builtins.match lineMatchers l;
at = builtins.elemAt res;
in if res == null then
abort "matchLine: should not happen (nothing matched)"
else if at 0 == "" then ignore
else if at 1 == "#" then ignore
else if at 2 == "!" then
abort ".gitignore negation not implemented (for line: ${l})"
else let four = at 4;
in if four != null then four
else let five = at 5;
in if five != null then five
else abort "matchLine: should not happen (${toString res})";
matchLineTests =
let t = line: expected: {
expr = matchLine line;
inherit expected;
};
in lib.runTests {
testEmpty = t "" "";
testComment1 = t "#" "";
testComment2 = t "# comment" "";
testComment3 = t ''# comment *.with \special/characters'' "";
# testInversion = t "!abc" "???";
testNormal1 = t "abc" "abc";
testNormal2 = t "/fo*/bar/" "/fo*/bar/";
};
pathElemMatchers = builtins.concatStringsSep "|" [
''.*([[?\]).*'' # 0: check for unsupported metacharacters
''.*(\*\*).*'' # 1: check for unsupported double glob
''(.*\*.*)'' # 2: a string containing a simple glob is supported
''(.*)'' # 3: anything else
];
# GlobSpec:
# sum
# { ignored : Unit,
# , glob : Glob
# }
# Glob:
# { isDir: Bool
# , isRooted : Bool
# , pathSpec : List PathSpec
# }
# PathSpec:
# sum
# { glob : String
# , literal : String
# }
# Convert an path element (path split on `/`) to a PathSpec.
toPathSpec = elem:
let res = builtins.match pathElemMatchers elem;
at = builtins.elemAt res;
in if res == null then
abort "toPathSpec: should not happen (nothing matched)"
else if at 0 != null then
abort ''
.gitignore: We don’t support these globbing metacharacters: ?\[
The problematic line is ${elem}
''
else let one = at 1;
in if one != null then
abort ''
.gitignore: We don’t support ** globbing.
The problematic line is ${elem}
''
else let two = at 2;
in if two != null then { starGlob = two; }
else let three = at 3;
in if three != null then { literal = three; }
else abort "toPathSpec: should not happen (${toString res}";
# Convert a line from a .gitignore file to a GlobSpec.
toGlobSpec = line:
# the line should be ignored
if line == "" then { ignored = {}; }
else
let
pathElems = splitPathElems line;
isRooted = builtins.head pathElems == "";
isDir = lib.last pathElems == "";
snip = let
one = if isRooted then builtins.tail pathElems else pathElems;
two = if isDir then lib.init one else one;
in two;
in {
glob = {
inherit isDir isRooted;
pathSpec = map toPathSpec snip;
};
};
globSpecTests =
let t = path: expected: {
expr = toGlobSpec path;
inherit expected;
};
ignored = { ignored = {}; };
def = args: {
glob = {
isDir = false;
isRooted = false;
pathSpec = [];
} // args;
};
lit = x: { literal = x; };
starGlob = s: { starGlob = s; };
in lib.runTests {
testIgnore = t "" ignored;
testRoot = t "/" (def {
isDir = true;
isRooted = true;
pathSpec = [ ];
});
# testDoubleGlob = t "foo**bar" "???";
testDir = t "foo/" (def {
isDir = true;
pathSpec = [ (lit "foo") ];
});
testMultiPath = t "foo/bar/baz" (def {
pathSpec = [ (lit "foo") (lit "bar") (lit "baz") ];
});
testGlobPath = t "/*/*ab*c/bar/*" (def {
isRooted = true;
pathSpec = [ (starGlob "*") (starGlob "*ab*c") (lit "bar") (starGlob "*") ];
});
testGlobEmptyPath = t "*//bar/*/" (def {
isDir = true;
pathSpec = [ (starGlob "*") (lit "") (lit "bar") (starGlob "*") ];
});
};
# Predicate for whether `path` is matched by a Glob.
# `pathIsDir` passes whether `path` is a directory (file otherwise)
# ‘I have never been this boolean-blind.’
pathMatchesGlob = pathIsDir: path: glob:
let
# split
pathElems = splitPathElems path;
pathElemsLen = builtins.length pathElems;
globPathSpecLen = builtins.length glob.pathSpec;
matchSpec = specElem: pathElem: match {
literal = l: l == pathElem;
# we translate to a regex and check
# Since we forbid \ alltogether, we don’t have to worry about \*
starGlob = s:
builtins.match
# based on tests, foo* matches fooabc as well as foo
# (* expands to the empty string as well)
(builtins.replaceStrings [ "*" ] [ ".*" ] s)
pathElem
!= null;
} specElem;
# all path elements have to match the glob from the left
matchPermutation = subPathElems:
# files cannot match if the glob is shorter than the subpath
(!glob.isDir -> globPathSpecLen == builtins.length subPathElems)
&& (builtins.all lib.id
# the zip ensures that the longer list is cut to the
# length of the shorter list; together with the length
# check for file globs on the last line, this leads to
# directories matching subpaths as well, e.g.
# foo/bar/ matches /a/b/foo/bar/, but also /a/b/foo/bar/baz
(lib.zipListsWith matchSpec glob.pathSpec subPathElems));
# all permutations of applying the path are tested
matchAllPermutations =
let noOfPerms = 1 + pathElemsLen - globPathSpecLen;
# if any matches, the whole path matches
in lib.any matchPermutation
# drop is also defined via genList, maybe there
# is a better (more efficient) implementation
(builtins.genList (i: lib.drop i pathElems) noOfPerms);
in
# a dir glob only matches a directory
(glob.isDir -> pathIsDir)
# if the glob has more elements than the path, we can return right away
&& (builtins.length glob.pathSpec <= builtins.length pathElems)
# if the glob is rooted, we only match from the left
&& (if glob.isRooted
then matchPermutation pathElems
# else we have to match the glob over all subpaths
else matchAllPermutations);
pathMatchesGlobTest =
let t = pathMatches: isDir: globString: path: {
expr = pathMatchesGlob isDir path (toGlobSpec globString).glob;
expected = pathMatches;
};
file = false;
dir = true;
y = t true; # matches
n = t false; # does not match
in lib.runTests {
testRootFileGood = y file "/hi" "hi";
testRootFileBad = n file "/hi" "hi-im-too-long";
testRootFileIsDir = y dir "/hi" "hi";
testRootDirGood = y dir "/hi/" "hi";
testRootDirBad = n file "/hi/" "hi";
# folder specs match all subfiles/folders
testRootParentDirGood = y dir "/hi/" "hi/parent/matched";
testRootParentDirBad = n dir "/hi/" "no/parent/matched";
# a glob that is longer than the folder will never match
testGlobTooLongFile = n file "/hi/im/too/*/long" "only/short";
testGlobTooLongDir = n dir "/hi/im/too/*/long/" "only/short/dir";
# one star glob matches one subpath
testGlobSimple1 = y file "/hi/*/foo" "hi/im/foo";
testGlobSimple2 = y file "/hi/*/foo" "hi/your/foo";
testGlobSimple3 = n file "/hi/*/foo" "hi/your/notfoo";
# and multiple stars also work
testGlobMultiple1 = y file "/hi/*u*/foo" "hi/your/foo";
testGlobMultiple2 = y file "/hi/*u*bc/foo" "hi/yourabc/foo";
testGlobMultiple3 = n file "/hi/*u*z*bc*/foo" "hi/yourabc/foo";
# * expands to the empty string as well
testGlobEmpty1 = y file "/*foo*" "foo";
# tests for non-rooted files
# we have to match those on every possible subpath
testNonRootedGood1 = y file "hi" "hi";
testNonRootedGood2 = y file "hi" "foo/bar/hi";
testNonRootedBad = n file "hi" "foo/bar/nothi";
testNonRootedDirGood = y dir "bar/*/hi/" "foo/bar/baz/hi/quux";
testNonRootedDirBad = n dir "hi/*" "baz/nope/foo";
};
# Reads a gitignore file and splits it into separate lines.
# Make sure you reference the of the surrounding src as string (toString),
# otherwise it will be copied to the store.
# Example: "${toString src}/.gitignore"
# TODO: Maybe also immediately parse it into globspecs?
readGitignoreFile = path:
splitLines (builtins.readFile path);
# takes a source directory, and uses the .gitignore file
# in that source directory as the predicate for which files
# to copy to the nix store.
# If you need control over which .gitignore files/lines
# to use, use filterSourceGitignoreWith.
filterSourceGitignore = src:
filterSourceGitignoreWith {
gitignoreLines = readGitignoreFile "${toString src}/.gitignore";
} src;
# More generic function that takes an attrset of options and a source directory.
# See filterSourceGitignoreWith for the available args.
filterSourceGitignoreWith = args: src:
builtins.filterSource (filterSourceGitignoreWithFilter (args // {
prefix = toString src + "/";
})) src;
# Filter form of `filterSourceGitignoreWith`
# that can be passed to `builtins.path` or composed with other source filters.
filterSourceGitignoreWithFilter = {
# list of lines in the .gitignore file
gitignoreLines,
# receives the parsed, structured gitignore Globs
# (see `toPathSpec` docs) and can map them.
# It is passed to `mapMaybe`,
# so entries that map to `null` are filtered out.
globMap ? lib.id,
# The prefix in the nix store to be stripped,
# i.e. toString src + "/"
prefix
}:
# filter arguments passed by `builtins.filterSource`
path: type:
let
# map, but removes elements for which f returns null
mapMaybe = f: xs: builtins.filter (x: x != null) (map f xs);
# turn path to glob, return all ignored lines
globs = mapMaybe (p: match {
ignored = _: null;
glob = globMap;
} (toGlobSpec (matchLine p)))
gitignoreLines;
# the actual predicate that returns whether a file should be ignored
shouldIgnore = p: t:
assert lib.assertMsg (t != "unknown")
(''filterSourceGitignore: file ${p} is of type "unknown"''
+ ", which we don’t support");
# remove the absolute path prefix
# of the parent dir of our gitignore, the src
# (the globs are relative to that directory)
let relPath = lib.removePrefix
prefix
(builtins.toString p);
in
# .git is always ignored by default
(relPath == ".git")
# if any glob matches, the file is ignored
|| builtins.any
(pathMatchesGlob (t == "directory") relPath)
globs;
in ! shouldIgnore path type;
# TODO: test suite
# in matchLineTests ++ globSpecTests ++ pathMatchesGlobTest
in {
inherit
filterSourceGitignore
filterSourceGitignoreWith
filterSourceGitignoreFilter
readGitignoreFile
;
}