-
Notifications
You must be signed in to change notification settings - Fork 87
/
Copy pathdiffer.rb
666 lines (595 loc) · 30.7 KB
/
differ.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
# frozen_string_literal: true
require 'diffy'
require 'digest'
require 'hashdiff'
require 'json'
require 'set'
require 'stringio'
require_relative '../catalog'
require_relative '../errors'
require_relative '../util/util'
require_relative 'filter'
HashDiff = Hashdiff unless defined? HashDiff
module OctocatalogDiff
module CatalogDiff
# Calculate the difference between two Puppet catalogs.
# -----------------------------------------------------
# It was necessary to write our own code for this, and not just use some existing gem,
# for two main reasons:
#
# 1. There are things that we want to ignore when doing a Puppet catalog diff. For example
# we want to ignore 'before' and 'require' parameters (because those affect the order of
# operations only, not the end result) and we probably want to ignore 'tags' attributes
# and all classes. No existing code (that I could find at least) was capable of allowing
# you to skip stuff via arguments, without your own custom pre-processing.
#
# 2. When using the 'hashdiff' gem, there is no distinguishing between an addition of an entire
# new key-value pair, or the addition of an element in a deeply nested array. By way of
# further explanation, consider these two data structures:
#
# a = { 'foo' => 'bar', 'my_array' => [ 1, 2, 3 ] }
# b = { 'foo' => 'bar', 'my_array' => [ 1, 2, 3, 4 ], 'another_key' => 'another_value'
#
# The hashdiff gem would report the differences between a and b to be:
# + 4
# + another_key => another_value
#
# We want to distinguish (without a whole bunch of convoluted code) between these two situations.
# One was a true addition (adding a key) while one was a change (adding element to array). This
# distinction becomes even more important when considering top-level changes vs. changes to arrays
# or hashes nested within the catalog.
#
# Therefore, the algorithm implemented here is as follows:
#
# 1. Pre-process the catalog JSON files to:
# - Sort the 'tags' array, since the order of tags does not matter to Puppet
# - Pull out additions of entire key-value pairs (above, 'another_key' => 'another_value')
#
# 2. Everything left consists of key-value pairs where the key exists in both old and new. Pass this
# to the 'hashdiff' gem.
#
# 3. Filter any differences to remove attributes, types, or resources that have been explicitly ignored.
#
# 4. Reformat any '+' or '-' reported by hashdiff to be changes to the keys, rather than outright
# additions.
#
# The heavy lifting is still handled by 'hashdiff' but we're pre-simplifying the input and post-processing
# the output to make it easier to deal with later.
class Differ
# Constructor
# @param catalog1_in [OctocatalogDiff::Catalog] First catalog to compare
# @param catalog2_in [OctocatalogDiff::Catalog] Second catalog to compare
def initialize(opts, catalog1_in, catalog2_in)
@catalog1_raw = catalog1_in
@catalog2_raw = catalog2_in
@catalog1 = catalog_resources(catalog1_in, 'First catalog')
@catalog2 = catalog_resources(catalog2_in, 'Second catalog')
@logger = opts.fetch(:logger, Logger.new(StringIO.new))
@diff_result = nil
@ignore = Set.new
ignore(opts.fetch(:ignore, []))
@opts = opts
end
# Difference - calculates and then returns the diff of this objects
# Each diff result is an array like this:
# [ <String> '+|-|~|!', <String> Key name, <Object> Old object, <Object> New object ]
# @return [Array<Diff results>] Results of the diff
def diff
@diff_result ||= catdiff
end
# Ignore - ignored items can be set by Type, Title, or Attribute; setting multiple in
# a hash is interpreted as AND. The collection of all ignored items is interpreted as OR.
# @param ignore [Hash<type: xxx, title: yyy, attr: zzz>] Ignore type/title/attr (can pass array also)
# @return [OctocatalogDiff::CatalogDiff::Differ] This object, modified
def ignore(ignores = [])
ignore_array = ignores.is_a?(Array) ? ignores : [ignores]
ignore_array.each do |item|
raise ArgumentError, "Argument #{item.inspect} to ignore is not a hash" unless item.is_a?(Hash)
unless item.key?(:type) || item.key?(:title) || item.key?(:attr)
raise ArgumentError, "Argument #{item.inspect} does not contain :type, :title, or :attr"
end
item[:type] ||= '*'
item[:title] ||= '*'
item[:attr] ||= '*'
# Support wildcards in title
if item[:title].is_a?(String) && item[:title] != '*' && item[:title].include?('*')
item[:title] = Regexp.new("\\A#{Regexp.escape(item[:title]).gsub('\*', '.*')}\\Z", 'i')
end
@ignore.add(item)
end
self
end
# Handle --ignore-tags option, the ability to tag resources within modules/manifests and
# have catalog-diff ignore them.
def ignore_tags
return unless @opts[:ignore_tags].is_a?(Array) && @opts[:ignore_tags].any?
# Go through the "to" catalog and identify any resources that have been tagged with one or more
# specified "ignore tags." Add any such items to the ignore list. The 'to' catalog has the authoritative
# list of dynamic ignores.
@catalog2_raw.resources.each do |resource|
next unless tagged_for_ignore?(resource)
ignore(type: resource['type'], title: resource['title'])
@logger.debug "Ignoring type='#{resource['type']}', title='#{resource['title']}' based on tag in to-catalog"
end
# Go through the "from" catalog and identify any resources that have been tagged with one or more
# specified "ignore tags." Only mark the resources for ignoring if they do not appear in the 'to'
# catalog, thereby allowing the 'to' catalog to be the authoritative ignore list. This allows deleted
# items that were previously ignored to continue to be ignored.
@catalog1_raw.resources.each do |resource|
next if @catalog2_raw.resource(type: resource['type'], title: resource['title'])
next unless tagged_for_ignore?(resource)
ignore(type: resource['type'], title: resource['title'])
@logger.debug "Ignoring type='#{resource['type']}', title='#{resource['title']}' based on tag in from-catalog"
end
end
# Return catalog1 with filter_and_cleanups applied.
# This is in the public section because it's called from spec tests as well
# as being called internally.
# @return [Array<Resource Hashes>] Filtered resources in catalog
def catalog1
filter_and_cleanup(@catalog1)
end
# Return catalog2 with filter_and_cleanups applied.
# This is in the public section because it's called from spec tests as well
# as being called internally.
# @return [Array<Resource Hashes>] Filtered resources in catalog
def catalog2
filter_and_cleanup(@catalog2)
end
private
# Determine if a resource is tagged with any ignore-tag.
# @param resource [Hash] The resource
# @return [Boolean] true if tagged for ignore, false if not
def tagged_for_ignore?(resource)
return false unless @opts[:ignore_tags].is_a?(Array)
return false unless resource.key?('tags') && resource['tags'].is_a?(Array)
@opts[:ignore_tags].each do |tag|
# tag_with_type will be like: 'ignored_catalog_diff__mymodule__mytype'
tag_with_type = [tag, resource['type'].downcase.gsub(/\W/, '_')].join('__')
return true if resource['tags'].include?(tag) || resource['tags'].include?(tag_with_type)
end
false
end
# Actually perform the catalog diff. This implements the 3-part algorithm described in the
# comment block at the top of this file.
def catdiff
@logger.debug "Entering catdiff; catalog sizes: #{@catalog1.size}, #{@catalog2.size}"
# Compute '+' and '-' from resources that exist in one catalog but not another.
# After this returns,
# result = Array<'+|-', key, value> (Additions/subtractions of entire resources)
# remaining1 & remaining2 = Hash<Serialized Type+Title, Value> (resources in each catalog)
# Note that remaining1.keys == remaining2.keys after running this
result, remaining1, remaining2 = preprocess_diff
# Call the hashdiff gem.
# After this returns,
# initial_hashdiff_result = Array<'~', key, oldvalue, newvalue>
# hashdiff_add_remove = Array<Serialized tokens with nested changes>
initial_hashdiff_result, hashdiff_add_remove = hashdiff_initial(remaining1, remaining2)
result.concat initial_hashdiff_result
# Compute '!' which is elements of arrays or hashes within the 'hashdiff' change set that
# have been added. See explanation in point #2 in main comment block at the top of this file.
hashdiff_nested_changes_result = hashdiff_nested_changes(hashdiff_add_remove, remaining1, remaining2)
result.concat hashdiff_nested_changes_result
# Remove resources that have been explicitly ignored
filter_diffs_for_ignored_items(result)
# Legacy options which are now filters
@opts[:filters] ||= []
add_element_to_array(@opts[:filters], 'CompilationDir')
add_element_to_array(@opts[:filters], 'AbsentFile') if @opts[:suppress_absent_file_details]
# Apply any additional pluggable filters.
filter_opts = {
logger: @logger,
from_compilation_dir: @catalog1_raw.compilation_dir,
to_compilation_dir: @catalog2_raw.compilation_dir
}
OctocatalogDiff::CatalogDiff::Filter.apply_filters(result, @opts[:filters], filter_opts) if @opts[:filters].any?
# That's it!
@logger.debug "Exiting catdiff; change count: #{result.size}"
result
end
# Add an element to an array if it doesn't already exist in that array
# @param array_in [Array] Array to have element added (**mutated** by this method)
# @param element [?] Element to add
def add_element_to_array(array_in, element)
array_in << element unless array_in.include?(element)
end
# Filter the differences for any items that were ignored, by some combination of type, title, and
# attribute. This modifies the array itself by selecting only items that do not meet the ignored
# filter.
def filter_diffs_for_ignored_items(result)
result.reject! { |item| ignored?(item) }
end
# Pre-processing of a catalog.
# - Remove 'before' and 'require' from parameters
# - Sort 'tags' array, or remove the tags array if tags are being ignored
# @param catalog_resources [Array<Hash>] Catalog resources
# @return [Array<Hash>] Array of cleaned resources
def filter_and_cleanup(catalog_resources)
result = []
catalog_resources.each do |resource|
# Exported resources are skipped (this is specifically testing that the value is
# equal to the boolean true, not just that the value exists or something similar)
next if resource['exported'] == true
# This will be the modified hash added to result
hsh = {}
hsh['type'] = resource.fetch('type', '')
hsh['title'] = resource.fetch('title', '')
# Special case for something like:
# file { 'my-own-resource-name':
# path => '/var/lib/puppet/my-file.txt'
# }
#
# The catalog-diff will treat the file above as "File\f/var/lib/puppet/my-file.txt" since the
# name that was given to the resource has no effect on how the file is deployed.
#
# Note that if the file was specified like this:
# file { '/var/lib/puppet/my-file.txt': }
#
# That also is "File\f/var/lib/puppet/my-file.txt" and that's what we want.
if resource.fetch('type', '') == 'File' && resource.key?('parameters') && resource['parameters'].key?('path')
hsh['title'] = resource['parameters']['path']
resource['parameters'].delete('path')
end
# Process each attribute in the resource
resource.each do |k, v|
# Title was pre-processed
next if ['title', 'type'].include? k
# Handle parameters
if k == 'parameters'
cleansed_param = cleanse_parameters_hash(v, resource.fetch('sensitive_parameters', []))
hsh[k] = cleansed_param unless cleansed_param.nil? || cleansed_param.empty?
elsif k == 'tags'
# The order of tags is unimportant. Sort this array to avoid false diffs if order changes.
# Also if tags is empty, don't add.
hsh[k] = v.sort if v.is_a?(Array) && v.any?
elsif ['file', 'line'].include? k
# We don't care, for the purposes of catalog-diff, from which manifest and line this resource originated.
# However, we may report this to the user, so we will keep it in here for now.
hsh[k] = v
else
# Default case: just use the existing value as-is.
hsh[k] = v
end
end
result << hsh unless hsh.empty?
end
result
end
# Logic to match attribute regular expressions. Called by lambda function in attr_match_rule?.
# @param operator [String] Either =~> (any regexp match) or =&> (all diffs must match regexp)
# @param regex [Regexp] Regex object
# @param old_val [String] Value from first catalog
# @param new_val [String] Value from first catalog
# @return [Boolean] True if condition is satisfied, false otherwise
def regexp_operator_match?(operator, regex, old_val, new_val)
# Use diffy to get only the lines that have changed in a text object.
# As we iterate through the diff, jump out if we have our answer: either
# true if '=~>' finds ANY match, or false if '=&>' fails to find a match.
diffy_result = Diffy::Diff.new(old_val, new_val, context: 0)
newline_alerts = diffy_result.count { |line| line.strip == '\\ No newline at end of file' }
diffy_result.each do |line|
if regex.match(line.strip)
return true if operator == '=~>'
elsif operator == '=&>'
next if line.strip == '\\ No newline at end of file' && newline_alerts == 2
return false
end
end
# At this point, we did not return out of the loop early. This means that for
# '=~>' no matches were found at all, so we should return false. Or for '=&>'
# every diff matched, so we should return true.
operator == '=~>' ? false : true
end
# Determine whether a particular attribute matches a rule
# @param rule [Hash] Rule
# @param attrib [String] String representation of attribute
# @param old_val [?] Old value
# @param new_val [?] New value
# @return [Boolean] True if attribute matches rule
def attr_match_rule?(rule, attrib, old_val, new_val)
matcher = ->(_x, _y) { true }
rule_attr = rule[:attr].dup
# Start with '+' or '-' indicates attribute was added or removed
if rule_attr.start_with?('+')
return false unless old_val.nil?
rule_attr.sub!(/^\+/, '')
elsif rule_attr.start_with?('-')
return false unless new_val.nil?
rule_attr.sub!(/^-/, '')
end
# Conditions that match the attribute value or regular expression
# Operators supported include:
# => String equality
# =+> Attribute must have been added and equal this
# =-> Attribute must have been removed and equal this
# =~> Change must match regexp (one line of change matching is sufficient)
# =&> Change must match regexp (all lines of change MUST match regexp)
# =s> Change must be array and contain identical elements, ignoring order
if rule_attr =~ /\A(.+?)(=[\-\+~&s]?>)(.+)/m
rule_attr = Regexp.last_match(1)
operator = Regexp.last_match(2)
value = Regexp.last_match(3)
if operator == '=>'
# String equality test
matcher = ->(x, y) { x == value || y == value }
elsif operator == '=+>'
# String equality test only of the new value
matcher = ->(_x, y) { y == value }
elsif operator == '=->'
# String equality test only of the old value
matcher = ->(x, _y) { x == value }
elsif ['=~>', '=&>'].include? operator
begin
my_regex = Regexp.new(value, Regexp::IGNORECASE)
rescue RegexpError => exc
key = "#{rule[:type]}[#{rule[:title]}] #{rule_attr.gsub(/\f/, '::')} =~ #{value}"
raise RegexpError, "Invalid ignore regexp for #{key}: #{exc.message}"
end
matcher = ->(x, y) { regexp_operator_match?(operator, my_regex, x, y) }
elsif operator == '=s>'
raise ArgumentError, "Invalid ignore option for =s>, must be '='" unless value == '='
matcher = ->(x, y) { x.is_a?(Array) && y.is_a?(Array) && Set.new(x) == Set.new(y) }
end
end
if rule_attr =~ /\f/
beginning = rule_attr.start_with?("\f") ? '\A' : '(\A|\f)'
ending = '(\f|\Z)'
rule_attr.gsub!(/^\f+/, '')
hash_attr_regexp = Regexp.new(beginning + Regexp.escape(rule_attr) + ending, Regexp::IGNORECASE)
return attrib.match(hash_attr_regexp) && matcher.call(old_val, new_val)
else
s = attrib.downcase.split(/\f/)
return s.include?(rule_attr.downcase) && matcher.call(old_val, new_val)
end
end
# Determine if a particular item matches a particular ignore pattern
# @param rule [Hash] Ignore rule
# @param diff_type [String] One of +, -, ~, !
# @param hsh [Hash] { type: title: attr: } parsed resource name
# @param old_val [?] Old value
# @param new_val [?] New value
# @return [Boolean] True if the item matched the rule
def ignore_match?(rule_in, diff_type, hsh, old_val, new_val)
rule = rule_in.dup
# Type matches?
if rule[:type].is_a?(Regexp)
return false unless hsh[:type].match(rule[:type])
elsif rule[:type].is_a?(String)
return false unless rule[:type] == '*' || rule[:type].casecmp(hsh[:type]).zero?
end
# Title matches? (Support regexp and string)
if rule[:title].is_a?(Regexp)
return false unless hsh[:title].match(rule[:title])
elsif rule[:title] != '*'
return false unless rule[:title].casecmp(hsh[:title]).zero?
end
# If rule[:attr] is a regular expression, handle that case here.
if rule[:attr].is_a?(Regexp)
return false unless hsh[:attr].is_a?(String)
return false unless rule[:attr].match(hsh[:attr])
return ignore_match_true(hsh, rule)
end
# Special 'attributes': Ignore specific diff types (+ add, - remove, ~ and ! change)
if rule[:attr] =~ /\A[\-\+~!]+\Z/
return ignore_match_true(hsh, rule) if rule[:attr].include?(diff_type)
return false
end
# Attribute matches?
return ignore_match_true(hsh, rule) if hsh[:attr].nil? && rule[:attr].nil?
return ignore_match_true(hsh, rule) if rule[:attr] == '*'
return false if hsh[:attr].nil?
# Attributes that match values
if rule[:attr].is_a?(Array)
rule[:attr].each do |attrib|
return false unless attr_match_rule?(rule.merge(attr: attrib), hsh[:attr], old_val, new_val)
end
else
return false unless attr_match_rule?(rule, hsh[:attr], old_val, new_val)
end
# Still here? Must be true.
ignore_match_true(hsh, rule)
end
# Debugging for ignore_match: This logs a debug message for an ignored diff and then returns true.
# @param hsh [Hash] Item that is being checked
# @param rule [Hash] Ignore rule
# @return [Boolean] Always returns true
def ignore_match_true(hsh, rule)
@logger.debug "Ignoring #{hsh.inspect}, matches #{rule.inspect}"
true
end
# Determine if a given item is ignored
# @param diff [Array] Diff
# @return [Boolean] True to ignore resource, false not to ignore
def ignored?(diff)
key = diff[1]
hsh = if key =~ /\A([^\f]+)\f([^\f]+)\Z/
{ type: Regexp.last_match(1), title: Regexp.last_match(2) }
else
s = key.split(/\f/, 3)
{ type: s[0], title: s[1], attr: s[2] }
end
@ignore.each do |rule|
return true if ignore_match?(rule, diff[0], hsh, diff[2], diff[3])
end
false
end
# Cleanse parameters of filtered attributes.
# @param parameters_hash [Hash] Hash of parameters
# @param sensitive_parameters [Array] Array of sensitive parameters
# @return [Hash] Cleaned parameters hash (original input hash is not altered)
def cleanse_parameters_hash(parameters_hash, sensitive_parameters)
result = parameters_hash.dup
# hides sensitive params. We still need to know if there's a going to
# be a diff, so we hash the value.
sensitive_parameters.each do |p|
md5 = Digest::MD5.hexdigest Marshal.dump(result[p])
result[p] = 'Sensitive [md5sum ' + md5 + ']'
end
# 'before' and 'require' handle internal Puppet ordering but do not affect what
# happens on the target machine. Don't consider these for the purpose of catalog diff.
result.delete('before')
result.delete('require')
# Sort arrays for parameters where the order is unimportant
%w(notify subscribe tag).each { |key| result[key].sort! if result[key].is_a?(Array) }
# Return the result
result
end
# Pre-process catalog resources by looking for additions and removals. This is required to distinguish between
# top-level addition/removal of resources, and addition/removal of elements from arrays and hashes nested within
# resources (those too will be reported as +/- by hashdiff, but we want to see them as changes).
# @return [Array<['+|-', Key, Hash]>, Array<(catalog1 hashes)>, Array<(catalog2 hashes)>] Data
def preprocess_diff
@logger.debug "Entering preprocess_diff; catalog sizes: #{@catalog1.size}, #{@catalog2.size}"
# Do the pre-processing: filter_and_cleanup catalogs of resources that do not matter, and then run
# through each to tokenize the entries for initial comparison.
# NOTE: 'catalog1' and 'catalog2' are methods above that call filter_and_cleanup(@catalogX)
catalog1_result = resources_as_hashes_with_serialized_keys(catalog1)
catalog1_resources = catalog1_result[:catalog]
catalog2_result = resources_as_hashes_with_serialized_keys(catalog2)
catalog2_resources = catalog2_result[:catalog]
# Call out all added and removed keys, and delete these from further consideration.
# (That way, 'hashdiff' will only be used to compare keys existing in both old and new.)
result = []
added_keys = catalog2_resources.keys - catalog1_resources.keys
removed_keys = catalog1_resources.keys - catalog2_resources.keys
added_keys.each do |key|
key_for_map = key.split(/\f/, 3)[0..1].join("\f") # Keep first two values separated by \f
result << ['+', key, catalog2_resources[key], catalog2_result[:catalog_map][key_for_map]]
catalog2_resources.delete(key)
end
removed_keys.each do |key|
key_for_map = key.split(/\f/, 3)[0..1].join("\f") # Keep first two values separated by \f
result << ['-', key, catalog1_resources[key], catalog1_result[:catalog_map][key_for_map]]
catalog1_resources.delete(key)
end
@logger.debug "Exiting preprocess_diff; added #{added_keys.size}, removed #{removed_keys.size}"
[result, catalog1_result, catalog2_result]
end
# This runs the remaining resources in the catalogs through hashdiff.
# @param catalog1_resources [<Hash<Catalog Resources, Catalog Map>] Hash of catalog1's resources, tokenized
# @param catalog2_resources [<Hash<Catalog Resources, Catalog Map>] Hash of catalog2's resources, tokenized
# @return [Array<Differences>, Array<(Token, Old, New)>] Input to next step
def hashdiff_initial(catalog1_in, catalog2_in)
catalog1_resources = catalog1_in[:catalog]
catalog2_resources = catalog2_in[:catalog]
@logger.debug "Entering hashdiff_initial; catalog sizes: #{catalog1_resources.size}, #{catalog2_resources.size}"
use_lcs = @opts.fetch(:use_lcs, true)
@logger.debug "HashDiff configuration: (use_lcs: #{use_lcs})"
result = []
hashdiff_add_remove = Set.new
hashdiff_result = HashDiff.diff(catalog1_resources, catalog2_resources, delimiter: "\f", use_lcs: use_lcs)
hashdiff_result.each do |obj|
# Regular change
if obj[0] == '~'
key_for_map = obj[1].split(/\f/, 3)[0..1].join("\f") # Keep first two values separated by \f
obj << catalog1_in[:catalog_map][key_for_map]
obj << catalog2_in[:catalog_map][key_for_map]
result << obj
next
end
# Added/removed element to/from array
if obj[1] =~ /^(.+)\[\d+\]/
hashdiff_add_remove.add(Regexp.last_match(1))
next
end
# Added a new key that points to some kind of data structure that we know how
# to handle.
classes = [String, Integer, Float, TrueClass, FalseClass, Array, Hash, NilClass]
if obj[1] =~ /^(.+)\f([^\f]+)$/ && OctocatalogDiff::Util::Util.object_is_any_of?(obj[2], classes)
hashdiff_add_remove.add(obj[1])
next
end
# Any other weird edge cases need to be added and handled here. For now just error out.
# :nocov:
raise "Bug (please report): Unexpected data structure in hashdiff_result: #{obj.inspect}"
# :nocov:
end
@logger.debug "Exiting hashdiff_initial; changes: #{result.size}, nested changes: #{hashdiff_add_remove.size}"
[result, hashdiff_add_remove.to_a]
end
# This diffs nested changes deep in the data structure. Each item in hashdiff_add_remove
# has been previously identified as being an addition or removal from a deeply nested element
# that exists in both old and new. This code compares that deeply nested element in both the
# old and new, and uses status '!' (rather than '+', '-', or '~') to indicate that the change
# occurred in a deeply nested element.
# @param hashdiff_add_remove [Array<Serialized Tokens>] Adds/removes from hashdiff
# @param remaining1 [Hash<Catalog1 Resources>] Serialized key / value pairs for catalog1 resources
# @param remaining2 [Hash<Catalog1 Resources>] Serialized key / value pairs for catalog2 resources
# @return [Array<'!', key, old, new>] Change set
def hashdiff_nested_changes(hashdiff_add_remove, remaining1, remaining2)
return [] if hashdiff_add_remove.empty?
catalog1 = remaining1[:catalog]
catalog2 = remaining2[:catalog]
catmap1 = remaining1[:catalog_map]
catmap2 = remaining2[:catalog_map]
result = []
hashdiff_add_remove.each do |key|
key_split = key.split(/\f/)
first_part_of_key = [key_split.shift, key_split.shift].join("\f")
key_split.unshift first_part_of_key
if catalog1[first_part_of_key].is_a?(Hash) && catalog2[first_part_of_key].is_a?(Hash)
# At this point catalog1[first_part_of_key] might look like this:
# {
# "type"=>"Class",
# "title"=>"Openssl::Package",
# "exported"=>false,
# "parameters"=>{"openssl_version"=>"1.0.1-4", "common-array"=>[1, 3, 5]}
# }
# and key_split looks like this:
# [ "Class\fOpenssl::Package", 'parameters', 'common-array' ]
#
# We have to dig out remaining1["Class\fOpenssl::Package"]['parameters']['common-array']
# to do the comparison.
obj0 = dig_out_key(catalog1, key_split.dup)
obj1 = dig_out_key(catalog2, key_split.dup)
result << ['!', key, obj0, obj1, catmap1[first_part_of_key], catmap2[first_part_of_key]]
else
# Bug condition
# :nocov:
raise "BUG (Please report): Unexpected resource: #{first_part_of_key.inspect} not a catalog resource"
# :nocov:
end
end
result
end
# From an array of keys [key1, key2, key3, ...] dig out the value of hash[key1][key2][key3]...
# @param hash_in [Hash] Starting hash (or value passed in by recursion)
# @param key_array [Array<String>] Names of keys in order
# @return [?] Value of hash_in[key1][key2][key3]..., or nil if any keys along the way don't exist
def dig_out_key(hash_in, key_array)
return hash_in if key_array.empty?
return hash_in unless hash_in.is_a?(Hash)
return nil unless hash_in.key?(key_array[0])
next_key = key_array.shift
dig_out_key(hash_in[next_key], key_array)
end
# This is a helper for the constructor, verifying that the incoming catalog is an expected
# object.
# @param catalog [OctocatalogDiff::Catalog] Incoming catalog
# @return [Hash] Internal simplified hash object
def catalog_resources(catalog_in, name = 'Passed catalog')
return catalog_in.resources if catalog_in.is_a?(OctocatalogDiff::Catalog)
raise OctocatalogDiff::Errors::DifferError, "#{name} is not a valid catalog (input datatype: #{catalog_in.class})"
end
# Turn array of resources into a hash by serialized keys. For consistency with 'hashdiff'
# the serialized key is the resource type and all components of the title (split on '::'),
# joined with \f.
# @param catalog Array<Hash> Resource array from catalog
# @return [Hash] See description above
def resources_as_hashes_with_serialized_keys(catalog)
result = {
catalog: {},
catalog_map: {}
}
catalog.each do |item|
i = item.dup
result[:catalog_map]["#{item['type']}\f#{item['title']}"] = { 'file' => item['file'], 'line' => item['line'] }
i.delete('file')
i.delete('line')
result[:catalog]["#{item['type']}\f#{item['title']}"] = i
end
result
end
end
end
end