-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathrecipe_parser_convert.py
937 lines (802 loc) · 49.2 KB
/
recipe_parser_convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
"""
:Description: Provides a subclass of RecipeParser that performs the conversion of a v0 recipe to the new v1 recipe
format. This tooling was originally part of the base class, but was broken-out for easier/cleaner code
maintenance.
"""
from __future__ import annotations
from typing import Final, Optional, cast
from conda.models.match_spec import MatchSpec
from conda_recipe_manager.licenses.spdx_utils import SpdxUtils
from conda_recipe_manager.parser._types import ROOT_NODE_VALUE, CanonicalSortOrder, Regex
from conda_recipe_manager.parser._utils import search_any_regex, set_key_conditionally, stack_path_to_str
from conda_recipe_manager.parser.dependency import Dependency, DependencyConflictMode
from conda_recipe_manager.parser.enums import SchemaVersion, SelectorConflictMode
from conda_recipe_manager.parser.recipe_parser import RecipeParser
from conda_recipe_manager.parser.recipe_parser_deps import RecipeParserDeps
from conda_recipe_manager.parser.types import CURRENT_RECIPE_SCHEMA_FORMAT
from conda_recipe_manager.types import JsonPatchType, JsonType, MessageCategory, MessageTable, Primitives, SentinelType
class RecipeParserConvert(RecipeParserDeps):
"""
Extension of the base RecipeParseDeps class that enables upgrading recipes from the old to V1 format.
This was originally part of the RecipeParserDeps class but was broken-out for easier maintenance.
"""
def __init__(self, content: str):
"""
Constructs a convertible recipe object. This extension of the parser class keeps a modified copy of the original
recipe to work on and tracks some debugging state.
:param content: conda-build formatted recipe file, as a single text string.
"""
super().__init__(content)
# `copy.deepcopy()` produced some bizarre artifacts, namely single-line comments were being incorrectly rendered
# as list members. Although inefficient, we have tests that validate round-tripping the parser and there
# is no development cost in utilizing tools we already must maintain.
self._v1_recipe: RecipeParserDeps = RecipeParserDeps(self.render())
self._spdx_utils = SpdxUtils()
self._msg_tbl = MessageTable()
## Patch utility functions ##
def _patch_and_log(self, patch: JsonPatchType) -> bool:
"""
Convenience function that logs failed patches to the message table.
:param patch: Patch operation to perform
:returns: Forwards patch results for further logging/error handling
"""
result: Final[bool] = self._v1_recipe.patch(patch)
if not result:
self._msg_tbl.add_message(MessageCategory.ERROR, f"Failed to patch: {patch}")
return result
def _patch_add_missing_path(self, base_path: str, ext: str, value: JsonType = None) -> None:
"""
Convenience function that constructs missing paths. Useful when you have to construct more than 1 path level at
once (the JSON patch standard only allows the creation of 1 new level at a time).
:param base_path: Base path, to be extended
:param ext: Extension to create the full path to check for
:param value: `value` field for the patch-add operation
"""
temp_path: Final[str] = RecipeParser.append_to_path(base_path, ext)
if self._v1_recipe.contains_value(temp_path):
return
self._patch_and_log({"op": "add", "path": temp_path, "value": value})
def _patch_move_base_path(self, base_path: str, old_ext: str, new_ext: str) -> None:
"""
Convenience function that moves a value under an old path to a new one sharing a common base path BUT only if
the old path exists.
:param base_path: Shared base path from old and new locations
:param old_ext: Old extension to the base path containing the data to move
:param new_ext: New extension to the base path of where the data should go
"""
old_path: Final[str] = RecipeParser.append_to_path(base_path, old_ext)
if not self._v1_recipe.contains_value(old_path):
return
self._patch_and_log({"op": "move", "from": old_path, "path": RecipeParser.append_to_path(base_path, new_ext)})
def _patch_move_new_path(self, base_path: str, old_ext: str, new_path: str, new_ext: Optional[str] = None) -> None:
"""
Convenience function that moves an old path to a new path that is now under a new path that must be
conditionally added, if it is not present.
Examples:
- `/build/entry_points` -> `/build/python/entry_points`
- `/build/missing_dso_whitelist` -> `/build/dynamic_linking/missing_dso_allowlist`
:param base_path: Shared base path from old and new locations
:param old_ext: Old extension to the base path containing the data to move
:param new_path: New path to extend to the base path, if the path does not currently exist
:param new_ext: (Optional) New extension to the base path of where the data should go. Use this when the target
value has been renamed. Defaults to the value of `old_ext`.
"""
if new_ext is None:
new_ext = old_ext
if self._v1_recipe.contains_value(RecipeParser.append_to_path(base_path, old_ext)):
self._patch_add_missing_path(base_path, new_path)
self._patch_move_base_path(base_path, old_ext, RecipeParser.append_to_path(new_path, new_ext))
def _patch_deprecated_fields(self, base_path: str, fields: list[str]) -> None:
"""
Automatically deprecates fields found in a common path.
:param base_path: Shared base path where fields can be found
:param fields: List of deprecated fields
"""
for field in fields:
path = RecipeParser.append_to_path(base_path, field)
if not self._v1_recipe.contains_value(path):
continue
if self._patch_and_log({"op": "remove", "path": path}):
self._msg_tbl.add_message(MessageCategory.WARNING, f"Field at `{path}` is no longer supported.")
## Upgrade functions ##
def _upgrade_jinja_to_context_obj(self) -> None:
"""
Upgrades the old proprietary JINJA templating usage to the new YAML-parsable `context` object and `$`-escaped
JINJA substitutions.
"""
# Convert the JINJA variable table to a `context` section. Empty tables still add the `context` section for
# future developers' convenience.
context_obj: dict[str, Primitives] = {}
for name, value in self._v1_recipe._vars_tbl.items(): # pylint: disable=protected-access
# Filter-out any value not covered in the V1 format
if not isinstance(value, (str, int, float, bool)):
self._msg_tbl.add_message(MessageCategory.WARNING, f"The variable `{name}` is an unsupported type.")
continue
# Function calls need to preserve JINJA escaping or else they turn into unevaluated strings.
# See issue #271 for details about env.get( string here.
if isinstance(value, str) and (
search_any_regex(Regex.JINJA_FUNCTIONS_SET, value) or value.startswith("env.get(")
):
value = "{{" + value + "}}"
context_obj[name] = value
# Ensure that we do not include an empty context object (which is forbidden by the schema).
if context_obj:
# Check for Jinja that is too complex to convert
complex_jinja = [
key
for key, value in context_obj.items()
if isinstance(value, str) and any(pattern.search(value) for pattern in Regex.V0_UNSUPPORTED_JINJA)
]
if complex_jinja:
complex_jinja_display = ", ".join(complex_jinja)
self._msg_tbl.add_message(
MessageCategory.WARNING, f"The following key(s) contain unsupported syntax: {complex_jinja_display}"
)
self._patch_and_log({"op": "add", "path": "/context", "value": cast(JsonType, context_obj)})
# Similarly, patch-in the new `schema_version` value to the top of the file
self._patch_and_log({"op": "add", "path": "/schema_version", "value": CURRENT_RECIPE_SCHEMA_FORMAT})
# Swap all JINJA to use the new `${{ }}` format. A `set` is used as `str.replace()` will replace all instances
# and a value containing multiple variables could be visited multiple times, causing multiple `${{}}`
# encapsulations.
jinja_sub_locations: Final[set[str]] = set(self._v1_recipe.search(Regex.JINJA_V0_SUB))
for path in jinja_sub_locations:
value = self._v1_recipe.get_value(path)
# Values that match the regex should only be strings. This prevents crashes that should not occur.
if not isinstance(value, str):
self._msg_tbl.add_message(
MessageCategory.WARNING, f"A non-string value was found as a JINJA substitution: {value}"
)
continue
# Safely replace `{{` but not any existing `${{` instances
value = Regex.JINJA_REPLACE_V0_STARTING_MARKER.sub("${{", value)
self._patch_and_log({"op": "replace", "path": path, "value": value})
def _upgrade_ambiguous_deps(self) -> None:
"""
Attempts to update all dependency sections to use unambiguous version constraints. This uses the dependency
tooling to prevent repeated logic. See Issue #276 and PR prefix-dev/rattler-build#1271 for more details.
This must be run before selectors are upgraded to the V1 format, as V1 support for dependency management is not
yet available.
"""
try:
dep_map = self._v1_recipe.get_all_dependencies()
except (KeyError, ValueError):
self._msg_tbl.add_message(
MessageCategory.ERROR,
"Could not parse dependencies when attempting to upgrade ambiguous version numbers.",
)
return
for _, deps in dep_map.items():
for dep in deps:
# Warn and quit-early if there is a potential for a ambiguous version variable.
if not isinstance(dep.data, MatchSpec): # type: ignore[misc]
# TODO: Reduce spammy-ness by looking at the variables table
self._msg_tbl.add_message(
MessageCategory.WARNING,
(
"Recipe upgrades cannot currently upgrade ambiguous version constraints on dependencies"
f" that use variables: {dep.data.name}"
),
)
continue
if dep.data.version is None or not isinstance(dep.data.original_spec_str, str): # type: ignore[misc]
continue
spec_str = dep.data.original_spec_str
# Corrects fairly common typos when dealing with >= and <= operators in dependency version selection
# statements.
spec_str = Regex.AMBIGUOUS_DEP_VERSION_GE_TYPO.sub(r"\1>=\2", spec_str)
spec_str = Regex.AMBIGUOUS_DEP_VERSION_LE_TYPO.sub(r"\1<=\2", spec_str)
# Corrects cases where two operators are used (i.e. `foo >=1.2.*`). We can't rely on MatchSpec to detect
# multiple operators, so we fall back to using a regular expression. We drop the trailing `.*` to be
# in alignment with `rattler-build`'s preferences:
# https://github.com/conda/rattler/blob/main/crates/rattler_conda_types/src/version_spec/parse.rs#L224
spec_str = Regex.AMBIGUOUS_DEP_MULTI_OPERATOR.sub(r"\1\2\3", spec_str)
# Add a trailing `.*` to ambiguous dependencies that lack an operator. This is not that easy as
# `VersionSpec` does not make a distinction between a version that contains a `==` operator and a
# version with no operator (which is ambiguous per the V1 specification).
if (
cast(bool, dep.data.version.is_exact()) # type: ignore[misc]
and "=" not in dep.data.original_spec_str
):
spec_str = f"{spec_str}.*"
# Only commit changes to modified dependencies.
if dep.data.original_spec_str == spec_str:
continue
# TODO add IGNORE conflict mode for selectors???
self._v1_recipe.add_dependency(
Dependency(
required_by=dep.required_by,
path=dep.path,
type=dep.type,
data=MatchSpec(spec_str),
selector=dep.selector,
),
dep_mode=DependencyConflictMode.EXACT_POSITION,
sel_mode=SelectorConflictMode.OR,
)
self._msg_tbl.add_message(MessageCategory.WARNING, f"Version on dependency changed to: {spec_str}")
def _upgrade_selectors_to_conditionals(self) -> None:
"""
Upgrades the proprietary comment-based selector syntax to equivalent conditional logic statements.
TODO warn if selector is unrecognized? See list:
https://prefix-dev.github.io/rattler-build/latest/selectors/#available-variables
conda docs for common selectors:
https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html#preprocessing-selectors
"""
for selector, instances in self._v1_recipe._selector_tbl.items(): # pylint: disable=protected-access
for info in instances:
# Selectors can be applied to the parent node if they appear on the same line. We'll ignore these when
# building replacements.
if not info.node.is_leaf():
continue
# Strip the []'s around the selector
bool_expression = selector[1:-1]
# Convert to a public-facing path representation
selector_path = stack_path_to_str(info.path)
# Some commonly used selectors (like `py<36`) need to be upgraded. Otherwise, these expressions will be
# interpreted as strings. See this CEP PR for more details: https://github.com/conda/ceps/pull/71
bool_expression = Regex.SELECTOR_PYTHON_VERSION_REPLACEMENT.sub(
r'match(python, "\1\2.\3")', bool_expression
)
# Upgrades for less common `py36` and `not py27` selectors
bool_expression = Regex.SELECTOR_PYTHON_VERSION_EQ_REPLACEMENT.sub(
r'match(python, "==\1.\2")', bool_expression
)
bool_expression = Regex.SELECTOR_PYTHON_VERSION_NE_REPLACEMENT.sub(
r'match(python, "!=\1.\2")', bool_expression
)
# Upgrades for less common `py2k` and `py3k` selectors
bool_expression = Regex.SELECTOR_PYTHON_VERSION_PY2K_REPLACEMENT.sub(
r'match(python, ">=2,<3")', bool_expression
)
bool_expression = Regex.SELECTOR_PYTHON_VERSION_PY3K_REPLACEMENT.sub(
r'match(python, ">=3,<4")', bool_expression
)
# TODO other common selectors to support:
# - GPU variants (see pytorch and llama.cpp feedstocks)
# For now, if a selector lands on a boolean value, use a ternary statement. Otherwise use the
# conditional logic.
patch: JsonPatchType = {
"op": "replace",
"path": selector_path,
"value": "${{ true if " + bool_expression + " }}",
}
# `skip` is special and can be a single boolean expression or a list of boolean expressions.
if selector_path.endswith("/build/skip"):
patch["value"] = bool_expression
if not isinstance(info.node.value, bool):
# CEP-13 states that ONLY list members may use the `if/then/else` blocks
if not info.node.list_member_flag:
self._msg_tbl.add_message(
MessageCategory.WARNING, f"A non-list item had a selector at: {selector_path}"
)
continue
bool_object = {
"if": bool_expression,
"then": None if isinstance(info.node.value, SentinelType) else info.node.value,
}
patch = {
"op": "replace",
"path": selector_path,
"value": cast(JsonType, bool_object),
}
# Apply the patch
self._patch_and_log(patch)
self._v1_recipe.remove_selector(selector_path)
def _correct_common_misspellings(self, base_package_paths: list[str]) -> None:
"""
Corrects common spelling mistakes in field names.
:param base_package_paths: Set of base paths to process that could contain this section.
"""
for base_path in base_package_paths:
build_path = RecipeParser.append_to_path(base_path, "/build")
about_path = RecipeParser.append_to_path(base_path, "/about")
# "If I had a nickel for every time `skip` was misspelled, I would have several nickels. Which isn't a lot,
# but it is weird that it has happened multiple times."
# - Dr. Doofenshmirtz, probably
self._patch_move_base_path(build_path, "skipt", "skip")
self._patch_move_base_path(build_path, "skips", "skip")
self._patch_move_base_path(build_path, "Skip", "skip")
# Various misspellings of "license_file" and "license_family". Note that `license_family` is deprecated,
# but we fix the spelling so it can be removed at a later phase.
self._patch_move_base_path(about_path, "licence_file", "license_file")
self._patch_move_base_path(about_path, "licensse_file", "license_file")
self._patch_move_base_path(about_path, "license_filte", "license_file")
self._patch_move_base_path(about_path, "licsense_file", "license_file")
self._patch_move_base_path(about_path, "icense_file", "license_file")
self._patch_move_base_path(about_path, "licence_family", "license_family")
self._patch_move_base_path(about_path, "license_familiy", "license_family")
self._patch_move_base_path(about_path, "license_familly", "license_family")
# Other about fields
self._patch_move_base_path(about_path, "Description", "description")
# `/extras` -> `/extra`
self._patch_move_base_path(base_path, "extras", "extra")
def _upgrade_source_section(self, base_package_paths: list[str]) -> None:
"""
Upgrades/converts the `source` section(s) of a recipe file.
:param base_package_paths: Set of base paths to process that could contain this section.
"""
for base_path in base_package_paths:
source_path = RecipeParser.append_to_path(base_path, "/source")
if not self._v1_recipe.contains_value(source_path):
continue
# The `source` field can contain a list of elements or a single element (not encapsulated in a list).
# This logic sets up a list to iterate through that will handle both cases.
source_data = self._v1_recipe.get_value(source_path)
source_paths = []
if isinstance(source_data, list):
for x in range(len(source_data)):
source_paths.append(RecipeParser.append_to_path(source_path, f"/{x}"))
else:
source_paths.append(source_path)
for src_path in source_paths:
# SVN and HG source options are no longer supported. This seems to have been deprecated a long
# time ago and there are unlikely any recipes that fall into this camp. Still, we should flag it.
if self._v1_recipe.contains_value(RecipeParser.append_to_path(src_path, "svn_url")):
self._msg_tbl.add_message(
MessageCategory.WARNING, "SVN packages are no longer supported in the V1 format"
)
if self._v1_recipe.contains_value(RecipeParser.append_to_path(src_path, "hg_url")):
self._msg_tbl.add_message(
MessageCategory.WARNING, "HG (Mercurial) packages are no longer supported in the V1 format"
)
# Basic renaming transformations
self._patch_move_base_path(src_path, "/fn", "/file_name")
self._patch_move_base_path(src_path, "/folder", "/target_directory")
# `git` source transformations (`conda` does not appear to support all of the new features)
self._patch_move_base_path(src_path, "/git_url", "/git")
self._patch_move_base_path(src_path, "/git_tag", "/tag")
self._patch_move_base_path(src_path, "/git_rev", "/rev")
self._patch_move_base_path(src_path, "/git_depth", "/depth")
# Canonically sort this section
self._v1_recipe._sort_subtree_keys( # pylint: disable=protected-access
src_path, CanonicalSortOrder.V1_SOURCE_SECTION_KEY_SORT_ORDER
)
def _upgrade_build_script_section(self, build_path: str) -> None:
"""
Upgrades the `/build/script` section if needed. Some fields like `script_env` will need to be wrapped into a new
`Script` object. Simple `script` sections can be left unchanged.
:param build_path: Build section path to upgrade
"""
script_env_path: Final[str] = RecipeParser.append_to_path(build_path, "/script_env")
# The environment list could contain dictionaries if the variables are conditionally included.
script_env_lst: Final[list[str | dict[str, str]]] = cast(
list[str | dict[str, str]], self._v1_recipe.get_value(script_env_path, [])
)
if not script_env_lst:
return
script_path: Final[str] = RecipeParser.append_to_path(build_path, "/script")
new_script_obj: JsonType = {}
# Set environment variables need to be parsed and then re-added as a dictionary. Unset variables are listed
# in the `secrets` section.
new_env: dict[str, str] = {}
new_secrets: list[str | dict[str, str]] = []
for item in script_env_lst:
# Attempt to edit conditional variables
if isinstance(item, dict):
if "then" not in item:
self._msg_tbl.add_message(
MessageCategory.ERROR, f"Could not parse dictionary `{item}` found in {script_env_path}"
)
continue
tokens = [i.strip() for i in item["then"].split("=")]
if len(tokens) == 1:
new_secrets.append(item)
else:
# The spec does not support conditional statements in a dictionary. As per discussions with the
# community, the best course of action is manual intervention.
self._msg_tbl.add_message(
MessageCategory.ERROR,
f"Converting `{item}` found in {script_env_path} is not supported."
" Manually replace the selector with a `cmp()` function.",
)
continue
tokens = [i.strip() for i in item.split("=")]
if len(tokens) == 1:
new_secrets.append(tokens[0])
elif len(tokens) == 2:
new_env[tokens[0]] = tokens[1]
else:
self._msg_tbl.add_message(MessageCategory.ERROR, f"Could not parse `{item}` found in {script_env_path}")
set_key_conditionally(cast(dict[str, JsonType], new_script_obj), "env", cast(JsonType, new_env))
set_key_conditionally(cast(dict[str, JsonType], new_script_obj), "secrets", cast(JsonType, new_secrets))
script_value = self._v1_recipe.get_value(script_path, "")
patch_op: Final[str] = "replace" if script_value else "add"
# TODO: Simple script files should be set as `file` not `content`
set_key_conditionally(cast(dict[str, JsonType], new_script_obj), "content", script_value)
self._patch_and_log({"op": patch_op, "path": script_path, "value": new_script_obj})
self._patch_and_log({"op": "remove", "path": script_env_path})
def _upgrade_build_section(self, base_package_paths: list[str]) -> None:
"""
Upgrades/converts the `build` section(s) of a recipe file.
:param base_package_paths: Set of base paths to process that could contain this section.
"""
build_deprecated: Final[list[str]] = [
"pre-link",
"noarch_python",
"features",
"msvc_compiler",
"requires_features",
"provides_features",
"preferred_env",
"preferred_env_executable_paths",
"disable_pip",
"pin_depends",
"overlinking_ignore_patterns",
"rpaths_patcher",
"post-link",
"pre-unlink",
"pre-link",
]
for base_path in base_package_paths:
# Move `run_exports` and `ignore_run_exports` from `build` to `requirements`
# `run_exports`
old_re_path = RecipeParser.append_to_path(base_path, "/build/run_exports")
if self._v1_recipe.contains_value(old_re_path):
requirements_path = RecipeParser.append_to_path(base_path, "/requirements")
new_re_path = RecipeParser.append_to_path(base_path, "/requirements/run_exports")
if not self._v1_recipe.contains_value(requirements_path):
self._patch_and_log({"op": "add", "path": requirements_path, "value": None})
self._patch_and_log({"op": "move", "from": old_re_path, "path": new_re_path})
# `ignore_run_exports`
old_ire_path = RecipeParser.append_to_path(base_path, "/build/ignore_run_exports")
if self._v1_recipe.contains_value(old_ire_path):
requirements_path = RecipeParser.append_to_path(base_path, "/requirements")
new_ire_path = RecipeParser.append_to_path(base_path, "/requirements/ignore_run_exports")
if not self._v1_recipe.contains_value(requirements_path):
self._patch_and_log({"op": "add", "path": requirements_path, "value": None})
self._patch_and_log({"op": "move", "from": old_ire_path, "path": new_ire_path})
# Perform internal section changes per `build/` section
build_path = RecipeParser.append_to_path(base_path, "/build")
if not self._v1_recipe.contains_value(build_path):
continue
# Simple transformations
self._patch_move_base_path(build_path, "merge_build_host", "merge_build_and_host_envs")
self._patch_move_base_path(build_path, "no_link", "always_copy_files")
# `build/entry_points` -> `build/python/entry_points`
self._patch_move_new_path(build_path, "/entry_points", "/python")
# New `prefix_detection` section changes
# NOTE: There is a new `force_file_type` field that may map to an unknown field that conda supports.
self._patch_move_new_path(build_path, "/ignore_prefix_files", "/prefix_detection", "/ignore")
self._patch_move_new_path(
build_path, "/detect_binary_files_with_prefix", "/prefix_detection", "/ignore_binary_files"
)
# New `dynamic_linking` section changes
# NOTE: `overdepending_behavior` and `overlinking_behavior` are new fields that don't have a direct path
# to conversion.
self._patch_move_new_path(build_path, "/rpaths", "/dynamic_linking", "/rpaths")
self._patch_move_new_path(build_path, "/binary_relocation", "/dynamic_linking", "/binary_relocation")
self._patch_move_new_path(
build_path, "/missing_dso_whitelist", "/dynamic_linking", "/missing_dso_allowlist"
)
self._patch_move_new_path(build_path, "/runpath_whitelist", "/dynamic_linking", "/rpath_allowlist")
self._upgrade_build_script_section(build_path)
self._patch_deprecated_fields(build_path, build_deprecated)
# Canonically sort this section
self._v1_recipe._sort_subtree_keys( # pylint: disable=protected-access
build_path, CanonicalSortOrder.V1_BUILD_SECTION_KEY_SORT_ORDER
)
def _upgrade_requirements_section(self, base_package_paths: list[str]) -> None:
"""
Upgrades/converts the `requirements` section(s) of a recipe file.
:param base_package_paths: Set of base paths to process that could contain this section.
"""
for base_path in base_package_paths:
requirements_path = RecipeParser.append_to_path(base_path, "/requirements")
if not self._v1_recipe.contains_value(requirements_path):
continue
# Renames `run_constrained` to the new equivalent name
self._patch_move_base_path(requirements_path, "/run_constrained", "/run_constraints")
def _fix_bad_licenses(self, about_path: str) -> None:
"""
Attempt to correct licenses to match SPDX-recognized names.
For now, this does not call-out to an SPDX database. Instead, we attempt to correct common mistakes.
:param about_path: Path to the `about` section, where the `license` field is located.
"""
license_path: Final[str] = RecipeParser.append_to_path(about_path, "/license")
old_license: Final[Optional[str]] = cast(Optional[str], self._v1_recipe.get_value(license_path, default=None))
if old_license is None:
self._msg_tbl.add_message(MessageCategory.WARNING, f"No `license` provided in `{about_path}`")
return
corrected_license: Final[Optional[str]] = self._spdx_utils.find_closest_license_match(old_license)
if corrected_license is None:
self._msg_tbl.add_message(MessageCategory.WARNING, f"Could not patch unrecognized license: `{old_license}`")
return
# If it ain't broke, don't patch it
if old_license == corrected_license:
return
# Alert the user that a patch was made, in case it needs manual verification. This warning will not emit if
# the patch failed (failure will generate an arguably more important message)
if self._patch_and_log({"op": "replace", "path": license_path, "value": corrected_license}):
self._msg_tbl.add_message(
MessageCategory.WARNING, f"Changed {license_path} from `{old_license}` to `{corrected_license}`"
)
def _upgrade_about_section(self, base_package_paths: list[str]) -> None:
"""
Upgrades/converts the `about` section of a recipe file.
:param base_package_paths: Set of base paths to process that could contain this section.
"""
about_rename_mapping: Final[list[tuple[str, str]]] = [
("home", "homepage"),
("dev_url", "repository"),
("doc_url", "documentation"),
]
about_deprecated: Final[list[str]] = [
"prelink_message",
"license_family",
"identifiers",
"tags",
"keywords",
"doc_source_url",
]
for base_path in base_package_paths:
about_path = RecipeParser.append_to_path(base_path, "/about")
# Skip transformations if there is no `/about` section
if not self._v1_recipe.contains_value(about_path):
continue
# Transform renamed fields
for old, new in about_rename_mapping:
self._patch_move_base_path(about_path, old, new)
self._fix_bad_licenses(about_path)
# R packages like to use multiline strings without multiline markers, which get interpreted as list members
# TODO address this at parse-time, adding a new multiline mode
summary_path = RecipeParser.append_to_path(about_path, "/summary")
summary = self._v1_recipe.get_value(summary_path, "")
if isinstance(summary, list):
self._patch_and_log(
{"op": "replace", "path": summary_path, "value": "\n".join(cast(list[str], summary))}
)
# Remove deprecated `about` fields
self._patch_deprecated_fields(about_path, about_deprecated)
def _upgrade_test_pip_check(self, test_path: str) -> None:
"""
Replaces the commonly used `pip check` test-case with the new `python/pip_check` attribute, if applicable.
:param test_path: Test path for the build target to upgrade
"""
# Replace `- pip check` in `commands` with the new flag. If not found, set the flag to `False` (as the
# flag defaults to `True`). DO NOT ADD THIS FLAG IF THE RECIPE IS NOT A "PYTHON RECIPE".
if not self._v1_recipe.is_python_recipe():
return
pip_check_variants: Final[set[str]] = {
"pip check",
"python -m pip check",
"python3 -m pip check",
}
commands_path: Final[str] = RecipeParser.append_to_path(test_path, "/commands")
commands = cast(Optional[list[str]], self._v1_recipe.get_value(commands_path, []))
# Normalize the rare edge case where the list may be null (usually caused by commented-out code)
if commands is None:
commands = []
pip_check = False
for i, command in enumerate(commands):
# TODO Future: handle selector cases (pip check will be in the `then` section of a dictionary object)
if not isinstance(command, str) or command not in pip_check_variants:
continue
# For now, we will only patch-out the first instance when no selector is attached
self._patch_and_log({"op": "remove", "path": RecipeParser.append_to_path(commands_path, f"/{i}")})
pip_check = True
break
# Edge-case: Remove `commands` (which will soon become `script`) and `requirements` if `pip check` was the only
# command present. Otherwise, we will effectively create an empty test object.
if pip_check and len(commands) == 1:
# `/commands` must exist in order to get a single command in the list checked above
self._patch_and_log({"op": "remove", "path": commands_path})
# `/requirements` should exist AND should be requiring `pip`. In the event it doesn't, let's be resilient.
requirements_path: Final[str] = RecipeParser.append_to_path(test_path, "/requirements")
if self._v1_recipe.contains_value(requirements_path):
self._patch_and_log({"op": "remove", "path": requirements_path})
self._patch_add_missing_path(test_path, "/python")
self._patch_and_log(
{"op": "add", "path": RecipeParser.append_to_path(test_path, "/python/pip_check"), "value": pip_check}
)
def _upgrade_test_section(self, base_package_paths: list[str]) -> None:
# pylint: disable=too-complex
# TODO Refactor and simplify ^
"""
Upgrades/converts the `test` section(s) of a recipe file.
:param base_package_paths: Set of base paths to process that could contain this section.
"""
# NOTE: For now, we assume that the existing test section comprises of a single test entity. Developers will
# have to use their best judgement to manually break-up the test into multiple tests as they see fit.
for base_path in base_package_paths:
test_path = RecipeParser.append_to_path(base_path, "/test")
if not self._v1_recipe.contains_value(test_path):
continue
# Moving `files` to `files/recipe` is not possible in a single `move` operation as a new path has to be
# created in the path being moved.
test_files_path = RecipeParser.append_to_path(test_path, "/files")
if self._v1_recipe.contains_value(test_files_path):
test_files_value = self._v1_recipe.get_value(test_files_path)
# TODO: Fix, replace does not work here, produces `- null`, Issue #20
# self._patch_and_log({"op": "replace", "path": test_files_path, "value": None})
self._patch_and_log({"op": "remove", "path": test_files_path})
self._patch_and_log({"op": "add", "path": test_files_path, "value": None})
self._patch_and_log(
{
"op": "add",
"path": RecipeParser.append_to_path(test_files_path, "/recipe"),
"value": test_files_value,
}
)
# Edge case: `/source_files` exists but `/files` does not
elif self._v1_recipe.contains_value(RecipeParser.append_to_path(test_path, "/source_files")):
self._patch_add_missing_path(test_path, "/files")
self._patch_move_base_path(test_path, "/source_files", "/files/source")
if self._v1_recipe.contains_value(RecipeParser.append_to_path(test_path, "/requires")):
self._patch_add_missing_path(test_path, "/requirements")
self._patch_move_base_path(test_path, "/requires", "/requirements/run")
# Upgrade `pip-check`, if applicable
self._upgrade_test_pip_check(test_path)
self._patch_move_base_path(test_path, "/commands", "/script")
if self._v1_recipe.contains_value(RecipeParser.append_to_path(test_path, "/imports")):
self._patch_add_missing_path(test_path, "/python")
self._patch_move_base_path(test_path, "/imports", "/python/imports")
self._patch_move_base_path(test_path, "/downstreams", "/downstream")
# Canonically sort the python section, if it exists
self._v1_recipe._sort_subtree_keys( # pylint: disable=protected-access
RecipeParser.append_to_path(test_path, "/python"), CanonicalSortOrder.V1_PYTHON_TEST_KEY_SORT_ORDER
)
# Move `test` to `tests` and encapsulate the pre-existing object into a list
new_test_path = f"{test_path}s"
test_element = cast(Optional[dict[str, JsonType]], self._v1_recipe.get_value(test_path, default=None))
# Handle empty test sections (commonly seen in bioconda and R recipes)
if test_element is None:
continue
test_array: list[JsonType] = []
# There are 3 types of test elements. We break them out of the original object, if they exist.
# `Python` Test Element
if "python" in test_element:
test_array.append({"python": test_element["python"]})
del test_element["python"]
# `Downstream` Test Element
if "downstream" in test_element:
test_array.append({"downstream": test_element["downstream"]})
del test_element["downstream"]
# What remains should be the `Command` Test Element type
if test_element:
test_array.append(test_element)
self._patch_and_log({"op": "add", "path": new_test_path, "value": test_array})
self._patch_and_log({"op": "remove", "path": test_path})
def _upgrade_multi_output(self, base_package_paths: list[str]) -> None:
"""
Upgrades/converts sections pertaining to multi-output recipes.
:param base_package_paths: Set of base paths to process that could contain this section.
"""
if not self._v1_recipe.contains_value("/outputs"):
return
# TODO Complete
# On the top-level, `package` -> `recipe`
self._patch_move_base_path(ROOT_NODE_VALUE, "/package", "/recipe")
for output_path in base_package_paths:
if output_path == ROOT_NODE_VALUE:
continue
# Move `name` and `version` under `package`
if self._v1_recipe.contains_value(
RecipeParser.append_to_path(output_path, "/name")
) or self._v1_recipe.contains_value(RecipeParser.append_to_path(output_path, "/version")):
self._patch_add_missing_path(output_path, "/package")
self._patch_move_base_path(output_path, "/name", "/package/name")
self._patch_move_base_path(output_path, "/version", "/package/version")
# Not all the top-level keys are found in each output section, but all the output section keys are
# found at the top-level. So for consistency, we sort on that ordering.
self._v1_recipe._sort_subtree_keys( # pylint: disable=protected-access
output_path, CanonicalSortOrder.TOP_LEVEL_KEY_SORT_ORDER
)
@staticmethod
def pre_process_recipe_text(content: str) -> str:
"""
Takes the content of a recipe file and performs manipulations prior to the parsing stage. This should be
used sparingly for solving conversion issues.
Ideally the pre-processor phase is only used when:
- There is no other feasible way to solve a conversion issue.
- There is a proof-of-concept fix that would be easier to develop as a pre-processor step that could be
refactored into the parser later.
- The number of recipes afflicted by an issue does not justify the engineering effort required to handle
the issue in the parsing phase.
:param content: Recipe file contents to pre-process
:returns: Pre-processed recipe file contents
"""
# Some recipes use `foo.<function()>` instead of `{{ foo | <function()> }}` in JINJA statements. This causes
# rattler-build to fail with `invalid operation: object has no method named <function()>`
# NOTE: This is currently done BEFORE converting to use `env.get()` to wipe-out those changes.
content = Regex.PRE_PROCESS_JINJA_DOT_FUNCTION_IN_ASSIGNMENT.sub(r"\1 | \2", content)
content = Regex.PRE_PROCESS_JINJA_DOT_FUNCTION_IN_SUBSTITUTION.sub(r"\1 | \2", content)
# Strip any problematic parenthesis that may be left over from the previous operations.
content = Regex.PRE_PROCESS_JINJA_DOT_FUNCTION_STRIP_EMPTY_PARENTHESIS.sub(r"\1", content)
# Attempt to normalize quoted multiline strings into the common `|` syntax.
# TODO: Handle multiple escaped newlines (very uncommon)
content = Regex.PRE_PROCESS_QUOTED_MULTILINE_STRINGS.sub(r"\1\2: |\1 \3\1 \4", content)
# [email protected]: Introduced checks for deprecated `max_pin` and `min_pin` fields. This replacement
# addresses the change in numerous JINJA functions that use this nomenclature.
content = Regex.PRE_PROCESS_MIN_PIN_REPLACEMENT.sub("lower_bound=", content)
content = Regex.PRE_PROCESS_MAX_PIN_REPLACEMENT.sub("upper_bound=", content)
# Convert the old JINJA `environ[""]` variable usage to the new `get.env("")` syntax.
# NOTE:
# - This is mostly used by Bioconda recipes and R-based-packages in the `license_file` field.
# - From our search, it looks like we never deal with more than one set of outer quotes within the brackets
replacements: list[tuple[str, str]] = []
for groups in cast(list[tuple[str, ...]], Regex.PRE_PROCESS_ENVIRON.findall(content)):
# Each match should return ["<quote char>", "<key>", "<quote_char>"]
quote_char = groups[0]
key = groups[1]
replacements.append(
(
f"environ[{quote_char}{key}{quote_char}]",
f"env.get({quote_char}{key}{quote_char})",
)
)
for groups in cast(list[tuple[str, ...]], Regex.PRE_PROCESS_ENVIRON_GET.findall(content)):
environ_key = f"{groups[0]}{groups[1]}{groups[2]}"
environ_default = f"{groups[3]}{groups[4]}{groups[5]}"
replacements.append(
(
f"environ | get({environ_key}, {environ_default})",
f"env.get({environ_key}, default={environ_default})",
)
)
for old, new in replacements:
content = content.replace(old, new, 1)
# Replace `{{ hash_type }}:` with the value of `hash_type`, which is likely `sha256`. This is an uncommon
# practice that is not part of the V1 specification. Currently, about 70 AnacondaRecipes and conda-forge files
# do this in our integration testing sample.
return RecipeParser.pre_process_remove_hash_type(content)
def render_to_v1_recipe_format(self) -> tuple[str, MessageTable, str]:
"""
Takes the current recipe representation and renders it to the V1 format WITHOUT modifying the current recipe
state.
This "new" format is defined in the following CEPs:
- https://github.com/conda-incubator/ceps/blob/main/cep-13.md
- https://github.com/conda-incubator/ceps/blob/main/cep-14.md
:returns: Returns a tuple containing: - The converted recipe, as a string - A `MessageTbl` instance that
contains error logging - Converted recipe file debug string. USE FOR DEBUGGING PURPOSES ONLY!
"""
# Approach: In the event that we want to expand support later, this function should be implemented in terms
# of a `RecipeParser` tree. This will make it easier to build an upgrade-path, if we so choose to pursue one.
# Log the original comments
old_comments: Final[dict[str, str]] = self._v1_recipe.get_comments_table()
# Attempts to update ambiguous dependency constraints. See function comments for more details.
self._upgrade_ambiguous_deps()
# Convert selectors into ternary statements or `if` blocks. We process selectors first so that there is no
# chance of selector comments getting accidentally wiped by patch or other operations.
self._upgrade_selectors_to_conditionals()
# JINJA templates -> `context` object
self._upgrade_jinja_to_context_obj()
# Cached copy of all of the "outputs" in a recipe. This is useful for easily handling multi and single output
# recipes in 1 loop construct.
base_package_paths: Final[list[str]] = self._v1_recipe.get_package_paths()
# TODO Fix: comments are not preserved with patch operations (add a flag to `patch()`?)
# There are a number of recipe files that contain the same misspellings. This is an attempt to
# solve the more common issues.
self._correct_common_misspellings(base_package_paths)
# Upgrade common sections found in a recipe
self._upgrade_source_section(base_package_paths)
self._upgrade_build_section(base_package_paths)
self._upgrade_requirements_section(base_package_paths)
self._upgrade_about_section(base_package_paths)
self._upgrade_test_section(base_package_paths)
self._upgrade_multi_output(base_package_paths)
## Final clean-up ##
# TODO: Comment tracking may need improvement. The "correct way" of tracking comments with patch changes is a
# fairly big engineering effort and refactor.
# Alert the user which comments have been dropped.
new_comments: Final[dict[str, str]] = self._v1_recipe.get_comments_table()
diff_comments: Final[dict[str, str]] = {k: v for k, v in old_comments.items() if k not in new_comments}
for path, comment in diff_comments.items():
if not self._v1_recipe.contains_value(path):
self._msg_tbl.add_message(MessageCategory.WARNING, f"Could not relocate comment: {comment}")
# TODO Complete: move operations may result in empty fields we can eliminate. This may require changes to
# `contains_value()`
# TODO Complete: Attempt to combine consecutive If/Then blocks after other modifications. This should reduce the
# risk of screwing up critical list indices and ordering.
# Hack: Wipe the existing table so the JINJA `set` statements don't render the final form
self._v1_recipe._vars_tbl = {} # pylint: disable=protected-access
# Sort the top-level keys to a "canonical" ordering. This should make previous patch operations look more
# "sensible" to a human reader.
self._v1_recipe._sort_subtree_keys( # pylint: disable=protected-access
"/", CanonicalSortOrder.TOP_LEVEL_KEY_SORT_ORDER
)
# Override the schema value as the recipe conversion is now complete.
self._v1_recipe._schema_version = SchemaVersion.V1 # pylint: disable=protected-access
# Update the variable table
self._v1_recipe._init_vars_tbl() # pylint: disable=protected-access
# TODO update selector table when V1 selectors are supported!
return self._v1_recipe.render(), self._msg_tbl, str(self._v1_recipe)