diff --git a/.changes/unreleased/Fixes-20230215-104536.yaml b/.changes/unreleased/Fixes-20230215-104536.yaml new file mode 100644 index 00000000000..643c9e77110 --- /dev/null +++ b/.changes/unreleased/Fixes-20230215-104536.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Make use of hashlib.md5() FIPS compliant +time: 2023-02-15T10:45:36.755797+01:00 +custom: + Author: nielspardon + Issue: "6900" diff --git a/core/dbt/config/project.py b/core/dbt/config/project.py index 696d3249d3a..be86fbde267 100644 --- a/core/dbt/config/project.py +++ b/core/dbt/config/project.py @@ -12,7 +12,6 @@ ) from typing_extensions import Protocol, runtime_checkable -import hashlib import os from dbt.flags import get_flags @@ -31,7 +30,7 @@ from dbt.helper_types import NoValue from dbt.semver import VersionSpecifier, versions_compatible from dbt.version import get_installed_version -from dbt.utils import MultiDict +from dbt.utils import MultiDict, md5 from dbt.node_types import NodeType from dbt.config.selectors import SelectorDict from dbt.contracts.project import ( @@ -678,7 +677,7 @@ def from_project_root( return partial.render(renderer) def hashed_name(self): - return hashlib.md5(self.project_name.encode("utf-8")).hexdigest() + return md5(self.project_name) def get_selector(self, name: str) -> Union[SelectionSpec, bool]: if name not in self.selectors: diff --git a/core/dbt/contracts/connection.py b/core/dbt/contracts/connection.py index 3f12a603363..41eb0bccb8d 100644 --- a/core/dbt/contracts/connection.py +++ b/core/dbt/contracts/connection.py @@ -1,6 +1,5 @@ import abc import itertools -import hashlib from dataclasses import dataclass, field from typing import ( Any, @@ -13,7 +12,7 @@ Callable, ) from dbt.exceptions import DbtInternalError -from dbt.utils import translate_aliases +from dbt.utils import translate_aliases, md5 from dbt.events.functions import fire_event from dbt.events.types import NewConnectionOpening from dbt.events.contextvars import get_node_info @@ -142,7 +141,7 @@ def unique_field(self) -> str: raise NotImplementedError("unique_field not implemented for base credentials class") def hashed_unique_field(self) -> str: - return hashlib.md5(self.unique_field.encode("utf-8")).hexdigest() + return md5(self.unique_field) def connection_info(self, *, with_aliases: bool = False) -> Iterable[Tuple[str, Any]]: """Return an ordered iterator of key/value pairs for pretty-printing.""" diff --git a/core/dbt/deps/git.py b/core/dbt/deps/git.py index a46ab91e7d2..31d83fa6cd4 100644 --- a/core/dbt/deps/git.py +++ b/core/dbt/deps/git.py @@ -1,5 +1,4 @@ import os -import hashlib from typing import List, Optional from dbt.clients import git, system @@ -13,10 +12,11 @@ from dbt.exceptions import ExecutableError, MultipleVersionGitDepsError from dbt.events.functions import fire_event, warn_or_error from dbt.events.types import EnsureGitInstalled, DepsUnpinned +from dbt.utils import md5 def md5sum(s: str): - return hashlib.md5(s.encode("latin-1")).hexdigest() + return md5(s, "latin-1") class GitPackageMixin: diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py index 678f7de9df3..7d42f0a9002 100644 --- a/core/dbt/parser/generic_test_builders.py +++ b/core/dbt/parser/generic_test_builders.py @@ -1,4 +1,3 @@ -import hashlib import re from copy import deepcopy from dataclasses import dataclass @@ -35,6 +34,7 @@ UndefinedMacroError, ) from dbt.parser.search import FileBlock +from dbt.utils import md5 def synthesize_generic_test_names( @@ -72,7 +72,7 @@ def synthesize_generic_test_names( if len(full_name) >= 64: test_trunc_identifier = test_identifier[:30] - label = hashlib.md5(full_name.encode("utf-8")).hexdigest() + label = md5(full_name) short_name = "{}_{}".format(test_trunc_identifier, label) else: short_name = full_name diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 482eb5b6e35..4a5d3247c60 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -3,7 +3,6 @@ import pathlib from abc import ABCMeta, abstractmethod -from hashlib import md5 from typing import Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type from dbt.dataclass_schema import ValidationError, dbtClassMixin @@ -80,7 +79,7 @@ TestBlock, Testable, ) -from dbt.utils import get_pseudo_test_path, coerce_dict_str +from dbt.utils import get_pseudo_test_path, coerce_dict_str, md5 TestDef = Union[str, Dict[str, Any]] @@ -222,8 +221,8 @@ def get_hashable_md(data: Union[str, int, float, List, Dict]) -> Union[str, List return str(data) hashable_metadata = repr(get_hashable_md(test_metadata)) - hash_string = "".join([name, hashable_metadata]).encode("utf-8") - test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:] + hash_string = "".join([name, hashable_metadata]) + test_hash = md5(hash_string)[-HASH_LENGTH:] dct = { "alias": name, diff --git a/core/dbt/utils.py b/core/dbt/utils.py index 27309c4b373..a26a30ac62b 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -10,6 +10,7 @@ import json import os import requests +import sys from tarfile import ReadError import time from pathlib import PosixPath, WindowsPath @@ -252,16 +253,19 @@ def get_pseudo_hook_path(hook_name): return os.path.join(*path_parts) -def md5(string): - return hashlib.md5(string.encode("utf-8")).hexdigest() +def md5(string, charset="utf-8"): + if sys.version_info >= (3, 9): + return hashlib.md5(string.encode(charset), usedforsecurity=False).hexdigest() + else: + return hashlib.md5(string.encode(charset)).hexdigest() def get_hash(model): - return hashlib.md5(model.unique_id.encode("utf-8")).hexdigest() + return md5(model.unique_id) def get_hashed_contents(model): - return hashlib.md5(model.raw_code.encode("utf-8")).hexdigest() + return md5(model.raw_code) def flatten_nodes(dep_list):