diff --git a/sql/engines/goinception.py b/sql/engines/goinception.py index d1cd21604b..0e61941dfd 100644 --- a/sql/engines/goinception.py +++ b/sql/engines/goinception.py @@ -3,6 +3,7 @@ import re import traceback import MySQLdb +import simplejson as json from common.config import SysConfig from sql.models import AliyunRdsConfig @@ -149,6 +150,27 @@ def query_print(self, instance, db_name=None, sql=''): raise RuntimeError(print_info.get('errmsg')) return print_info + def query_datamasking(self, instance, db_name=None, sql=''): + """ + 将sql交给goInception打印语法树。 + 使用 masking 参数,可参考 https://github.com/hanchuanchuan/goInception/pull/355 + """ + # 判断如果配置了隧道则连接隧道 + host, port, user, password = self.remote_instance_conn(instance) + sql = f"""/*--user={user};--password={password};--host={host};--port={port};--masking=1;*/ + inception_magic_start; + use `{db_name}`; + {sql} + inception_magic_commit;""" + print_info = self.query(db_name=db_name, sql=sql).to_dict()[0] + # 兼容语法错误时errlevel=0的场景 + if print_info['errlevel'] == 0 and print_info['errmsg'] is None : + return json.loads(_repair_json_str(print_info['query_tree'])) + elif print_info['errlevel'] == 0 and print_info['errmsg'] == 'Global environment': + raise SyntaxError(f"Inception Error: {print_info['query_tree']}") + else: + raise RuntimeError(f"Inception Error: {print_info['errmsg']}") + def get_variables(self, variables=None): """获取实例参数""" if variables: @@ -247,3 +269,15 @@ def get_session_variables(instance): for k, v in variables.items(): set_session_sql += f"inception set session {k} = '{v}';\n" return variables, set_session_sql + +def _repair_json_str(json_str): + """ + 处理JSONDecodeError: Expecting property name enclosed in double quotes + inception语法树出现{"a":1,}、["a":1,]、{'a':1}、[, { }] + """ + json_str = re.sub(r"{\s*'(.+)':", r'{"\1":', json_str) + json_str = re.sub(r",\s*?]", "]", json_str) + json_str = re.sub(r",\s*?}", "}", json_str) + json_str = re.sub(r"\[,\s*?{", "[{", json_str) + json_str = json_str.replace("'", "\"") + return json_str diff --git a/sql/engines/inception.py b/sql/engines/inception.py index 2f260ab45a..ff77783a2d 100644 --- a/sql/engines/inception.py +++ b/sql/engines/inception.py @@ -206,8 +206,8 @@ def get_rollback(self, workflow): sql = row.get('sql') # 获取备份表名 opid_time = sequence.replace("'", "") - sql_table = f"""select tablename - from {backup_db_name}.$_$Inception_backup_information$_$ + sql_table = f"""select tablename + from {backup_db_name}.$_$Inception_backup_information$_$ where opid_time='{opid_time}';""" cur.execute(sql_table) @@ -215,8 +215,8 @@ def get_rollback(self, workflow): if list_tables: # 获取备份语句 table_name = list_tables[0][0] - sql_back = f"""select rollback_statement - from {backup_db_name}.{table_name} + sql_back = f"""select rollback_statement + from {backup_db_name}.{table_name} where opid_time='{opid_time}'""" cur.execute(sql_back) list_backup = cur.fetchall() @@ -271,4 +271,4 @@ def _repair_json_str(json_str): json_str = re.sub(r",\s*?}", "}", json_str) json_str = re.sub(r"\[,\s*?{", "[{", json_str) json_str = json_str.replace("'", "\"") - return json_str + return json_str \ No newline at end of file diff --git a/sql/engines/mysql.py b/sql/engines/mysql.py index f0a6fb2bc0..065d774951 100644 --- a/sql/engines/mysql.py +++ b/sql/engines/mysql.py @@ -15,6 +15,7 @@ from .models import ResultSet, ReviewResult, ReviewSet from .inception import InceptionEngine from sql.utils.data_masking import data_masking +from sql.utils.go_data_masking import go_data_masking from common.config import SysConfig logger = logging.getLogger('default') @@ -112,7 +113,7 @@ def get_all_tables(self, db_name, **kwargs): def get_all_columns_by_tb(self, db_name, tb_name, **kwargs): """获取所有字段, 返回一个ResultSet""" - sql = f"""SELECT + sql = f"""SELECT COLUMN_NAME, COLUMN_TYPE, CHARACTER_SET_NAME, @@ -234,7 +235,13 @@ def query_masking(self, db_name=None, sql='', resultset=None): 返回一个脱敏后的结果集""" # 仅对select语句脱敏 if re.match(r"^select", sql, re.I): - mask_result = data_masking(self.instance, db_name, sql, resultset) + ##判断是否设置了inception脱敏,如果未配置inception地址,则使用goinception脱敏 + if (self.config.get('inception_host') is None): + mask_result = go_data_masking(self.instance, db_name, sql, resultset) + #print("use goinception") + else: + mask_result = data_masking(self.instance, db_name, sql, resultset) + #print("use inception") else: mask_result = resultset return mask_result diff --git a/sql/engines/oracle.py b/sql/engines/oracle.py index 4c18c2832d..28639a96d2 100644 --- a/sql/engines/oracle.py +++ b/sql/engines/oracle.py @@ -14,6 +14,7 @@ from . import EngineBase import cx_Oracle from .models import ResultSet, ReviewSet, ReviewResult +#from sql.utils.data_masking import brute_mask from sql.utils.data_masking import simple_column_mask logger = logging.getLogger('default') @@ -138,13 +139,17 @@ def describe_table(self, db_name, tb_name, **kwargs): """return ResultSet""" # https://www.thepolyglotdeveloper.com/2015/01/find-tables-oracle-database-column-name/ sql = f"""SELECT - column_name, + a.column_name, data_type, data_length, nullable, - data_default - FROM all_tab_cols - WHERE table_name = '{tb_name}' and owner = '{db_name}' order by column_id + data_default, + b.comments + FROM all_tab_cols a, all_col_comments b + WHERE a.table_name = b.table_name + and a.owner = b.OWNER + and a.COLUMN_NAME = b.COLUMN_NAME + and a.table_name = '{tb_name}' and a.owner = '{db_name}' order by column_id """ result = self.query(db_name=db_name, sql=sql) return result @@ -365,6 +370,7 @@ def query(self, db_name=None, sql='', limit_num=0, close_conn=True, **kwargs): def query_masking(self, db_name=None, sql='', resultset=None): """简单字段脱敏规则, 仅对select有效""" if re.match(r"^select", sql, re.I): + #filtered_result = brute_mask(self.instance, resultset) filtered_result = simple_column_mask(self.instance, resultset) filtered_result.is_masked = True else: diff --git a/sql/utils/data_masking.py b/sql/utils/data_masking.py index 70268ddc1f..4c74b7f4fa 100644 --- a/sql/utils/data_masking.py +++ b/sql/utils/data_masking.py @@ -257,7 +257,7 @@ def regex(masking_rules, rule_type, value): def brute_mask(instance, sql_result): - """输入的是一个resultset + """输入的是一个resultset sql_result.full_sql sql_result.rows 查询结果列表 List , list内的item为tuple @@ -348,4 +348,3 @@ def simple_column_mask(instance, sql_result): sql_result.error = str(e) return sql_result - diff --git a/sql/utils/go_data_masking.py b/sql/utils/go_data_masking.py new file mode 100644 index 0000000000..5b4c7d2d72 --- /dev/null +++ b/sql/utils/go_data_masking.py @@ -0,0 +1,301 @@ +# -*- coding:utf-8 -*- +import logging +import traceback + +import sqlparse +from sqlparse.tokens import Keyword + +from common.config import SysConfig +from sql.engines.inception import InceptionEngine +from sql.engines.goinception import GoInceptionEngine +from sql.models import DataMaskingRules, DataMaskingColumns +import re + +logger = logging.getLogger('default') + + +# TODO 待优化,没想好 + +#Inception转为goInception,将archery中数据脱敏的IP和端口指向goInception的 +#不修改整体逻辑,主要修改由goInception返回的结果中关键字,比如db修改为schema +def go_data_masking(instance, db_name, sql, sql_result): + """脱敏数据""" + try: + if SysConfig().get('query_check'): + # 解析查询语句,禁用部分Inception无法解析关键词 + p = sqlparse.parse(sql)[0] + for token in p.tokens: + if token.ttype is Keyword and token.value.upper() in ['UNION', 'UNION ALL']: + logger.warning(f'数据脱敏异常,错误信息:不支持该查询语句脱敏!请联系管理员') + sql_result.error = '不支持该查询语句脱敏!请联系管理员' + sql_result.status = 1 + return sql_result + # 通过Inception获取语法树,并进行解析 + inception_engine = GoInceptionEngine() + query_tree = inception_engine.query_datamasking(instance=instance, db_name=db_name, sql=sql) + # 分析语法树获取命中脱敏规则的列数据 + table_hit_columns, hit_columns = analyze_query_tree(query_tree, instance) + + sql_result.mask_rule_hit = True if table_hit_columns or hit_columns else False + except Exception as msg: + logger.warning(f'数据脱敏异常,错误信息:{traceback.format_exc()}') + sql_result.error = str(msg) + sql_result.status = 1 + else: + # 存在select * 的查询,遍历column_list,获取命中列的index,添加到hit_columns + if table_hit_columns and sql_result.rows: + column_list = sql_result.column_list + table_hit_column = dict() + + + for index, item in enumerate(column_list): + if item in table_hit_column.keys(): + hit_columns.append({ + "column_name": item, + "index": index, + "rule_type": table_hit_column.get(item) + }) + + # 对命中规则列hit_columns的数据进行脱敏 + # 获取全部脱敏规则信息,减少循环查询,提升效率 + masking_rules = DataMaskingRules.objects.all() + + if hit_columns and sql_result.rows: + rows = list(sql_result.rows) + for column in hit_columns: + index = column['index'] + for idx, item in enumerate(rows): + + rows[idx] = list(item) + rows[idx][index] = regex(masking_rules, column['rule_type'], rows[idx][index]) + + sql_result.rows = rows + + # 脱敏结果 + sql_result.is_masked = True + return sql_result + + +def analyze_query_tree(query_tree, instance): + """解析query_tree,获取语句信息,并返回命中脱敏规则的列信息""" + + # old_select_list = query_tree.get('select_list', []) + # table_ref = query_tree.get('table_ref', []) + old_select_list =[] + table_ref=[] + #old_select_list=[{ 'field' : query_tree[0].get('field', []), 'alias' : query_tree[0].get('alias', [])}] + #table_ref= [{'schema' : query_tree[0].get('schema', []),'table' : query_tree[0].get('table', [])}] + for list_i in query_tree: + + old_select_list.append({'field': list_i['field'], 'alias': list_i['alias'],'schema': list_i['schema'], 'table': list_i['table']}) + table_ref.append({'schema': list_i['schema'], 'table': list_i['table']}) + + # 获取全部激活的脱敏字段信息,减少循环查询,提升效率 + masking_columns = DataMaskingColumns.objects.filter(active=True) + + # 判断语句涉及的表是否存在脱敏字段配置 + hit = False + for table in table_ref: + if masking_columns.filter(instance=instance, table_schema=table['schema'], table_name=table['table']).exists(): + hit = True + + # 不存在脱敏字段则直接跳过规则解析 + if not hit: + table_hit_columns = [] + hit_columns = [] + else: + # 遍历select_list + columns = [] + hit_columns = [] # 命中列 + table_hit_columns = [] # 涉及表命中的列,仅select *需要 + + select_index = [] + select_list = [] + + for select_item in old_select_list: + select_index.append(select_item['field']) + select_list.append(select_item) + if select_index: + + for table in table_ref: + hit_columns_info = hit_table(masking_columns, instance, table['schema'], table['table']) + table_hit_columns.extend(hit_columns_info) + + for index, item in enumerate(select_list): + item['index'] = index + if item.get('field') != '*': + columns.append(item) + + # 格式化命中的列信息 + for column in columns: + hit_info = hit_column(masking_columns, instance, column.get('schema'), column.get('table'), + column.get('field')) + + if hit_info['is_hit']: + hit_info['index'] = column['index'] + hit_columns.append(hit_info) + + + return table_hit_columns, hit_columns + + +def hit_column(masking_columns, instance, table_schema, table_name, column_name): + """判断字段是否命中脱敏规则,如果命中则返回脱敏的规则id和规则类型""" + + column_info = masking_columns.filter(instance=instance, table_schema=table_schema, + table_name=table_name, column_name=column_name) + + hit_column_info = { + "instance_name": instance.instance_name, + "table_schema": table_schema, + "table_name": table_name, + "column_name": column_name, + "rule_type": 0, + "is_hit": False + } + + # 命中规则 + if column_info: + hit_column_info['rule_type'] = column_info[0].rule_type + hit_column_info['is_hit'] = True + + return hit_column_info + + +def hit_table(masking_columns, instance, table_schema, table_name): + """获取表中所有命中脱敏规则的字段信息,用于select *的查询""" + columns_info = masking_columns.filter(instance=instance, table_schema=table_schema, table_name=table_name) + + # 命中规则列 + hit_columns_info = [] + for column in columns_info: + hit_columns_info.append({ + "instance_name": instance.instance_name, + "table_schema": table_schema, + "table_name": table_name, + "is_hit": True, + "column_name": column.column_name, + "rule_type": column.rule_type + }) + return hit_columns_info + + +def regex(masking_rules, rule_type, value): + """利用正则表达式脱敏数据""" + rules_info = masking_rules.get(rule_type=rule_type) + + if rules_info: + rule_regex = rules_info.rule_regex + hide_group = rules_info.hide_group + # 正则匹配必须分组,隐藏的组会使用****代替 + try: + p = re.compile(rule_regex, re.I) + m = p.search(str(value)) + + masking_str = '' + for i in range(m.lastindex): + if i == hide_group - 1: + group = '****' + else: + group = m.group(i + 1) + masking_str = masking_str + group + return masking_str + except AttributeError: + return value + else: + return value + + +def brute_mask(instance, sql_result): + """输入的是一个resultset + sql_result.full_sql + sql_result.rows 查询结果列表 List , list内的item为tuple + + 返回同样结构的sql_result , error 中写入脱敏时产生的错误. + """ + # 读取所有关联实例的脱敏规则,去重后应用到结果集,不会按照具体配置的字段匹配 + rule_types = DataMaskingColumns.objects.filter(instance=instance).values_list('rule_type', flat=True).distinct() + masking_rules = DataMaskingRules.objects.filter(rule_type__in=rule_types) + for reg in masking_rules: + compiled_r = re.compile(reg.rule_regex, re.I) + replace_pattern = r"" + rows = list(sql_result.rows) + for i in range(1, compiled_r.groups + 1): + if i == int(reg.hide_group): + replace_pattern += r"****" + else: + replace_pattern += r"\{}".format(i) + for i in range(len(sql_result.rows)): + temp_value_list = [] + for j in range(len(sql_result.rows[i])): + # 进行正则替换 + temp_value_list += [compiled_r.sub(replace_pattern, str(sql_result.rows[i][j]))] + rows[i] = tuple(temp_value_list) + sql_result.rows = rows + return sql_result + + +def simple_column_mask(instance, sql_result): + """输入的是一个resultset + sql_result.full_sql + sql_result.rows 查询结果列表 List , list内的item为tuple + sql_result.column_list 查询结果字段列表 List + 返回同样结构的sql_result , error 中写入脱敏时产生的错误. + """ + # 获取当前实例脱敏字段信息,减少循环查询,提升效率 + masking_columns = DataMaskingColumns.objects.filter(instance=instance, active=True) + # 转换sql输出字段名为小写, 适配oracle脱敏 + sql_result_column_list = [c.lower() for c in sql_result.column_list] + if masking_columns: + try: + for mc in masking_columns: + # 脱敏规则字段名 + column_name = mc.column_name.lower() + # 脱敏规则字段索引信息 + _masking_column_index = [] + if column_name in sql_result_column_list: + _masking_column_index.append(sql_result_column_list.index(column_name)) + # 别名字段脱敏处理 + try: + for _c in sql_result_column_list: + alias_column_regex = r'"?([^\s"]+)"?\s+(as\s+)?"?({})[",\s+]?'.format(re.escape(_c)) + alias_column_r = re.compile(alias_column_regex, re.I) + # 解析原SQL查询别名字段 + search_data = re.search(alias_column_r, sql_result.full_sql) + # 字段名 + _column_name = search_data.group(1).lower() + s_column_name = re.sub(r'^"?\w+"?\."?|\.|"$', '', _column_name) + # 别名 + alias_name = search_data.group(3).lower() + # 如果字段名匹配脱敏配置字段,对此字段进行脱敏处理 + if s_column_name == column_name: + _masking_column_index.append(sql_result_column_list.index(alias_name)) + except: + pass + + for masking_column_index in _masking_column_index: + # 脱敏规则 + masking_rule = DataMaskingRules.objects.get(rule_type=mc.rule_type) + # 脱敏后替换字符串 + compiled_r = re.compile(masking_rule.rule_regex, re.I | re.S) + replace_pattern = r"" + for i in range(1, compiled_r.groups + 1): + if i == int(masking_rule.hide_group): + replace_pattern += r"****" + else: + replace_pattern += r"\{}".format(i) + + rows = list(sql_result.rows) + for i in range(len(sql_result.rows)): + temp_value_list = [] + for j in range(len(sql_result.rows[i])): + column_data = sql_result.rows[i][j] + if j == masking_column_index: + column_data = compiled_r.sub(replace_pattern, str(sql_result.rows[i][j])) + temp_value_list += [column_data] + rows[i] = tuple(temp_value_list) + sql_result.rows = rows + except Exception as e: + sql_result.error = str(e) + + return sql_result diff --git a/sql/utils/tests.py b/sql/utils/tests.py index 552e2a9e52..08794be9f0 100644 --- a/sql/utils/tests.py +++ b/sql/utils/tests.py @@ -1,5 +1,5 @@ # -*- coding: UTF-8 -*- -""" +""" @author: hhyo @license: Apache Licence @file: tests.py @@ -27,6 +27,7 @@ from sql.utils.tasks import add_sql_schedule, del_schedule, task_info from sql.utils.workflow_audit import Audit from sql.utils.data_masking import data_masking, brute_mask, simple_column_mask +from sql.utils.go_data_masking import go_data_masking, brute_mask, simple_column_mask User = Users __author__ = 'hhyo' @@ -1297,6 +1298,177 @@ def test_data_masking_does_not_support_keyword(self, ): self.assertEqual(r.status, 1) self.assertEqual(r.error, '不支持该查询语句脱敏!请联系管理员') + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_not_hit_rules(self, _inception): + DataMaskingColumns.objects.all().delete() + DataMaskingRules.objects.all().delete() + _inception.return_value.query_print.return_value = {'command': 'select', + 'select_list': [{'type': 'FIELD_ITEM', 'field': '*'}], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select phone from users;""" + rows = (('18888888888',), ('18888888889',), ('18888888810',)) + query_result = ReviewSet(column_list=['phone'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + self.assertEqual(r, query_result) + + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_hit_rules_not_exists_star(self, _inception): + _inception.return_value.query_print.return_value = { + 'command': 'select', + 'select_list': [{'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'phone'}, + {'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'email'}, + {'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'id_number'}], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select phone from users;""" + rows = (('18888888888',), ('18888888889',), ('18888888810',)) + query_result = ReviewSet(column_list=['phone'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + mask_result_rows = [['188****8888', ], ['188****8889', ], ['188****8810', ]] + self.assertEqual(r.rows, mask_result_rows) + + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_hit_rules_exists_star(self, _inception): + """[*]""" + _inception.return_value.query_print.return_value = { + 'command': 'select', + 'select_list': [{'type': 'FIELD_ITEM', 'field': '*'}], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select * from users;""" + rows = (('18888888888',), ('18888888889',), ('18888888810',)) + query_result = ReviewSet(column_list=['phone'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + mask_result_rows = [['188****8888', ], ['188****8889', ], ['188****8810', ]] + self.assertEqual(r.rows, mask_result_rows) + + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_hit_rules_star_and_column(self, _inception): + """[*,column_a]""" + _inception.return_value.query_print.return_value = { + 'command': 'select', + 'select_list': [{'type': 'FIELD_ITEM', 'field': '*'}, + {'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'phone'}], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select *,phone from users;""" + rows = (('18888888888', '18888888888',), + ('18888888889', '18888888889',),) + query_result = ReviewSet(column_list=['phone', 'phone'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + mask_result_rows = [['188****8888', '188****8888', ], + ['188****8889', '188****8889', ]] + self.assertEqual(r.rows, mask_result_rows) + + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_hit_rules_column_and_star(self, _inception): + """[column_a, *]""" + _inception.return_value.query_print.return_value = { + 'command': 'select', + 'select_list': [{'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'phone'}, + {'type': 'FIELD_ITEM', 'field': '*'}, ], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select phone,* from users;""" + rows = (('18888888888', '18888888888',), + ('18888888889', '18888888889',)) + query_result = ReviewSet(column_list=['phone', 'phone'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + mask_result_rows = [['188****8888', '188****8888', ], + ['188****8889', '188****8889', ]] + self.assertEqual(r.rows, mask_result_rows) + + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_hit_rules_column_and_star_and_column(self, _inception): + """[column_a,a.*,column_b]""" + _inception.return_value.query_print.return_value = { + 'command': 'select', + 'select_list': [{'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'phone'}, + {'type': 'FIELD_ITEM', 'field': '*'}, + {'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'phone'}, ], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select phone,*,phone from users;""" + rows = (('18888888888', '18888888888', '18888888888',), + ('18888888889', '18888888889', '18888888889',)) + query_result = ReviewSet(column_list=['phone', 'phone', 'phone'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + mask_result_rows = [['188****8888', '188****8888', '188****8888', ], + ['188****8889', '188****8889', '188****8889', ]] + self.assertEqual(r.rows, mask_result_rows) + + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_hit_rules_star_and_column_and_star(self, _inception): + """[a.*, column_a, b.*]""" + _inception.return_value.query_print.return_value = { + 'command': 'select', + 'select_list': [{'type': 'FIELD_ITEM', 'field': '*'}, + {'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'phone'}, + {'type': 'FIELD_ITEM', 'field': '*'}, ], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select *,phone,* from users;""" + rows = (('18888888888', '18888888888', '18888888888',), + ('18888888889', '18888888889', '18888888889',)) + query_result = ReviewSet(column_list=['phone', 'phone', 'phone'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + self.assertEqual(r.rows, rows) + self.assertEqual(r.status, 1) + self.assertEqual(r.error, '不支持select信息为[a.*, column_a, b.*]格式的查询脱敏!') + + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_does_not_support_aggregate(self, _inception): + """不支持的语法""" + _inception.return_value.query_print.return_value = { + 'command': 'select', 'select_list': [{ + 'type': 'FUNC_ITEM', 'func': 'OTHERS', 'name': 'concat', + 'args': [{'type': 'FIELD_ITEM', 'db': 'archer_test', 'table': 'users', 'field': 'phone'}, + {'type': 'INT_ITEM', 'value': '1'}]}], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select concat(phone,1) from users;""" + rows = [] + query_result = ReviewSet(column_list=['concat(phone,1)'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + self.assertEqual(r.rows, rows) + self.assertEqual(r.status, 1) + self.assertEqual(r.error, '不支持该查询语句脱敏!请联系管理员') + + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_does_not_support_fuc(self, _inception): + """不支持的语法""" + _inception.return_value.query_print.return_value = { + 'command': 'select', 'select_list': [{ + 'type': 'aggregate', 'agg_type': 'max', + 'aggregate': {'type': 'FUNC_ITEM', 'func': 'OTHERS', 'name': '+', + 'args': [{'type': 'FIELD_ITEM', 'db': 'archer_test', + 'table': 'users', 'field': 'phone'}, + {'type': 'INT_ITEM', 'value': '1'}]}}], + 'table_ref': [{'db': 'archer_test', 'table': 'users'}], + 'limit': {'limit': [{'type': 'INT_ITEM', 'value': '100'}]}} + sql = """select max(phone+1) from users;""" + rows = [] + query_result = ReviewSet(column_list=['max(phone+1)'], rows=rows, full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + self.assertEqual(r.rows, rows) + self.assertEqual(r.status, 1) + self.assertEqual(r.error, '不支持该查询语句脱敏!请联系管理员') + + def test_go_data_masking_does_not_support_keyword(self, ): + """不支持的关键字""" + self.sys_config.set('query_check', 'true') + self.sys_config.get_all_config() + + sqls = ["select id from test union select email from activity_email_all_in_one;", + "select id from test union all select email from activity_email_all_in_one;"] + for sql in sqls: + query_result = ReviewSet(full_sql=sql) + r = go_data_masking(self.ins, 'archery', sql, query_result) + self.assertEqual(r.status, 1) + self.assertEqual(r.error, '不支持该查询语句脱敏!请联系管理员') + + def test_brute_mask(self): sql = """select * from users;""" rows = (('18888888888',), ('18888888889',), ('18888888810',)) diff --git a/src/charts/charts/goinception/values.yaml b/src/charts/charts/goinception/values.yaml index 460a3ae9b4..5396fc404d 100644 --- a/src/charts/charts/goinception/values.yaml +++ b/src/charts/charts/goinception/values.yaml @@ -5,9 +5,10 @@ replicaCount: 1 image: - repository: hanchuanchuan/goinception + repository: hanchuanchuan/goinception:latest tag: latest - pullPolicy: IfNotPresent + version: latest + pullPolicy: Always nameOverride: "" fullnameOverride: "" diff --git a/src/docker-compose/docker-compose.yml b/src/docker-compose/docker-compose.yml index 605ef44df1..9157bd5520 100644 --- a/src/docker-compose/docker-compose.yml +++ b/src/docker-compose/docker-compose.yml @@ -32,7 +32,7 @@ services: - "./inception/inc.cnf:/etc/inc.cnf" goinception: - image: hanchuanchuan/goinception + image: hanchuanchuan/goinception:latest container_name: goinception restart: always ports: