-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
Copy pathclickhouse.py
484 lines (455 loc) · 19.7 KB
/
clickhouse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
# -*- coding: UTF-8 -*-
from clickhouse_driver import connect
from sql.utils.sql_utils import get_syntax_type
from .models import ResultSet, ReviewResult, ReviewSet
from common.utils.timer import FuncTimer
from common.config import SysConfig
from . import EngineBase
import sqlparse
import logging
import re
logger = logging.getLogger("default")
class ClickHouseEngine(EngineBase):
test_query = "SELECT 1"
def __init__(self, instance=None):
super(ClickHouseEngine, self).__init__(instance=instance)
self.config = SysConfig()
def get_connection(self, db_name=None):
if self.conn:
return self.conn
if db_name:
self.conn = connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
database=db_name,
connect_timeout=10,
)
else:
self.conn = connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
connect_timeout=10,
)
return self.conn
@property
def name(self):
return "ClickHouse"
@property
def info(self):
return "ClickHouse engine"
@property
def auto_backup(self):
"""是否支持备份"""
return False
@property
def server_version(self):
sql = "select value from system.build_options where name = 'VERSION_FULL';"
result = self.query(sql=sql)
version = result.rows[0][0].split(" ")[1]
return tuple([int(n) for n in version.split(".")[:3]])
def get_table_engine(self, tb_name):
"""获取某个table的engine type"""
sql = f"""select engine
from system.tables
where database='{tb_name.split('.')[0]}'
and name='{tb_name.split('.')[1]}'"""
query_result = self.query(sql=sql)
if query_result.rows:
result = {"status": 1, "engine": query_result.rows[0][0]}
else:
result = {"status": 0, "engine": "None"}
return result
def get_all_databases(self):
"""获取数据库列表, 返回一个ResultSet"""
sql = "show databases"
result = self.query(sql=sql)
db_list = [
row[0]
for row in result.rows
if row[0]
not in ("system", "INFORMATION_SCHEMA", "information_schema", "datasets")
]
result.rows = db_list
return result
def get_all_tables(self, db_name, **kwargs):
"""获取table 列表, 返回一个ResultSet"""
sql = "show tables"
result = self.query(db_name=db_name, sql=sql)
tb_list = [row[0] for row in result.rows]
result.rows = tb_list
return result
def get_all_columns_by_tb(self, db_name, tb_name, **kwargs):
"""获取所有字段, 返回一个ResultSet"""
sql = f"""select
name,
type,
comment
from
system.columns
where
database = '{db_name}'
and table = '{tb_name}';"""
result = self.query(db_name=db_name, sql=sql)
column_list = [row[0] for row in result.rows]
result.rows = column_list
return result
def describe_table(self, db_name, tb_name, **kwargs):
"""return ResultSet 类似查询"""
sql = f"show create table `{tb_name}`;"
result = self.query(db_name=db_name, sql=sql)
result.rows[0] = (tb_name,) + (
result.rows[0][0].replace("(", "(\n ").replace(",", ",\n "),
)
return result
def query(self, db_name=None, sql="", limit_num=0, close_conn=True, **kwargs):
"""返回 ResultSet"""
result_set = ResultSet(full_sql=sql)
try:
conn = self.get_connection(db_name=db_name)
cursor = conn.cursor()
cursor.execute(sql)
if int(limit_num) > 0:
rows = cursor.fetchmany(size=int(limit_num))
else:
rows = cursor.fetchall()
fields = cursor.description
result_set.column_list = [i[0] for i in fields] if fields else []
result_set.rows = rows
result_set.affected_rows = len(rows)
except Exception as e:
logger.warning(f"ClickHouse语句执行报错,语句:{sql},错误信息{e}")
result_set.error = str(e).split("Stack trace")[0]
finally:
if close_conn:
self.close()
return result_set
def query_check(self, db_name=None, sql=""):
# 查询语句的检查、注释去除、切分
result = {"msg": "", "bad_query": False, "filtered_sql": sql, "has_star": False}
# 删除注释语句,进行语法判断,执行第一条有效sql
try:
sql = sqlparse.format(sql, strip_comments=True)
sql = sqlparse.split(sql)[0]
result["filtered_sql"] = sql.strip()
except IndexError:
result["bad_query"] = True
result["msg"] = "没有有效的SQL语句"
if re.match(r"^select|^show|^explain", sql, re.I) is None:
result["bad_query"] = True
result["msg"] = "不支持的查询语法类型!"
if "*" in sql:
result["has_star"] = True
result["msg"] = "SQL语句中含有 * "
# clickhouse 20.6.3版本开始正式支持explain语法
if re.match(r"^explain", sql, re.I) and self.server_version < (20, 6, 3):
result["bad_query"] = True
result["msg"] = f"当前ClickHouse实例版本低于20.6.3,不支持explain!"
# select语句先使用Explain判断语法是否正确
if re.match(r"^select", sql, re.I) and self.server_version >= (20, 6, 3):
explain_result = self.query(db_name=db_name, sql=f"explain {sql}")
if explain_result.error:
result["bad_query"] = True
result["msg"] = explain_result.error
return result
def filter_sql(self, sql="", limit_num=0):
# 对查询sql增加limit限制,limit n 或 limit n,n 或 limit n offset n统一改写成limit n
sql = sql.rstrip(";").strip()
if re.match(r"^select", sql, re.I):
# LIMIT N
limit_n = re.compile(r"limit\s+(\d+)\s*$", re.I)
# LIMIT M OFFSET N
limit_offset = re.compile(r"limit\s+(\d+)\s+offset\s+(\d+)\s*$", re.I)
# LIMIT M,N
offset_comma_limit = re.compile(r"limit\s+(\d+)\s*,\s*(\d+)\s*$", re.I)
if limit_n.search(sql):
sql_limit = limit_n.search(sql).group(1)
limit_num = min(int(limit_num), int(sql_limit))
sql = limit_n.sub(f"limit {limit_num};", sql)
elif limit_offset.search(sql):
sql_limit = limit_offset.search(sql).group(1)
sql_offset = limit_offset.search(sql).group(2)
limit_num = min(int(limit_num), int(sql_limit))
sql = limit_offset.sub(f"limit {limit_num} offset {sql_offset};", sql)
elif offset_comma_limit.search(sql):
sql_offset = offset_comma_limit.search(sql).group(1)
sql_limit = offset_comma_limit.search(sql).group(2)
limit_num = min(int(limit_num), int(sql_limit))
sql = offset_comma_limit.sub(f"limit {sql_offset},{limit_num};", sql)
else:
sql = f"{sql} limit {limit_num};"
else:
sql = f"{sql};"
return sql
def explain_check(self, check_result, db_name=None, line=0, statement=""):
"""使用explain ast检查sql语法, 返回Review set"""
result = ReviewResult(
id=line,
errlevel=0,
stagestatus="Audit completed",
errormessage="None",
sql=statement,
affected_rows=0,
execute_time=0,
)
# clickhouse版本>=21.1.2 explain ast才支持非select语句检查
if self.server_version >= (21, 1, 2):
explain_result = self.query(db_name=db_name, sql=f"explain ast {statement}")
if explain_result.error:
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回未通过检查SQL",
errormessage=f"explain语法检查错误:{explain_result.error}",
sql=statement,
)
return result
def execute_check(self, db_name=None, sql=""):
"""上线单执行前的检查, 返回Review set"""
sql = sqlparse.format(sql, strip_comments=True)
sql_list = sqlparse.split(sql)
# 禁用/高危语句检查
check_result = ReviewSet(full_sql=sql)
line = 1
critical_ddl_regex = self.config.get("critical_ddl_regex", "")
p = re.compile(critical_ddl_regex)
check_result.syntax_type = 2 # TODO 工单类型 0、其他 1、DDL,2、DML
for statement in sql_list:
statement = statement.rstrip(";")
# 禁用语句
if re.match(r"^select|^show", statement.lower()):
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回不支持语句",
errormessage="仅支持DML和DDL语句,查询语句请使用SQL查询功能!",
sql=statement,
)
# 高危语句
elif critical_ddl_regex and p.match(statement.strip().lower()):
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回高危SQL",
errormessage="禁止提交匹配" + critical_ddl_regex + "条件的语句!",
sql=statement,
)
# alter语句
elif re.match(r"^alter", statement.lower()):
# alter table语句
if re.match(r"^alter\s+table\s+(.+?)\s+", statement.lower()):
table_name = re.match(
r"^alter\s+table\s+(.+?)\s+", statement.lower(), re.M
).group(1)
if "." not in table_name:
table_name = f"{db_name}.{table_name}"
table_engine = self.get_table_engine(table_name)["engine"]
table_exist = self.get_table_engine(table_name)["status"]
if table_exist == 1:
if not table_engine.endswith(
"MergeTree"
) and table_engine not in ("Merge", "Distributed"):
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回不支持SQL",
errormessage="ALTER TABLE仅支持*MergeTree,Merge以及Distributed等引擎表!",
sql=statement,
)
else:
# delete与update语句,实际是alter语句的变种
if re.match(
r"^alter\s+table\s+(.+?)\s+(delete|update)\s+",
statement.lower(),
):
if not table_engine.endswith("MergeTree"):
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回不支持SQL",
errormessage="DELETE与UPDATE仅支持*MergeTree引擎表!",
sql=statement,
)
else:
result = self.explain_check(
check_result, db_name, line, statement
)
else:
result = self.explain_check(
check_result, db_name, line, statement
)
else:
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="表不存在",
errormessage=f"表 {table_name} 不存在!",
sql=statement,
)
# 其他alter语句
else:
result = self.explain_check(check_result, db_name, line, statement)
# truncate语句
elif re.match(r"^truncate\s+table\s+(.+?)(\s|$)", statement.lower()):
table_name = re.match(
r"^truncate\s+table\s+(.+?)(\s|$)", statement.lower(), re.M
).group(1)
if "." not in table_name:
table_name = f"{db_name}.{table_name}"
table_engine = self.get_table_engine(table_name)["engine"]
table_exist = self.get_table_engine(table_name)["status"]
if table_exist == 1:
if table_engine in ("View", "File,", "URL", "Buffer", "Null"):
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回不支持SQL",
errormessage="TRUNCATE不支持View,File,URL,Buffer和Null表引擎!",
sql=statement,
)
else:
result = self.explain_check(
check_result, db_name, line, statement
)
else:
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="表不存在",
errormessage=f"表 {table_name} 不存在!",
sql=statement,
)
# insert语句,explain无法正确判断,暂时只做表存在性检查与简单关键字匹配
elif re.match(r"^insert", statement.lower()):
if re.match(
r"^insert\s+into\s+(.+?)(\s+|\s*\(.+?)(values|format|select)(\s+|\()",
statement.lower(),
):
table_name = re.match(
r"^insert\s+into\s+(.+?)(\s+|\s*\(.+?)(values|format|select)(\s+|\()",
statement.lower(),
re.M,
).group(1)
if "." not in table_name:
table_name = f"{db_name}.{table_name}"
table_exist = self.get_table_engine(table_name)["status"]
if table_exist == 1:
result = ReviewResult(
id=line,
errlevel=0,
stagestatus="Audit completed",
errormessage="None",
sql=statement,
affected_rows=0,
execute_time=0,
)
else:
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="表不存在",
errormessage=f"表 {table_name} 不存在!",
sql=statement,
)
else:
result = ReviewResult(
id=line,
errlevel=2,
stagestatus="驳回不支持SQL",
errormessage="INSERT语法不正确!",
sql=statement,
)
# 其他语句使用explain ast简单检查
else:
result = self.explain_check(check_result, db_name, line, statement)
# 没有找出DDL语句的才继续执行此判断
if check_result.syntax_type == 2:
if get_syntax_type(statement, parser=False, db_type="mysql") == "DDL":
check_result.syntax_type = 1
check_result.rows += [result]
line += 1
# 统计警告和错误数量
for r in check_result.rows:
if r.errlevel == 1:
check_result.warning_count += 1
if r.errlevel == 2:
check_result.error_count += 1
return check_result
def execute_workflow(self, workflow):
"""执行上线单,返回Review set"""
sql = workflow.sqlworkflowcontent.sql_content
execute_result = ReviewSet(full_sql=sql)
sqls = sqlparse.format(sql, strip_comments=True)
sql_list = sqlparse.split(sqls)
line = 1
for statement in sql_list:
with FuncTimer() as t:
result = self.execute(
db_name=workflow.db_name, sql=statement, close_conn=True
)
if not result.error:
execute_result.rows.append(
ReviewResult(
id=line,
errlevel=0,
stagestatus="Execute Successfully",
errormessage="None",
sql=statement,
affected_rows=0,
execute_time=t.cost,
)
)
line += 1
else:
# 追加当前报错语句信息到执行结果中
execute_result.error = result.error
execute_result.rows.append(
ReviewResult(
id=line,
errlevel=2,
stagestatus="Execute Failed",
errormessage=f"异常信息:{result.error}",
sql=statement,
affected_rows=0,
execute_time=0,
)
)
line += 1
# 报错语句后面的语句标记为审核通过、未执行,追加到执行结果中
for statement in sql_list[line - 1 :]:
execute_result.rows.append(
ReviewResult(
id=line,
errlevel=0,
stagestatus="Audit completed",
errormessage=f"前序语句失败, 未执行",
sql=statement,
affected_rows=0,
execute_time=0,
)
)
line += 1
break
return execute_result
def execute(self, db_name=None, sql="", close_conn=True):
"""原生执行语句"""
result = ResultSet(full_sql=sql)
conn = self.get_connection(db_name=db_name)
try:
cursor = conn.cursor()
for statement in sqlparse.split(sql):
cursor.execute(statement)
cursor.close()
except Exception as e:
logger.warning(f"ClickHouse语句执行报错,语句:{sql},错误信息{e}")
result.error = str(e).split("Stack trace")[0]
if close_conn:
self.close()
return result
def close(self):
if self.conn:
self.conn.close()
self.conn = None