-
Notifications
You must be signed in to change notification settings - Fork 281
/
Copy pathcheck_docs_version_diff.py
140 lines (106 loc) · 5.91 KB
/
check_docs_version_diff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import json
import os
import filecmp
# Define a list of keywords to exclude
exclude_keywords = ["sql-manual", "releasenotes", "ecosystem", "admin-manual", "faq",
"data-operate", "query-data", "table-design", "gettingStarted",
"query-acceleration", "lakehouse", "compute-storage-decoupled",
"benchmark", "db-connect", "deploy-on-kubernetes"]
version_21_prefix_cn = "./ii18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/"
version_30_prefix_cn = "./i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/"
version_dev_prefix_cn = "./i18n/zh-CN/docusaurus-plugin-content-docs/current/"
version_21_prefix_en = "./versioned_docs/version-2.1/"
version_30_prefix_en = "./versioned_docs/version-3.0/"
version_dev_prefix_en = "./docs/"
def extract_items_from_file(file_path):
"""Read the JSON file, extract 'items', and filter them based on exclusion criteria."""
result = [] # List to store the filtered items
try:
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file) # Load the JSON data
# Recursive function to extract items and filter them
def extract_items(data):
"""Recursively extract 'items' and store them in result, excluding specified keywords."""
if isinstance(data, list):
for item in data:
extract_items(item) # Process each item in the list
elif isinstance(data, dict):
if "items" in data:
# Add valid items (not containing excluded keywords)
result.extend([item for item in data["items"] if isinstance(item, str) and not any(keyword in item for keyword in exclude_keywords)])
# Recursively process each key in the dictionary
for key in data:
extract_items(data[key])
extract_items(data) # Start extracting items from the loaded JSON data
return result # Return the list of filtered items
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"Error: {e}")
return []
except Exception as e:
print(f"An unexpected error occurred: {e}")
return []
def diff_doc_cn(directories):
"""Generate three paths for each directory using predefined version prefixes and check file existence and content differences."""
for directory in directories:
# Construct the paths for each version
path_v21 = os.path.join(version_21_prefix_cn, directory + ".md")
path_v30 = os.path.join(version_30_prefix_cn, directory + ".md")
path_dev = os.path.join(version_dev_prefix_cn, directory + ".md")
# Check if the file is missing in v21 or v30
if not os.path.exists(path_v21):
print(f"Missing in version 2.1: {path_v21}")
if not os.path.exists(path_v30):
print(f"Missing in version 3.0: {path_v30}")
if not os.path.exists(path_dev):
print(f"Missing in current (dev) version: {path_dev}")
# Compare path_v21 with path_dev if path_v21 exists
if os.path.exists(path_v21) and os.path.exists(path_dev):
if not filecmp.cmp(path_v21, path_dev, shallow=False):
print(f"File mismatch between v21 and dev for: {directory}")
print("path_dev: " + path_dev)
print("path_v21: " + path_v21)
print("-" * 50)
# Compare path_v30 with path_dev if path_v30 exists
if os.path.exists(path_v30) and os.path.exists(path_dev):
if not filecmp.cmp(path_v30, path_dev, shallow=False):
print(f"File mismatch between v30 and dev for: {directory}")
print("path_dev: " + path_dev)
print("path_v30: " + path_v30)
print("-" * 50)
def diff_doc_en(directories):
"""Generate three paths for each directory using predefined version prefixes and check file existence and content differences."""
for directory in directories:
# Construct the paths for each version
path_v21 = os.path.join(version_21_prefix_en, directory + ".md")
path_v30 = os.path.join(version_30_prefix_en, directory + ".md")
path_dev = os.path.join(version_dev_prefix_en, directory + ".md")
# Check if the file is missing in v21 or v30
if not os.path.exists(path_v21):
print(f"Missing in version 2.1: {path_v21}")
if not os.path.exists(path_v30):
print(f"Missing in version 3.0: {path_v30}")
if not os.path.exists(path_dev):
print(f"Missing in current (dev) version: {path_dev}")
# Compare path_v21 with path_dev if path_v21 exists
if os.path.exists(path_v21) and os.path.exists(path_dev):
if not filecmp.cmp(path_v21, path_dev, shallow=False):
print(f"File mismatch between v21 and dev for: {directory}")
print("path_dev: " + path_dev)
print("path_v21: " + path_v21)
print("-" * 50)
# Compare path_v30 with path_dev if path_v30 exists
if os.path.exists(path_v30) and os.path.exists(path_dev):
if not filecmp.cmp(path_v30, path_dev, shallow=False):
print(f"File mismatch between v30 and dev for: {directory}")
print("path_dev: " + path_dev)
print("path_v30: " + path_v30)
print("-" * 50)
if __name__ == "__main__":
# Fixed file path to sidebars.json
file_path = "./sidebars.json" # Fixed file path
file_list = extract_items_from_file(file_path) # Extract items and get the result
# Call diff_doc to check for mismatched documents
print("Checking CN Doc >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
diff_doc_cn(file_list)
print("Checking EN Doc >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
diff_doc_en(file_list)