-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
90 lines (72 loc) · 3.54 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import json
import os
import re
from load_write import load_file, write_list_of_dicts_to_file
#--------------------------------------------------#
# given a data entry, cast the value of the key "question" with a list
def cast_list(data):
if "question" in data:
data["question"] = [data["question"]]
# given a data entry, remost a layer of list of the value with key "question"
def remove_list(data):
if "question" in data and isinstance(data["question"], list) and len(data["question"]) == 1:
data["question"] = data["question"][0]
# given the file path to a json file, sort the file by the values with key "id"
def sort_json(input_file_path, output_file_name, output_loc=""):
data = load_file(input_file_path)
data.sort(key=lambda x: int(re.search(r'\d+$', x["id"]).group()))
write_list_of_dicts_to_file(output_file_name, data, subdir=output_loc)
# given the file path to a json file, reindex the file by the values with key "id"
def reindex_json(input_file_path, output_file_name, output_loc=""):
data = load_file(input_file_path)
for i, entry in enumerate(data):
entry["id"] = re.sub(r'\d+', str(i), entry["id"])
write_list_of_dicts_to_file(output_file_name, data, subdir=output_loc)
#--------------------------------------------------#
# verify enum format: entries and type must be string
def recursive_find_enum(entry, results, preceding_type=None):
"""Recursively searches for 'enum' in nested dictionaries."""
if isinstance(entry, dict):
for key, value in entry.items():
if key == "type":
preceding_type = value # Update the last 'type' encountered
elif key == "enum":
# Check the two conditions for adding the entry's 'id'
if preceding_type != "string" or not all(isinstance(item, str) for item in value):
if "id" in entry:
results.append(entry.get("id"))
# Recurse if the value is another dictionary
if isinstance(value, dict):
recursive_find_enum(value, results, preceding_type)
def find_invalid_enums(data_entries, output_filename):
results = []
# Create 'output' directory if it doesn't exist
output_dir = "output"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Define the full path for the output file
output_file_path = os.path.join(output_dir, output_filename)
# Loop through each entry (each entry is a dictionary) in data_entries (which is a list)
for entry in data_entries:
recursive_find_enum(entry, results) # Recursively find invalid enums
# Write results (invalid ids) to a text file
with open(output_file_path, 'w') as file:
for result in results:
file.write(str(result) + "\n")
print(f"Invalid enum results have been saved to {output_file_path}")
# Directory containing JSON files
directory_path = "./berkeley-function-call-leaderboard/data"
count = 0
for file_name in os.listdir(directory_path):
# Check if the file is a JSON file
if file_name.endswith(".json"):
count += 1
print(f"Processing {count}: {file_name}")
# Construct the full file path
file_path = os.path.join(directory_path, file_name)
# Call the load_file function with the file path
data = load_file(file_path)
if data: # Proceed only if data was loaded successfully
output_filename = f"{file_name}_output.txt"
find_invalid_enums(data, output_filename)
print(f"Total processed files: {count}")