-
Notifications
You must be signed in to change notification settings - Fork 15
/
main.py
128 lines (105 loc) · 5.6 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# %%
# Import necessary libraries
import numpy as np
import os
import string
import mediapipe as mp
import cv2
from my_functions import *
import keyboard
from tensorflow.keras.models import load_model
import language_tool_python
# Set the path to the data directory
PATH = os.path.join('data')
# Create an array of action labels by listing the contents of the data directory
actions = np.array(os.listdir(PATH))
# Load the trained model
model = load_model('my_model')
# Create an instance of the grammar correction tool
tool = language_tool_python.LanguageToolPublicAPI('en-UK')
# Initialize the lists
sentence, keypoints, last_prediction, grammar, grammar_result = [], [], [], [], []
# Access the camera and check if the camera is opened successfully
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Cannot access camera.")
exit()
# Create a holistic object for sign prediction
with mp.solutions.holistic.Holistic(min_detection_confidence=0.75, min_tracking_confidence=0.75) as holistic:
# Run the loop while the camera is open
while cap.isOpened():
# Read a frame from the camera
_, image = cap.read()
# Process the image and obtain sign landmarks using image_process function from my_functions.py
results = image_process(image, holistic)
# Draw the sign landmarks on the image using draw_landmarks function from my_functions.py
draw_landmarks(image, results)
# Extract keypoints from the pose landmarks using keypoint_extraction function from my_functions.py
keypoints.append(keypoint_extraction(results))
# Check if 10 frames have been accumulated
if len(keypoints) == 10:
# Convert keypoints list to a numpy array
keypoints = np.array(keypoints)
# Make a prediction on the keypoints using the loaded model
prediction = model.predict(keypoints[np.newaxis, :, :])
# Clear the keypoints list for the next set of frames
keypoints = []
# Check if the maximum prediction value is above 0.9
if np.amax(prediction) > 0.9:
# Check if the predicted sign is different from the previously predicted sign
if last_prediction != actions[np.argmax(prediction)]:
# Append the predicted sign to the sentence list
sentence.append(actions[np.argmax(prediction)])
# Record a new prediction to use it on the next cycle
last_prediction = actions[np.argmax(prediction)]
# Limit the sentence length to 7 elements to make sure it fits on the screen
if len(sentence) > 7:
sentence = sentence[-7:]
# Reset if the "Spacebar" is pressed
if keyboard.is_pressed(' '):
sentence, keypoints, last_prediction, grammar, grammar_result = [], [], [], [], []
# Check if the list is not empty
if sentence:
# Capitalize the first word of the sentence
sentence[0] = sentence[0].capitalize()
# Check if the sentence has at least two elements
if len(sentence) >= 2:
# Check if the last element of the sentence belongs to the alphabet (lower or upper cases)
if sentence[-1] in string.ascii_lowercase or sentence[-1] in string.ascii_uppercase:
# Check if the second last element of sentence belongs to the alphabet or is a new word
if sentence[-2] in string.ascii_lowercase or sentence[-2] in string.ascii_uppercase or (sentence[-2] not in actions and sentence[-2] not in list(x.capitalize() for x in actions)):
# Combine last two elements
sentence[-1] = sentence[-2] + sentence[-1]
sentence.pop(len(sentence) - 2)
sentence[-1] = sentence[-1].capitalize()
# Perform grammar check if "Enter" is pressed
if keyboard.is_pressed('enter'):
# Record the words in the sentence list into a single string
text = ' '.join(sentence)
# Apply grammar correction tool and extract the corrected result
grammar_result = tool.correct(text)
if grammar_result:
# Calculate the size of the text to be displayed and the X coordinate for centering the text on the image
textsize = cv2.getTextSize(grammar_result, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)[0]
text_X_coord = (image.shape[1] - textsize[0]) // 2
# Draw the sentence on the image
cv2.putText(image, grammar_result, (text_X_coord, 470),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
else:
# Calculate the size of the text to be displayed and the X coordinate for centering the text on the image
textsize = cv2.getTextSize(' '.join(sentence), cv2.FONT_HERSHEY_SIMPLEX, 1, 2)[0]
text_X_coord = (image.shape[1] - textsize[0]) // 2
# Draw the sentence on the image
cv2.putText(image, ' '.join(sentence), (text_X_coord, 470),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
# Show the image on the display
cv2.imshow('Camera', image)
cv2.waitKey(1)
# Check if the 'Camera' window was closed and break the loop
if cv2.getWindowProperty('Camera',cv2.WND_PROP_VISIBLE) < 1:
break
# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()
# Shut off the server
tool.close()