363 lines
15 KiB
Python
363 lines
15 KiB
Python
#!pip install customtkinter (https://pypi.org/project/customtkinter/0.3/#files)
|
|
#!pip3 install Tk interface (https://medium.com/illumination/error-could-not-find-a-version-that-satisfies-the-requirement-tkinter-from-versions-none-753a512dd4ab)
|
|
#pip install customtkinter tkVideoPlayer
|
|
#pip install opencv-python Pillow
|
|
#!pip install moviepy
|
|
|
|
import tkinter as tk
|
|
import customtkinter
|
|
import cv2
|
|
from PIL import Image, ImageTk
|
|
import threading
|
|
from glob import glob
|
|
from tkinter import filedialog
|
|
import sounddevice as sd
|
|
import wavio
|
|
import pygame
|
|
from moviepy.editor import *
|
|
import librosa
|
|
import librosa.display
|
|
import IPython.display as ipd
|
|
import speech_recognition as sr
|
|
import numpy as np
|
|
import time
|
|
import os
|
|
from cv2 import dnn
|
|
from math import ceil
|
|
import subprocess
|
|
|
|
image_mean = np.array([127, 127, 127])
|
|
image_std = 128.0
|
|
iou_threshold = 0.3
|
|
center_variance = 0.1
|
|
size_variance = 0.2
|
|
min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0], [64.0, 96.0], [128.0, 192.0, 256.0]]
|
|
strides = [8.0, 16.0, 32.0, 64.0]
|
|
threshold = 0.5
|
|
|
|
def define_img_size(image_size):
|
|
shrinkage_list = []
|
|
feature_map_w_h_list = []
|
|
for size in image_size:
|
|
feature_map = [int(ceil(size / stride)) for stride in strides]
|
|
feature_map_w_h_list.append(feature_map)
|
|
|
|
for i in range(0, len(image_size)):
|
|
shrinkage_list.append(strides)
|
|
priors = generate_priors(
|
|
feature_map_w_h_list, shrinkage_list, image_size, min_boxes
|
|
)
|
|
return priors
|
|
|
|
def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes):
|
|
priors = []
|
|
for index in range(0, len(feature_map_list[0])):
|
|
scale_w = image_size[0] / shrinkage_list[0][index]
|
|
scale_h = image_size[1] / shrinkage_list[1][index]
|
|
for j in range(0, feature_map_list[1][index]):
|
|
for i in range(0, feature_map_list[0][index]):
|
|
x_center = (i + 0.5) / scale_w
|
|
y_center = (j + 0.5) / scale_h
|
|
|
|
for min_box in min_boxes[index]:
|
|
w = min_box / image_size[0]
|
|
h = min_box / image_size[1]
|
|
priors.append([x_center,y_center,w,h])
|
|
print("priors nums:{}".format(len(priors)))
|
|
return np.clip(priors, 0.0, 1.0)
|
|
|
|
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
|
|
scores = box_scores[:, -1]
|
|
boxes = box_scores[:, :-1]
|
|
picked = []
|
|
indexes = np.argsort(scores)
|
|
indexes = indexes[-candidate_size:]
|
|
while len(indexes) > 0:
|
|
current = indexes[-1]
|
|
picked.append(current)
|
|
if 0 < top_k == len(picked) or len(indexes) == 1:
|
|
break
|
|
current_box = boxes[current, :]
|
|
indexes = indexes[:-1]
|
|
rest_boxes = boxes[indexes, :]
|
|
iou = iou_of(rest_boxes,np.expand_dims(current_box, axis=0),)
|
|
indexes = indexes[iou <= iou_threshold]
|
|
return box_scores[picked, :]
|
|
|
|
def area_of(left_top, right_bottom):
|
|
hw = np.clip(right_bottom - left_top, 0.0, None)
|
|
return hw[..., 0] * hw[..., 1]
|
|
|
|
def iou_of(boxes0, boxes1, eps=1e-5):
|
|
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
|
|
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
|
|
|
|
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
|
|
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
|
|
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
|
|
return overlap_area / (area0 + area1 - overlap_area + eps)
|
|
|
|
def predict(width,height,confidences,boxes, prob_threshold,iou_threshold=0.3,top_k=-1):
|
|
boxes = boxes[0]
|
|
confidences = confidences[0]
|
|
picked_box_probs = []
|
|
picked_labels = []
|
|
for class_index in range(1, confidences.shape[1]):
|
|
probs = confidences[:, class_index]
|
|
mask = probs > prob_threshold
|
|
probs = probs[mask]
|
|
if probs.shape[0] == 0:
|
|
continue
|
|
subset_boxes = boxes[mask, :]
|
|
box_probs = np.concatenate(
|
|
[subset_boxes, probs.reshape(-1, 1)], axis=1)
|
|
box_probs = hard_nms(box_probs,iou_threshold=iou_threshold, top_k=top_k,)
|
|
picked_box_probs.append(box_probs)
|
|
picked_labels.extend([class_index] * box_probs.shape[0])
|
|
if not picked_box_probs:
|
|
return np.array([]), np.array([]), np.array([])
|
|
picked_box_probs = np.concatenate(picked_box_probs)
|
|
picked_box_probs[:, 0] *= width
|
|
picked_box_probs[:, 1] *= height
|
|
picked_box_probs[:, 2] *= width
|
|
picked_box_probs[:, 3] *= height
|
|
return (
|
|
picked_box_probs[:, :4].astype(np.int32),
|
|
np.array(picked_labels),
|
|
picked_box_probs[:, 4]
|
|
)
|
|
|
|
def convert_locations_to_boxes(locations, priors, center_variance,
|
|
size_variance):
|
|
if len(priors.shape) + 1 == len(locations.shape):
|
|
priors = np.expand_dims(priors, 0)
|
|
return np.concatenate([
|
|
locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
|
|
np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
|
|
], axis=len(locations.shape) - 1)
|
|
|
|
def center_form_to_corner_form(locations):
|
|
return np.concatenate(
|
|
[locations[..., :2] - locations[..., 2:] / 2,
|
|
locations[..., :2] + locations[..., 2:] / 2],
|
|
len(locations.shape) - 1
|
|
)
|
|
|
|
def FER_live_cam():
|
|
emotion_dict = {0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness',4: 'anger', 5: 'disgust', 6: 'fear'}
|
|
|
|
def modelemotionface():
|
|
cap = cv2.VideoCapture(1)
|
|
frame_width = int(cap.get(3))
|
|
frame_height = int(cap.get(4))
|
|
size = (frame_width, frame_height)
|
|
result = cv2.VideoWriter('camera-test.avi', cv2.VideoWriter_fourcc(*'MJPG'), 10, size)
|
|
emotion_dict = {0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness', 4: 'anger', 5: 'disgust', 6: 'fear'}
|
|
emotion_colors = {'neutral': '#FFFFFF', 'happiness': '#00FF00', 'surprise': '#FFFF00','sadness': '#0000FF', 'anger': '#800000', 'disgust': '#800080', 'fear': '#FF0000'}
|
|
model = cv2.dnn.readNetFromONNX('C:/Users/garci/Downloads/emotion-ferplus-8.onnx')
|
|
model_path = 'C:/Users/garci/Downloads/RFB-320.caffemodel'
|
|
proto_path = 'C:/Users/garci/Downloads/RFB-320.prototxt'
|
|
net = cv2.dnn.readNetFromCaffe(proto_path, model_path)
|
|
|
|
input_size = [320, 240]
|
|
width, height = input_size
|
|
priors = define_img_size(input_size)
|
|
|
|
while cap.isOpened():
|
|
ret, frame = cap.read()
|
|
if ret:
|
|
img_ori = frame
|
|
rect = cv2.resize(img_ori, (width, height))
|
|
rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
|
|
net.setInput(cv2.dnn.blobFromImage(rect, 1 / 127.5, (width, height), 127))
|
|
start_time = time.time()
|
|
boxes, scores = net.forward(["boxes", "scores"])
|
|
boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
|
|
scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
|
|
boxes = convert_locations_to_boxes(boxes, priors, 0.1, 0.2)
|
|
boxes = center_form_to_corner_form(boxes)
|
|
boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, 0.5)
|
|
|
|
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
|
for (x1, y1, x2, y2) in boxes:
|
|
w, h = x2 - x1, y2 - y1
|
|
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
|
resize_frame = cv2.resize(gray[y1:y1 + h, x1:x1 + w], (64, 64)).reshape(1, 1, 64, 64)
|
|
model.setInput(resize_frame)
|
|
output = model.forward()
|
|
pred_index = list(output[0]).index(max(output[0]))
|
|
pred_emotion = emotion_dict[pred_index]
|
|
color = emotion_colors[pred_emotion]
|
|
|
|
end_time = time.time()
|
|
fps = 1 / (end_time - start_time)
|
|
|
|
cv2.rectangle(img_ori, (x1, y1), (x2, y2), color, 2, lineType=cv2.LINE_AA)
|
|
cv2.putText(frame, pred_emotion, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, lineType=cv2.LINE_AA)
|
|
|
|
result.write(frame)
|
|
cv2.imshow('Camera', frame)
|
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
break
|
|
else:
|
|
break
|
|
|
|
cap.release()
|
|
result.release()
|
|
|
|
video_clip = VideoFileClip("camera.avi")
|
|
video_clip.write_videofile("teste.avi", codec="libx264")
|
|
|
|
if __name__ == "__main__":
|
|
FER_live_cam()
|
|
|
|
def Login():
|
|
janela = customtkinter.CTk()
|
|
janela.title("Login")
|
|
janela.geometry("400x250")
|
|
janela.resizable(False, False)
|
|
texto = customtkinter.CTkLabel(janela, text="Login")
|
|
texto.pack(padx=10, pady=10)
|
|
user = customtkinter.CTkEntry(janela, placeholder_text="User")
|
|
user.pack (padx=10, pady=10)
|
|
password = customtkinter.CTkEntry(janela, placeholder_text="Password", show='*')
|
|
password.pack (padx=10, pady=10)
|
|
checkbox = customtkinter.CTkCheckBox(janela, text="1ª Utilização")
|
|
checkbox.pack (padx=10, pady=10)
|
|
botao = customtkinter.CTkButton(janela, text="Login", command=lambda: validar_login(user, password, janela))
|
|
botao.pack(padx=10, pady=10)
|
|
janela.mainloop()
|
|
|
|
def validar_login(user, password, janela):
|
|
userverifica = user.get()
|
|
passwordverifica = password.get()
|
|
if userverifica == "kl3z" and passwordverifica == "12345":
|
|
janela.destroy()
|
|
else:
|
|
show_messagebox("Erro", "User ou password incorretos!", box_type="Erro")
|
|
|
|
def transcrever_audio(arquivo_audio):
|
|
recognizer = sr.Recognizer()
|
|
with sr.AudioFile(arquivo_audio) as source:
|
|
audio_data = recognizer.record(source)
|
|
try:
|
|
texto = recognizer.recognize_google(audio_data, language="pt-BR")
|
|
return texto
|
|
except sr.UnknownValueError:
|
|
return "Não foi possível entender o áudio."
|
|
except sr.RequestError as e:
|
|
return f"Liguagem não detetada: {e}"
|
|
|
|
def salvar_como_pdf(texto, caminho_pdf):
|
|
pdf = FPDF()
|
|
pdf.add_page()
|
|
pdf.set_font("Arial", size=12)
|
|
|
|
for linha in texto.split('\n'):
|
|
pdf.cell(200, 10, txt=linha, ln=True)
|
|
|
|
pdf.output(caminho_pdf)
|
|
print(f"Texto salvo com sucesso em {caminho_pdf}")
|
|
|
|
arquivo_audio = audio_path
|
|
texto = transcrever_audio(arquivo_audio)
|
|
if texto and not texto.startswith("Erro"):
|
|
caminho_pdf = 'transcricao_audio.pdf'
|
|
salvar_como_pdf(texto, caminho_pdf)
|
|
else:
|
|
print(texto)
|
|
|
|
def show_messagebox(title, message, box_type="Erro"):
|
|
Janela_erro = customtkinter.CTkToplevel()
|
|
Janela_erro.title(title)
|
|
Janela_erro.geometry("300x150+500+300")
|
|
label_title = customtkinter.CTkLabel(Janela_erro, text=title, font=('Arial', 16, 'bold'))
|
|
label_title.pack(pady=10)
|
|
label_message = customtkinter.CTkLabel(Janela_erro, text=message)
|
|
label_message.pack(pady=10)
|
|
close_button = customtkinter.CTkButton(Janela_erro, text="OK", command=Janela_erro.destroy)
|
|
close_button.pack(pady=10)
|
|
|
|
def play_video(video_path):
|
|
vlc_path = "C:\\Program Files (x86)\\VideoLAN\\VLC\\vlc.exe"
|
|
try:
|
|
subprocess.Popen([vlc_path, video_path,'--play-and-exit'])
|
|
except Exception as e:
|
|
video_clip = VideoFileClip(video_path)
|
|
def update_frame():
|
|
for frame in video_clip.iter_frames(fps=24, dtype='uint8'):
|
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
img = Image.fromarray(frame)
|
|
img = img.resize((600, 400))
|
|
imgTk = ImageTk.PhotoImage(image=img)
|
|
update_frame()
|
|
|
|
def carregar_video():
|
|
video_path = filedialog.askopenfilename(filetypes=[("Carregar Vídeo", "*.mp4;*.avi;*.mov")])
|
|
if video_path:
|
|
video_path = os.path.normpath(video_path)
|
|
threading.Thread(target=play_video, args=(video_path,)).start()
|
|
|
|
def carregar_audio():
|
|
audio_path = filedialog.askopenfilename(filetypes=[("Carregar Áudio", "*.mp3;*.wav;*.ogg")])
|
|
if audio_path:
|
|
audio_path = os.path.normpath(audio_path)
|
|
threading.Thread(target=tocar_audio, args=(audio_path,)).start()
|
|
|
|
pygame.mixer.init()
|
|
Login()
|
|
|
|
janela_principal = customtkinter.CTk()
|
|
janela_principal.geometry("1200x800")
|
|
janela_principal.title("PJM IA")
|
|
janela_principal.resizable(False, False)
|
|
janela_principal.grid_columnconfigure(0, weight=0)
|
|
janela_principal.grid_columnconfigure(1, weight=0)
|
|
janela_principal.grid_rowconfigure(0, weight=0)
|
|
janela_principal.grid_rowconfigure(1, weight=0)
|
|
customtkinter.set_appearance_mode("Dark")
|
|
lado_esquerdo1 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
|
|
lado_esquerdo1.grid(row=0, column=0, padx=10, pady=10, sticky="nsew")
|
|
lado_direito1 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
|
|
lado_direito1.grid(row=0, column=1, padx=10, pady=10, sticky="nsew")
|
|
lado_esquerdo2 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
|
|
lado_esquerdo2.grid(row=1, column=1, padx=10, pady=10, sticky="nsew")
|
|
lado_direito2 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
|
|
lado_direito2.grid(row=1, column=0, padx=10, pady=10, sticky="nsew")
|
|
barradetarefas=tk.Menu(janela_principal)
|
|
Carregar=tk.Menu(barradetarefas, tearoff=0)
|
|
Carregar.add_command(label='Carregar Video', command=carregar_video)
|
|
Carregar.add_command(label='Carregar gravação som',command=carregar_audio)
|
|
Carregar.add_command(label='Usar a Camera', command=modelemotionface)
|
|
Carregar.add_separator()
|
|
Carregar.add_command(label="Sair", command=janela_principal.destroy)
|
|
barradetarefas.add_cascade(label="Ficheiros", menu=Carregar)
|
|
Recursos=tk.Menu(barradetarefas, tearoff=0)
|
|
Recursos.add_command(label='Análise dos videos')
|
|
Recursos.add_command(label='Análise de audio')
|
|
Recursos.add_command(label='Outras2')
|
|
barradetarefas.add_cascade(label="Recursos", menu=Recursos)
|
|
Opcoes=tk.Menu(barradetarefas, tearoff=0)
|
|
Opcoes.add_command(label='User')
|
|
Opcoes.add_command(label='Settings')
|
|
Opcoes.add_command(label='Outras')
|
|
barradetarefas.add_cascade(label="Opções", menu=Opcoes)
|
|
pdfimage = Image.open("pdf.png").resize((50,50), Image.ANTIALIAS)
|
|
pdficon=ImageTk.PhotoImage(pdfimage)
|
|
pdfspeach = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
|
|
pdfspeach.grid(row=0, column=0, padx=10)
|
|
labelpdfspeach = tk.Label(lado_direito1, text="Audio Transcript", fg="white", bg="#2B2B2B")
|
|
labelpdfspeach.grid(row=1, column=0, pady=(5, 10))
|
|
pdfspeachFER = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
|
|
pdfspeachFER.grid(row=0, column=1, padx=10)
|
|
labelpdfspeachFER = tk.Label(lado_direito1, text="Audio Transcript\n with FER", fg="white", bg="#2B2B2B")
|
|
labelpdfspeachFER.grid(row=1, column=1, pady=(5, 10))
|
|
pdfspeachFERandaudio = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
|
|
pdfspeachFERandaudio.grid(row=0, column=2, padx=10)
|
|
labelpdfspeachFERandaudio = tk.Label(lado_direito1, text="Audio Transcript \n with FER & Audio", fg="white", bg="#2B2B2B")
|
|
labelpdfspeachFERandaudio.grid(row=1, column=2, pady=(5, 10))
|
|
pdfspeachFERandaudioandsentimentanalises = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
|
|
pdfspeachFERandaudioandsentimentanalises.grid(row=0, column=3, padx=10)
|
|
labelpdfspeachFERandaudioandsentimentanalises = tk.Label(lado_direito1, text="Audio Transcript \n with FER, Audio \n & Sentiment Analyses", fg="white", bg="#2B2B2B")
|
|
labelpdfspeachFERandaudioandsentimentanalises.grid(row=1, column=3, pady=(5, 10))
|