Files

363 lines
15 KiB
Python
Raw Permalink Normal View History

2026-03-15 13:27:50 +00:00
#!pip install customtkinter (https://pypi.org/project/customtkinter/0.3/#files)
#!pip3 install Tk interface (https://medium.com/illumination/error-could-not-find-a-version-that-satisfies-the-requirement-tkinter-from-versions-none-753a512dd4ab)
#pip install customtkinter tkVideoPlayer
#pip install opencv-python Pillow
#!pip install moviepy
import tkinter as tk
import customtkinter
import cv2
from PIL import Image, ImageTk
import threading
from glob import glob
from tkinter import filedialog
import sounddevice as sd
import wavio
import pygame
from moviepy.editor import *
import librosa
import librosa.display
import IPython.display as ipd
import speech_recognition as sr
import numpy as np
import time
import os
from cv2 import dnn
from math import ceil
import subprocess
image_mean = np.array([127, 127, 127])
image_std = 128.0
iou_threshold = 0.3
center_variance = 0.1
size_variance = 0.2
min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0], [64.0, 96.0], [128.0, 192.0, 256.0]]
strides = [8.0, 16.0, 32.0, 64.0]
threshold = 0.5
def define_img_size(image_size):
shrinkage_list = []
feature_map_w_h_list = []
for size in image_size:
feature_map = [int(ceil(size / stride)) for stride in strides]
feature_map_w_h_list.append(feature_map)
for i in range(0, len(image_size)):
shrinkage_list.append(strides)
priors = generate_priors(
feature_map_w_h_list, shrinkage_list, image_size, min_boxes
)
return priors
def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes):
priors = []
for index in range(0, len(feature_map_list[0])):
scale_w = image_size[0] / shrinkage_list[0][index]
scale_h = image_size[1] / shrinkage_list[1][index]
for j in range(0, feature_map_list[1][index]):
for i in range(0, feature_map_list[0][index]):
x_center = (i + 0.5) / scale_w
y_center = (j + 0.5) / scale_h
for min_box in min_boxes[index]:
w = min_box / image_size[0]
h = min_box / image_size[1]
priors.append([x_center,y_center,w,h])
print("priors nums:{}".format(len(priors)))
return np.clip(priors, 0.0, 1.0)
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(rest_boxes,np.expand_dims(current_box, axis=0),)
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def area_of(left_top, right_bottom):
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
def iou_of(boxes0, boxes1, eps=1e-5):
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def predict(width,height,confidences,boxes, prob_threshold,iou_threshold=0.3,top_k=-1):
boxes = boxes[0]
confidences = confidences[0]
picked_box_probs = []
picked_labels = []
for class_index in range(1, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > prob_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = boxes[mask, :]
box_probs = np.concatenate(
[subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(box_probs,iou_threshold=iou_threshold, top_k=top_k,)
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if not picked_box_probs:
return np.array([]), np.array([]), np.array([])
picked_box_probs = np.concatenate(picked_box_probs)
picked_box_probs[:, 0] *= width
picked_box_probs[:, 1] *= height
picked_box_probs[:, 2] *= width
picked_box_probs[:, 3] *= height
return (
picked_box_probs[:, :4].astype(np.int32),
np.array(picked_labels),
picked_box_probs[:, 4]
)
def convert_locations_to_boxes(locations, priors, center_variance,
size_variance):
if len(priors.shape) + 1 == len(locations.shape):
priors = np.expand_dims(priors, 0)
return np.concatenate([
locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
], axis=len(locations.shape) - 1)
def center_form_to_corner_form(locations):
return np.concatenate(
[locations[..., :2] - locations[..., 2:] / 2,
locations[..., :2] + locations[..., 2:] / 2],
len(locations.shape) - 1
)
def FER_live_cam():
emotion_dict = {0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness',4: 'anger', 5: 'disgust', 6: 'fear'}
def modelemotionface():
cap = cv2.VideoCapture(1)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
size = (frame_width, frame_height)
result = cv2.VideoWriter('camera-test.avi', cv2.VideoWriter_fourcc(*'MJPG'), 10, size)
emotion_dict = {0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness', 4: 'anger', 5: 'disgust', 6: 'fear'}
emotion_colors = {'neutral': '#FFFFFF', 'happiness': '#00FF00', 'surprise': '#FFFF00','sadness': '#0000FF', 'anger': '#800000', 'disgust': '#800080', 'fear': '#FF0000'}
model = cv2.dnn.readNetFromONNX('C:/Users/garci/Downloads/emotion-ferplus-8.onnx')
model_path = 'C:/Users/garci/Downloads/RFB-320.caffemodel'
proto_path = 'C:/Users/garci/Downloads/RFB-320.prototxt'
net = cv2.dnn.readNetFromCaffe(proto_path, model_path)
input_size = [320, 240]
width, height = input_size
priors = define_img_size(input_size)
while cap.isOpened():
ret, frame = cap.read()
if ret:
img_ori = frame
rect = cv2.resize(img_ori, (width, height))
rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
net.setInput(cv2.dnn.blobFromImage(rect, 1 / 127.5, (width, height), 127))
start_time = time.time()
boxes, scores = net.forward(["boxes", "scores"])
boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
boxes = convert_locations_to_boxes(boxes, priors, 0.1, 0.2)
boxes = center_form_to_corner_form(boxes)
boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, 0.5)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
for (x1, y1, x2, y2) in boxes:
w, h = x2 - x1, y2 - y1
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
resize_frame = cv2.resize(gray[y1:y1 + h, x1:x1 + w], (64, 64)).reshape(1, 1, 64, 64)
model.setInput(resize_frame)
output = model.forward()
pred_index = list(output[0]).index(max(output[0]))
pred_emotion = emotion_dict[pred_index]
color = emotion_colors[pred_emotion]
end_time = time.time()
fps = 1 / (end_time - start_time)
cv2.rectangle(img_ori, (x1, y1), (x2, y2), color, 2, lineType=cv2.LINE_AA)
cv2.putText(frame, pred_emotion, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, lineType=cv2.LINE_AA)
result.write(frame)
cv2.imshow('Camera', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
cap.release()
result.release()
video_clip = VideoFileClip("camera.avi")
video_clip.write_videofile("teste.avi", codec="libx264")
if __name__ == "__main__":
FER_live_cam()
def Login():
janela = customtkinter.CTk()
janela.title("Login")
janela.geometry("400x250")
janela.resizable(False, False)
texto = customtkinter.CTkLabel(janela, text="Login")
texto.pack(padx=10, pady=10)
user = customtkinter.CTkEntry(janela, placeholder_text="User")
user.pack (padx=10, pady=10)
password = customtkinter.CTkEntry(janela, placeholder_text="Password", show='*')
password.pack (padx=10, pady=10)
checkbox = customtkinter.CTkCheckBox(janela, text="1ª Utilização")
checkbox.pack (padx=10, pady=10)
botao = customtkinter.CTkButton(janela, text="Login", command=lambda: validar_login(user, password, janela))
botao.pack(padx=10, pady=10)
janela.mainloop()
def validar_login(user, password, janela):
userverifica = user.get()
passwordverifica = password.get()
if userverifica == "kl3z" and passwordverifica == "12345":
janela.destroy()
else:
show_messagebox("Erro", "User ou password incorretos!", box_type="Erro")
def transcrever_audio(arquivo_audio):
recognizer = sr.Recognizer()
with sr.AudioFile(arquivo_audio) as source:
audio_data = recognizer.record(source)
try:
texto = recognizer.recognize_google(audio_data, language="pt-BR")
return texto
except sr.UnknownValueError:
return "Não foi possível entender o áudio."
except sr.RequestError as e:
return f"Liguagem não detetada: {e}"
def salvar_como_pdf(texto, caminho_pdf):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
for linha in texto.split('\n'):
pdf.cell(200, 10, txt=linha, ln=True)
pdf.output(caminho_pdf)
print(f"Texto salvo com sucesso em {caminho_pdf}")
arquivo_audio = audio_path
texto = transcrever_audio(arquivo_audio)
if texto and not texto.startswith("Erro"):
caminho_pdf = 'transcricao_audio.pdf'
salvar_como_pdf(texto, caminho_pdf)
else:
print(texto)
def show_messagebox(title, message, box_type="Erro"):
Janela_erro = customtkinter.CTkToplevel()
Janela_erro.title(title)
Janela_erro.geometry("300x150+500+300")
label_title = customtkinter.CTkLabel(Janela_erro, text=title, font=('Arial', 16, 'bold'))
label_title.pack(pady=10)
label_message = customtkinter.CTkLabel(Janela_erro, text=message)
label_message.pack(pady=10)
close_button = customtkinter.CTkButton(Janela_erro, text="OK", command=Janela_erro.destroy)
close_button.pack(pady=10)
def play_video(video_path):
vlc_path = "C:\\Program Files (x86)\\VideoLAN\\VLC\\vlc.exe"
try:
subprocess.Popen([vlc_path, video_path,'--play-and-exit'])
except Exception as e:
video_clip = VideoFileClip(video_path)
def update_frame():
for frame in video_clip.iter_frames(fps=24, dtype='uint8'):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(frame)
img = img.resize((600, 400))
imgTk = ImageTk.PhotoImage(image=img)
update_frame()
def carregar_video():
video_path = filedialog.askopenfilename(filetypes=[("Carregar Vídeo", "*.mp4;*.avi;*.mov")])
if video_path:
video_path = os.path.normpath(video_path)
threading.Thread(target=play_video, args=(video_path,)).start()
def carregar_audio():
audio_path = filedialog.askopenfilename(filetypes=[("Carregar Áudio", "*.mp3;*.wav;*.ogg")])
if audio_path:
audio_path = os.path.normpath(audio_path)
threading.Thread(target=tocar_audio, args=(audio_path,)).start()
pygame.mixer.init()
Login()
janela_principal = customtkinter.CTk()
janela_principal.geometry("1200x800")
janela_principal.title("PJM IA")
janela_principal.resizable(False, False)
janela_principal.grid_columnconfigure(0, weight=0)
janela_principal.grid_columnconfigure(1, weight=0)
janela_principal.grid_rowconfigure(0, weight=0)
janela_principal.grid_rowconfigure(1, weight=0)
customtkinter.set_appearance_mode("Dark")
lado_esquerdo1 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
lado_esquerdo1.grid(row=0, column=0, padx=10, pady=10, sticky="nsew")
lado_direito1 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
lado_direito1.grid(row=0, column=1, padx=10, pady=10, sticky="nsew")
lado_esquerdo2 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
lado_esquerdo2.grid(row=1, column=1, padx=10, pady=10, sticky="nsew")
lado_direito2 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
lado_direito2.grid(row=1, column=0, padx=10, pady=10, sticky="nsew")
barradetarefas=tk.Menu(janela_principal)
Carregar=tk.Menu(barradetarefas, tearoff=0)
Carregar.add_command(label='Carregar Video', command=carregar_video)
Carregar.add_command(label='Carregar gravação som',command=carregar_audio)
Carregar.add_command(label='Usar a Camera', command=modelemotionface)
Carregar.add_separator()
Carregar.add_command(label="Sair", command=janela_principal.destroy)
barradetarefas.add_cascade(label="Ficheiros", menu=Carregar)
Recursos=tk.Menu(barradetarefas, tearoff=0)
Recursos.add_command(label='Análise dos videos')
Recursos.add_command(label='Análise de audio')
Recursos.add_command(label='Outras2')
barradetarefas.add_cascade(label="Recursos", menu=Recursos)
Opcoes=tk.Menu(barradetarefas, tearoff=0)
Opcoes.add_command(label='User')
Opcoes.add_command(label='Settings')
Opcoes.add_command(label='Outras')
barradetarefas.add_cascade(label="Opções", menu=Opcoes)
pdfimage = Image.open("pdf.png").resize((50,50), Image.ANTIALIAS)
pdficon=ImageTk.PhotoImage(pdfimage)
pdfspeach = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
pdfspeach.grid(row=0, column=0, padx=10)
labelpdfspeach = tk.Label(lado_direito1, text="Audio Transcript", fg="white", bg="#2B2B2B")
labelpdfspeach.grid(row=1, column=0, pady=(5, 10))
pdfspeachFER = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
pdfspeachFER.grid(row=0, column=1, padx=10)
labelpdfspeachFER = tk.Label(lado_direito1, text="Audio Transcript\n with FER", fg="white", bg="#2B2B2B")
labelpdfspeachFER.grid(row=1, column=1, pady=(5, 10))
pdfspeachFERandaudio = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
pdfspeachFERandaudio.grid(row=0, column=2, padx=10)
labelpdfspeachFERandaudio = tk.Label(lado_direito1, text="Audio Transcript \n with FER & Audio", fg="white", bg="#2B2B2B")
labelpdfspeachFERandaudio.grid(row=1, column=2, pady=(5, 10))
pdfspeachFERandaudioandsentimentanalises = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
pdfspeachFERandaudioandsentimentanalises.grid(row=0, column=3, padx=10)
labelpdfspeachFERandaudioandsentimentanalises = tk.Label(lado_direito1, text="Audio Transcript \n with FER, Audio \n & Sentiment Analyses", fg="white", bg="#2B2B2B")
labelpdfspeachFERandaudioandsentimentanalises.grid(row=1, column=3, pady=(5, 10))