#!pip install customtkinter (https://pypi.org/project/customtkinter/0.3/#files) #!pip3 install Tk interface (https://medium.com/illumination/error-could-not-find-a-version-that-satisfies-the-requirement-tkinter-from-versions-none-753a512dd4ab) #pip install customtkinter tkVideoPlayer #pip install opencv-python Pillow #!pip install moviepy import tkinter as tk import customtkinter import cv2 from PIL import Image, ImageTk import threading from glob import glob from tkinter import filedialog import sounddevice as sd import wavio import pygame from moviepy.editor import * import librosa import librosa.display import IPython.display as ipd import speech_recognition as sr import numpy as np import time import os from cv2 import dnn from math import ceil import subprocess image_mean = np.array([127, 127, 127]) image_std = 128.0 iou_threshold = 0.3 center_variance = 0.1 size_variance = 0.2 min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0], [64.0, 96.0], [128.0, 192.0, 256.0]] strides = [8.0, 16.0, 32.0, 64.0] threshold = 0.5 def define_img_size(image_size): shrinkage_list = [] feature_map_w_h_list = [] for size in image_size: feature_map = [int(ceil(size / stride)) for stride in strides] feature_map_w_h_list.append(feature_map) for i in range(0, len(image_size)): shrinkage_list.append(strides) priors = generate_priors( feature_map_w_h_list, shrinkage_list, image_size, min_boxes ) return priors def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes): priors = [] for index in range(0, len(feature_map_list[0])): scale_w = image_size[0] / shrinkage_list[0][index] scale_h = image_size[1] / shrinkage_list[1][index] for j in range(0, feature_map_list[1][index]): for i in range(0, feature_map_list[0][index]): x_center = (i + 0.5) / scale_w y_center = (j + 0.5) / scale_h for min_box in min_boxes[index]: w = min_box / image_size[0] h = min_box / image_size[1] priors.append([x_center,y_center,w,h]) print("priors nums:{}".format(len(priors))) return np.clip(priors, 0.0, 1.0) def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): scores = box_scores[:, -1] boxes = box_scores[:, :-1] picked = [] indexes = np.argsort(scores) indexes = indexes[-candidate_size:] while len(indexes) > 0: current = indexes[-1] picked.append(current) if 0 < top_k == len(picked) or len(indexes) == 1: break current_box = boxes[current, :] indexes = indexes[:-1] rest_boxes = boxes[indexes, :] iou = iou_of(rest_boxes,np.expand_dims(current_box, axis=0),) indexes = indexes[iou <= iou_threshold] return box_scores[picked, :] def area_of(left_top, right_bottom): hw = np.clip(right_bottom - left_top, 0.0, None) return hw[..., 0] * hw[..., 1] def iou_of(boxes0, boxes1, eps=1e-5): overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) overlap_area = area_of(overlap_left_top, overlap_right_bottom) area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) return overlap_area / (area0 + area1 - overlap_area + eps) def predict(width,height,confidences,boxes, prob_threshold,iou_threshold=0.3,top_k=-1): boxes = boxes[0] confidences = confidences[0] picked_box_probs = [] picked_labels = [] for class_index in range(1, confidences.shape[1]): probs = confidences[:, class_index] mask = probs > prob_threshold probs = probs[mask] if probs.shape[0] == 0: continue subset_boxes = boxes[mask, :] box_probs = np.concatenate( [subset_boxes, probs.reshape(-1, 1)], axis=1) box_probs = hard_nms(box_probs,iou_threshold=iou_threshold, top_k=top_k,) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.shape[0]) if not picked_box_probs: return np.array([]), np.array([]), np.array([]) picked_box_probs = np.concatenate(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height return ( picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4] ) def convert_locations_to_boxes(locations, priors, center_variance, size_variance): if len(priors.shape) + 1 == len(locations.shape): priors = np.expand_dims(priors, 0) return np.concatenate([ locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2], np.exp(locations[..., 2:] * size_variance) * priors[..., 2:] ], axis=len(locations.shape) - 1) def center_form_to_corner_form(locations): return np.concatenate( [locations[..., :2] - locations[..., 2:] / 2, locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1 ) def FER_live_cam(): emotion_dict = {0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness',4: 'anger', 5: 'disgust', 6: 'fear'} def modelemotionface(): cap = cv2.VideoCapture(1) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) size = (frame_width, frame_height) result = cv2.VideoWriter('camera-test.avi', cv2.VideoWriter_fourcc(*'MJPG'), 10, size) emotion_dict = {0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness', 4: 'anger', 5: 'disgust', 6: 'fear'} emotion_colors = {'neutral': '#FFFFFF', 'happiness': '#00FF00', 'surprise': '#FFFF00','sadness': '#0000FF', 'anger': '#800000', 'disgust': '#800080', 'fear': '#FF0000'} model = cv2.dnn.readNetFromONNX('C:/Users/garci/Downloads/emotion-ferplus-8.onnx') model_path = 'C:/Users/garci/Downloads/RFB-320.caffemodel' proto_path = 'C:/Users/garci/Downloads/RFB-320.prototxt' net = cv2.dnn.readNetFromCaffe(proto_path, model_path) input_size = [320, 240] width, height = input_size priors = define_img_size(input_size) while cap.isOpened(): ret, frame = cap.read() if ret: img_ori = frame rect = cv2.resize(img_ori, (width, height)) rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB) net.setInput(cv2.dnn.blobFromImage(rect, 1 / 127.5, (width, height), 127)) start_time = time.time() boxes, scores = net.forward(["boxes", "scores"]) boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0) scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0) boxes = convert_locations_to_boxes(boxes, priors, 0.1, 0.2) boxes = center_form_to_corner_form(boxes) boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, 0.5) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) for (x1, y1, x2, y2) in boxes: w, h = x2 - x1, y2 - y1 cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) resize_frame = cv2.resize(gray[y1:y1 + h, x1:x1 + w], (64, 64)).reshape(1, 1, 64, 64) model.setInput(resize_frame) output = model.forward() pred_index = list(output[0]).index(max(output[0])) pred_emotion = emotion_dict[pred_index] color = emotion_colors[pred_emotion] end_time = time.time() fps = 1 / (end_time - start_time) cv2.rectangle(img_ori, (x1, y1), (x2, y2), color, 2, lineType=cv2.LINE_AA) cv2.putText(frame, pred_emotion, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, lineType=cv2.LINE_AA) result.write(frame) cv2.imshow('Camera', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() result.release() video_clip = VideoFileClip("camera.avi") video_clip.write_videofile("teste.avi", codec="libx264") if __name__ == "__main__": FER_live_cam() def Login(): janela = customtkinter.CTk() janela.title("Login") janela.geometry("400x250") janela.resizable(False, False) texto = customtkinter.CTkLabel(janela, text="Login") texto.pack(padx=10, pady=10) user = customtkinter.CTkEntry(janela, placeholder_text="User") user.pack (padx=10, pady=10) password = customtkinter.CTkEntry(janela, placeholder_text="Password", show='*') password.pack (padx=10, pady=10) checkbox = customtkinter.CTkCheckBox(janela, text="1ª Utilização") checkbox.pack (padx=10, pady=10) botao = customtkinter.CTkButton(janela, text="Login", command=lambda: validar_login(user, password, janela)) botao.pack(padx=10, pady=10) janela.mainloop() def validar_login(user, password, janela): userverifica = user.get() passwordverifica = password.get() if userverifica == "kl3z" and passwordverifica == "12345": janela.destroy() else: show_messagebox("Erro", "User ou password incorretos!", box_type="Erro") def transcrever_audio(arquivo_audio): recognizer = sr.Recognizer() with sr.AudioFile(arquivo_audio) as source: audio_data = recognizer.record(source) try: texto = recognizer.recognize_google(audio_data, language="pt-BR") return texto except sr.UnknownValueError: return "Não foi possível entender o áudio." except sr.RequestError as e: return f"Liguagem não detetada: {e}" def salvar_como_pdf(texto, caminho_pdf): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) for linha in texto.split('\n'): pdf.cell(200, 10, txt=linha, ln=True) pdf.output(caminho_pdf) print(f"Texto salvo com sucesso em {caminho_pdf}") arquivo_audio = audio_path texto = transcrever_audio(arquivo_audio) if texto and not texto.startswith("Erro"): caminho_pdf = 'transcricao_audio.pdf' salvar_como_pdf(texto, caminho_pdf) else: print(texto) def show_messagebox(title, message, box_type="Erro"): Janela_erro = customtkinter.CTkToplevel() Janela_erro.title(title) Janela_erro.geometry("300x150+500+300") label_title = customtkinter.CTkLabel(Janela_erro, text=title, font=('Arial', 16, 'bold')) label_title.pack(pady=10) label_message = customtkinter.CTkLabel(Janela_erro, text=message) label_message.pack(pady=10) close_button = customtkinter.CTkButton(Janela_erro, text="OK", command=Janela_erro.destroy) close_button.pack(pady=10) def play_video(video_path): vlc_path = "C:\\Program Files (x86)\\VideoLAN\\VLC\\vlc.exe" try: subprocess.Popen([vlc_path, video_path,'--play-and-exit']) except Exception as e: video_clip = VideoFileClip(video_path) def update_frame(): for frame in video_clip.iter_frames(fps=24, dtype='uint8'): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = Image.fromarray(frame) img = img.resize((600, 400)) imgTk = ImageTk.PhotoImage(image=img) update_frame() def carregar_video(): video_path = filedialog.askopenfilename(filetypes=[("Carregar Vídeo", "*.mp4;*.avi;*.mov")]) if video_path: video_path = os.path.normpath(video_path) threading.Thread(target=play_video, args=(video_path,)).start() def carregar_audio(): audio_path = filedialog.askopenfilename(filetypes=[("Carregar Áudio", "*.mp3;*.wav;*.ogg")]) if audio_path: audio_path = os.path.normpath(audio_path) threading.Thread(target=tocar_audio, args=(audio_path,)).start() pygame.mixer.init() Login() janela_principal = customtkinter.CTk() janela_principal.geometry("1200x800") janela_principal.title("PJM IA") janela_principal.resizable(False, False) janela_principal.grid_columnconfigure(0, weight=0) janela_principal.grid_columnconfigure(1, weight=0) janela_principal.grid_rowconfigure(0, weight=0) janela_principal.grid_rowconfigure(1, weight=0) customtkinter.set_appearance_mode("Dark") lado_esquerdo1 = customtkinter.CTkFrame(janela_principal, width=600, height=400) lado_esquerdo1.grid(row=0, column=0, padx=10, pady=10, sticky="nsew") lado_direito1 = customtkinter.CTkFrame(janela_principal, width=600, height=400) lado_direito1.grid(row=0, column=1, padx=10, pady=10, sticky="nsew") lado_esquerdo2 = customtkinter.CTkFrame(janela_principal, width=600, height=400) lado_esquerdo2.grid(row=1, column=1, padx=10, pady=10, sticky="nsew") lado_direito2 = customtkinter.CTkFrame(janela_principal, width=600, height=400) lado_direito2.grid(row=1, column=0, padx=10, pady=10, sticky="nsew") barradetarefas=tk.Menu(janela_principal) Carregar=tk.Menu(barradetarefas, tearoff=0) Carregar.add_command(label='Carregar Video', command=carregar_video) Carregar.add_command(label='Carregar gravação som',command=carregar_audio) Carregar.add_command(label='Usar a Camera', command=modelemotionface) Carregar.add_separator() Carregar.add_command(label="Sair", command=janela_principal.destroy) barradetarefas.add_cascade(label="Ficheiros", menu=Carregar) Recursos=tk.Menu(barradetarefas, tearoff=0) Recursos.add_command(label='Análise dos videos') Recursos.add_command(label='Análise de audio') Recursos.add_command(label='Outras2') barradetarefas.add_cascade(label="Recursos", menu=Recursos) Opcoes=tk.Menu(barradetarefas, tearoff=0) Opcoes.add_command(label='User') Opcoes.add_command(label='Settings') Opcoes.add_command(label='Outras') barradetarefas.add_cascade(label="Opções", menu=Opcoes) pdfimage = Image.open("pdf.png").resize((50,50), Image.ANTIALIAS) pdficon=ImageTk.PhotoImage(pdfimage) pdfspeach = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B") pdfspeach.grid(row=0, column=0, padx=10) labelpdfspeach = tk.Label(lado_direito1, text="Audio Transcript", fg="white", bg="#2B2B2B") labelpdfspeach.grid(row=1, column=0, pady=(5, 10)) pdfspeachFER = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B") pdfspeachFER.grid(row=0, column=1, padx=10) labelpdfspeachFER = tk.Label(lado_direito1, text="Audio Transcript\n with FER", fg="white", bg="#2B2B2B") labelpdfspeachFER.grid(row=1, column=1, pady=(5, 10)) pdfspeachFERandaudio = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B") pdfspeachFERandaudio.grid(row=0, column=2, padx=10) labelpdfspeachFERandaudio = tk.Label(lado_direito1, text="Audio Transcript \n with FER & Audio", fg="white", bg="#2B2B2B") labelpdfspeachFERandaudio.grid(row=1, column=2, pady=(5, 10)) pdfspeachFERandaudioandsentimentanalises = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B") pdfspeachFERandaudioandsentimentanalises.grid(row=0, column=3, padx=10) labelpdfspeachFERandaudioandsentimentanalises = tk.Label(lado_direito1, text="Audio Transcript \n with FER, Audio \n & Sentiment Analyses", fg="white", bg="#2B2B2B") labelpdfspeachFERandaudioandsentimentanalises.grid(row=1, column=3, pady=(5, 10))