INTUIA/Testes/Versão final2.py

#!pip install customtkinter (https://pypi.org/project/customtkinter/0.3/#files)
#!pip3 install Tk interface (https://medium.com/illumination/error-could-not-find-a-version-that-satisfies-the-requirement-tkinter-from-versions-none-753a512dd4ab)
#pip install customtkinter tkVideoPlayer
#pip install opencv-python Pillow
#!pip install moviepy

import tkinter as tk
import customtkinter
import cv2
from PIL import Image, ImageTk
import threading
from glob import glob
from tkinter import filedialog
import sounddevice as sd
import wavio
import pygame
from moviepy.editor import *
import librosa
import librosa.display
import IPython.display as ipd
import speech_recognition as sr
import numpy as np
import time
import os
from cv2 import dnn
from math import ceil
import subprocess

image_mean = np.array([127, 127, 127])
image_std = 128.0
iou_threshold = 0.3
center_variance = 0.1
size_variance = 0.2
min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0], [64.0, 96.0], [128.0, 192.0, 256.0]]
strides = [8.0, 16.0, 32.0, 64.0]
threshold = 0.5

def define_img_size(image_size):
    shrinkage_list = []
    feature_map_w_h_list = []
    for size in image_size:
        feature_map = [int(ceil(size / stride)) for stride in strides]
        feature_map_w_h_list.append(feature_map)

    for i in range(0, len(image_size)):
        shrinkage_list.append(strides)
    priors = generate_priors(
        feature_map_w_h_list, shrinkage_list, image_size, min_boxes
    )
    return priors

def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes):
    priors = []
    for index in range(0, len(feature_map_list[0])):
        scale_w = image_size[0] / shrinkage_list[0][index]
        scale_h = image_size[1] / shrinkage_list[1][index]
        for j in range(0, feature_map_list[1][index]):
            for i in range(0, feature_map_list[0][index]):
                x_center = (i + 0.5) / scale_w
                y_center = (j + 0.5) / scale_h

                for min_box in min_boxes[index]:
                    w = min_box / image_size[0]
                    h = min_box / image_size[1]
                    priors.append([x_center,y_center,w,h])
    print("priors nums:{}".format(len(priors)))
    return np.clip(priors, 0.0, 1.0)

def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
    scores = box_scores[:, -1]
    boxes = box_scores[:, :-1]
    picked = []
    indexes = np.argsort(scores)
    indexes = indexes[-candidate_size:]
    while len(indexes) > 0:
        current = indexes[-1]
        picked.append(current)
        if 0 < top_k == len(picked) or len(indexes) == 1:
            break
        current_box = boxes[current, :]
        indexes = indexes[:-1]
        rest_boxes = boxes[indexes, :]
        iou = iou_of(rest_boxes,np.expand_dims(current_box, axis=0),)
        indexes = indexes[iou <= iou_threshold]
    return box_scores[picked, :]

def area_of(left_top, right_bottom):
    hw = np.clip(right_bottom - left_top, 0.0, None)
    return hw[..., 0] * hw[..., 1]

def iou_of(boxes0, boxes1, eps=1e-5):
    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])

    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
    return overlap_area / (area0 + area1 - overlap_area + eps)

def predict(width,height,confidences,boxes, prob_threshold,iou_threshold=0.3,top_k=-1):
    boxes = boxes[0]
    confidences = confidences[0]
    picked_box_probs = []
    picked_labels = []
    for class_index in range(1, confidences.shape[1]):
        probs = confidences[:, class_index]
        mask = probs > prob_threshold
        probs = probs[mask]
        if probs.shape[0] == 0:
            continue
        subset_boxes = boxes[mask, :]
        box_probs = np.concatenate(
            [subset_boxes, probs.reshape(-1, 1)], axis=1)
        box_probs = hard_nms(box_probs,iou_threshold=iou_threshold, top_k=top_k,)
        picked_box_probs.append(box_probs)
        picked_labels.extend([class_index] * box_probs.shape[0])
    if not picked_box_probs:
        return np.array([]), np.array([]), np.array([])
    picked_box_probs = np.concatenate(picked_box_probs)
    picked_box_probs[:, 0] *= width
    picked_box_probs[:, 1] *= height
    picked_box_probs[:, 2] *= width
    picked_box_probs[:, 3] *= height
    return (
        picked_box_probs[:, :4].astype(np.int32),
        np.array(picked_labels),
        picked_box_probs[:, 4]
    )

def convert_locations_to_boxes(locations, priors, center_variance,
                               size_variance):
    if len(priors.shape) + 1 == len(locations.shape):
        priors = np.expand_dims(priors, 0)
    return np.concatenate([
        locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
        np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
    ], axis=len(locations.shape) - 1)

def center_form_to_corner_form(locations):
    return np.concatenate(
        [locations[..., :2] - locations[..., 2:] / 2,
         locations[..., :2] + locations[..., 2:] / 2],
        len(locations.shape) - 1
    )

def FER_live_cam():
    emotion_dict = {0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness',4: 'anger', 5: 'disgust', 6: 'fear'}

def modelemotionface():
    cap = cv2.VideoCapture(1)
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    size = (frame_width, frame_height)
    result = cv2.VideoWriter('camera-test.avi', cv2.VideoWriter_fourcc(*'MJPG'), 10, size)
    emotion_dict = {0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness', 4: 'anger', 5: 'disgust', 6: 'fear'}
    emotion_colors = {'neutral': '#FFFFFF', 'happiness': '#00FF00', 'surprise': '#FFFF00','sadness': '#0000FF', 'anger': '#800000', 'disgust': '#800080', 'fear': '#FF0000'}
    model = cv2.dnn.readNetFromONNX('C:/Users/garci/Downloads/emotion-ferplus-8.onnx')
    model_path = 'C:/Users/garci/Downloads/RFB-320.caffemodel'
    proto_path = 'C:/Users/garci/Downloads/RFB-320.prototxt'
    net = cv2.dnn.readNetFromCaffe(proto_path, model_path)

    input_size = [320, 240]
    width, height = input_size
    priors = define_img_size(input_size)

    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            img_ori = frame
            rect = cv2.resize(img_ori, (width, height))
            rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
            net.setInput(cv2.dnn.blobFromImage(rect, 1 / 127.5, (width, height), 127))
            start_time = time.time()
            boxes, scores = net.forward(["boxes", "scores"])
            boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
            scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
            boxes = convert_locations_to_boxes(boxes, priors, 0.1, 0.2)
            boxes = center_form_to_corner_form(boxes)
            boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, 0.5)

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            for (x1, y1, x2, y2) in boxes:
                w, h = x2 - x1, y2 - y1
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
                resize_frame = cv2.resize(gray[y1:y1 + h, x1:x1 + w], (64, 64)).reshape(1, 1, 64, 64)
                model.setInput(resize_frame)
                output = model.forward()
                pred_index = list(output[0]).index(max(output[0]))
                pred_emotion = emotion_dict[pred_index]
                color = emotion_colors[pred_emotion]

                end_time = time.time()
                fps = 1 / (end_time - start_time)

                cv2.rectangle(img_ori, (x1, y1), (x2, y2), color, 2, lineType=cv2.LINE_AA)
                cv2.putText(frame, pred_emotion, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, lineType=cv2.LINE_AA)

            result.write(frame)
            cv2.imshow('Camera', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    cap.release()
    result.release()

    video_clip = VideoFileClip("camera.avi")
    video_clip.write_videofile("teste.avi", codec="libx264")

if __name__ == "__main__":
    FER_live_cam()

def Login():
    janela = customtkinter.CTk()
    janela.title("Login")
    janela.geometry("400x250")
    janela.resizable(False, False)
    texto = customtkinter.CTkLabel(janela, text="Login")
    texto.pack(padx=10, pady=10)
    user = customtkinter.CTkEntry(janela, placeholder_text="User")
    user.pack (padx=10, pady=10)
    password = customtkinter.CTkEntry(janela, placeholder_text="Password", show='*')
    password.pack (padx=10, pady=10)
    checkbox = customtkinter.CTkCheckBox(janela, text="1ª Utilização")
    checkbox.pack (padx=10, pady=10)
    botao = customtkinter.CTkButton(janela, text="Login", command=lambda: validar_login(user, password, janela))
    botao.pack(padx=10, pady=10)
    janela.mainloop()

def validar_login(user, password, janela):
    userverifica = user.get()
    passwordverifica = password.get()
    if userverifica == "kl3z" and passwordverifica == "12345":
        janela.destroy()
    else:
        show_messagebox("Erro", "User ou password incorretos!", box_type="Erro")

def transcrever_audio(arquivo_audio):
    recognizer = sr.Recognizer()
    with sr.AudioFile(arquivo_audio) as source:
        audio_data = recognizer.record(source)
        try:
            texto = recognizer.recognize_google(audio_data, language="pt-BR")
            return texto
        except sr.UnknownValueError:
            return "Não foi possível entender o áudio."
        except sr.RequestError as e:
            return f"Liguagem não detetada: {e}"

    def salvar_como_pdf(texto, caminho_pdf):
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=12)

        for linha in texto.split('\n'):
            pdf.cell(200, 10, txt=linha, ln=True)

        pdf.output(caminho_pdf)
        print(f"Texto salvo com sucesso em {caminho_pdf}")

    arquivo_audio = audio_path
    texto = transcrever_audio(arquivo_audio)
    if texto and not texto.startswith("Erro"):
        caminho_pdf = 'transcricao_audio.pdf'
        salvar_como_pdf(texto, caminho_pdf)
    else:
        print(texto)

def show_messagebox(title, message, box_type="Erro"):
    Janela_erro = customtkinter.CTkToplevel()
    Janela_erro.title(title)
    Janela_erro.geometry("300x150+500+300")
    label_title = customtkinter.CTkLabel(Janela_erro, text=title, font=('Arial', 16, 'bold'))
    label_title.pack(pady=10)
    label_message = customtkinter.CTkLabel(Janela_erro, text=message)
    label_message.pack(pady=10)
    close_button = customtkinter.CTkButton(Janela_erro, text="OK", command=Janela_erro.destroy)
    close_button.pack(pady=10)

def play_video(video_path):
    vlc_path = "C:\\Program Files (x86)\\VideoLAN\\VLC\\vlc.exe"
    try:
        subprocess.Popen([vlc_path, video_path,'--play-and-exit'])
    except Exception as e:
        video_clip = VideoFileClip(video_path)
        def update_frame():
            for frame in video_clip.iter_frames(fps=24, dtype='uint8'):
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(frame)
                img = img.resize((600, 400))
                imgTk = ImageTk.PhotoImage(image=img)
        update_frame()

def carregar_video():
    video_path = filedialog.askopenfilename(filetypes=[("Carregar Vídeo", "*.mp4;*.avi;*.mov")])
    if video_path:
        video_path = os.path.normpath(video_path)
        threading.Thread(target=play_video, args=(video_path,)).start()

def carregar_audio():
    audio_path = filedialog.askopenfilename(filetypes=[("Carregar Áudio", "*.mp3;*.wav;*.ogg")])
    if audio_path:
        audio_path = os.path.normpath(audio_path)
        threading.Thread(target=tocar_audio, args=(audio_path,)).start()

pygame.mixer.init()
Login()

janela_principal = customtkinter.CTk()
janela_principal.geometry("1200x800")
janela_principal.title("PJM IA")
janela_principal.resizable(False, False)
janela_principal.grid_columnconfigure(0, weight=0)
janela_principal.grid_columnconfigure(1, weight=0)
janela_principal.grid_rowconfigure(0, weight=0)
janela_principal.grid_rowconfigure(1, weight=0)
customtkinter.set_appearance_mode("Dark")
lado_esquerdo1 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
lado_esquerdo1.grid(row=0, column=0, padx=10, pady=10, sticky="nsew")
lado_direito1 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
lado_direito1.grid(row=0, column=1, padx=10, pady=10, sticky="nsew")
lado_esquerdo2 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
lado_esquerdo2.grid(row=1, column=1, padx=10, pady=10, sticky="nsew")
lado_direito2 = customtkinter.CTkFrame(janela_principal, width=600, height=400)
lado_direito2.grid(row=1, column=0, padx=10, pady=10, sticky="nsew")
barradetarefas=tk.Menu(janela_principal)
Carregar=tk.Menu(barradetarefas, tearoff=0)
Carregar.add_command(label='Carregar Video', command=carregar_video)
Carregar.add_command(label='Carregar gravação som',command=carregar_audio)
Carregar.add_command(label='Usar a Camera', command=modelemotionface)
Carregar.add_separator()
Carregar.add_command(label="Sair", command=janela_principal.destroy)
barradetarefas.add_cascade(label="Ficheiros", menu=Carregar)
Recursos=tk.Menu(barradetarefas, tearoff=0)
Recursos.add_command(label='Análise dos videos')
Recursos.add_command(label='Análise de audio')
Recursos.add_command(label='Outras2')
barradetarefas.add_cascade(label="Recursos", menu=Recursos)
Opcoes=tk.Menu(barradetarefas, tearoff=0)
Opcoes.add_command(label='User')
Opcoes.add_command(label='Settings')
Opcoes.add_command(label='Outras')
barradetarefas.add_cascade(label="Opções", menu=Opcoes)
pdfimage = Image.open("pdf.png").resize((50,50), Image.ANTIALIAS)
pdficon=ImageTk.PhotoImage(pdfimage)
pdfspeach = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
pdfspeach.grid(row=0, column=0, padx=10)
labelpdfspeach = tk.Label(lado_direito1, text="Audio Transcript", fg="white", bg="#2B2B2B")
labelpdfspeach.grid(row=1, column=0, pady=(5, 10))
pdfspeachFER = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
pdfspeachFER.grid(row=0, column=1, padx=10)
labelpdfspeachFER = tk.Label(lado_direito1, text="Audio Transcript\n with FER", fg="white", bg="#2B2B2B")
labelpdfspeachFER.grid(row=1, column=1, pady=(5, 10))
pdfspeachFERandaudio = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
pdfspeachFERandaudio.grid(row=0, column=2, padx=10)
labelpdfspeachFERandaudio = tk.Label(lado_direito1, text="Audio Transcript \n with FER & Audio", fg="white", bg="#2B2B2B")
labelpdfspeachFERandaudio.grid(row=1, column=2, pady=(5, 10))
pdfspeachFERandaudioandsentimentanalises = tk.Button(lado_direito1, image=pdficon, borderwidth=0, highlightthickness=0, bg="#2B2B2B")
pdfspeachFERandaudioandsentimentanalises.grid(row=0, column=3, padx=10)
labelpdfspeachFERandaudioandsentimentanalises = tk.Label(lado_direito1, text="Audio Transcript \n with FER, Audio \n & Sentiment Analyses", fg="white", bg="#2B2B2B")
labelpdfspeachFERandaudioandsentimentanalises.grid(row=1, column=3, pady=(5, 10))