{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", "import random\n", "import librosa\n", "import soundfile as sf\n", "from gtts import gTTS\n", "import subprocess" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "emotion_labels = {\n", " \"NEU\": \"neutral\",\n", " \"HAP\": \"happiness\",\n", " \"SAD\": \"sadness\",\n", " \"ANG\": \"anger\",\n", " \"FEA\": \"fear\",\n", " \"DIS\": \"disgust\"\n", "}\n", "\n", "emotion_phrases = {\n", " \"neutral\": [\"I need to buy groceries.\", \"The sky is clear today.\"],\n", " \"happiness\": [\"I just won the lottery!\", \"This is the best day ever!\"],\n", " \"sadness\": [\"I lost my best friend.\", \"Today is a really tough day.\"],\n", " \"anger\": [\"This is unacceptable!\", \"Why did you do that?!\"],\n", " \"fear\": [\"I feel really anxious about this.\", \"I don't think it's safe here.\"],\n", " \"disgust\": [\"That was really disgusting!\", \"I can't stand the smell!\"],\n", " \"surprise\": [\"I wasn't expecting that!\", \"Wow, this is amazing!\"]\n", "}\n", "\n", "def generate_new_audio(original_audio_path, target_emotion):\n", " new_text = random.choice(emotion_phrases[target_emotion])\n", " tts = gTTS(new_text, lang='en')\n", " new_audio_path = original_audio_path.replace(\".wav\", \"_modified.wav\")\n", " tts.save(new_audio_path)\n", " return new_audio_path\n", "\n", "def replace_audio_in_video(video_path, new_audio_path):\n", " output_video_path = video_path.replace(\".flv\", \"_modified.flv\")\n", " command = f'ffmpeg -i \"{video_path}\" -i \"{new_audio_path}\" -c:v copy -map 0:v:0 -map 1:a:0 -shortest \"{output_video_path}\"'\n", " subprocess.call(command, shell=True)\n", " return output_video_path\n", "\n", "video_dir = \"C:\\\\Users\\\\garci\\\\OneDrive\\\\Área de Trabalho\\\\Ambiente de trabalho unbunto\\\\Cienciadedados\\\\IPL\\\\Tese\\\\CREMA-D\\\\VideoFLV\\\\split_dataset\\\\test\"\n", "audio_dir = \"C:\\\\Users\\\\garci\\\\OneDrive\\\\Área de Trabalho\\\\Ambiente de trabalho unbunto\\\\Cienciadedados\\\\IPL\\\\Tese\\\\CREMA-D\\\\AudioWAV\\\\split_dataset\\\\test\"\n", "\n", "\n", "for file in os.listdir(audio_dir):\n", " if file.endswith(\".wav\"):\n", " audio_path = os.path.join(audio_dir, file)\n", " video_path = os.path.join(video_dir, file.replace(\".wav\", \".flv\"))\n", " emotion_code = file.split(\"_\")[2]\n", " if emotion_code in emotion_labels:\n", " new_audio = generate_new_audio(audio_path, emotion_labels[emotion_code])\n", " replace_audio_in_video(video_path, new_audio)\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }