{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "1f540c43", "metadata": {}, "outputs": [], "source": [ "import os\n", "import tkinter as tk\n", "from tkinter import filedialog, simpledialog, messagebox\n", "import re\n", "from datetime import datetime\n", "import pandas as pd\n", "import pdfplumber\n", "from tkinter import messagebox\n", "import sys\n" ] }, { "cell_type": "code", "execution_count": 43, "id": "74f35480", "metadata": {}, "outputs": [], "source": [ "START_MARKER = \"Planeamento e execução da formação – Respostas:\"\n", "END_MARKER = \"Ação dos formadores\"\n", "OUT_DIR = \"output_tables\"\n", "os.makedirs(OUT_DIR, exist_ok=True)\n", "\n", "root = tk.Tk()\n", "root.withdraw() \n", "root.update()\n" ] }, { "cell_type": "code", "execution_count": 44, "id": "8b51acae", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'D:\\\\DAQ\\\\Relatorios\\\\2CFPQP-ART\\\\2CFPQP_RAI_Art.pdf'" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdf_path = filedialog.askopenfilename(\n", " title=\"Seleciona o PDF\",\n", " filetypes=[(\"PDF files\", \"*.pdf\"), (\"All files\", \"*.*\")]\n", ")\n", "\n", "if not pdf_path:\n", " messagebox.showerror(\"Erro\", \"Nenhum PDF selecionado.\")\n", " raise SystemExit\n", "\n", "pdf_path = os.path.abspath(pdf_path)\n", "pdf_path\n" ] }, { "cell_type": "code", "execution_count": 45, "id": "e62c1fbb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2CFPQP-Art'" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "course_name = simpledialog.askstring(\n", " title=\"Nome do Curso\",\n", " prompt=\"Introduz o nome do curso:\"\n", ")\n", "\n", "if not course_name:\n", " messagebox.showerror(\"Erro\", \"Nome do curso não foi preenchido.\")\n", " raise SystemExit\n", "\n", "course_name\n" ] }, { "cell_type": "code", "execution_count": 46, "id": "212eb52c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2024'" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "year = simpledialog.askstring(\n", " title=\"Ano\",\n", " prompt=\"Introduz o ano (ex: 2024):\"\n", ")\n", "\n", "if not year or not year.isdigit():\n", " messagebox.showerror(\"Erro\", \"Ano inválido.\")\n", " raise SystemExit\n", "\n", "year\n" ] }, { "cell_type": "code", "execution_count": 47, "id": "5a225637", "metadata": {}, "outputs": [], "source": [ "def norm_text(s: str) -> str:\n", " if not s:\n", " return \"\"\n", " s = s.replace(\"–\", \"-\")\n", " s = re.sub(r\"\\s+\", \" \", s).strip().lower()\n", " return s\n", "\n", "def clean_cell(x) -> str:\n", " if x is None:\n", " return \"\"\n", " x = str(x).replace(\"\\n\", \" \").replace(\"\\r\", \" \").strip()\n", " x = re.sub(r\"\\s+\", \" \", x)\n", " return x\n", "\n", "def normalize_df(df: pd.DataFrame) -> pd.DataFrame:\n", " df = df.copy().applymap(clean_cell)\n", " df = df.loc[~(df.apply(lambda r: all(v == \"\" for v in r), axis=1))].reset_index(drop=True)\n", " return df\n", "\n", "def df_flattext(df: pd.DataFrame) -> str:\n", " vals = df.astype(str).fillna(\"\").values.flatten().tolist()\n", " return \" \".join(vals)\n", "\n", "def safe_filename(s: str) -> str:\n", " s = re.sub(r\"[\\\\/:*?\\\"<>|]+\", \"_\", s)\n", " s = re.sub(r\"\\s+\", \"_\", s.strip())\n", " return s[:180]\n" ] }, { "cell_type": "code", "execution_count": 48, "id": "990ce85a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(7, 8)" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "start_n = norm_text(START_MARKER)\n", "end_n = norm_text(END_MARKER)\n", "with pdfplumber.open(pdf_path) as pdf:\n", " start_page = None\n", " end_page = None\n", " for i, page in enumerate(pdf.pages):\n", " txt = norm_text(page.extract_text() or \"\")\n", " if start_n in txt:\n", " start_page = i\n", " break\n", " if start_page is None:\n", " raise ValueError(f\"Não encontrei o início: {START_MARKER}\")\n", " for j in range(start_page, len(pdf.pages)):\n", " txt = norm_text(pdf.pages[j].extract_text() or \"\")\n", " if end_n in txt:\n", " end_page = j\n", " break\n", " if end_page is None:\n", " raise ValueError(f\"Encontrei o início, mas não encontrei o fim: {END_MARKER}\")\n", "(start_page + 1, end_page + 1)\n" ] }, { "cell_type": "code", "execution_count": 49, "id": "43916fc0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\garci\\AppData\\Local\\Temp\\ipykernel_17004\\2567387785.py:16: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", " df = df.copy().applymap(clean_cell)\n", "C:\\Users\\garci\\AppData\\Local\\Temp\\ipykernel_17004\\2567387785.py:16: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", " df = df.copy().applymap(clean_cell)\n", "C:\\Users\\garci\\AppData\\Local\\Temp\\ipykernel_17004\\2567387785.py:16: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", " df = df.copy().applymap(clean_cell)\n", "C:\\Users\\garci\\AppData\\Local\\Temp\\ipykernel_17004\\2567387785.py:16: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", " df = df.copy().applymap(clean_cell)\n" ] }, { "data": { "text/plain": [ "4" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "candidates = []\n", "\n", "with pdfplumber.open(pdf_path) as pdf:\n", " for pno in range(start_page, end_page + 1):\n", " page = pdf.pages[pno]\n", " tables = page.extract_tables() or []\n", "\n", " for ti, tbl in enumerate(tables, start=1):\n", " if not tbl or len(tbl) < 2:\n", " continue\n", " header = tbl[0]\n", " body = tbl[1:]\n", " df = pd.DataFrame(body, columns=header)\n", " df = normalize_df(df)\n", "\n", " r, c = df.shape\n", " if r < 2 or c < 2:\n", " continue\n", " text_n = norm_text(df_flattext(df))\n", " score = 0\n", " if c >= 3: score += 10\n", " if r >= 4: score += 10\n", " if pno == start_page: score += 15\n", " for kw in [\"planeamento\", \"execucao\", \"formacao\", \"respostas\", \"formadores\"]:\n", " if kw in text_n:\n", " score += 2\n", "\n", " empties = (df == \"\").sum().sum()\n", " total = r * c\n", " if total and empties / total > 0.6:\n", " score -= 10\n", "\n", " candidates.append((score, pno, ti, df))\n", "\n", "len(candidates)\n" ] }, { "cell_type": "code", "execution_count": 50, "id": "cd4bd06b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Bloco: páginas 7 → 8\n", "Tabela escolhida: p7_t1 (score=35)\n", "Dimensões: (18, 4)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IndicadoresAvaliaçãoNoneGanhos / Perdas
01. Intervenção da Unidade FormadoraInicialFinal
11.1 Apoio Técnico administrativoN/A3.91N/A
21.2 Alojamento4.554.45-0.1
31.3 Alimentação4.274.0-0.27
42. Intervenção da Direção de CursoInicialFinal
52.1 Apoio prestado pela Direção de CursoN/A4.09N/A
63. Programa de CursoInicialFinal
73.1 Grau de dificuldade do curso3.553.27-0.28
83.2 Utilidade do Curso para futuras funções4.093.82-0.27
93.3 Motivação e participação4.454.09-0.36
103.4 Grau de conhecimento adquirido com o curso3.734.090.36
114. Funcionamento do CursoMédia
124.1 Objetivo do curso3.64
134.2 Conteúdo do curso3.55
144.3 Adequação dos trabalhos/exercícios3.36
154.4 Instalações afetas à formação3.82
164.5 Meios audiovisuais e didáticos4.09
174.6 Documentos e bibliografias disponibilizadas4.09
\n", "
" ], "text/plain": [ " Indicadores Avaliação None \\\n", "0 1. Intervenção da Unidade Formadora Inicial Final \n", "1 1.1 Apoio Técnico administrativo N/A 3.91 \n", "2 1.2 Alojamento 4.55 4.45 \n", "3 1.3 Alimentação 4.27 4.0 \n", "4 2. Intervenção da Direção de Curso Inicial Final \n", "5 2.1 Apoio prestado pela Direção de Curso N/A 4.09 \n", "6 3. Programa de Curso Inicial Final \n", "7 3.1 Grau de dificuldade do curso 3.55 3.27 \n", "8 3.2 Utilidade do Curso para futuras funções 4.09 3.82 \n", "9 3.3 Motivação e participação 4.45 4.09 \n", "10 3.4 Grau de conhecimento adquirido com o curso 3.73 4.09 \n", "11 4. Funcionamento do Curso \n", "12 4.1 Objetivo do curso \n", "13 4.2 Conteúdo do curso \n", "14 4.3 Adequação dos trabalhos/exercícios \n", "15 4.4 Instalações afetas à formação \n", "16 4.5 Meios audiovisuais e didáticos \n", "17 4.6 Documentos e bibliografias disponibilizadas \n", "\n", " Ganhos / Perdas \n", "0 \n", "1 N/A \n", "2 -0.1 \n", "3 -0.27 \n", "4 \n", "5 N/A \n", "6 \n", "7 -0.28 \n", "8 -0.27 \n", "9 -0.36 \n", "10 0.36 \n", "11 Média \n", "12 3.64 \n", "13 3.55 \n", "14 3.36 \n", "15 3.82 \n", "16 4.09 \n", "17 4.09 " ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "if not candidates:\n", " raise ValueError(\"Encontrei o bloco, mas não encontrei tabelas nas páginas desse intervalo.\")\n", "\n", "candidates.sort(key=lambda x: x[0], reverse=True)\n", "best_score, best_pno, best_ti, best_df = candidates[0]\n", "\n", "print(f\"Bloco: páginas {start_page+1} → {end_page+1}\")\n", "print(f\"Tabela escolhida: p{best_pno+1}_t{best_ti} (score={best_score})\")\n", "print(\"Dimensões:\", best_df.shape)\n", "\n", "best_df.head(30)\n" ] }, { "cell_type": "code", "execution_count": 51, "id": "884d3881", "metadata": {}, "outputs": [], "source": [ "def dot_to_comma(df: pd.DataFrame) -> pd.DataFrame:\n", " df = df.copy()\n", "\n", " def convert(x):\n", " if isinstance(x, (int, float)):\n", " return str(x).replace(\".\", \",\")\n", " if isinstance(x, str):\n", " # só troca se parecer número (ex: \"3.5\")\n", " if re.fullmatch(r\"-?\\d+\\.\\d+\", x):\n", " return x.replace(\".\", \",\")\n", " return x\n", "\n", " return df.applymap(convert)" ] }, { "cell_type": "code", "execution_count": 52, "id": "f1d5c545", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
01.1 Apoio Técnico administrativo3.91
11.2 Alojamento4.45
21.3 Alimentação4.0
32.1 Apoio prestado pela Direção de Curso4.09
43.1 Grau de dificuldade do curso3.27
53.2 Utilidade do Curso para futuras funções3.82
63.3 Motivação e participação4.09
73.4 Grau de conhecimento adquirido com o curso4.09
84.1 Objetivo do curso3.64
94.2 Conteúdo do curso3.55
104.3 Adequação dos trabalhos/exercícios3.36
114.4 Instalações afetas à formação3.82
124.5 Meios audiovisuais e didáticos4.09
134.6 Documentos e bibliografias disponibilizadas4.09
\n", "
" ], "text/plain": [ " 0 1\n", "0 1.1 Apoio Técnico administrativo 3.91\n", "1 1.2 Alojamento 4.45\n", "2 1.3 Alimentação 4.0\n", "3 2.1 Apoio prestado pela Direção de Curso 4.09\n", "4 3.1 Grau de dificuldade do curso 3.27\n", "5 3.2 Utilidade do Curso para futuras funções 3.82\n", "6 3.3 Motivação e participação 4.09\n", "7 3.4 Grau de conhecimento adquirido com o curso 4.09\n", "8 4.1 Objetivo do curso 3.64\n", "9 4.2 Conteúdo do curso 3.55\n", "10 4.3 Adequação dos trabalhos/exercícios 3.36\n", "11 4.4 Instalações afetas à formação 3.82\n", "12 4.5 Meios audiovisuais e didáticos 4.09\n", "13 4.6 Documentos e bibliografias disponibilizadas 4.09" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rows_config = {\n", " 1: [0, 2],\n", " 2: [0, 2],\n", " 3: [0, 2],\n", " 5: [0, 2],\n", " 7: [0, 2],\n", " 8: [0, 2],\n", " 9: [0, 2],\n", " 10: [0, 2],\n", " 12: [0, 3],\n", " 13: [0, 3],\n", " 14: [0, 3],\n", " 15: [0, 3],\n", " 16: [0, 3],\n", " 17: [0, 3]\n", "}\n", "\n", "new_rows = []\n", "\n", "for row_idx, col_idxs in rows_config.items():\n", " values = best_df.iloc[row_idx, col_idxs].tolist()\n", " new_rows.append(values)\n", "\n", "new_df = pd.DataFrame(new_rows)\n", "new_df" ] }, { "cell_type": "code", "execution_count": 53, "id": "5dfa7c90", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\garci\\AppData\\Local\\Temp\\ipykernel_17004\\2220260037.py:13: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", " return df.applymap(convert)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
1.1 Apoio Técnico administrativo1.2 Alojamento1.3 Alimentação2.1 Apoio prestado pela Direção de Curso3.1 Grau de dificuldade do curso3.2 Utilidade do Curso para futuras funções3.3 Motivação e participação3.4 Grau de conhecimento adquirido com o curso4.1 Objetivo do curso4.2 Conteúdo do curso4.3 Adequação dos trabalhos/exercícios4.4 Instalações afetas à formação4.5 Meios audiovisuais e didáticos4.6 Documentos e bibliografias disponibilizadas
2024 - 2CFPQP-Art3,914,454,04,093,273,824,094,093,643,553,363,824,094,09
\n", "
" ], "text/plain": [ " 1.1 Apoio Técnico administrativo 1.2 Alojamento \\\n", "2024 - 2CFPQP-Art 3,91 4,45 \n", "\n", " 1.3 Alimentação 2.1 Apoio prestado pela Direção de Curso \\\n", "2024 - 2CFPQP-Art 4,0 4,09 \n", "\n", " 3.1 Grau de dificuldade do curso \\\n", "2024 - 2CFPQP-Art 3,27 \n", "\n", " 3.2 Utilidade do Curso para futuras funções \\\n", "2024 - 2CFPQP-Art 3,82 \n", "\n", " 3.3 Motivação e participação \\\n", "2024 - 2CFPQP-Art 4,09 \n", "\n", " 3.4 Grau de conhecimento adquirido com o curso \\\n", "2024 - 2CFPQP-Art 4,09 \n", "\n", " 4.1 Objetivo do curso 4.2 Conteúdo do curso \\\n", "2024 - 2CFPQP-Art 3,64 3,55 \n", "\n", " 4.3 Adequação dos trabalhos/exercícios \\\n", "2024 - 2CFPQP-Art 3,36 \n", "\n", " 4.4 Instalações afetas à formação \\\n", "2024 - 2CFPQP-Art 3,82 \n", "\n", " 4.5 Meios audiovisuais e didáticos \\\n", "2024 - 2CFPQP-Art 4,09 \n", "\n", " 4.6 Documentos e bibliografias disponibilizadas \n", "2024 - 2CFPQP-Art 4,09 " ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_dict = dict(zip(new_df[0], new_df[1]))\n", "final_df = pd.DataFrame([data_dict])\n", "final_df.index = [f\"{year} - {course_name}\"]\n", "final_df = dot_to_comma(final_df)\n", "final_df" ] }, { "cell_type": "code", "execution_count": null, "id": "10337d57", "metadata": {}, "outputs": [], "source": [ "def save_row_if_new(final_df: pd.DataFrame, xlsx_path: str, row_name: str, sheet_name: str = \"Resultados\"):\n", " root = tk.Tk()\n", " root.withdraw()\n", " if not os.path.exists(xlsx_path):\n", " final_df.to_excel(xlsx_path, sheet_name=sheet_name, index=True)\n", " messagebox.showinfo(\"OK\", f\"Ficheiro criado e linha guardada:\\n{row_name}\\n\\n{xlsx_path}\")\n", " return False \n", " try:\n", " existing = pd.read_excel(xlsx_path, sheet_name=sheet_name, index_col=0)\n", " except ValueError:\n", " final_df.to_excel(xlsx_path, sheet_name=sheet_name, index=True)\n", " messagebox.showinfo(\"OK\", f\"Sheet '{sheet_name}' criada e linha guardada:\\n{row_name}\")\n", " return False\n", " if row_name in existing.index.astype(str):\n", " messagebox.showwarning(\"Já existe\", f\"Já existe uma linha com este nome:\\n{row_name}\\n\\nNada foi gravado.\")\n", " return True\n", " updated = pd.concat([existing, final_df], axis=0)\n", " with pd.ExcelWriter(xlsx_path, engine=\"openpyxl\", mode=\"w\") as writer:\n", " updated.to_excel(writer, sheet_name=sheet_name, index=True)\n", " messagebox.showinfo(\"OK\", f\"Linha adicionada com sucesso:\\n{row_name}\\n\\n{xlsx_path}\")\n", " return False\n", "row_name = f\"{year} - {course_name}\"\n", "xlsx_path = \"BDpowerbi.xlsx\"\n", "_duplicado = save_row_if_new(final_df, xlsx_path, row_name, sheet_name=\"Resultados\")\n", "\n", "sys.exit(0)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.2" } }, "nbformat": 4, "nbformat_minor": 5 }