Files
Botdaq/Relatórios Internos/reportcreator.ipynb
T

3483 lines
130 KiB
Plaintext
Raw Normal View History

2026-03-14 22:57:45 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "1a6ab6f8",
"metadata": {},
"outputs": [],
"source": [
"#ollama pull llama3.1:8b\n",
"import pandas as pd\n",
"from tkinter import Tk\n",
"from tkinter.filedialog import askopenfilename\n",
"from docx import Document\n",
"from docx.document import Document as DocxDocument\n",
"from docx.text.paragraph import Paragraph\n",
"import re\n",
"import string\n",
"from docx.oxml import OxmlElement\n",
"from docx.shared import Cm, Pt\n",
"from docx.oxml.ns import qn\n",
"from docx.text.run import Run\n",
"from docx.table import Table\n",
"from docx.enum.text import WD_ALIGN_PARAGRAPH\n",
"from docx.enum.table import WD_ROW_HEIGHT_RULE, WD_ALIGN_VERTICAL\n",
"import requests\n",
"import tkinter as tk\n",
"from tkinter import messagebox"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e68b58bb",
"metadata": {},
"outputs": [],
"source": [
"def ollama_available(timeout=0.4) -> bool:\n",
" try:\n",
" r = requests.get(\"http://localhost:11434/api/tags\", timeout=timeout) #aqui depois colocar o bot\n",
" return r.status_code == 200\n",
" except Exception:\n",
" return False"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4f257bf0",
"metadata": {},
"outputs": [],
"source": [
"def avaliacao_qualitativa(valor: float) -> str:\n",
" if valor is None or pd.isna(valor):\n",
" return \"N/A\"\n",
" if 1.0 <= valor <= 3.0:\n",
" return \"Rever Urgentemente\"\n",
" elif 3.0 < valor <= 3.5:\n",
" return \"Rever e Melhorar\"\n",
" elif 3.5 < valor <= 3.9:\n",
" return \"Bom\"\n",
" elif 3.9 < valor <= 4.5:\n",
" return \"Qualidade\"\n",
" elif 4.5 < valor <= 5.0:\n",
" return \"Excelência\"\n",
" else:\n",
" return \"Out of Range\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c0c9fdd2",
"metadata": {},
"outputs": [],
"source": [
"def format_header_row(row, height_cm=5.2):\n",
" row.height = Cm(height_cm)\n",
" row.height_rule = WD_ROW_HEIGHT_RULE.EXACTLY\n",
" for cell in row.cells:\n",
" cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER\n",
" for p in cell.paragraphs:\n",
" p.alignment = WD_ALIGN_PARAGRAPH.CENTER"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "91ad588c",
"metadata": {},
"outputs": [],
"source": [
"def set_cell_text_vertical(cell, direction=\"btLr\"):\n",
" tcPr = cell._tc.get_or_add_tcPr()\n",
" td = OxmlElement(\"w:textDirection\")\n",
" td.set(qn(\"w:val\"), direction)\n",
" tcPr.append(td)\n",
"\n",
"def set_table_all_columns_width(tbl, width_cm=2.3):\n",
" tbl.autofit = False\n",
" w = Cm(width_cm)\n",
" for row in tbl.rows:\n",
" for cell in row.cells:\n",
" cell.width = w"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "65478bf7",
"metadata": {},
"outputs": [],
"source": [
"def force_run_font(run: Run, font_name=\"Arial\", font_size_pt=12, bold=None):\n",
" if bold is not None:\n",
" run.bold = bool(bold)\n",
" run.font.name = font_name\n",
" run.font.size = Pt(font_size_pt)\n",
" run._element.rPr.rFonts.set(qn(\"w:eastAsia\"), font_name)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "1ad788ec",
"metadata": {},
"outputs": [],
"source": [
"def format_pt_number(x: float) -> str:\n",
" if pd.isna(x):\n",
" return \"\"\n",
" return f\"{x:.2f}\".replace(\".\", \",\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a32425de",
"metadata": {},
"outputs": [],
"source": [
"def delete_paragraph(paragraph: Paragraph) -> None:\n",
" p = paragraph._p\n",
" p.getparent().remove(p)\n",
" paragraph._p = paragraph._element = None"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9d3ccf1e",
"metadata": {},
"outputs": [],
"source": [
"def clean_module_title(col_name: str) -> str:\n",
" s = str(col_name).strip()\n",
" if \"->\" in s:\n",
" s = s.split(\"->\", 1)[1].strip()\n",
" return s"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1baf3c15",
"metadata": {},
"outputs": [],
"source": [
"#def clean_module_title(col_name: str) -> str:\n",
"# s = str(col_name).strip()\n",
"# s = re.sub(r\"^.*?->\\s*\", \"\", s)\n",
"# s = re.sub(r\"^\\s*Q06\\s*[-_ ]\\s*Aprecia.*?[-:]\\s*\", \"\", s, flags=re.IGNORECASE)\n",
"# return s.strip()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "fb0689e3",
"metadata": {},
"outputs": [],
"source": [
"def apply_table_paragraph_spacing(cell, line_spacing=1.5):\n",
" for p in cell.paragraphs:\n",
" fmt = p.paragraph_format\n",
" fmt.line_spacing = line_spacing\n",
" fmt.space_before = Pt(0)\n",
" fmt.space_after = Pt(0)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "cacbdd0b",
"metadata": {},
"outputs": [],
"source": [
"def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]:\n",
" means = []\n",
" for j in range(start_col_idx, df.shape[1]):\n",
" col = pd.to_numeric(df.iloc[:, j], errors=\"coerce\")\n",
" if col.notna().sum() == 0:\n",
" break\n",
" means.append(col.mean())\n",
" return means\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a835ca2d",
"metadata": {},
"outputs": [],
"source": [
"def insert_table_after_paragraph(paragraph: Paragraph, rows: int, cols: int) -> Table:\n",
" doc = paragraph.part.document\n",
" tbl = doc.add_table(rows=rows, cols=cols)\n",
" paragraph._p.addnext(tbl._tbl)\n",
" return tbl"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "be90075d",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_uc_table(\n",
" doc,\n",
" df_inicial: pd.DataFrame,\n",
" df_final: pd.DataFrame,\n",
" placeholder: str = \"{{tabelasUC}}\",\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" start_col_idx_inicial: int = 16,\n",
"):\n",
" def extract_uc_cols(df: pd.DataFrame):\n",
" cols = []\n",
" for c in df.columns:\n",
" name = str(c).lower()\n",
" if \"q06\" in name and \"aprecia\" in name:\n",
" cols.append(c)\n",
" cols.sort(key=lambda x: str(x))\n",
" return cols\n",
"\n",
" uc_cols = extract_uc_cols(df_final)\n",
" ini_means = mean_columns_until_empty(df_inicial, start_col_idx=start_col_idx_inicial)\n",
"\n",
" def build_rows():\n",
" data = []\n",
" for i, c in enumerate(uc_cols):\n",
" ini = ini_means[i] if i < len(ini_means) else float(\"nan\")\n",
" fin = pd.to_numeric(df_final[c], errors=\"coerce\").mean() if c in df_final.columns else float(\"nan\")\n",
" diff = fin - ini if (not pd.isna(fin) and not pd.isna(ini)) else float(\"nan\")\n",
" data.append((clean_module_title(c), ini, fin, diff))\n",
" return data\n",
"\n",
" rows_data = build_rows()\n",
"\n",
" def _fill_cell(cell, text: str, bold=False, align=None):\n",
" cell.text = \"\"\n",
" p = cell.paragraphs[0]\n",
" if align is not None:\n",
" p.alignment = align\n",
" r = p.add_run(text)\n",
" force_run_font(r, font_name, font_size_pt, bold=bold)\n",
" apply_table_paragraph_spacing(cell, line_spacing=1.5)\n",
"\n",
"\n",
" def _apply_table_layout(tbl):\n",
" tbl.style = \"Table Grid\"\n",
" tbl.autofit = False\n",
" col_widths = [Cm(11), Cm(1.6), Cm(1.6), Cm(3.5)]\n",
" for row in tbl.rows:\n",
" for i, w in enumerate(col_widths):\n",
" row.cells[i].width = w\n",
" headers = [\"Apreciação dos módulos\", \"Inicial\", \"Final\", \"Ganhos/Perdas\"]\n",
" for j, h in enumerate(headers):\n",
" _fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" def _populate_rows(tbl):\n",
" if not rows_data:\n",
" _fill_cell(tbl.rows[1].cells[0], \"Sem dados\", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
" _fill_cell(tbl.rows[1].cells[1], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[1].cells[2], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[1].cells[3], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" return\n",
" for i, (title, ini, fin, diff) in enumerate(rows_data, start=1):\n",
" _fill_cell(tbl.rows[i].cells[0], title, bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
" _fill_cell(tbl.rows[i].cells[1], format_pt_number(ini), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[i].cells[2], format_pt_number(fin), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" diff_txt = \"\" if pd.isna(diff) else f\"{diff:+.2f}\".replace(\".\", \",\")\n",
" _fill_cell(tbl.rows[i].cells[3], diff_txt, bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" def _make_table_at_paragraph(p: Paragraph):\n",
" n_rows = 1 + max(1, len(rows_data))\n",
" tbl = insert_table_after_paragraph(p, rows=n_rows, cols=4)\n",
" _apply_table_layout(tbl)\n",
" _populate_rows(tbl)\n",
" delete_paragraph(p)\n",
" def _process_paragraph(p: Paragraph) -> bool:\n",
" if placeholder not in p.text:\n",
" return False\n",
" _make_table_at_paragraph(p)\n",
" return True\n",
" for p in doc.paragraphs:\n",
" if _process_paragraph(p):\n",
" return\n",
" for table in doc.tables:\n",
" for row in table.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" if placeholder in p.text:\n",
" cell.text = \"\"\n",
" n_rows = 1 + max(1, len(rows_data))\n",
" tbl = cell.add_table(rows=n_rows, cols=4)\n",
" _apply_table_layout(tbl)\n",
" _populate_rows(tbl)\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "51f6e2c5",
"metadata": {},
"outputs": [],
"source": [
"def insert_paragraph_after(paragraph: Paragraph) -> Paragraph:\n",
" new_p = OxmlElement(\"w:p\") \n",
" paragraph._p.addnext(new_p) \n",
" return Paragraph(new_p, paragraph._parent)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "3b36ac23",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_q06_subitems(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str = \"{{Q06_Apreciacao}}\",\n",
" item_number: int = 5,\n",
" indent_cm: float = 2.75,\n",
" indent_title: bool = True,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
"):\n",
" cols = []\n",
" for c in df.columns:\n",
" name = str(c).lower()\n",
" if \"q06\" in name and \"aprecia\" in name:\n",
" cols.append(c)\n",
" cols.sort(key=lambda x: str(x))\n",
" letters = string.ascii_lowercase\n",
" def _apply_par_format(par: Paragraph, left_indent_cm: float):\n",
" fmt = par.paragraph_format\n",
" fmt.left_indent = Cm(left_indent_cm)\n",
" fmt.line_spacing = 1.5\n",
" fmt.space_before = Pt(0)\n",
" fmt.space_after = Pt(0) \n",
" def _add_line(par: Paragraph, text: str):\n",
" run = par.add_run(text)\n",
" force_run_font(run, font_name, font_size_pt)\n",
" return run\n",
" def _apply_par_format(par: Paragraph, left_indent_cm: float):\n",
" fmt = par.paragraph_format\n",
" fmt.left_indent = Cm(left_indent_cm)\n",
" fmt.line_spacing = 1.5\n",
" fmt.space_before = Pt(0)\n",
" fmt.space_after = Pt(0)\n",
" def _process_paragraph(p: Paragraph) -> bool:\n",
" if placeholder not in p.text:\n",
" return False\n",
" current = p\n",
" inserted_any = False\n",
" if not cols:\n",
" newp = insert_paragraph_after(current)\n",
" _add_line(newp, \"(a)\\tSem dados;\")\n",
" _apply_par_format(newp, indent_cm)\n",
" inserted_any = True\n",
" else:\n",
" for i, c in enumerate(cols):\n",
" mean_val = pd.to_numeric(df[c], errors=\"coerce\").mean()\n",
" mean_str = f\"{mean_val:.2f}\".replace(\".\", \",\")\n",
" label = avaliacao_qualitativa(mean_val)\n",
" module_title = clean_module_title(c)\n",
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
" newp = insert_paragraph_after(current)\n",
" r1 = newp.add_run(f\"({sub})\\t{module_title} (\")\n",
" force_run_font(r1, font_name, font_size_pt)\n",
" r2 = newp.add_run(mean_str)\n",
" r2.bold = True\n",
" force_run_font(r2, font_name, font_size_pt)\n",
" r3 = newp.add_run(\"), que corresponde a \")\n",
" force_run_font(r3, font_name, font_size_pt)\n",
" r4 = newp.add_run(label)\n",
" r4.bold = True\n",
" force_run_font(r4, font_name, font_size_pt)\n",
" r5 = newp.add_run(\";\")\n",
" force_run_font(r5, font_name, font_size_pt)\n",
" _apply_par_format(newp, indent_cm)\n",
" current = newp\n",
" inserted_any = True\n",
" delete_paragraph(p)\n",
" return inserted_any\n",
" for p in doc.paragraphs:\n",
" if _process_paragraph(p):\n",
" return\n",
" for table in doc.tables:\n",
" for row in table.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" if _process_paragraph(p):\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d58493cd",
"metadata": {},
"outputs": [],
"source": [
"def _collect_course_info():\n",
" form = tk.Tk()\n",
" form.title(\"Dados do curso\")\n",
" form.resizable(False, False)\n",
"\n",
" fields = [\n",
" (\"Nomenclatura do curso\", \"NOMEDOCURSO\"),\n",
" (\"Dia de inicio (DD)\", \"DDi\"),\n",
" (\"Mes de inicio (Extenso)\", \"MESi\"),\n",
" (\"Ano de inicio (AAAA)\", \"AAAAi\"),\n",
" (\"Dia de fim (DD)\", \"DDf\"),\n",
" (\"Mes de fim (Extenso)\", \"MESf\"),\n",
" (\"Ano de fim (AAAA)\", \"AAAAf\"),\n",
" (\"Numero de formandos\", \"NFORMANDOS\"),\n",
" (\"Finalidade do curso\", \"FINALIDADECURSO\"),\n",
" (\"Média final do curso\", \"MEDIAFINALCURSO\"),\n",
" ]\n",
" entries = {}\n",
" for i, (label, key) in enumerate(fields):\n",
" tk.Label(form, text=label, anchor=\"w\").grid(row=i, column=0, padx=8, pady=4, sticky=\"w\")\n",
" ent = tk.Entry(form, width=30)\n",
" ent.grid(row=i, column=1, padx=8, pady=4)\n",
" entries[key] = ent\n",
" form_values = {}\n",
" def _submit():\n",
" values = {k: e.get().strip() for k, e in entries.items()}\n",
" missing = [label for (label, key) in fields if not values[key]]\n",
" if missing:\n",
" messagebox.showerror(\"Dados em falta\", \"Preencha: \" + \", \".join(missing))\n",
" return\n",
" form_values.update(values)\n",
" form.destroy()\n",
" tk.Button(form, text=\"Continuar\", command=_submit).grid(row=len(fields), column=0, columnspan=2, pady=10)\n",
" form.mainloop()\n",
" if not form_values:\n",
" raise RuntimeError(\"Formulario cancelado\")\n",
" return form_values\n",
"course_info = _collect_course_info()\n",
"def _to_int_or_str(s):\n",
" return int(s) if s.isdigit() else s\n",
"NOMEDOCURSOcurto = course_info[\"NOMEDOCURSO\"]\n",
"DDi = _to_int_or_str(course_info[\"DDi\"])\n",
"MESi = _to_int_or_str(course_info[\"MESi\"])\n",
"AAAAi = _to_int_or_str(course_info[\"AAAAi\"])\n",
"DDf = _to_int_or_str(course_info[\"DDf\"])\n",
"MESf = _to_int_or_str(course_info[\"MESf\"])\n",
"AAAAf = _to_int_or_str(course_info[\"AAAAf\"])\n",
"NFORMANDOS = _to_int_or_str(course_info[\"NFORMANDOS\"])\n",
"FINALIDADECURSO = course_info[\"FINALIDADECURSO\"]\n",
"MEDIAFINALCURSO = course_info[\"MEDIAFINALCURSO\"]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "912bf2d5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Selected file:E:/Relatórios Internos/3Curso QP praças/Question_rio_de_Expetativas_Inicial_geral.xlsx\n"
]
}
],
"source": [
"Tk().withdraw()\n",
"file_path = askopenfilename(\n",
" title=\"Select Excel das expetativas iniciais\",\n",
" filetypes=[(\"Excel files\", \"*.xlsx *.xls\")])\n",
"if not file_path:\n",
" raise FileNotFoundError(\"No file selected\")\n",
"print(f\"Selected file:{file_path}\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "0f2192e3",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_excel(file_path)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "a5494272",
"metadata": {},
"outputs": [],
"source": [
"ninq = df.shape[0]-1\n",
"medalojamento = round(df.iloc[:,10].mean(),2)\n",
"medalimentacao = round(df.iloc[:,11].mean(),2)\n",
"meddificuldade = round(df.iloc[:,12].mean(),2)\n",
"medfuncfut = round(df.iloc[:,13].mean(),2)\n",
"medmotvpart = round(df.iloc[:,14].mean(),2)\n",
"medconhecimento = round(df.iloc[:,15].mean(),2)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "e6d556fc",
"metadata": {},
"outputs": [],
"source": [
"def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]:\n",
" means = []\n",
" n_cols = df.shape[1]\n",
" for j in range(start_col_idx, n_cols):\n",
" col = pd.to_numeric(df.iloc[:, j], errors=\"coerce\")\n",
" if col.notna().sum() == 0:\n",
" break\n",
" means.append(col.mean())\n",
" return means"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "7fc12954",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6 [np.float64(4.031746031746032), np.float64(3.9047619047619047), np.float64(3.9523809523809526), np.float64(3.7936507936507935), np.float64(4.079365079365079)]\n"
]
}
],
"source": [
"medias = mean_columns_until_empty(df, start_col_idx=16)\n",
"print(len(medias), medias[:5])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7cc0fbc1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Selected file:\n",
"E:/Relatórios Internos/3Curso QP praças/Question_rio_Satisfa_o_Final_geral.xlsx\n"
]
}
],
"source": [
"Tk().withdraw()\n",
"file_path2 = askopenfilename(\n",
" title=\"Select Excel das expetativas finais\",\n",
" filetypes=[(\"Excel files\", \"*.xlsx *.xls\")])\n",
"if not file_path2:\n",
" raise FileNotFoundError(\"No file selected\")\n",
"print(f\"Selected file:\\n{file_path2}\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "055a89b6",
"metadata": {},
"outputs": [],
"source": [
"df2 = pd.read_excel(file_path2)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "02228c44",
"metadata": {},
"outputs": [],
"source": [
"ninq2 = df2.shape[0]-1\n",
"medaplog = round(df2.iloc[:,10].mean(),2)\n",
"medalojamento2 = round(df2.iloc[:,11].mean(),2)\n",
"medalimentacao2 = round(df2.iloc[:,12].mean(),2)\n",
"medapdir = round(df2.iloc[:,13].mean(),2)\n",
"meddificuldade2 = round(df2.iloc[:,14].mean(),2)\n",
"medfuncfut2 = round(df2.iloc[:,15].mean(),2)\n",
"medmotvpart2 = round(df2.iloc[:,16].mean(),2)\n",
"medconhecimento2 = round(df2.iloc[:,17].mean(),2)\n",
"medalojamentofinal = round(medalojamento2 - medalojamento,2)\n",
"medalimentacaofinal = round(medalimentacao2 - medalimentacao,2)\n",
"meddificuldadefinal = round(meddificuldade2 - meddificuldade,2)\n",
"medfuncfutfinal = round(medfuncfut2 - medfuncfut,2)\n",
"medmotvpartfinal = round(medmotvpart2 - medmotvpart,2)\n",
"medconhecimentofinal = round(medconhecimento2 - medconhecimento,2)\n",
"objcruso = round(df2.iloc[:,18].mean(),2)\n",
"contcurso = round(df2.iloc[:,19].mean(),2)\n",
"adeqtrab = round(df2.iloc[:,20].mean(),2)\n",
"instform = round(df2.iloc[:,21].mean(),2)\n",
"audiovisuais = round(df2.iloc[:,22].mean(),2)\n",
"biblio = round(df2.iloc[:,23].mean(),2)\n",
"NOMEDOCURSO = df2.iloc[1,4].split(\"-\")[1].split(\"\")[0].strip()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "fd4f44f1",
"metadata": {},
"outputs": [],
"source": [
"def build_formadores_rows(df: pd.DataFrame):\n",
" groups = {}\n",
" for c in df.columns:\n",
" name = str(c)\n",
" low = name.lower()\n",
" if \"_formador\" not in low:\n",
" continue\n",
" m = re.match(r\"^\\s*(Q\\d+)\\s*_Formador\\s*->\\s*(.+?)\\s*(?:\\(|$)\", name, flags=re.IGNORECASE)\n",
" if not m:\n",
" continue\n",
" qcode = m.group(1).upper()\n",
" metric_raw = m.group(2).strip().lower()\n",
" groups.setdefault(qcode, {})\n",
" groups[qcode][metric_raw] = name\n",
" def qnum(q): \n",
" mm = re.match(r\"Q(\\d+)\", q)\n",
" return int(mm.group(1)) if mm else 10**9\n",
" qcodes_sorted = sorted(groups.keys(), key=qnum)\n",
" def metric_key(metric_raw: str) -> str | None:\n",
" mr = metric_raw.lower()\n",
" if \"dom\" in mr and \"ass\" in mr: \n",
" return \"dominio\"\n",
" if \"métod\" in mr or \"metod\" in mr: \n",
" return \"metodos\"\n",
" if \"lingu\" in mr: \n",
" return \"linguagem\"\n",
" if \"empenh\" in mr: \n",
" return \"empenho\"\n",
" if \"relac\" in mr or \"formand\" in mr: \n",
" return \"relacao\"\n",
" return None\n",
" rows = []\n",
" letters = \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n",
" for idx, qcode in enumerate(qcodes_sorted):\n",
" cols_map = groups[qcode]\n",
" picked = {\"dominio\": None, \"metodos\": None, \"linguagem\": None, \"empenho\": None, \"relacao\": None}\n",
" for raw, colname in cols_map.items():\n",
" k = metric_key(raw)\n",
" if k and picked[k] is None:\n",
" picked[k] = colname\n",
" def col_mean(colname):\n",
" if not colname:\n",
" return float(\"nan\")\n",
" return pd.to_numeric(df[colname], errors=\"coerce\").mean()\n",
" dominio = col_mean(picked[\"dominio\"])\n",
" metodos = col_mean(picked[\"metodos\"])\n",
" linguagem = col_mean(picked[\"linguagem\"])\n",
" empenho = col_mean(picked[\"empenho\"])\n",
" relacao = col_mean(picked[\"relacao\"])\n",
" vals = [dominio, metodos, linguagem, empenho, relacao]\n",
" media_final = pd.Series(vals, dtype=\"float\").mean(skipna=True)\n",
" label = letters[idx] if idx < len(letters) else f\"F{idx+1}\"\n",
" rows.append({\n",
" \"label\": label,\n",
" \"qcode\": qcode,\n",
" \"dominio\": dominio,\n",
" \"metodos\": metodos,\n",
" \"linguagem\": linguagem,\n",
" \"empenho\": empenho,\n",
" \"relacao\": relacao,\n",
" \"media_final\": media_final,\n",
" })\n",
" return rows\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "1ce5e3c2",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_formadores_table(\n",
" doc,\n",
" df2: pd.DataFrame,\n",
" placeholder: str = \"{{tabelaFormadores}}\",\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" col_width_cm: float = 2.3,\n",
" header_vertical: bool = True,\n",
" rotate_first_header: bool = True,\n",
"):\n",
" rows = build_formadores_rows(df2)\n",
" def _fill_cell(cell, text: str, bold=False, align=None):\n",
" cell.text = \"\"\n",
" p = cell.paragraphs[0]\n",
" if align is not None:\n",
" p.alignment = align\n",
" r = p.add_run(text)\n",
" force_run_font(r, font_name, font_size_pt, bold=bold)\n",
" apply_table_paragraph_spacing(cell, line_spacing=1.5)\n",
" headers = [\"Formadores\", \"Domínio do assunto\",\"Métodos utilizados\",\"Linguagem utilizada\",\"Empenho\",\"Relação c/ formandos\",\"Média final\",]\n",
" global_mean = pd.Series([r[\"media_final\"] for r in rows], dtype=\"float\").mean(skipna=True)\n",
" def _apply_layout(tbl):\n",
" tbl.style = \"Table Grid\"\n",
" tbl.autofit = False\n",
" set_table_all_columns_width(tbl, width_cm=col_width_cm)\n",
" if header_vertical:\n",
" start_j = 0 if rotate_first_header else 1\n",
" for j in range(start_j, len(headers)):\n",
" set_cell_text_vertical(tbl.rows[0].cells[j], direction=\"btLr\")\n",
" def _populate_table(tbl):\n",
" for j, h in enumerate(headers):\n",
" _fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" if not rows:\n",
" _fill_cell(tbl.rows[1].cells[0], \"Sem dados\", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
" for j in range(1, len(headers)):\n",
" _fill_cell(tbl.rows[1].cells[j], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[2].cells[0], \"Média\", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
" for j in range(1, len(headers) - 1):\n",
" _fill_cell(tbl.rows[2].cells[j], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[2].cells[-1], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" return\n",
" for i, r in enumerate(rows, start=1):\n",
" _fill_cell(tbl.rows[i].cells[0], r[\"label\"], bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[i].cells[1], format_pt_number(r[\"dominio\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[i].cells[2], format_pt_number(r[\"metodos\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[i].cells[3], format_pt_number(r[\"linguagem\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[i].cells[4], format_pt_number(r[\"empenho\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[i].cells[5], format_pt_number(r[\"relacao\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[i].cells[6], format_pt_number(r[\"media_final\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" last = 1 + len(rows)\n",
" _fill_cell(tbl.rows[last].cells[0], \"Média\", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
" for j in range(1, len(headers) - 1):\n",
" _fill_cell(tbl.rows[last].cells[j], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" _fill_cell(tbl.rows[last].cells[-1], format_pt_number(global_mean), bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
" def _make_table_at_paragraph(p: Paragraph):\n",
" n_rows = 1 + max(1, len(rows)) + 1\n",
" tbl = insert_table_after_paragraph(p, rows=n_rows, cols=len(headers))\n",
" _apply_layout(tbl)\n",
" format_header_row(tbl.rows[0], height_cm=5.2)\n",
" _populate_table(tbl)\n",
" delete_paragraph(p)\n",
" for p in doc.paragraphs:\n",
" if placeholder in p.text:\n",
" _make_table_at_paragraph(p)\n",
" return\n",
" for table in doc.tables:\n",
" for row in table.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" if placeholder in p.text:\n",
" cell.text = \"\"\n",
" n_rows = 1 + max(1, len(rows)) + 1\n",
" tbl = cell.add_table(rows=n_rows, cols=len(headers))\n",
" _apply_layout(tbl)\n",
" format_header_row(tbl.rows[0], height_cm=5.2)\n",
" _populate_table(tbl)\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "5c02a5d9",
"metadata": {},
"outputs": [],
"source": [
"def compute_formadores_summary(df: pd.DataFrame):\n",
" rows = build_formadores_rows(df)\n",
" nforma = len(rows)\n",
" global_mean = pd.Series([r[\"media_final\"] for r in rows], dtype=\"float\").mean(skipna=True)\n",
" mediaformquanl = \"\" if pd.isna(global_mean) else f\"{global_mean:.2f}\".replace(\".\", \",\")\n",
" mediaform = \"\" if pd.isna(global_mean) else avaliacao_qualitativa(global_mean)\n",
" return nforma, mediaformquanl, mediaform\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "9db6b739",
"metadata": {},
"outputs": [],
"source": [
"def _copy_run_format(src_run, dst_run, keep_bold=None):\n",
" dst_run.bold = src_run.bold if keep_bold is None else keep_bold\n",
" dst_run.italic = src_run.italic\n",
" dst_run.underline = src_run.underline\n",
" if src_run.font.name:\n",
" dst_run.font.name = src_run.font.name\n",
" dst_run._element.rPr.rFonts.set(qn(\"w:eastAsia\"), src_run.font.name)\n",
" if src_run.font.size:\n",
" dst_run.font.size = src_run.font.size"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "a3e6867c",
"metadata": {},
"outputs": [],
"source": [
"def extract_temas_list(df: pd.DataFrame) -> list[str]:\n",
" temas_cols = [c for c in df.columns if \"_temas\" in str(c).lower()]\n",
" temas = []\n",
" for col in temas_cols:\n",
" s = df[col].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" temas.extend(s.tolist())\n",
" seen = set()\n",
" out = []\n",
" for t in temas:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "a539bab3",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholders_docx_bold_values_keep_style(doc, replacements: dict[str, str]):\n",
" keys = sorted(replacements.keys(), key=len, reverse=True)\n",
" def _replace_in_paragraph(paragraph):\n",
" if not paragraph.runs:\n",
" return\n",
" full_text = \"\".join(run.text for run in paragraph.runs)\n",
" if not any(k in full_text for k in keys):\n",
" return\n",
" base_run = paragraph.runs[0]\n",
" for run in paragraph.runs:\n",
" run.text = \"\"\n",
" text = full_text\n",
" while True:\n",
" next_pos = None\n",
" next_key = None\n",
" for k in keys:\n",
" pos = text.find(k)\n",
" if pos != -1 and (next_pos is None or pos < next_pos):\n",
" next_pos, next_key = pos, k\n",
" if next_key is None:\n",
" if text:\n",
" r = paragraph.add_run(text)\n",
" _copy_run_format(base_run, r, keep_bold=base_run.bold)\n",
" break\n",
" before = text[:next_pos]\n",
" if before:\n",
" r = paragraph.add_run(before)\n",
" _copy_run_format(base_run, r, keep_bold=base_run.bold)\n",
" val = str(replacements[next_key])\n",
" r_val = paragraph.add_run(val)\n",
" _copy_run_format(base_run, r_val, keep_bold=True)\n",
" text = text[next_pos + len(next_key):]\n",
" for p in doc.paragraphs:\n",
" _replace_in_paragraph(p)\n",
" for table in doc.tables:\n",
" for row in table.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" _replace_in_paragraph(p)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "78816507",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_column_subitems_hanging(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str,\n",
" column_contains: str, \n",
" indent_cm: float = 0.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
"):\n",
" cols = [c for c in df.columns if column_contains.lower() in str(c).lower()]\n",
" items = []\n",
" for col in cols:\n",
" s = df[col].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" items.extend(s.tolist())\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in items:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" items = out\n",
" letters = string.ascii_lowercase\n",
" def _apply_par_format(par: Paragraph):\n",
" fmt = par.paragraph_format\n",
" fmt.left_indent = Cm(indent_cm)\n",
" fmt.first_line_indent = Cm(-indent_cm)\n",
" fmt.line_spacing = 1.5\n",
" fmt.space_before = Pt(0)\n",
" fmt.space_after = Pt(0)\n",
" def _add_run(par: Paragraph, text: str):\n",
" r = par.add_run(text)\n",
" force_run_font(r, font_name, font_size_pt)\n",
" return r\n",
" def _process_paragraph(p: Paragraph) -> bool:\n",
" if placeholder not in p.text:\n",
" return False\n",
" current = p\n",
" if not items:\n",
" newp = insert_paragraph_after(current)\n",
" _add_run(newp, \"a. Sem dados;\")\n",
" _apply_par_format(newp)\n",
" delete_paragraph(p)\n",
" return True\n",
" for i, txt in enumerate(items):\n",
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
" newp = insert_paragraph_after(current)\n",
" _add_run(newp, f\"{sub}. \")\n",
" _add_run(newp, txt)\n",
" _add_run(newp, \";\")\n",
" _apply_par_format(newp)\n",
" current = newp\n",
" delete_paragraph(p)\n",
" return True\n",
" for p in doc.paragraphs:\n",
" if _process_paragraph(p):\n",
" return\n",
" for table in doc.tables:\n",
" for row in table.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" if _process_paragraph(p):\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "e3f33ea2",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_column_subitems_hanging2(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str,\n",
" column_contains: str,\n",
" left_indent_cm: float = 2.75,\n",
" hanging_cm: float = 0.6,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
"):\n",
" cols = [c for c in df.columns if column_contains.lower() in str(c).lower()]\n",
" items = []\n",
" for col in cols:\n",
" s = df[col].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" items.extend(s.tolist())\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in items:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" items = out\n",
" letters = string.ascii_lowercase\n",
" def _apply_par_format(par: Paragraph):\n",
" fmt = par.paragraph_format\n",
" fmt.left_indent = Cm(left_indent_cm)\n",
" fmt.first_line_indent = Cm(-hanging_cm) \n",
" fmt.line_spacing = 1.5\n",
" fmt.space_before = Pt(0)\n",
" fmt.space_after = Pt(0)\n",
" def _add_run(par: Paragraph, text: str):\n",
" r = par.add_run(text)\n",
" force_run_font(r, font_name, font_size_pt)\n",
" return r\n",
" def _process_paragraph(p: Paragraph) -> bool:\n",
" if placeholder not in p.text:\n",
" return False\n",
" current = p\n",
" if not items:\n",
" newp = insert_paragraph_after(current)\n",
" _add_run(newp, \"a. Sem dados;\")\n",
" _apply_par_format(newp)\n",
" delete_paragraph(p)\n",
" return True\n",
" for i, txt in enumerate(items):\n",
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
" newp = insert_paragraph_after(current)\n",
" _add_run(newp, f\"{sub}. \")\n",
" _add_run(newp, txt)\n",
" _add_run(newp, \";\")\n",
" _apply_par_format(newp)\n",
" current = newp\n",
" delete_paragraph(p)\n",
" return True\n",
" for p in doc.paragraphs:\n",
" if _process_paragraph(p):\n",
" return\n",
" for table in doc.tables:\n",
" for row in table.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" if _process_paragraph(p):\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "2905d4ff",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_temas_smart(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str = \"{{TEMAS}}\",\n",
" indent_cm: float = 0.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None, \n",
" **_ignored, \n",
"):\n",
" temas_raw = extract_temas_list(df)\n",
" temas = [str(t).strip() for t in (temas_raw or []) if str(t).strip()]\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in temas:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" temas = out\n",
" if max_items is not None:\n",
" temas = temas[:max_items]\n",
" if not temas:\n",
" df_tmp = pd.DataFrame({\"_Temas\": [\"Sem dados\"]})\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_temas\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )\n",
" df_tmp = pd.DataFrame({\"_Temas\": temas})\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_temas\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "f14de057",
"metadata": {},
"outputs": [],
"source": [
"def extract_desenvolver_list(df: pd.DataFrame) -> list[str]:\n",
" cols = [c for c in df.columns if \"_desenvolver\" in str(c).lower()]\n",
" items = []\n",
" for col in cols:\n",
" s = df[col].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" items.extend(s.tolist())\n",
" seen = set()\n",
" out = []\n",
" for t in items:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "6779cbad",
"metadata": {},
"outputs": [],
"source": [
"def ollama_summarize_desenvolver(\n",
" items: list[str],\n",
" max_items: int = 6,\n",
" model: str = \"llama3.1:8b\",\n",
" timeout: float = 30.0\n",
") -> list[str]:\n",
" items_txt = \"\\n\".join(f\"- {t}\" for t in items)\n",
" prompt = f\"\"\"\n",
"És um analista a escrever um relatório oficial de avaliação.\n",
"\n",
"Tens uma lista de aspetos a desenvolver/melhorar apontados pelos formandos. Faz o seguinte:\n",
"1) Agrupa itens repetidos/semelhantes;\n",
"2) Seleciona os mais importantes e recorrentes;\n",
"3) Reescreve numa lista curta, clara e formal (Português de Portugal);\n",
"4) NÃO inventes novos pontos;\n",
"5) No máximo {max_items} itens;\n",
"6) Frases curtas, em formato de sintagma nominal (ex.: \"Melhoria da componente prática\", \"Aprofundamento de ...\").\n",
"\n",
"Itens:\n",
"{items_txt}\n",
"\n",
"Devolve APENAS a lista final no formato:\n",
"- Item 1\n",
"- Item 2\n",
"- Item 3\n",
"\"\"\".strip()\n",
" payload = {\n",
" \"model\": model,\n",
" \"prompt\": prompt,\n",
" \"stream\": False,\n",
" \"options\": {\"temperature\": 0.2}\n",
" }\n",
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=timeout)\n",
" r.raise_for_status()\n",
" text = r.json().get(\"response\", \"\")\n",
" lines = []\n",
" for line in text.splitlines():\n",
" line = line.strip()\n",
" if line.startswith((\"-\", \"•\")):\n",
" item = line.lstrip(\"-•\").strip(\" .;\")\n",
" if item:\n",
" lines.append(item)\n",
" return lines[:max_items]\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "7e8deb1f",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_desenvolver_smart(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str = \"{{DESENVOLVER}}\",\n",
" indent_cm: float = 1.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" use_ollama: bool = True,\n",
" ollama_model: str = \"llama3.1:8b\",\n",
" max_items: int = 6,\n",
"):\n",
" raw = extract_desenvolver_list(df)\n",
" if not raw:\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc, df,\n",
" placeholder=placeholder,\n",
" column_contains=\"_desenvolver\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=deduplicate\n",
" )\n",
" final_items = None\n",
" if use_ollama and ollama_available():\n",
" try:\n",
" llm_items = ollama_summarize_desenvolver(\n",
" raw, max_items=max_items, model=ollama_model\n",
" )\n",
" if llm_items:\n",
" final_items = llm_items\n",
" except Exception:\n",
" final_items = None\n",
" if final_items is None:\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc, df,\n",
" placeholder=placeholder,\n",
" column_contains=\"_desenvolver\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=deduplicate\n",
" )\n",
" df_tmp = pd.DataFrame({\"_desenvolver\": final_items})\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc, df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_desenvolver\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "f5249e3c",
"metadata": {},
"outputs": [],
"source": [
"def extract_incluir_list(df: pd.DataFrame) -> list[str]:\n",
" cols = [c for c in df.columns if \"_incluir\" in str(c).lower()]\n",
" items = []\n",
" for col in cols:\n",
" s = df[col].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" items.extend(s.tolist())\n",
" seen = set()\n",
" out = []\n",
" for t in items:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" return out\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "7f8a9eb4",
"metadata": {},
"outputs": [],
"source": [
"def ollama_summarize_incluir(\n",
" items: list[str],\n",
" max_items: int = 6,\n",
" model: str = \"llama3.1:8b\",\n",
" timeout: float = 30.0\n",
") -> list[str]:\n",
" items_txt = \"\\n\".join(f\"- {t}\" for t in items)\n",
" prompt = f\"\"\"\n",
"És um analista a escrever um relatório oficial de avaliação.\n",
"\n",
"Tens uma lista de conteúdos/temas que os formandos sugerem que sejam incluídos. Faz o seguinte:\n",
"1) Agrupa itens repetidos/semelhantes;\n",
"2) Seleciona os mais importantes e recorrentes;\n",
"3) Reescreve numa lista curta, clara e formal (Português de Portugal);\n",
"4) NÃO inventes novos pontos;\n",
"5) No máximo {max_items} itens;\n",
"6) Frases curtas e objetivas.\n",
"\n",
"Itens:\n",
"{items_txt}\n",
"\n",
"Devolve APENAS a lista final no formato:\n",
"- Item 1\n",
"- Item 2\n",
"- Item 3\n",
"\"\"\".strip()\n",
"\n",
" payload = {\n",
" \"model\": model,\n",
" \"prompt\": prompt,\n",
" \"stream\": False,\n",
" \"options\": {\"temperature\": 0.2}\n",
" }\n",
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=timeout)\n",
" r.raise_for_status()\n",
" text = r.json().get(\"response\", \"\")\n",
"\n",
" lines = []\n",
" for line in text.splitlines():\n",
" line = line.strip()\n",
" if line.startswith((\"-\", \"•\")):\n",
" item = line.lstrip(\"-•\").strip(\" .;\")\n",
" if item:\n",
" lines.append(item)\n",
"\n",
" return lines[:max_items]\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "b1a43e80",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_incluir_smart(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str = \"{{INCLUIR}}\",\n",
" indent_cm: float = 1.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" use_ollama: bool = True,\n",
" ollama_model: str = \"llama3.1:8b\",\n",
" max_items: int = 6,\n",
"):\n",
" raw = extract_incluir_list(df)\n",
" if not raw:\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc, df,\n",
" placeholder=placeholder,\n",
" column_contains=\"_incluir\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=deduplicate\n",
" )\n",
" final_items = None\n",
" if use_ollama and ollama_available():\n",
" try:\n",
" llm_items = ollama_summarize_incluir(\n",
" raw, max_items=max_items, model=ollama_model\n",
" )\n",
" if llm_items:\n",
" final_items = llm_items\n",
" except Exception:\n",
" final_items = None\n",
" if final_items is None:\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc, df,\n",
" placeholder=placeholder,\n",
" column_contains=\"_incluir\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=deduplicate\n",
" )\n",
" df_tmp = pd.DataFrame({\"_incluir\": final_items})\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc, df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_incluir\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "615ac60c",
"metadata": {},
"outputs": [],
"source": [
"def ollama_summarize_observacoes_paragraph(\n",
" items: list[str],\n",
" model: str = \"llama3.1:8b\",\n",
" timeout: float = 45.0\n",
") -> str:\n",
" items_txt = \"\\n\".join(f\"- {t}\" for t in items)\n",
"\n",
" prompt = f\"\"\"\n",
"És um analista a escrever um relatório oficial de avaliação.\n",
"\n",
"Tens observações livres escritas pelos formandos. Produz um ÚNICO PARÁGRAFO de síntese:\n",
"- Português de Portugal, tom formal e objetivo;\n",
"- Não inventes informação;\n",
"- Agrupa ideias repetidas;\n",
"- Evita exemplos pessoais e detalhes identificáveis;\n",
"- 3 a 6 frases, no máximo ~120 palavras.\n",
"\n",
"Observações:\n",
"{items_txt}\n",
"Devolve APENAS o parágrafo final (sem tópicos, sem títulos, sem listas).\n",
"\"\"\".strip()\n",
" payload = {\n",
" \"model\": model,\n",
" \"prompt\": prompt,\n",
" \"stream\": False,\n",
" \"options\": {\"temperature\": 0.2}\n",
" }\n",
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=timeout)\n",
" r.raise_for_status()\n",
" text = r.json().get(\"response\", \"\").strip()\n",
" text = \" \".join(line.strip() for line in text.splitlines() if line.strip())\n",
" return text\n"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "2a130fdd",
"metadata": {},
"outputs": [],
"source": [
"def extract_observacoes_list(df: pd.DataFrame) -> list[str]:\n",
" cols = [c for c in df.columns if \"_observ\" in str(c).lower()]\n",
" items = []\n",
" for col in cols:\n",
" s = df[col].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" items.extend(s.tolist())\n",
" seen = set()\n",
" out = []\n",
" for t in items:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "218d5649",
"metadata": {},
"outputs": [],
"source": [
"def _iter_paragraphs_in_table(tbl):\n",
" for row in tbl.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" yield p\n",
" for t in cell.tables:\n",
" yield from _iter_paragraphs_in_table(t)\n",
"def iter_all_paragraphs_everywhere(doc):\n",
" for p in doc.paragraphs:\n",
" yield p\n",
" for t in doc.tables:\n",
" yield from _iter_paragraphs_in_table(t)\n",
" for section in doc.sections:\n",
" containers = [\n",
" section.header,\n",
" section.footer,\n",
" section.first_page_header,\n",
" section.first_page_footer,\n",
" section.even_page_header,\n",
" section.even_page_footer,\n",
" ]\n",
" for c in containers:\n",
" for p in c.paragraphs:\n",
" yield p\n",
" for t in c.tables:\n",
" yield from _iter_paragraphs_in_table(t)\n"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "b09a33b9",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_observacoes_smart(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str = \"{{OBSERVACOES}}\",\n",
" indent_cm: float = 0.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" use_ollama: bool = True,\n",
" ollama_model: str = \"llama3.1:8b\",\n",
" max_items: int | None = None,\n",
") -> int:\n",
" raw = extract_observacoes_list(df) \n",
" items = [str(t).strip() for t in (raw or []) if str(t).strip()]\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in items:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" items = out\n",
" if max_items is not None:\n",
" items = items[:max_items]\n",
" paragraph_text = None\n",
" if items and use_ollama and ollama_available():\n",
" try:\n",
" paragraph_text = ollama_summarize_observacoes_paragraph(items, model=ollama_model)\n",
" if paragraph_text:\n",
" paragraph_text = paragraph_text.strip()\n",
" except Exception:\n",
" paragraph_text = None\n",
" letters = string.ascii_lowercase\n",
" def _apply_par_format(par: Paragraph, hanging=False):\n",
" fmt = par.paragraph_format\n",
" fmt.left_indent = Cm(indent_cm)\n",
" if hanging:\n",
" fmt.first_line_indent = Cm(-0.6)\n",
" fmt.line_spacing = 1.5\n",
" fmt.space_before = Pt(0)\n",
" fmt.space_after = Pt(0)\n",
" def _add_run(par: Paragraph, text: str, bold=False):\n",
" r = par.add_run(text)\n",
" force_run_font(r, font_name, font_size_pt, bold=bold)\n",
" return r\n",
" replaced = 0\n",
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
" if placeholder not in (p.text or \"\"):\n",
" continue\n",
" current = p\n",
" if paragraph_text:\n",
" newp = insert_paragraph_after(current)\n",
" _add_run(newp, paragraph_text)\n",
" _apply_par_format(newp, hanging=False)\n",
" else:\n",
" if not items:\n",
" items_to_write = [\"Sem dados\"]\n",
" else:\n",
" items_to_write = items\n",
" for i, txt in enumerate(items_to_write):\n",
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
" newp = insert_paragraph_after(current)\n",
" _add_run(newp, f\"{sub}. \")\n",
" _add_run(newp, txt.rstrip(\".;\"))\n",
" _add_run(newp, \";\")\n",
" _apply_par_format(newp, hanging=True)\n",
" current = newp\n",
" delete_paragraph(p)\n",
" replaced += 1\n",
" return replaced\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "e4df3360",
"metadata": {},
"outputs": [],
"source": [
"def build_melhoria_checklist_items(\n",
" temas_items: list[str] | None,\n",
" desenvolver_items: list[str] | None,\n",
" incluir_items: list[str] | None,\n",
" observacoes_text_or_items: str | list[str] | None = None,\n",
" use_ollama: bool = True,\n",
" ollama_model: str = \"llama3.1:8b\",\n",
" max_items: int = 10,\n",
"):\n",
" temas_items = [x.strip() for x in (temas_items or []) if str(x).strip()]\n",
" desenvolver_items = [x.strip() for x in (desenvolver_items or []) if str(x).strip()]\n",
" incluir_items = [x.strip() for x in (incluir_items or []) if str(x).strip()]\n",
" if isinstance(observacoes_text_or_items, str):\n",
" observacoes_text = observacoes_text_or_items.strip()\n",
" elif isinstance(observacoes_text_or_items, list):\n",
" obs_list = [str(x).strip() for x in observacoes_text_or_items if str(x).strip()]\n",
" observacoes_text = \"\\n\".join(f\"- {x}\" for x in obs_list)\n",
" else:\n",
" observacoes_text = \"\"\n",
" if use_ollama and ollama_available():\n",
" try:\n",
" temas_txt = \"\\n\".join(f\"- {t}\" for t in temas_items)\n",
" des_txt = \"\\n\".join(f\"- {t}\" for t in desenvolver_items)\n",
" inc_txt = \"\\n\".join(f\"- {t}\" for t in incluir_items)\n",
" prompt = f\"\"\"\n",
"És um analista a escrever um relatório oficial de avaliação.\n",
"\n",
"A partir dos seguintes outputs, cria uma CHECKLIST de melhorias (ações).\n",
"Regras:\n",
"- Não inventes pontos.\n",
"- Junta redundâncias.\n",
"- Escreve cada item como ação (ex.: \"Reforçar ...\", \"Incluir ...\", \"Aprofundar ...\", \"Melhorar ...\").\n",
"- No máximo {max_items} itens.\n",
"- Devolve APENAS lista em bullets \"- ...\".\n",
"\n",
"TEMAS:\n",
"{temas_txt}\n",
"\n",
"A DESENVOLVER:\n",
"{des_txt}\n",
"\n",
"A INCLUIR:\n",
"{inc_txt}\n",
"\n",
"OBSERVAÇÕES:\n",
"{observacoes_text}\n",
"\"\"\".strip()\n",
"\n",
" payload = {\n",
" \"model\": ollama_model,\n",
" \"prompt\": prompt,\n",
" \"stream\": False,\n",
" \"options\": {\"temperature\": 0.2},\n",
" }\n",
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=45.0)\n",
" r.raise_for_status()\n",
" text = r.json().get(\"response\", \"\").strip()\n",
"\n",
" llm_items = []\n",
" for line in text.splitlines():\n",
" line = line.strip()\n",
" if line.startswith((\"-\", \"•\")):\n",
" item = line.lstrip(\"-•\").strip(\" .;\")\n",
" if item:\n",
" llm_items.append(item)\n",
"\n",
" llm_items = llm_items[:max_items]\n",
" if llm_items:\n",
" return llm_items\n",
" except Exception:\n",
" pass\n",
" merged = temas_items + desenvolver_items + incluir_items\n",
" seen = set()\n",
" out = []\n",
" for x in merged:\n",
" x = x.strip().strip(\"•-\").strip()\n",
" if not x:\n",
" continue\n",
" k = x.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(x)\n",
" return out[:max_items] if out else [\"Sem dados\"]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "d430cad8",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_checklist_subitems(\n",
" doc,\n",
" checklist_items: list[str],\n",
" placeholder: str = \"{{CHECKLIST_MELHORAR}}\",\n",
" indent_cm: float = 0.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" checkbox: str = \"-\",\n",
"):\n",
" letters = string.ascii_lowercase\n",
"\n",
" def _apply_par_format(par: Paragraph):\n",
" fmt = par.paragraph_format\n",
" fmt.left_indent = Cm(indent_cm)\n",
" fmt.first_line_indent = Cm(-indent_cm)\n",
" fmt.line_spacing = 1.5\n",
" fmt.space_before = Pt(0)\n",
" fmt.space_after = Pt(0)\n",
"\n",
" def _add_run(par: Paragraph, text: str):\n",
" r = par.add_run(text)\n",
" force_run_font(r, font_name, font_size_pt)\n",
" return r\n",
"\n",
" def _process_paragraph(p: Paragraph) -> bool:\n",
" if placeholder not in p.text:\n",
" return False\n",
"\n",
" current = p\n",
" items = checklist_items or [\"Sem dados\"]\n",
"\n",
" for i, item in enumerate(items):\n",
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
" newp = insert_paragraph_after(current)\n",
" _add_run(newp, f\"{sub}. {checkbox} \")\n",
" _add_run(newp, item.strip().rstrip(\".;\"))\n",
" _add_run(newp, \";\")\n",
" _apply_par_format(newp)\n",
" current = newp\n",
"\n",
" delete_paragraph(p)\n",
" return True\n",
"\n",
" for p in doc.paragraphs:\n",
" if _process_paragraph(p):\n",
" return\n",
"\n",
" for table in doc.tables:\n",
" for row in table.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" if _process_paragraph(p):\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "c92e42dc",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_observacoes_smart2(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str = \"{{OBSERVACOES2}}\",\n",
" indent_cm: float = 2.75,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None,\n",
" **_ignored,\n",
"):\n",
" obs_raw = extract_observacoes_list(df)\n",
" itens = [str(t).strip() for t in (obs_raw or []) if str(t).strip()]\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in itens:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" itens = out\n",
" if max_items is not None:\n",
" itens = itens[:max_items]\n",
" if not itens:\n",
" itens = [\"Sem dados\"]\n",
" df_tmp = pd.DataFrame({\"_Observacoes\": itens})\n",
" return replace_placeholder_with_column_subitems_hanging2(\n",
" doc, df2,\n",
" placeholder=\"{{OBSERVACOES2}}\",\n",
" column_contains=\"_observa\",\n",
" left_indent_cm=2.75,\n",
" hanging_cm=0.6,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" deduplicate=True\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "c9bc1475",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_incluir_smart(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str = \"{{INCLUIR}}\",\n",
" indent_cm: float = 0.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None,\n",
" **_ignored,\n",
"):\n",
" incluir_raw = extract_incluir_list(df)\n",
" itens = [str(t).strip() for t in (incluir_raw or []) if str(t).strip()]\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in itens:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" itens = out\n",
" if max_items is not None:\n",
" itens = itens[:max_items]\n",
" if not itens:\n",
" itens = [\"Sem dados\"]\n",
" df_tmp = pd.DataFrame({\"_Incluir\": itens})\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_incluir\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "070ae13a",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_desenvolver_smart(\n",
" doc,\n",
" df: pd.DataFrame,\n",
" placeholder: str = \"{{DESENVOLVER}}\",\n",
" indent_cm: float = 0.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None,\n",
" **_ignored,\n",
"):\n",
" desenvolver_raw = extract_desenvolver_list(df) \n",
" itens = [str(t).strip() for t in (desenvolver_raw or []) if str(t).strip()]\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in itens:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" itens = out\n",
" if max_items is not None:\n",
" itens = itens[:max_items]\n",
" if not itens:\n",
" itens = [\"Sem dados\"]\n",
" df_tmp = pd.DataFrame({\"_Desenvolver\": itens})\n",
" return replace_placeholder_with_column_subitems_hanging(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_desenvolver\",\n",
" indent_cm=indent_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "9ee1a5cf",
"metadata": {},
"outputs": [],
"source": [
"def delete_all_between_anchors_xml(\n",
" doc,\n",
" start=\"{{ANCORA1}}\",\n",
" end=\"{{ANCORA2}}\",\n",
" debug=False,\n",
" max_passes=10_000,\n",
") -> int:\n",
" body = doc._element.body\n",
" removed_blocks = 0\n",
" passes = 0\n",
" def child_contains_needle(elm, needle: str) -> bool:\n",
" texts = elm.xpath(\".//*[local-name()='t']/text()\")\n",
" joined = \"\".join(texts) if texts else \"\"\n",
" return needle in joined\n",
" while passes < max_passes:\n",
" passes += 1\n",
" children = list(body.iterchildren())\n",
" start_idx = None\n",
" end_idx = None\n",
" for i, elm in enumerate(children):\n",
" if start_idx is None and child_contains_needle(elm, start):\n",
" start_idx = i\n",
" if debug:\n",
" print(f\"[DEBUG] start in child {i} tag={elm.tag}\")\n",
" continue\n",
" if start_idx is not None and child_contains_needle(elm, end):\n",
" end_idx = i\n",
" if debug:\n",
" print(f\"[DEBUG] end in child {i} tag={elm.tag}\")\n",
" break\n",
" if start_idx is None or end_idx is None:\n",
" if debug:\n",
" print(\"[DEBUG] done. start/end:\", start_idx, end_idx)\n",
" break\n",
" for elm in reversed(children[start_idx:end_idx + 1]):\n",
" body.remove(elm)\n",
" removed_blocks += 1\n",
" return removed_blocks"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "19f98bbc",
"metadata": {},
"outputs": [],
"source": [
"def delete_all_between_anchors_xml(\n",
" doc,\n",
" start=\"{{ANCORA3}}\",\n",
" end=\"{{ANCORA4}}\",\n",
" debug=False,\n",
" max_passes=10_000,\n",
") -> int:\n",
" body = doc._element.body\n",
" removed_blocks = 0\n",
" passes = 0\n",
" def child_contains_needle(elm, needle: str) -> bool:\n",
" texts = elm.xpath(\".//*[local-name()='t']/text()\")\n",
" joined = \"\".join(texts) if texts else \"\"\n",
" return needle in joined\n",
" while passes < max_passes:\n",
" passes += 1\n",
" children = list(body.iterchildren())\n",
" start_idx = None\n",
" end_idx = None\n",
" for i, elm in enumerate(children):\n",
" if start_idx is None and child_contains_needle(elm, start):\n",
" start_idx = i\n",
" if debug:\n",
" print(f\"[DEBUG] start in child {i} tag={elm.tag}\")\n",
" continue\n",
" if start_idx is not None and child_contains_needle(elm, end):\n",
" end_idx = i\n",
" if debug:\n",
" print(f\"[DEBUG] end in child {i} tag={elm.tag}\")\n",
" break\n",
" if start_idx is None or end_idx is None:\n",
" if debug:\n",
" print(\"[DEBUG] done. start/end:\", start_idx, end_idx)\n",
" break\n",
" for elm in reversed(children[start_idx:end_idx + 1]):\n",
" body.remove(elm)\n",
" removed_blocks += 1\n",
" return removed_blocks"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "8da13fb2",
"metadata": {},
"outputs": [],
"source": [
"def _iter_paragraphs_in_table(tbl):\n",
" for row in tbl.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" yield p\n",
" for t in cell.tables:\n",
" yield from _iter_paragraphs_in_table(t)\n",
"def iter_all_paragraphs_everywhere(doc):\n",
" for p in doc.paragraphs:\n",
" yield p\n",
" for t in doc.tables:\n",
" yield from _iter_paragraphs_in_table(t)\n",
" for section in doc.sections:\n",
" containers = [\n",
" section.header,\n",
" section.footer,\n",
" section.first_page_header,\n",
" section.first_page_footer,\n",
" section.even_page_header,\n",
" section.even_page_footer,\n",
" ]\n",
" for c in containers:\n",
" for p in c.paragraphs:\n",
" yield p\n",
" for t in c.tables:\n",
" yield from _iter_paragraphs_in_table(t)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "ce5986ca",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Selected file:\n",
"E:/Relatórios Internos/3Curso QP praças/Question_rio_Final_Curso_para_Formadores.xlsx\n"
]
}
],
"source": [
"Tk().withdraw()\n",
"file_path3 = askopenfilename(\n",
" title=\"Select Excel das Formadores\",\n",
" filetypes=[(\"Excel files\", \"*.xlsx *.xls\")]\n",
")\n",
"if not file_path3:\n",
" print(\"Nenhum ficheiro selecionado.\")\n",
" df3 = None\n",
"else:\n",
" print(f\"Selected file:\\n{file_path3}\")\n",
" df3 = pd.read_excel(file_path3)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "961996c2",
"metadata": {},
"outputs": [],
"source": [
"df3 = None\n",
"ninq3 = 0\n",
"medpub = 0\n",
"medmeiosaux = 0\n",
"medapform = 0\n",
"medapdc = 0\n",
"medobjesp = 0\n",
"medmetensi = 0\n",
"medtempform = 0\n",
"medlocaisform = 0\n",
"medlançaaval = 0\n",
"medtipoaval = 0\n",
"medtempoaval = 0\n",
"medobjapre = 0\n",
"medadqonjesp = 0\n",
"medinterforma = 0\n",
"prerequesitos = 0\n",
"Conteudo = 0\n",
"objgeral = 0\n",
"objfinal = 0\n",
"objadq = 0\n",
"avadq = 0\n",
"refere = 0\n",
"if file_path3:\n",
" df3 = pd.read_excel(file_path3)\n",
" ninq3 = df3.shape[0]\n",
" medpub = round(df3.iloc[:, 12].mean(), 2)\n",
" medmeiosaux = round(df3.iloc[:, 13].mean(), 2)\n",
" medapform = round(df3.iloc[:, 14].mean(), 2)\n",
" medapdc = round(df3.iloc[:, 15].mean(), 2)\n",
" medobjesp = round(df3.iloc[:, 16].mean(), 2)\n",
" medmetensi = round(df3.iloc[:, 17].mean(), 2)\n",
" medtempform = round(df3.iloc[:, 18].mean(), 2)\n",
" medlocaisform = round(df3.iloc[:, 19].mean(), 2)\n",
" medlançaaval = round(df3.iloc[:, 20].mean(), 2)\n",
" medtipoaval = round(df3.iloc[:, 21].mean(), 2)\n",
" medtempoaval = round(df3.iloc[:, 22].mean(), 2)\n",
" medobjapre = round(df3.iloc[:, 23].mean(), 2)\n",
" medadqonjesp = round(df3.iloc[:, 24].mean(), 2)\n",
" medinterforma = round(df3.iloc[:, 25].mean(), 2)\n",
" if ninq3 > 0:\n",
" prerequesitos = round((df3.iloc[:, 26].eq(1).sum() / ninq3) * 100, 2)\n",
" Conteudo = round((df3.iloc[:, 27].eq(1).sum() / ninq3) * 100, 2)\n",
" objgeral = round((df3.iloc[:, 28].eq(1).sum() / ninq3) * 100, 2)\n",
" objfinal = round((df3.iloc[:, 29].eq(1).sum() / ninq3) * 100, 2)\n",
" objadq = round((df3.iloc[:, 30].eq(1).sum() / ninq3) * 100, 2)\n",
" avadq = round((df3.iloc[:, 31].eq(1).sum() / ninq3) * 100, 2)\n",
" refere = round((df3.iloc[:, 32].eq(1).sum() / ninq3) * 100, 2)\n",
"else:\n",
" print(\"Nenhum ficheiro selecionado (df3). Valores definidos a 0.\")"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "2edd4d84",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Selected file:\n",
"E:/Relatórios Internos/3Curso QP praças/Question_rio_Final_Curso_para_Dire_o_de_Curso_N_velamento.xlsx\n"
]
}
],
"source": [
"Tk().withdraw()\n",
"file_path4 = askopenfilename(\n",
" title=\"Select Excel da Direção de Curso\",\n",
" filetypes=[(\"Excel files\", \"*.xlsx *.xls\")]\n",
")\n",
"if not file_path4:\n",
" print(\"Nenhum ficheiro selecionado.\")\n",
" df4 = None\n",
"else:\n",
" print(f\"Selected file:\\n{file_path4}\")\n",
" df4 = pd.read_excel(file_path4)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "97b51ec3",
"metadata": {},
"outputs": [],
"source": [
"df4 = None\n",
"ninq4 = 0\n",
"medprogcurso = 0\n",
"medcontcurso = 0\n",
"medestrcurso = 0\n",
"medutilprat = 0\n",
"medcargahoraria = 0\n",
"med1 = 0\n",
"medinstal = 0\n",
"medaudiovis = 0\n",
"meddocdispor = 0\n",
"medapadmin = 0\n",
"medapcoord = 0\n",
"med2 = 0\n",
"medmotform = 0\n",
"medrelpart = 0\n",
"medpontass = 0\n",
"med3 = 0\n",
"if file_path4:\n",
" df4 = pd.read_excel(file_path4)\n",
" ninq4 = df4.shape[0]\n",
" medprogcurso = round(df4.iloc[:, 10].mean(), 2)\n",
" medcontcurso = round(df4.iloc[:, 11].mean(), 2)\n",
" medestrcurso = round(df4.iloc[:, 12].mean(), 2)\n",
" medutilprat = round(df4.iloc[:, 13].mean(), 2)\n",
" medcargahoraria= round(df4.iloc[:, 14].mean(), 2)\n",
" med1 = round(\n",
" (medprogcurso + medcontcurso + medestrcurso +\n",
" medutilprat + medcargahoraria) / 5,\n",
" 2\n",
" )\n",
" medinstal = round(df4.iloc[:, 15].mean(), 2)\n",
" medaudiovis = round(df4.iloc[:, 16].mean(), 2)\n",
" meddocdispor = round(df4.iloc[:, 17].mean(), 2)\n",
" medapadmin = round(df4.iloc[:, 18].mean(), 2)\n",
" medapcoord = round(df4.iloc[:, 19].mean(), 2)\n",
" med2 = round(\n",
" (medinstal + medaudiovis + meddocdispor +\n",
" medapadmin + medapcoord) / 5,\n",
" 2\n",
" )\n",
" medmotform = round(df4.iloc[:, 20].mean(), 2)\n",
" medrelpart = round(df4.iloc[:, 21].mean(), 2)\n",
" medpontass = round(df4.iloc[:, 22].mean(), 2)\n",
" med3 = round(\n",
" (medmotform + medrelpart + medpontass) / 3,\n",
" 2\n",
" )\n",
"else:\n",
" print(\"Nenhum ficheiro selecionado. Valores definidos a 0.\")"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "ad4e7d71",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Resposta</th>\n",
" <th>Data/hora de submissão:</th>\n",
" <th>Instituição</th>\n",
" <th>Departamento</th>\n",
" <th>SalaOnline</th>\n",
" <th>Grupo</th>\n",
" <th>ID</th>\n",
" <th>Nome completo</th>\n",
" <th>NIM / CC / BI</th>\n",
" <th>Q00_Data</th>\n",
" <th>...</th>\n",
" <th>Q00_Funcionamento do Curso-&gt;Documentação ao Dispor (1 - Inadequada...5 - Adequada à Realidade)</th>\n",
" <th>Q00_Funcionamento do Curso-&gt;Apoio Administrativo (1- - Ineficaz...5 - Muito eficaz)</th>\n",
" <th>Q00_Funcionamento do Curso-&gt;Apoio Prestado pelo Coordenador (1 - Ineficaz...5 - Muito eficaz)</th>\n",
" <th>Q00_Apreciação dos Módulos-&gt;Motivação dos Participantes (1 - Baixa...5 - Muito Elevada)</th>\n",
" <th>Q00_Apreciação dos Módulos-&gt;Relacionamento entre Participantes (1 - Fechado...5- Muito Aberto)</th>\n",
" <th>Q00_Apreciação dos Módulos-&gt;Pontualidade e Assiduidade (1 - Fraca...5 - Muito Boa)</th>\n",
" <th>Q00_Temas</th>\n",
" <th>Q00_desenvolver</th>\n",
" <th>Q00_Incluir</th>\n",
" <th>Q00_Positivos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>117497</td>\n",
" <td>03/08/2025 10:48:31</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>EA - 3.º Curso de Formação de Praças do Quadro...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Anónimo1</td>\n",
" <td>NaN</td>\n",
" <td>2025-08-03</td>\n",
" <td>...</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>Armamento e tiro</td>\n",
" <td>Armamento e tiro</td>\n",
" <td>Nada a referir</td>\n",
" <td>Os tempos de formação dedicados à prática do t...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>117890</td>\n",
" <td>13/08/2025 09:30:14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>EA - 3.º Curso de Formação de Praças do Quadro...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Anónimo2</td>\n",
" <td>NaN</td>\n",
" <td>2025-08-13</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" Resposta Data/hora de submissão: Instituição Departamento \\\n",
"0 117497 03/08/2025 10:48:31 NaN NaN \n",
"1 117890 13/08/2025 09:30:14 NaN NaN \n",
"\n",
" SalaOnline Grupo ID Nome completo \\\n",
"0 EA - 3.º Curso de Formação de Praças do Quadro... NaN NaN Anónimo1 \n",
"1 EA - 3.º Curso de Formação de Praças do Quadro... NaN NaN Anónimo2 \n",
"\n",
" NIM / CC / BI Q00_Data ... \\\n",
"0 NaN 2025-08-03 ... \n",
"1 NaN 2025-08-13 ... \n",
"\n",
" Q00_Funcionamento do Curso->Documentação ao Dispor (1 - Inadequada...5 - Adequada à Realidade) \\\n",
"0 5 \n",
"1 4 \n",
"\n",
" Q00_Funcionamento do Curso->Apoio Administrativo (1- - Ineficaz...5 - Muito eficaz) \\\n",
"0 5 \n",
"1 4 \n",
"\n",
" Q00_Funcionamento do Curso->Apoio Prestado pelo Coordenador (1 - Ineficaz...5 - Muito eficaz) \\\n",
"0 5 \n",
"1 3 \n",
"\n",
" Q00_Apreciação dos Módulos->Motivação dos Participantes (1 - Baixa...5 - Muito Elevada) \\\n",
"0 4 \n",
"1 3 \n",
"\n",
" Q00_Apreciação dos Módulos->Relacionamento entre Participantes (1 - Fechado...5- Muito Aberto) \\\n",
"0 4 \n",
"1 3 \n",
"\n",
" Q00_Apreciação dos Módulos->Pontualidade e Assiduidade (1 - Fraca...5 - Muito Boa) \\\n",
"0 4 \n",
"1 4 \n",
"\n",
" Q00_Temas Q00_desenvolver Q00_Incluir \\\n",
"0 Armamento e tiro Armamento e tiro Nada a referir \n",
"1 NaN NaN NaN \n",
"\n",
" Q00_Positivos \n",
"0 Os tempos de formação dedicados à prática do t... \n",
"1 NaN \n",
"\n",
"[2 rows x 27 columns]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df4"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "f63e680c",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_propostas(\n",
" doc,\n",
" df3: pd.DataFrame,\n",
" placeholder: str = \"{{Propostas}}\",\n",
" left_indent_cm: float = 2.75,\n",
" hanging_cm: float = 0.6,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None,\n",
") -> int:\n",
" cols = [c for c in df3.columns if \"_propostas\" in str(c).lower()]\n",
" itens = []\n",
" for c in cols:\n",
" s = df3[c].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" itens.extend(s.tolist())\n",
"\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in itens:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" itens = out\n",
"\n",
" if max_items is not None:\n",
" itens = itens[:max_items]\n",
"\n",
" if not itens:\n",
" itens = [\"Sem dados\"]\n",
"\n",
" df_tmp = pd.DataFrame({\"_Propostas\": itens})\n",
"\n",
" replaced = 0\n",
"\n",
" while True:\n",
" changed = False\n",
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
" if placeholder in (p.text or \"\"):\n",
" replace_placeholder_with_column_subitems_hanging2(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_propostas\",\n",
" left_indent_cm=left_indent_cm,\n",
" hanging_cm=hanging_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )\n",
" replaced += 1\n",
" changed = True\n",
" break \n",
" if not changed:\n",
" break\n",
"\n",
" return replaced"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "31986236",
"metadata": {},
"outputs": [],
"source": [
"def delete_paragraph(paragraph):\n",
" p = paragraph._p\n",
" parent = p.getparent()\n",
" if parent is not None:\n",
" parent.remove(p)\n",
"def _iter_paragraphs_in_table(tbl):\n",
" for row in tbl.rows:\n",
" for cell in row.cells:\n",
" for p in cell.paragraphs:\n",
" yield p\n",
" for t in cell.tables:\n",
" yield from _iter_paragraphs_in_table(t)\n",
"def iter_all_paragraphs_everywhere(doc):\n",
" for p in doc.paragraphs:\n",
" yield p\n",
" for t in doc.tables:\n",
" yield from _iter_paragraphs_in_table(t)\n",
" for section in doc.sections:\n",
" containers = [\n",
" section.header,\n",
" section.footer,\n",
" section.first_page_header,\n",
" section.first_page_footer,\n",
" section.even_page_header,\n",
" section.even_page_footer,\n",
" ]\n",
" for c in containers:\n",
" for p in c.paragraphs:\n",
" yield p\n",
" for t in c.tables:\n",
" yield from _iter_paragraphs_in_table(t)\n",
"def delete_lines_with_ancora(doc, pattern=r\"ANCORA\") -> int:\n",
" rx = re.compile(pattern, flags=re.IGNORECASE)\n",
" to_delete = []\n",
" for p in iter_all_paragraphs_everywhere(doc):\n",
" if rx.search(p.text or \"\"):\n",
" to_delete.append(p)\n",
" for p in reversed(to_delete):\n",
" delete_paragraph(p)\n",
" return len(to_delete)\n"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "abece7b0",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_temasdir(\n",
" doc,\n",
" df4: pd.DataFrame,\n",
" placeholder: str = \"{{temasdir}}\",\n",
" left_indent_cm: float = 2.75,\n",
" hanging_cm: float = 0.6,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None,\n",
") -> int:\n",
" cols = [c for c in df4.columns if \"_temas\" in str(c).lower()]\n",
" itens = []\n",
" for c in cols:\n",
" s = df4[c].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" itens.extend(s.tolist())\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in itens:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" itens = out\n",
" if max_items is not None:\n",
" itens = itens[:max_items]\n",
" if not itens:\n",
" itens = [\"Sem dados\"]\n",
" df_tmp = pd.DataFrame({\"_Temas\": itens})\n",
" replaced = 0\n",
" while True:\n",
" changed = False\n",
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
" if placeholder in (p.text or \"\"):\n",
" replace_placeholder_with_column_subitems_hanging2(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_temas\",\n",
" left_indent_cm=left_indent_cm,\n",
" hanging_cm=hanging_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )\n",
" replaced += 1\n",
" changed = True\n",
" break\n",
" if not changed:\n",
" break\n",
" return replaced"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "280d80ff",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_desenvolverdir(\n",
" doc,\n",
" df4: pd.DataFrame,\n",
" placeholder: str = \"{{desenvolverdir}}\",\n",
" left_indent_cm: float = 2.75,\n",
" hanging_cm: float = 0.6,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None,\n",
") -> int:\n",
" cols = [c for c in df4.columns if \"_desenvolver\" in str(c).lower()]\n",
" itens = []\n",
" for c in cols:\n",
" s = df4[c].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" itens.extend(s.tolist())\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in itens:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" itens = out\n",
" if max_items is not None:\n",
" itens = itens[:max_items]\n",
" if not itens:\n",
" itens = [\"Sem dados\"]\n",
" df_tmp = pd.DataFrame({\"_Desenvolver\": itens})\n",
" replaced = 0\n",
" while True:\n",
" changed = False\n",
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
" if placeholder in (p.text or \"\"):\n",
" replace_placeholder_with_column_subitems_hanging2(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_desenvolver\",\n",
" left_indent_cm=left_indent_cm,\n",
" hanging_cm=hanging_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )\n",
" replaced += 1\n",
" changed = True\n",
" break\n",
" if not changed:\n",
" break\n",
" return replaced"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "9c0fa5f2",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_incluirdir(\n",
" doc,\n",
" df4: pd.DataFrame,\n",
" placeholder: str = \"{{incluirdir}}\",\n",
" left_indent_cm: float = 2.75,\n",
" hanging_cm: float = 0.6,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None,\n",
") -> int:\n",
" cols = [c for c in df4.columns if \"_incluir\" in str(c).lower()]\n",
" itens = []\n",
" for c in cols:\n",
" s = df4[c].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" itens.extend(s.tolist())\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in itens:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" itens = out\n",
" if max_items is not None:\n",
" itens = itens[:max_items]\n",
" if not itens:\n",
" itens = [\"Sem dados\"]\n",
" df_tmp = pd.DataFrame({\"_Incluir\": itens})\n",
" replaced = 0\n",
" while True:\n",
" changed = False\n",
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
" if placeholder in (p.text or \"\"):\n",
" replace_placeholder_with_column_subitems_hanging2(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_incluir\",\n",
" left_indent_cm=left_indent_cm,\n",
" hanging_cm=hanging_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )\n",
" replaced += 1\n",
" changed = True\n",
" break\n",
" if not changed:\n",
" break\n",
" return replaced\n"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "84a86195",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_positivosdir(\n",
" doc,\n",
" df4: pd.DataFrame,\n",
" placeholder: str = \"{{positivosdir}}\",\n",
" left_indent_cm: float = 2.75,\n",
" hanging_cm: float = 0.6,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" deduplicate: bool = True,\n",
" max_items: int | None = None,\n",
") -> int:\n",
" cols = [c for c in df4.columns if \"_positivos\" in str(c).lower()]\n",
" itens = []\n",
" for c in cols:\n",
" s = df4[c].dropna().astype(str).str.strip()\n",
" s = s[s != \"\"]\n",
" itens.extend(s.tolist())\n",
" if deduplicate:\n",
" seen = set()\n",
" out = []\n",
" for t in itens:\n",
" k = t.lower()\n",
" if k not in seen:\n",
" seen.add(k)\n",
" out.append(t)\n",
" itens = out\n",
" if max_items is not None:\n",
" itens = itens[:max_items]\n",
" if not itens:\n",
" itens = [\"Sem dados\"]\n",
" df_tmp = pd.DataFrame({\"_Positivos\": itens})\n",
" replaced = 0\n",
" while True:\n",
" changed = False\n",
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
" if placeholder in (p.text or \"\"):\n",
" replace_placeholder_with_column_subitems_hanging2(\n",
" doc,\n",
" df_tmp,\n",
" placeholder=placeholder,\n",
" column_contains=\"_positivos\",\n",
" left_indent_cm=left_indent_cm,\n",
" hanging_cm=hanging_cm,\n",
" font_name=font_name,\n",
" font_size_pt=font_size_pt,\n",
" deduplicate=False\n",
" )\n",
" replaced += 1\n",
" changed = True\n",
" break\n",
" if not changed:\n",
" break\n",
" return replaced\n"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "c75442af",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Resposta</th>\n",
" <th>Data/hora de submissão:</th>\n",
" <th>Instituição</th>\n",
" <th>Departamento</th>\n",
" <th>SalaOnline</th>\n",
" <th>Grupo</th>\n",
" <th>ID</th>\n",
" <th>Nome completo</th>\n",
" <th>NIM / CC / BI</th>\n",
" <th>Q00_Data</th>\n",
" <th>...</th>\n",
" <th>Q00_Funcionamento do Curso-&gt;Documentação ao Dispor (1 - Inadequada...5 - Adequada à Realidade)</th>\n",
" <th>Q00_Funcionamento do Curso-&gt;Apoio Administrativo (1- - Ineficaz...5 - Muito eficaz)</th>\n",
" <th>Q00_Funcionamento do Curso-&gt;Apoio Prestado pelo Coordenador (1 - Ineficaz...5 - Muito eficaz)</th>\n",
" <th>Q00_Apreciação dos Módulos-&gt;Motivação dos Participantes (1 - Baixa...5 - Muito Elevada)</th>\n",
" <th>Q00_Apreciação dos Módulos-&gt;Relacionamento entre Participantes (1 - Fechado...5- Muito Aberto)</th>\n",
" <th>Q00_Apreciação dos Módulos-&gt;Pontualidade e Assiduidade (1 - Fraca...5 - Muito Boa)</th>\n",
" <th>Q00_Temas</th>\n",
" <th>Q00_desenvolver</th>\n",
" <th>Q00_Incluir</th>\n",
" <th>Q00_Positivos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>117497</td>\n",
" <td>03/08/2025 10:48:31</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>EA - 3.º Curso de Formação de Praças do Quadro...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Anónimo1</td>\n",
" <td>NaN</td>\n",
" <td>2025-08-03</td>\n",
" <td>...</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>Armamento e tiro</td>\n",
" <td>Armamento e tiro</td>\n",
" <td>Nada a referir</td>\n",
" <td>Os tempos de formação dedicados à prática do t...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>117890</td>\n",
" <td>13/08/2025 09:30:14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>EA - 3.º Curso de Formação de Praças do Quadro...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Anónimo2</td>\n",
" <td>NaN</td>\n",
" <td>2025-08-13</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" Resposta Data/hora de submissão: Instituição Departamento \\\n",
"0 117497 03/08/2025 10:48:31 NaN NaN \n",
"1 117890 13/08/2025 09:30:14 NaN NaN \n",
"\n",
" SalaOnline Grupo ID Nome completo \\\n",
"0 EA - 3.º Curso de Formação de Praças do Quadro... NaN NaN Anónimo1 \n",
"1 EA - 3.º Curso de Formação de Praças do Quadro... NaN NaN Anónimo2 \n",
"\n",
" NIM / CC / BI Q00_Data ... \\\n",
"0 NaN 2025-08-03 ... \n",
"1 NaN 2025-08-13 ... \n",
"\n",
" Q00_Funcionamento do Curso->Documentação ao Dispor (1 - Inadequada...5 - Adequada à Realidade) \\\n",
"0 5 \n",
"1 4 \n",
"\n",
" Q00_Funcionamento do Curso->Apoio Administrativo (1- - Ineficaz...5 - Muito eficaz) \\\n",
"0 5 \n",
"1 4 \n",
"\n",
" Q00_Funcionamento do Curso->Apoio Prestado pelo Coordenador (1 - Ineficaz...5 - Muito eficaz) \\\n",
"0 5 \n",
"1 3 \n",
"\n",
" Q00_Apreciação dos Módulos->Motivação dos Participantes (1 - Baixa...5 - Muito Elevada) \\\n",
"0 4 \n",
"1 3 \n",
"\n",
" Q00_Apreciação dos Módulos->Relacionamento entre Participantes (1 - Fechado...5- Muito Aberto) \\\n",
"0 4 \n",
"1 3 \n",
"\n",
" Q00_Apreciação dos Módulos->Pontualidade e Assiduidade (1 - Fraca...5 - Muito Boa) \\\n",
"0 4 \n",
"1 4 \n",
"\n",
" Q00_Temas Q00_desenvolver Q00_Incluir \\\n",
"0 Armamento e tiro Armamento e tiro Nada a referir \n",
"1 NaN NaN NaN \n",
"\n",
" Q00_Positivos \n",
"0 Os tempos de formação dedicados à prática do t... \n",
"1 NaN \n",
"\n",
"[2 rows x 27 columns]"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df4"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "5721702e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OBSERVACOES substituídos: 0\n",
"Saved: relatorio_final.docx\n"
]
}
],
"source": [
"nforma, mediaformquanl, mediaform = compute_formadores_summary(df2)\n",
"replacements = {\n",
" \"{{NOMEDOCURSOEXTENSO}}\": str(NOMEDOCURSO),\n",
" \"{{prerequesitos}}\": str(prerequesitos),\n",
" \"{{prerequesitos2}}\": str(100-prerequesitos),\n",
" \"{{Conteudo}}\": str(Conteudo),\n",
" \"{{Conteudo2}}\": str(100-Conteudo),\n",
" \"{{objgeral}}\": str(objgeral),\n",
" \"{{objgeral2}}\": str(100-objgeral), \n",
" \"{{objfinal}}\": str(objfinal),\n",
" \"{{objfinal2}}\": str(100-objfinal), \n",
" \"{{objadq}}\": str(objadq),\n",
" \"{{objadq2}}\": str(100-objadq),\n",
" \"{{avadq}}\": str(avadq),\n",
" \"{{avadq2}}\": str(100-avadq),\n",
" \"{{refere}}\": str(refere),\n",
" \"{{refere2}}\": str(100-refere),\n",
" \"{{ninq4}}\": str(ninq4), \n",
" \"{{ninq3}}\": str(ninq3),\n",
" \"{{nforma}}\" : str(nforma),\n",
" \"{{mediaformquanl}}\" : str(mediaformquanl),\n",
" \"{{mediaform}}\" : str(mediaform),\n",
" \"{{mediaaloj}}\": str(medalojamento2),\n",
" \"{{mediaaloj1}}\": str(medalojamento),\n",
" \"{{mediaalime}}\": str(medalimentacao2),\n",
" \"{{mediaalime1}}\": str(medalimentacao),\n",
" \"{{medalimentacao}}\": str(medalimentacaofinal),\n",
" \"{{apdir}}\": str(medapdir),\n",
" \"{{graudif}}\": str(meddificuldade2),\n",
" \"{{graudif1}}\": str(meddificuldade),\n",
" \"{{meddificuldadefinal}}\": str(meddificuldadefinal),\n",
" \"{{funcfut}}\": str(medfuncfut2),\n",
" \"{{funcfut1}}\": str(medfuncfut),\n",
" \"{{medfuncfutfinal}}\": str(medfuncfutfinal),\n",
" \"{{apadm}}\": str(medaplog),\n",
" \"{{motapro}}\": str(medmotvpart2),\n",
" \"{{motapro1}}\": str(medmotvpart),\n",
" \"{{medmotvpartfinal}}\": str(medmotvpartfinal),\n",
" \"{{conhcurso}}\": str(medconhecimento2),\n",
" \"{{conhcurso1}}\": str(medconhecimento),\n",
" \"{{medconhecimentofinal}}\": str(medconhecimentofinal), \n",
" \"{{objcruso}}\": str(objcruso),\n",
" \"{{contcurso}}\": str(contcurso),\n",
" \"{{medalojamento}}\": str(medalojamentofinal),\n",
" \"{{adeqtrab}}\": str(adeqtrab),\n",
" \"{{instform}}\": str(instform),\n",
" \"{{audiovisuais}}\": str(audiovisuais),\n",
" \"{{biblio}}\": str(biblio),\n",
" \"{{ninq}}\": str(ninq),\n",
" \"{{ninqfim}}\": str(ninq2),\n",
" \"{{medpub}}\": str(medpub),\n",
" \"{{medmeiosaux}}\": str(medmeiosaux),\n",
" \"{{medapform}}\": str(medapform),\n",
" \"{{medapdc}}\": str(medapdc),\n",
" \"{{medobjesp}}\": str(medobjesp),\n",
" \"{{medmetensi}}\": str(medmetensi),\n",
" \"{{medtempform}}\": str(medtempform),\n",
" \"{{medlocaisform}}\": str(medlocaisform),\n",
" \"{{medlançaaval}}\": str(medlançaaval),\n",
" \"{{medtipoaval}}\": str(medtipoaval),\n",
" \"{{medtempoaval}}\": str(medtempoaval),\n",
" \"{{medobjapre}}\": str(medobjapre),\n",
" \"{{medadqonjesp}}\": str(medadqonjesp),\n",
" \"{{medinterforma}}\": str(medinterforma),\n",
" \"{{medprogcurso}}\": str(medprogcurso),\n",
" \"{{medcontcurso}}\": str(medcontcurso),\n",
" \"{{medestrcurso}}\": str(medestrcurso),\n",
" \"{{medutilprat}}\": str(medutilprat),\n",
" \"{{medcargahoraria}}\": str(medcargahoraria),\n",
" \"{{medinstal}}\": str(medinstal),\n",
" \"{{medaudiovis}}\": str(medaudiovis),\n",
" \"{{meddocdispor}}\": str(meddocdispor),\n",
" \"{{medapadmin}}\": str(medapadmin),\n",
" \"{{medapcoord}}\": str(medapcoord),\n",
" \"{{medmotform}}\": str(medmotform),\n",
" \"{{medrelpart}}\": str(medrelpart),\n",
" \"{{medpontass}}\": str(medpontass),\n",
" \"{{med1}}\": str(med1),\n",
" \"{{med2}}\": str(med2),\n",
" \"{{med3}}\": str(med3),\n",
" \"{{medpontassqual}}\": str(avaliacao_qualitativa(medpontass)),\n",
" \"{{medrelpartqual}}\": str(avaliacao_qualitativa(medrelpart)),\n",
" \"{{medmotformqual}}\": str(avaliacao_qualitativa(medmotform)),\n",
" \"{{medapcoordqual}}\": str(avaliacao_qualitativa(medapcoord)),\n",
" \"{{medapadminqual}}\": str(avaliacao_qualitativa(medapadmin)),\n",
" \"{{meddocdisporqual}}\": str(avaliacao_qualitativa(meddocdispor)),\n",
" \"{{medaudiovisqual}}\": str(avaliacao_qualitativa(medaudiovis)),\n",
" \"{{medinstalqual}}\": str(avaliacao_qualitativa(medinstal)),\n",
" \"{{medcargahorariaqual}}\": str(avaliacao_qualitativa(medcargahoraria)),\n",
" \"{{medutilpratoqual}}\": str(avaliacao_qualitativa(medutilprat)),\n",
" \"{{medestrcursooqual}}\": str(avaliacao_qualitativa(medestrcurso)),\n",
" \"{{medprogcursoqual}}\": str(avaliacao_qualitativa(medcontcurso)),\n",
" \"{{medinterformaqual}}\": str(avaliacao_qualitativa(medinterforma)),\n",
" \"{{medadqonjespqual}}\": str(avaliacao_qualitativa(medadqonjesp)),\n",
" \"{{medobjaprequal}}\": str(avaliacao_qualitativa(medobjapre)),\n",
" \"{{medtempoavalqual}}\": str(avaliacao_qualitativa(medtempoaval)),\n",
" \"{{medtipoavalqual}}\": str(avaliacao_qualitativa(medtipoaval)),\n",
" \"{{medlançaavalual}}\": str(avaliacao_qualitativa(medlançaaval)),\n",
" \"{{medlocaisformqual}}\": str(avaliacao_qualitativa(medlocaisform)),\n",
" \"{{medtempformqual}}\": str(avaliacao_qualitativa(medtempform)),\n",
" \"{{medmetensiqual}}\": str(avaliacao_qualitativa(medmetensi)),\n",
" \"{{medobjespqual}}\": str(avaliacao_qualitativa(medobjesp)),\n",
" \"{{medapdcqual}}\": str(avaliacao_qualitativa(medapdc)),\n",
" \"{{medapformqual}}\": str(avaliacao_qualitativa(medapform)),\n",
" \"{{medmeiosauxqual}}\": str(avaliacao_qualitativa(medmeiosaux)),\n",
" \"{{medpubqual}}\": str(avaliacao_qualitativa(medpub)),\n",
" \"{{mediaalojqual}}\": str(avaliacao_qualitativa(medalojamento2)),\n",
" \"{{apdirqual}}\": str(avaliacao_qualitativa(medapdir)),\n",
" \"{{funcfutqual}}\": str(avaliacao_qualitativa(medfuncfut2)),\n",
" \"{{graudifaqual}}\": str(avaliacao_qualitativa(meddificuldade2)),\n",
" \"{{apadmqual}}\": str(avaliacao_qualitativa(medaplog)),\n",
" \"{{motaproqual}}\": str(avaliacao_qualitativa(medmotvpart2)),\n",
" \"{{conhcursoqual}}\": str(avaliacao_qualitativa(medconhecimento2)),\n",
" \"{{objcrusoqual}}\": str(avaliacao_qualitativa(objcruso)),\n",
" \"{{contcursoqual}}\": str(avaliacao_qualitativa(contcurso)),\n",
" \"{{adeqtrabqual}}\": str(avaliacao_qualitativa(adeqtrab)),\n",
" \"{{instformqual}}\": str(avaliacao_qualitativa(instform)),\n",
" \"{{audiovisuaisqual}}\": str(avaliacao_qualitativa(audiovisuais)),\n",
" \"{{biblioqual}}\": str(avaliacao_qualitativa(biblio)),\n",
" \"{{mediaalimequal}}\": str(avaliacao_qualitativa(medalimentacao2)),\n",
" \"{{NOMEDOCURSO}}\": str(NOMEDOCURSOcurto),\n",
" \"{{MESi}}\": str(MESi),\n",
" \"{{AAAAi}}\": str(AAAAi),\n",
" \"{{DDf}}\": str(DDf),\n",
" \"{{MESf}}\": str(MESf),\n",
" \"{{AAAAf}}\": str(AAAAf),\n",
" \"{{MEDIAFINALCURSO}}\": str(MEDIAFINALCURSO),\n",
" \"{{NFORMANDOS}}\": str(NFORMANDOS),\n",
" \"{{FINALIDADECURSO}}\": str(FINALIDADECURSO),\n",
" \"{{DDi}}\": str(DDi)\n",
"}\n",
"template_path = \"Anexo RAI..docx\" \n",
"output_path = \"relatorio_final.docx\" \n",
"\n",
"doc = Document(template_path)\n",
"if not file_path3:\n",
" print(\"Nenhum ficheiro selecionado. A remover secção do documento.\")\n",
" delete_all_between_anchors_xml(doc, \"{{ANCORA1}}\", \"{{ANCORA2}}\", debug=True)\n",
" df3 = None\n",
"if file_path3:\n",
" replace_placeholder_with_propostas(\n",
" doc,\n",
" df3,\n",
" placeholder=\"{{Propostas}}\",\n",
" left_indent_cm=2.75,\n",
" hanging_cm=0.6,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" deduplicate=True,\n",
" max_items=None\n",
" )\n",
"\n",
"if not file_path4:\n",
" print(\"Nenhum ficheiro selecionado. A remover secção do documento.\")\n",
" delete_all_between_anchors_xml(doc, \"{{ANCORA3}}\", \"{{ANCORA4}}\", debug=True)\n",
" df4 = None\n",
"if file_path4:\n",
" replace_placeholder_with_positivosdir(\n",
" doc,\n",
" df4,\n",
" placeholder=\"{{positivosdir}}\",\n",
" left_indent_cm=2.75,\n",
" hanging_cm=0.6,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" deduplicate=True\n",
" )\n",
" replace_placeholder_with_incluirdir(\n",
" doc,\n",
" df4,\n",
" placeholder=\"{{incluirdir}}\",\n",
" left_indent_cm=2.75,\n",
" hanging_cm=0.6,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" deduplicate=True\n",
" )\n",
" replace_placeholder_with_desenvolverdir(\n",
" doc,\n",
" df4,\n",
" placeholder=\"{{desenvolverdir}}\",\n",
" left_indent_cm=2.75,\n",
" hanging_cm=0.6,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" deduplicate=True\n",
" )\n",
" replace_placeholder_with_temasdir(\n",
" doc,\n",
" df4,\n",
" placeholder=\"{{temasdir}}\",\n",
" left_indent_cm=2.75,\n",
" hanging_cm=0.6,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" deduplicate=True\n",
" )\n",
" \n",
"replace_placeholders_docx_bold_values_keep_style(doc, replacements)\n",
"replace_placeholder_with_q06_subitems(\n",
" doc, df2,\n",
" placeholder=\"{{Q06_Apreciacao}}\",\n",
" indent_cm=2.75,\n",
" indent_title=True,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12\n",
")\n",
"\n",
"replace_placeholder_with_formadores_table(\n",
" doc,\n",
" df2,\n",
" placeholder=\"{{tabelaFormadores}}\",\n",
" font_name=\"Arial\",\n",
" font_size_pt=12\n",
")\n",
"\n",
"replace_placeholder_with_uc_table(\n",
" doc,\n",
" df_inicial=df,\n",
" df_final=df2,\n",
" placeholder=\"{{tabelasUC}}\",\n",
" font_name=\"Arial\",\n",
" font_size_pt=12\n",
")\n",
"\n",
"replace_placeholder_with_temas_smart(\n",
" doc,\n",
" df2,\n",
" placeholder=\"{{TEMAS}}\",\n",
" indent_cm=0.5,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" deduplicate=True,\n",
" max_items=6\n",
")\n",
"\n",
"replace_placeholder_with_desenvolver_smart(\n",
" doc,\n",
" df2,\n",
" placeholder=\"{{DESENVOLVER}}\",\n",
" indent_cm=0.5,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" max_items=6\n",
")\n",
"\n",
"replace_placeholder_with_incluir_smart(\n",
" doc,\n",
" df2,\n",
" placeholder=\"{{INCLUIR}}\",\n",
" indent_cm=0.5,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" max_items=6\n",
")\n",
"\n",
"replace_placeholder_with_observacoes_smart2(\n",
" doc,\n",
" df2,\n",
" placeholder=\"{{OBSERVACOES2}}\",\n",
" indent_cm=2.75,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" max_items=6\n",
")\n",
"\n",
"replace_placeholder_with_observacoes_smart(\n",
" doc,\n",
" df2,\n",
" placeholder=\"{{OBSERVACOES}}\",\n",
" indent_cm=0.5,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" use_ollama=True,\n",
" ollama_model=\"llama3.1:8b\" # ajusta ao que existir nas máquinas\n",
")\n",
"\n",
"\n",
"n = replace_placeholder_with_observacoes_smart(\n",
" doc,\n",
" df2,\n",
" placeholder=\"{{OBSERVACOES}}\",\n",
" indent_cm=0.5,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" use_ollama=True,\n",
" ollama_model=\"llama3.1:8b\" # ajusta ao que existir nas máquinas\n",
")\n",
"print(\"OBSERVACOES substituídos:\", n)\n",
"\n",
"delete_lines_with_ancora(doc, pattern=r\"ANCORA\")\n",
"doc.save(output_path)\n",
"\n",
"print(f\"Saved: {output_path}\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "37116ac4",
"metadata": {},
"outputs": [],
"source": [
"def iter_body_blocks(doc):\n",
" body = doc._element.body\n",
" for child in body.iterchildren():\n",
" tag = child.tag.rsplit(\"}\", 1)[-1]\n",
" if tag == \"p\":\n",
" yield (\"p\", Paragraph(child, doc))\n",
" elif tag == \"tbl\":\n",
" yield (\"tbl\", Table(child, doc))"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "94b4cf8d",
"metadata": {},
"outputs": [],
"source": [
"def table_to_text(tbl) -> str:\n",
" lines = []\n",
" for row in tbl.rows:\n",
" row_txt = []\n",
" for cell in row.cells:\n",
" txt = \" \".join(p.text.strip() for p in cell.paragraphs if p.text.strip())\n",
" txt = re.sub(r\"\\s+\", \" \", txt).strip()\n",
" row_txt.append(txt)\n",
" if any(row_txt):\n",
" lines.append(\" | \".join(row_txt))\n",
" return \"\\n\".join(lines).strip()\n",
"def extract_text_between_markers(doc, start_re: str, end_re: str | None = None, debug=False) -> str:\n",
" start_rx = re.compile(start_re, flags=re.IGNORECASE)\n",
" end_rx = re.compile(end_re, flags=re.IGNORECASE) if end_re else None\n",
" collecting = False\n",
" chunks = []\n",
" for kind, obj in iter_body_blocks(doc):\n",
" if kind == \"p\":\n",
" txt = (obj.text or \"\").strip()\n",
" if not collecting and start_rx.search(txt):\n",
" collecting = True\n",
" if debug:\n",
" print(\"[DEBUG] START matched:\", txt)\n",
" continue\n",
" if collecting and end_rx and end_rx.search(txt):\n",
" if debug:\n",
" print(\"[DEBUG] END matched:\", txt)\n",
" break\n",
" if collecting and txt:\n",
" chunks.append(txt)\n",
" elif kind == \"tbl\":\n",
" if collecting:\n",
" ttxt = table_to_text(obj)\n",
" if ttxt:\n",
" chunks.append(ttxt)\n",
" out = \"\\n\".join(chunks).strip()\n",
" out = re.sub(r\"\\n{3,}\", \"\\n\\n\", out)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "3b2a6313",
"metadata": {},
"outputs": [],
"source": [
"def ollama_available(timeout=0.4) -> bool:\n",
" try:\n",
" r = requests.get(\"http://localhost:11434/api/tags\", timeout=timeout)\n",
" return r.status_code == 200\n",
" except Exception:\n",
" return False\n",
"\n",
"def ollama_summarize_text(\n",
" text: str,\n",
" model: str = \"llama3.1:8b\",\n",
" max_chars: int = 24000, \n",
" timeout: float = 120.0,\n",
" system_prompt: str = \"\",\n",
" user_prompt: str = \"\",\n",
") -> str:\n",
" if not text.strip():\n",
" return \"\"\n",
" if len(text) > max_chars:\n",
" text = text[:max_chars] + \"\\n\\n[Texto truncado por limite de tamanho.]\"\n",
" prompt = f\"\"\"\n",
"{system_prompt}\n",
"{user_prompt}\n",
"TEXTO:\n",
"{text}\n",
"\n",
"DEVOLVE APENAS O RESULTADO FINAL, SEM EXPLICAÇÕES.\n",
"\"\"\".strip()\n",
"\n",
" payload = {\n",
" \"model\": model,\n",
" \"prompt\": prompt,\n",
" \"stream\": False,\n",
" \"options\": {\"temperature\": 0.2}\n",
" }\n",
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=timeout)\n",
" r.raise_for_status()\n",
" return (r.json().get(\"response\", \"\") or \"\").strip()\n"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "595d6cff",
"metadata": {},
"outputs": [],
"source": [
"def replace_placeholder_with_text_paragraph_all(\n",
" doc,\n",
" placeholder: str,\n",
" text: str,\n",
" indent_cm: float = 0.0,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
") -> int:\n",
" replaced = 0\n",
" def _apply_fmt(p: Paragraph):\n",
" fmt = p.paragraph_format\n",
" fmt.left_indent = Cm(indent_cm)\n",
" fmt.line_spacing = 1.5\n",
" fmt.space_before = Pt(0)\n",
" fmt.space_after = Pt(0)\n",
" def _process_paragraph(p: Paragraph):\n",
" nonlocal replaced\n",
" if placeholder not in (p.text or \"\"):\n",
" return\n",
" if not p.runs:\n",
" r = p.add_run(\"\")\n",
" for run in p.runs:\n",
" run.text = \"\"\n",
" r0 = p.runs[0]\n",
" r0.text = text\n",
" force_run_font(r0, font_name, font_size_pt)\n",
" _apply_fmt(p)\n",
" replaced += 1\n",
" for p in list(doc.paragraphs):\n",
" _process_paragraph(p)\n",
" for tbl in doc.tables:\n",
" for row in tbl.rows:\n",
" for cell in row.cells:\n",
" for p in list(cell.paragraphs):\n",
" _process_paragraph(p)\n",
" return replaced"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "5476ed71",
"metadata": {},
"outputs": [],
"source": [
"def fill_llm_placeholders_llm9_llm10(\n",
" doc,\n",
" model: str = \"llama3.1:8b\",\n",
" placeholder_llm10: str = \"{{LLM10}}\",\n",
" indent_cm: float = 0.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" debug_extract: bool = False,\n",
") -> dict:\n",
" if not ollama_available():\n",
" return {\"ok\": False, \"reason\": \"ollama_not_available\", \"llm9\": 0, \"llm10\": 0}\n",
" ap5 = extract_text_between_markers(\n",
" doc,\n",
" start_re=r\"^\\s*Apêndice\\s*5\\b\",\n",
" end_re=r\"^\\s*Apêndice\\s*6\\b\",\n",
" debug=debug_extract\n",
" )\n",
" ap6 = extract_text_between_markers(\n",
" doc,\n",
" start_re=r\"^\\s*Apêndice\\s*6\\b\",\n",
" end_re=r\"^\\s*1\\s*[-]\\s\", \n",
" debug=debug_extract\n",
" )\n",
" texto_llm10 = \"\\n\\n\".join([t for t in [ap5, ap6] if t.strip()]).strip()\n",
" sys_pt = \"És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação.\"\n",
" prompt_llm10 = (\n",
" \"Resume os conteúdos do Apêndice 5 e do Apêndice 6 num texto único, formal e conciso \"\n",
" \"(1 a 2 parágrafos). Realça pontos-chave e recomendações.\"\n",
" )\n",
" resumo10 = \"\"\n",
" if texto_llm10.strip():\n",
" resumo10 = ollama_summarize_text(texto_llm10, model=model, system_prompt=sys_pt, user_prompt=prompt_llm10)\n",
" n10 = replace_placeholder_with_text_paragraph_all(\n",
" doc, placeholder_llm10, resumo10.strip() or \"Sem dados.\",\n",
" indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt\n",
" )\n",
" return {\n",
" \"ok\": True,\n",
" \"llm10\": n10,\n",
" \"chars_in_llm10\": len(texto_llm10),\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "8df236c3",
"metadata": {},
"outputs": [],
"source": [
"def fill_llm_placeholder_from_doc_range(\n",
" doc,\n",
" placeholder: str,\n",
" start_marker: str,\n",
" end_marker: str,\n",
" model: str = \"llama3.1:8b\",\n",
" indent_cm: float = 0.5,\n",
" font_name: str = \"Arial\",\n",
" font_size_pt: int = 12,\n",
" debug_extract: bool = False,\n",
") -> dict:\n",
"\n",
" if not ollama_available():\n",
" return {\"ok\": False, \"reason\": \"ollama_not_available\", \"replaced\": 0, \"chars\": 0}\n",
" start_re = r\"^\\s*\" + re.escape(start_marker.strip()) + r\"\\s*$\"\n",
" end_re = r\"^\\s*\" + re.escape(end_marker.strip()) + r\"\\s*$\"\n",
" texto = extract_text_between_markers(\n",
" doc,\n",
" start_re=start_re,\n",
" end_re=end_re,\n",
" debug=debug_extract\n",
" ).strip()\n",
" if not texto:\n",
" start_re2 = re.escape(start_marker.strip())\n",
" end_re2 = re.escape(end_marker.strip())\n",
" texto = extract_text_between_markers(\n",
" doc,\n",
" start_re=start_re2,\n",
" end_re=end_re2,\n",
" debug=debug_extract\n",
" ).strip()\n",
" if not texto:\n",
" n = replace_placeholder_with_text_paragraph_all(\n",
" doc, placeholder, \"Sem dados.\",\n",
" indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt\n",
" )\n",
" return {\"ok\": True, \"replaced\": n, \"chars\": 0, \"note\": \"range_not_found\"}\n",
" sys_pt = \"És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação.\"\n",
" user_prompt = (\n",
" \"Lê o texto e produz um resumo final, em estilo de conclusões, adequado a relatório oficial:\\n\"\n",
" \"• 1 parágrafo de enquadramento (24 frases)\\n\"\n",
" \"• 58 bullets com conclusões/recomendações principais\\n\"\n",
" \"• Não inventes dados nem percentagens.\"\n",
" )\n",
" resumo = ollama_summarize_text(\n",
" texto,\n",
" model=model,\n",
" system_prompt=sys_pt,\n",
" user_prompt=user_prompt\n",
" ).strip() or \"Sem dados.\"\n",
" n = replace_placeholder_with_text_paragraph_all(\n",
" doc, placeholder, resumo,\n",
" indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt\n",
" )\n",
" return {\"ok\": True, \"replaced\": n, \"chars\": len(texto)}\n"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "dac9419a",
"metadata": {},
"outputs": [],
"source": [
"output_path = \"relatorio_final.docx\" \n",
"doc = Document(output_path)\n",
"fill_llm_placeholder_from_doc_range(\n",
" doc,\n",
" placeholder=\"{{LLM9}}\",\n",
" start_marker=\"RELATÓRIO DE AVALIAÇÃO INTERNA\",\n",
" end_marker=\"O CHEFE DA DIREÇÃO DE AVALIAÇÃO E QUALIDADE\",\n",
" model=\"llama3.1:8b\",\n",
" indent_cm=0.5,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" debug_extract=True\n",
")\n",
"fill_llm_placeholders_llm9_llm10(\n",
" doc,\n",
" model=\"llama3.1:8b\",\n",
" placeholder_llm10=\"{{LLM10}}\",\n",
" indent_cm=0.5,\n",
" font_name=\"Arial\",\n",
" font_size_pt=12,\n",
" debug_extract=True\n",
")\n",
"doc.save(output_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}