3483 lines
130 KiB
Plaintext
3483 lines
130 KiB
Plaintext
|
|
{
|
|||
|
|
"cells": [
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 1,
|
|||
|
|
"id": "1a6ab6f8",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"#ollama pull llama3.1:8b\n",
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"from tkinter import Tk\n",
|
|||
|
|
"from tkinter.filedialog import askopenfilename\n",
|
|||
|
|
"from docx import Document\n",
|
|||
|
|
"from docx.document import Document as DocxDocument\n",
|
|||
|
|
"from docx.text.paragraph import Paragraph\n",
|
|||
|
|
"import re\n",
|
|||
|
|
"import string\n",
|
|||
|
|
"from docx.oxml import OxmlElement\n",
|
|||
|
|
"from docx.shared import Cm, Pt\n",
|
|||
|
|
"from docx.oxml.ns import qn\n",
|
|||
|
|
"from docx.text.run import Run\n",
|
|||
|
|
"from docx.table import Table\n",
|
|||
|
|
"from docx.enum.text import WD_ALIGN_PARAGRAPH\n",
|
|||
|
|
"from docx.enum.table import WD_ROW_HEIGHT_RULE, WD_ALIGN_VERTICAL\n",
|
|||
|
|
"import requests\n",
|
|||
|
|
"import tkinter as tk\n",
|
|||
|
|
"from tkinter import messagebox"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 2,
|
|||
|
|
"id": "e68b58bb",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def ollama_available(timeout=0.4) -> bool:\n",
|
|||
|
|
" try:\n",
|
|||
|
|
" r = requests.get(\"http://localhost:11434/api/tags\", timeout=timeout) #aqui depois colocar o bot\n",
|
|||
|
|
" return r.status_code == 200\n",
|
|||
|
|
" except Exception:\n",
|
|||
|
|
" return False"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 3,
|
|||
|
|
"id": "4f257bf0",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def avaliacao_qualitativa(valor: float) -> str:\n",
|
|||
|
|
" if valor is None or pd.isna(valor):\n",
|
|||
|
|
" return \"N/A\"\n",
|
|||
|
|
" if 1.0 <= valor <= 3.0:\n",
|
|||
|
|
" return \"Rever Urgentemente\"\n",
|
|||
|
|
" elif 3.0 < valor <= 3.5:\n",
|
|||
|
|
" return \"Rever e Melhorar\"\n",
|
|||
|
|
" elif 3.5 < valor <= 3.9:\n",
|
|||
|
|
" return \"Bom\"\n",
|
|||
|
|
" elif 3.9 < valor <= 4.5:\n",
|
|||
|
|
" return \"Qualidade\"\n",
|
|||
|
|
" elif 4.5 < valor <= 5.0:\n",
|
|||
|
|
" return \"Excelência\"\n",
|
|||
|
|
" else:\n",
|
|||
|
|
" return \"Out of Range\""
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 4,
|
|||
|
|
"id": "c0c9fdd2",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def format_header_row(row, height_cm=5.2):\n",
|
|||
|
|
" row.height = Cm(height_cm)\n",
|
|||
|
|
" row.height_rule = WD_ROW_HEIGHT_RULE.EXACTLY\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" p.alignment = WD_ALIGN_PARAGRAPH.CENTER"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 5,
|
|||
|
|
"id": "91ad588c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def set_cell_text_vertical(cell, direction=\"btLr\"):\n",
|
|||
|
|
" tcPr = cell._tc.get_or_add_tcPr()\n",
|
|||
|
|
" td = OxmlElement(\"w:textDirection\")\n",
|
|||
|
|
" td.set(qn(\"w:val\"), direction)\n",
|
|||
|
|
" tcPr.append(td)\n",
|
|||
|
|
"\n",
|
|||
|
|
"def set_table_all_columns_width(tbl, width_cm=2.3):\n",
|
|||
|
|
" tbl.autofit = False\n",
|
|||
|
|
" w = Cm(width_cm)\n",
|
|||
|
|
" for row in tbl.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" cell.width = w"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 6,
|
|||
|
|
"id": "65478bf7",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def force_run_font(run: Run, font_name=\"Arial\", font_size_pt=12, bold=None):\n",
|
|||
|
|
" if bold is not None:\n",
|
|||
|
|
" run.bold = bool(bold)\n",
|
|||
|
|
" run.font.name = font_name\n",
|
|||
|
|
" run.font.size = Pt(font_size_pt)\n",
|
|||
|
|
" run._element.rPr.rFonts.set(qn(\"w:eastAsia\"), font_name)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 7,
|
|||
|
|
"id": "1ad788ec",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def format_pt_number(x: float) -> str:\n",
|
|||
|
|
" if pd.isna(x):\n",
|
|||
|
|
" return \"\"\n",
|
|||
|
|
" return f\"{x:.2f}\".replace(\".\", \",\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 8,
|
|||
|
|
"id": "a32425de",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def delete_paragraph(paragraph: Paragraph) -> None:\n",
|
|||
|
|
" p = paragraph._p\n",
|
|||
|
|
" p.getparent().remove(p)\n",
|
|||
|
|
" paragraph._p = paragraph._element = None"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 9,
|
|||
|
|
"id": "9d3ccf1e",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def clean_module_title(col_name: str) -> str:\n",
|
|||
|
|
" s = str(col_name).strip()\n",
|
|||
|
|
" if \"->\" in s:\n",
|
|||
|
|
" s = s.split(\"->\", 1)[1].strip()\n",
|
|||
|
|
" return s"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 10,
|
|||
|
|
"id": "1baf3c15",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"#def clean_module_title(col_name: str) -> str:\n",
|
|||
|
|
"# s = str(col_name).strip()\n",
|
|||
|
|
"# s = re.sub(r\"^.*?->\\s*\", \"\", s)\n",
|
|||
|
|
"# s = re.sub(r\"^\\s*Q06\\s*[-–_ ]\\s*Aprecia.*?[-–:]\\s*\", \"\", s, flags=re.IGNORECASE)\n",
|
|||
|
|
"# return s.strip()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 11,
|
|||
|
|
"id": "fb0689e3",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def apply_table_paragraph_spacing(cell, line_spacing=1.5):\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" fmt = p.paragraph_format\n",
|
|||
|
|
" fmt.line_spacing = line_spacing\n",
|
|||
|
|
" fmt.space_before = Pt(0)\n",
|
|||
|
|
" fmt.space_after = Pt(0)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 12,
|
|||
|
|
"id": "cacbdd0b",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]:\n",
|
|||
|
|
" means = []\n",
|
|||
|
|
" for j in range(start_col_idx, df.shape[1]):\n",
|
|||
|
|
" col = pd.to_numeric(df.iloc[:, j], errors=\"coerce\")\n",
|
|||
|
|
" if col.notna().sum() == 0:\n",
|
|||
|
|
" break\n",
|
|||
|
|
" means.append(col.mean())\n",
|
|||
|
|
" return means\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 13,
|
|||
|
|
"id": "a835ca2d",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def insert_table_after_paragraph(paragraph: Paragraph, rows: int, cols: int) -> Table:\n",
|
|||
|
|
" doc = paragraph.part.document\n",
|
|||
|
|
" tbl = doc.add_table(rows=rows, cols=cols)\n",
|
|||
|
|
" paragraph._p.addnext(tbl._tbl)\n",
|
|||
|
|
" return tbl"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 14,
|
|||
|
|
"id": "be90075d",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_uc_table(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_inicial: pd.DataFrame,\n",
|
|||
|
|
" df_final: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{tabelasUC}}\",\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" start_col_idx_inicial: int = 16,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" def extract_uc_cols(df: pd.DataFrame):\n",
|
|||
|
|
" cols = []\n",
|
|||
|
|
" for c in df.columns:\n",
|
|||
|
|
" name = str(c).lower()\n",
|
|||
|
|
" if \"q06\" in name and \"aprecia\" in name:\n",
|
|||
|
|
" cols.append(c)\n",
|
|||
|
|
" cols.sort(key=lambda x: str(x))\n",
|
|||
|
|
" return cols\n",
|
|||
|
|
"\n",
|
|||
|
|
" uc_cols = extract_uc_cols(df_final)\n",
|
|||
|
|
" ini_means = mean_columns_until_empty(df_inicial, start_col_idx=start_col_idx_inicial)\n",
|
|||
|
|
"\n",
|
|||
|
|
" def build_rows():\n",
|
|||
|
|
" data = []\n",
|
|||
|
|
" for i, c in enumerate(uc_cols):\n",
|
|||
|
|
" ini = ini_means[i] if i < len(ini_means) else float(\"nan\")\n",
|
|||
|
|
" fin = pd.to_numeric(df_final[c], errors=\"coerce\").mean() if c in df_final.columns else float(\"nan\")\n",
|
|||
|
|
" diff = fin - ini if (not pd.isna(fin) and not pd.isna(ini)) else float(\"nan\")\n",
|
|||
|
|
" data.append((clean_module_title(c), ini, fin, diff))\n",
|
|||
|
|
" return data\n",
|
|||
|
|
"\n",
|
|||
|
|
" rows_data = build_rows()\n",
|
|||
|
|
"\n",
|
|||
|
|
" def _fill_cell(cell, text: str, bold=False, align=None):\n",
|
|||
|
|
" cell.text = \"\"\n",
|
|||
|
|
" p = cell.paragraphs[0]\n",
|
|||
|
|
" if align is not None:\n",
|
|||
|
|
" p.alignment = align\n",
|
|||
|
|
" r = p.add_run(text)\n",
|
|||
|
|
" force_run_font(r, font_name, font_size_pt, bold=bold)\n",
|
|||
|
|
" apply_table_paragraph_spacing(cell, line_spacing=1.5)\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
" def _apply_table_layout(tbl):\n",
|
|||
|
|
" tbl.style = \"Table Grid\"\n",
|
|||
|
|
" tbl.autofit = False\n",
|
|||
|
|
" col_widths = [Cm(11), Cm(1.6), Cm(1.6), Cm(3.5)]\n",
|
|||
|
|
" for row in tbl.rows:\n",
|
|||
|
|
" for i, w in enumerate(col_widths):\n",
|
|||
|
|
" row.cells[i].width = w\n",
|
|||
|
|
" headers = [\"Apreciação dos módulos\", \"Inicial\", \"Final\", \"Ganhos/Perdas\"]\n",
|
|||
|
|
" for j, h in enumerate(headers):\n",
|
|||
|
|
" _fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" def _populate_rows(tbl):\n",
|
|||
|
|
" if not rows_data:\n",
|
|||
|
|
" _fill_cell(tbl.rows[1].cells[0], \"Sem dados\", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
|
|||
|
|
" _fill_cell(tbl.rows[1].cells[1], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[1].cells[2], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[1].cells[3], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" return\n",
|
|||
|
|
" for i, (title, ini, fin, diff) in enumerate(rows_data, start=1):\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[0], title, bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[1], format_pt_number(ini), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[2], format_pt_number(fin), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" diff_txt = \"\" if pd.isna(diff) else f\"{diff:+.2f}\".replace(\".\", \",\")\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[3], diff_txt, bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" def _make_table_at_paragraph(p: Paragraph):\n",
|
|||
|
|
" n_rows = 1 + max(1, len(rows_data))\n",
|
|||
|
|
" tbl = insert_table_after_paragraph(p, rows=n_rows, cols=4)\n",
|
|||
|
|
" _apply_table_layout(tbl)\n",
|
|||
|
|
" _populate_rows(tbl)\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" def _process_paragraph(p: Paragraph) -> bool:\n",
|
|||
|
|
" if placeholder not in p.text:\n",
|
|||
|
|
" return False\n",
|
|||
|
|
" _make_table_at_paragraph(p)\n",
|
|||
|
|
" return True\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return\n",
|
|||
|
|
" for table in doc.tables:\n",
|
|||
|
|
" for row in table.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" if placeholder in p.text:\n",
|
|||
|
|
" cell.text = \"\"\n",
|
|||
|
|
" n_rows = 1 + max(1, len(rows_data))\n",
|
|||
|
|
" tbl = cell.add_table(rows=n_rows, cols=4)\n",
|
|||
|
|
" _apply_table_layout(tbl)\n",
|
|||
|
|
" _populate_rows(tbl)\n",
|
|||
|
|
" return"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 15,
|
|||
|
|
"id": "51f6e2c5",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def insert_paragraph_after(paragraph: Paragraph) -> Paragraph:\n",
|
|||
|
|
" new_p = OxmlElement(\"w:p\") \n",
|
|||
|
|
" paragraph._p.addnext(new_p) \n",
|
|||
|
|
" return Paragraph(new_p, paragraph._parent)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 16,
|
|||
|
|
"id": "3b36ac23",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_q06_subitems(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{Q06_Apreciacao}}\",\n",
|
|||
|
|
" item_number: int = 5,\n",
|
|||
|
|
" indent_cm: float = 2.75,\n",
|
|||
|
|
" indent_title: bool = True,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" cols = []\n",
|
|||
|
|
" for c in df.columns:\n",
|
|||
|
|
" name = str(c).lower()\n",
|
|||
|
|
" if \"q06\" in name and \"aprecia\" in name:\n",
|
|||
|
|
" cols.append(c)\n",
|
|||
|
|
" cols.sort(key=lambda x: str(x))\n",
|
|||
|
|
" letters = string.ascii_lowercase\n",
|
|||
|
|
" def _apply_par_format(par: Paragraph, left_indent_cm: float):\n",
|
|||
|
|
" fmt = par.paragraph_format\n",
|
|||
|
|
" fmt.left_indent = Cm(left_indent_cm)\n",
|
|||
|
|
" fmt.line_spacing = 1.5\n",
|
|||
|
|
" fmt.space_before = Pt(0)\n",
|
|||
|
|
" fmt.space_after = Pt(0) \n",
|
|||
|
|
" def _add_line(par: Paragraph, text: str):\n",
|
|||
|
|
" run = par.add_run(text)\n",
|
|||
|
|
" force_run_font(run, font_name, font_size_pt)\n",
|
|||
|
|
" return run\n",
|
|||
|
|
" def _apply_par_format(par: Paragraph, left_indent_cm: float):\n",
|
|||
|
|
" fmt = par.paragraph_format\n",
|
|||
|
|
" fmt.left_indent = Cm(left_indent_cm)\n",
|
|||
|
|
" fmt.line_spacing = 1.5\n",
|
|||
|
|
" fmt.space_before = Pt(0)\n",
|
|||
|
|
" fmt.space_after = Pt(0)\n",
|
|||
|
|
" def _process_paragraph(p: Paragraph) -> bool:\n",
|
|||
|
|
" if placeholder not in p.text:\n",
|
|||
|
|
" return False\n",
|
|||
|
|
" current = p\n",
|
|||
|
|
" inserted_any = False\n",
|
|||
|
|
" if not cols:\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" _add_line(newp, \"(a)\\tSem dados;\")\n",
|
|||
|
|
" _apply_par_format(newp, indent_cm)\n",
|
|||
|
|
" inserted_any = True\n",
|
|||
|
|
" else:\n",
|
|||
|
|
" for i, c in enumerate(cols):\n",
|
|||
|
|
" mean_val = pd.to_numeric(df[c], errors=\"coerce\").mean()\n",
|
|||
|
|
" mean_str = f\"{mean_val:.2f}\".replace(\".\", \",\")\n",
|
|||
|
|
" label = avaliacao_qualitativa(mean_val)\n",
|
|||
|
|
" module_title = clean_module_title(c)\n",
|
|||
|
|
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" r1 = newp.add_run(f\"({sub})\\t{module_title} (\")\n",
|
|||
|
|
" force_run_font(r1, font_name, font_size_pt)\n",
|
|||
|
|
" r2 = newp.add_run(mean_str)\n",
|
|||
|
|
" r2.bold = True\n",
|
|||
|
|
" force_run_font(r2, font_name, font_size_pt)\n",
|
|||
|
|
" r3 = newp.add_run(\"), que corresponde a \")\n",
|
|||
|
|
" force_run_font(r3, font_name, font_size_pt)\n",
|
|||
|
|
" r4 = newp.add_run(label)\n",
|
|||
|
|
" r4.bold = True\n",
|
|||
|
|
" force_run_font(r4, font_name, font_size_pt)\n",
|
|||
|
|
" r5 = newp.add_run(\";\")\n",
|
|||
|
|
" force_run_font(r5, font_name, font_size_pt)\n",
|
|||
|
|
" _apply_par_format(newp, indent_cm)\n",
|
|||
|
|
" current = newp\n",
|
|||
|
|
" inserted_any = True\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" return inserted_any\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return\n",
|
|||
|
|
" for table in doc.tables:\n",
|
|||
|
|
" for row in table.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 17,
|
|||
|
|
"id": "d58493cd",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def _collect_course_info():\n",
|
|||
|
|
" form = tk.Tk()\n",
|
|||
|
|
" form.title(\"Dados do curso\")\n",
|
|||
|
|
" form.resizable(False, False)\n",
|
|||
|
|
"\n",
|
|||
|
|
" fields = [\n",
|
|||
|
|
" (\"Nomenclatura do curso\", \"NOMEDOCURSO\"),\n",
|
|||
|
|
" (\"Dia de inicio (DD)\", \"DDi\"),\n",
|
|||
|
|
" (\"Mes de inicio (Extenso)\", \"MESi\"),\n",
|
|||
|
|
" (\"Ano de inicio (AAAA)\", \"AAAAi\"),\n",
|
|||
|
|
" (\"Dia de fim (DD)\", \"DDf\"),\n",
|
|||
|
|
" (\"Mes de fim (Extenso)\", \"MESf\"),\n",
|
|||
|
|
" (\"Ano de fim (AAAA)\", \"AAAAf\"),\n",
|
|||
|
|
" (\"Numero de formandos\", \"NFORMANDOS\"),\n",
|
|||
|
|
" (\"Finalidade do curso\", \"FINALIDADECURSO\"),\n",
|
|||
|
|
" (\"Média final do curso\", \"MEDIAFINALCURSO\"),\n",
|
|||
|
|
" ]\n",
|
|||
|
|
" entries = {}\n",
|
|||
|
|
" for i, (label, key) in enumerate(fields):\n",
|
|||
|
|
" tk.Label(form, text=label, anchor=\"w\").grid(row=i, column=0, padx=8, pady=4, sticky=\"w\")\n",
|
|||
|
|
" ent = tk.Entry(form, width=30)\n",
|
|||
|
|
" ent.grid(row=i, column=1, padx=8, pady=4)\n",
|
|||
|
|
" entries[key] = ent\n",
|
|||
|
|
" form_values = {}\n",
|
|||
|
|
" def _submit():\n",
|
|||
|
|
" values = {k: e.get().strip() for k, e in entries.items()}\n",
|
|||
|
|
" missing = [label for (label, key) in fields if not values[key]]\n",
|
|||
|
|
" if missing:\n",
|
|||
|
|
" messagebox.showerror(\"Dados em falta\", \"Preencha: \" + \", \".join(missing))\n",
|
|||
|
|
" return\n",
|
|||
|
|
" form_values.update(values)\n",
|
|||
|
|
" form.destroy()\n",
|
|||
|
|
" tk.Button(form, text=\"Continuar\", command=_submit).grid(row=len(fields), column=0, columnspan=2, pady=10)\n",
|
|||
|
|
" form.mainloop()\n",
|
|||
|
|
" if not form_values:\n",
|
|||
|
|
" raise RuntimeError(\"Formulario cancelado\")\n",
|
|||
|
|
" return form_values\n",
|
|||
|
|
"course_info = _collect_course_info()\n",
|
|||
|
|
"def _to_int_or_str(s):\n",
|
|||
|
|
" return int(s) if s.isdigit() else s\n",
|
|||
|
|
"NOMEDOCURSOcurto = course_info[\"NOMEDOCURSO\"]\n",
|
|||
|
|
"DDi = _to_int_or_str(course_info[\"DDi\"])\n",
|
|||
|
|
"MESi = _to_int_or_str(course_info[\"MESi\"])\n",
|
|||
|
|
"AAAAi = _to_int_or_str(course_info[\"AAAAi\"])\n",
|
|||
|
|
"DDf = _to_int_or_str(course_info[\"DDf\"])\n",
|
|||
|
|
"MESf = _to_int_or_str(course_info[\"MESf\"])\n",
|
|||
|
|
"AAAAf = _to_int_or_str(course_info[\"AAAAf\"])\n",
|
|||
|
|
"NFORMANDOS = _to_int_or_str(course_info[\"NFORMANDOS\"])\n",
|
|||
|
|
"FINALIDADECURSO = course_info[\"FINALIDADECURSO\"]\n",
|
|||
|
|
"MEDIAFINALCURSO = course_info[\"MEDIAFINALCURSO\"]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 18,
|
|||
|
|
"id": "912bf2d5",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Selected file:E:/Relatórios Internos/3Curso QP praças/Question_rio_de_Expetativas_Inicial_geral.xlsx\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"Tk().withdraw()\n",
|
|||
|
|
"file_path = askopenfilename(\n",
|
|||
|
|
" title=\"Select Excel das expetativas iniciais\",\n",
|
|||
|
|
" filetypes=[(\"Excel files\", \"*.xlsx *.xls\")])\n",
|
|||
|
|
"if not file_path:\n",
|
|||
|
|
" raise FileNotFoundError(\"No file selected\")\n",
|
|||
|
|
"print(f\"Selected file:{file_path}\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 19,
|
|||
|
|
"id": "0f2192e3",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"df = pd.read_excel(file_path)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 20,
|
|||
|
|
"id": "a5494272",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"ninq = df.shape[0]-1\n",
|
|||
|
|
"medalojamento = round(df.iloc[:,10].mean(),2)\n",
|
|||
|
|
"medalimentacao = round(df.iloc[:,11].mean(),2)\n",
|
|||
|
|
"meddificuldade = round(df.iloc[:,12].mean(),2)\n",
|
|||
|
|
"medfuncfut = round(df.iloc[:,13].mean(),2)\n",
|
|||
|
|
"medmotvpart = round(df.iloc[:,14].mean(),2)\n",
|
|||
|
|
"medconhecimento = round(df.iloc[:,15].mean(),2)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 21,
|
|||
|
|
"id": "e6d556fc",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]:\n",
|
|||
|
|
" means = []\n",
|
|||
|
|
" n_cols = df.shape[1]\n",
|
|||
|
|
" for j in range(start_col_idx, n_cols):\n",
|
|||
|
|
" col = pd.to_numeric(df.iloc[:, j], errors=\"coerce\")\n",
|
|||
|
|
" if col.notna().sum() == 0:\n",
|
|||
|
|
" break\n",
|
|||
|
|
" means.append(col.mean())\n",
|
|||
|
|
" return means"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 22,
|
|||
|
|
"id": "7fc12954",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"6 [np.float64(4.031746031746032), np.float64(3.9047619047619047), np.float64(3.9523809523809526), np.float64(3.7936507936507935), np.float64(4.079365079365079)]\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"medias = mean_columns_until_empty(df, start_col_idx=16)\n",
|
|||
|
|
"print(len(medias), medias[:5])"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 23,
|
|||
|
|
"id": "7cc0fbc1",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Selected file:\n",
|
|||
|
|
"E:/Relatórios Internos/3Curso QP praças/Question_rio_Satisfa_o_Final_geral.xlsx\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"Tk().withdraw()\n",
|
|||
|
|
"file_path2 = askopenfilename(\n",
|
|||
|
|
" title=\"Select Excel das expetativas finais\",\n",
|
|||
|
|
" filetypes=[(\"Excel files\", \"*.xlsx *.xls\")])\n",
|
|||
|
|
"if not file_path2:\n",
|
|||
|
|
" raise FileNotFoundError(\"No file selected\")\n",
|
|||
|
|
"print(f\"Selected file:\\n{file_path2}\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 24,
|
|||
|
|
"id": "055a89b6",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"df2 = pd.read_excel(file_path2)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 25,
|
|||
|
|
"id": "02228c44",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"ninq2 = df2.shape[0]-1\n",
|
|||
|
|
"medaplog = round(df2.iloc[:,10].mean(),2)\n",
|
|||
|
|
"medalojamento2 = round(df2.iloc[:,11].mean(),2)\n",
|
|||
|
|
"medalimentacao2 = round(df2.iloc[:,12].mean(),2)\n",
|
|||
|
|
"medapdir = round(df2.iloc[:,13].mean(),2)\n",
|
|||
|
|
"meddificuldade2 = round(df2.iloc[:,14].mean(),2)\n",
|
|||
|
|
"medfuncfut2 = round(df2.iloc[:,15].mean(),2)\n",
|
|||
|
|
"medmotvpart2 = round(df2.iloc[:,16].mean(),2)\n",
|
|||
|
|
"medconhecimento2 = round(df2.iloc[:,17].mean(),2)\n",
|
|||
|
|
"medalojamentofinal = round(medalojamento2 - medalojamento,2)\n",
|
|||
|
|
"medalimentacaofinal = round(medalimentacao2 - medalimentacao,2)\n",
|
|||
|
|
"meddificuldadefinal = round(meddificuldade2 - meddificuldade,2)\n",
|
|||
|
|
"medfuncfutfinal = round(medfuncfut2 - medfuncfut,2)\n",
|
|||
|
|
"medmotvpartfinal = round(medmotvpart2 - medmotvpart,2)\n",
|
|||
|
|
"medconhecimentofinal = round(medconhecimento2 - medconhecimento,2)\n",
|
|||
|
|
"objcruso = round(df2.iloc[:,18].mean(),2)\n",
|
|||
|
|
"contcurso = round(df2.iloc[:,19].mean(),2)\n",
|
|||
|
|
"adeqtrab = round(df2.iloc[:,20].mean(),2)\n",
|
|||
|
|
"instform = round(df2.iloc[:,21].mean(),2)\n",
|
|||
|
|
"audiovisuais = round(df2.iloc[:,22].mean(),2)\n",
|
|||
|
|
"biblio = round(df2.iloc[:,23].mean(),2)\n",
|
|||
|
|
"NOMEDOCURSO = df2.iloc[1,4].split(\"-\")[1].split(\"–\")[0].strip()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 26,
|
|||
|
|
"id": "fd4f44f1",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def build_formadores_rows(df: pd.DataFrame):\n",
|
|||
|
|
" groups = {}\n",
|
|||
|
|
" for c in df.columns:\n",
|
|||
|
|
" name = str(c)\n",
|
|||
|
|
" low = name.lower()\n",
|
|||
|
|
" if \"_formador\" not in low:\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" m = re.match(r\"^\\s*(Q\\d+)\\s*_Formador\\s*->\\s*(.+?)\\s*(?:\\(|$)\", name, flags=re.IGNORECASE)\n",
|
|||
|
|
" if not m:\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" qcode = m.group(1).upper()\n",
|
|||
|
|
" metric_raw = m.group(2).strip().lower()\n",
|
|||
|
|
" groups.setdefault(qcode, {})\n",
|
|||
|
|
" groups[qcode][metric_raw] = name\n",
|
|||
|
|
" def qnum(q): \n",
|
|||
|
|
" mm = re.match(r\"Q(\\d+)\", q)\n",
|
|||
|
|
" return int(mm.group(1)) if mm else 10**9\n",
|
|||
|
|
" qcodes_sorted = sorted(groups.keys(), key=qnum)\n",
|
|||
|
|
" def metric_key(metric_raw: str) -> str | None:\n",
|
|||
|
|
" mr = metric_raw.lower()\n",
|
|||
|
|
" if \"dom\" in mr and \"ass\" in mr: \n",
|
|||
|
|
" return \"dominio\"\n",
|
|||
|
|
" if \"métod\" in mr or \"metod\" in mr: \n",
|
|||
|
|
" return \"metodos\"\n",
|
|||
|
|
" if \"lingu\" in mr: \n",
|
|||
|
|
" return \"linguagem\"\n",
|
|||
|
|
" if \"empenh\" in mr: \n",
|
|||
|
|
" return \"empenho\"\n",
|
|||
|
|
" if \"relac\" in mr or \"formand\" in mr: \n",
|
|||
|
|
" return \"relacao\"\n",
|
|||
|
|
" return None\n",
|
|||
|
|
" rows = []\n",
|
|||
|
|
" letters = \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n",
|
|||
|
|
" for idx, qcode in enumerate(qcodes_sorted):\n",
|
|||
|
|
" cols_map = groups[qcode]\n",
|
|||
|
|
" picked = {\"dominio\": None, \"metodos\": None, \"linguagem\": None, \"empenho\": None, \"relacao\": None}\n",
|
|||
|
|
" for raw, colname in cols_map.items():\n",
|
|||
|
|
" k = metric_key(raw)\n",
|
|||
|
|
" if k and picked[k] is None:\n",
|
|||
|
|
" picked[k] = colname\n",
|
|||
|
|
" def col_mean(colname):\n",
|
|||
|
|
" if not colname:\n",
|
|||
|
|
" return float(\"nan\")\n",
|
|||
|
|
" return pd.to_numeric(df[colname], errors=\"coerce\").mean()\n",
|
|||
|
|
" dominio = col_mean(picked[\"dominio\"])\n",
|
|||
|
|
" metodos = col_mean(picked[\"metodos\"])\n",
|
|||
|
|
" linguagem = col_mean(picked[\"linguagem\"])\n",
|
|||
|
|
" empenho = col_mean(picked[\"empenho\"])\n",
|
|||
|
|
" relacao = col_mean(picked[\"relacao\"])\n",
|
|||
|
|
" vals = [dominio, metodos, linguagem, empenho, relacao]\n",
|
|||
|
|
" media_final = pd.Series(vals, dtype=\"float\").mean(skipna=True)\n",
|
|||
|
|
" label = letters[idx] if idx < len(letters) else f\"F{idx+1}\"\n",
|
|||
|
|
" rows.append({\n",
|
|||
|
|
" \"label\": label,\n",
|
|||
|
|
" \"qcode\": qcode,\n",
|
|||
|
|
" \"dominio\": dominio,\n",
|
|||
|
|
" \"metodos\": metodos,\n",
|
|||
|
|
" \"linguagem\": linguagem,\n",
|
|||
|
|
" \"empenho\": empenho,\n",
|
|||
|
|
" \"relacao\": relacao,\n",
|
|||
|
|
" \"media_final\": media_final,\n",
|
|||
|
|
" })\n",
|
|||
|
|
" return rows\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 27,
|
|||
|
|
"id": "1ce5e3c2",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_formadores_table(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df2: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{tabelaFormadores}}\",\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" col_width_cm: float = 2.3,\n",
|
|||
|
|
" header_vertical: bool = True,\n",
|
|||
|
|
" rotate_first_header: bool = True,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" rows = build_formadores_rows(df2)\n",
|
|||
|
|
" def _fill_cell(cell, text: str, bold=False, align=None):\n",
|
|||
|
|
" cell.text = \"\"\n",
|
|||
|
|
" p = cell.paragraphs[0]\n",
|
|||
|
|
" if align is not None:\n",
|
|||
|
|
" p.alignment = align\n",
|
|||
|
|
" r = p.add_run(text)\n",
|
|||
|
|
" force_run_font(r, font_name, font_size_pt, bold=bold)\n",
|
|||
|
|
" apply_table_paragraph_spacing(cell, line_spacing=1.5)\n",
|
|||
|
|
" headers = [\"Formadores\", \"Domínio do assunto\",\"Métodos utilizados\",\"Linguagem utilizada\",\"Empenho\",\"Relação c/ formandos\",\"Média final\",]\n",
|
|||
|
|
" global_mean = pd.Series([r[\"media_final\"] for r in rows], dtype=\"float\").mean(skipna=True)\n",
|
|||
|
|
" def _apply_layout(tbl):\n",
|
|||
|
|
" tbl.style = \"Table Grid\"\n",
|
|||
|
|
" tbl.autofit = False\n",
|
|||
|
|
" set_table_all_columns_width(tbl, width_cm=col_width_cm)\n",
|
|||
|
|
" if header_vertical:\n",
|
|||
|
|
" start_j = 0 if rotate_first_header else 1\n",
|
|||
|
|
" for j in range(start_j, len(headers)):\n",
|
|||
|
|
" set_cell_text_vertical(tbl.rows[0].cells[j], direction=\"btLr\")\n",
|
|||
|
|
" def _populate_table(tbl):\n",
|
|||
|
|
" for j, h in enumerate(headers):\n",
|
|||
|
|
" _fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" if not rows:\n",
|
|||
|
|
" _fill_cell(tbl.rows[1].cells[0], \"Sem dados\", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
|
|||
|
|
" for j in range(1, len(headers)):\n",
|
|||
|
|
" _fill_cell(tbl.rows[1].cells[j], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[2].cells[0], \"Média\", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
|
|||
|
|
" for j in range(1, len(headers) - 1):\n",
|
|||
|
|
" _fill_cell(tbl.rows[2].cells[j], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[2].cells[-1], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" return\n",
|
|||
|
|
" for i, r in enumerate(rows, start=1):\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[0], r[\"label\"], bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[1], format_pt_number(r[\"dominio\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[2], format_pt_number(r[\"metodos\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[3], format_pt_number(r[\"linguagem\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[4], format_pt_number(r[\"empenho\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[5], format_pt_number(r[\"relacao\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[i].cells[6], format_pt_number(r[\"media_final\"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" last = 1 + len(rows)\n",
|
|||
|
|
" _fill_cell(tbl.rows[last].cells[0], \"Média\", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT)\n",
|
|||
|
|
" for j in range(1, len(headers) - 1):\n",
|
|||
|
|
" _fill_cell(tbl.rows[last].cells[j], \"\", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" _fill_cell(tbl.rows[last].cells[-1], format_pt_number(global_mean), bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)\n",
|
|||
|
|
" def _make_table_at_paragraph(p: Paragraph):\n",
|
|||
|
|
" n_rows = 1 + max(1, len(rows)) + 1\n",
|
|||
|
|
" tbl = insert_table_after_paragraph(p, rows=n_rows, cols=len(headers))\n",
|
|||
|
|
" _apply_layout(tbl)\n",
|
|||
|
|
" format_header_row(tbl.rows[0], height_cm=5.2)\n",
|
|||
|
|
" _populate_table(tbl)\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" if placeholder in p.text:\n",
|
|||
|
|
" _make_table_at_paragraph(p)\n",
|
|||
|
|
" return\n",
|
|||
|
|
" for table in doc.tables:\n",
|
|||
|
|
" for row in table.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" if placeholder in p.text:\n",
|
|||
|
|
" cell.text = \"\"\n",
|
|||
|
|
" n_rows = 1 + max(1, len(rows)) + 1\n",
|
|||
|
|
" tbl = cell.add_table(rows=n_rows, cols=len(headers))\n",
|
|||
|
|
" _apply_layout(tbl)\n",
|
|||
|
|
" format_header_row(tbl.rows[0], height_cm=5.2)\n",
|
|||
|
|
" _populate_table(tbl)\n",
|
|||
|
|
" return"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 28,
|
|||
|
|
"id": "5c02a5d9",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def compute_formadores_summary(df: pd.DataFrame):\n",
|
|||
|
|
" rows = build_formadores_rows(df)\n",
|
|||
|
|
" nforma = len(rows)\n",
|
|||
|
|
" global_mean = pd.Series([r[\"media_final\"] for r in rows], dtype=\"float\").mean(skipna=True)\n",
|
|||
|
|
" mediaformquanl = \"\" if pd.isna(global_mean) else f\"{global_mean:.2f}\".replace(\".\", \",\")\n",
|
|||
|
|
" mediaform = \"\" if pd.isna(global_mean) else avaliacao_qualitativa(global_mean)\n",
|
|||
|
|
" return nforma, mediaformquanl, mediaform\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 29,
|
|||
|
|
"id": "9db6b739",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def _copy_run_format(src_run, dst_run, keep_bold=None):\n",
|
|||
|
|
" dst_run.bold = src_run.bold if keep_bold is None else keep_bold\n",
|
|||
|
|
" dst_run.italic = src_run.italic\n",
|
|||
|
|
" dst_run.underline = src_run.underline\n",
|
|||
|
|
" if src_run.font.name:\n",
|
|||
|
|
" dst_run.font.name = src_run.font.name\n",
|
|||
|
|
" dst_run._element.rPr.rFonts.set(qn(\"w:eastAsia\"), src_run.font.name)\n",
|
|||
|
|
" if src_run.font.size:\n",
|
|||
|
|
" dst_run.font.size = src_run.font.size"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 30,
|
|||
|
|
"id": "a3e6867c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def extract_temas_list(df: pd.DataFrame) -> list[str]:\n",
|
|||
|
|
" temas_cols = [c for c in df.columns if \"_temas\" in str(c).lower()]\n",
|
|||
|
|
" temas = []\n",
|
|||
|
|
" for col in temas_cols:\n",
|
|||
|
|
" s = df[col].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" temas.extend(s.tolist())\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in temas:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" return out"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 31,
|
|||
|
|
"id": "a539bab3",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholders_docx_bold_values_keep_style(doc, replacements: dict[str, str]):\n",
|
|||
|
|
" keys = sorted(replacements.keys(), key=len, reverse=True)\n",
|
|||
|
|
" def _replace_in_paragraph(paragraph):\n",
|
|||
|
|
" if not paragraph.runs:\n",
|
|||
|
|
" return\n",
|
|||
|
|
" full_text = \"\".join(run.text for run in paragraph.runs)\n",
|
|||
|
|
" if not any(k in full_text for k in keys):\n",
|
|||
|
|
" return\n",
|
|||
|
|
" base_run = paragraph.runs[0]\n",
|
|||
|
|
" for run in paragraph.runs:\n",
|
|||
|
|
" run.text = \"\"\n",
|
|||
|
|
" text = full_text\n",
|
|||
|
|
" while True:\n",
|
|||
|
|
" next_pos = None\n",
|
|||
|
|
" next_key = None\n",
|
|||
|
|
" for k in keys:\n",
|
|||
|
|
" pos = text.find(k)\n",
|
|||
|
|
" if pos != -1 and (next_pos is None or pos < next_pos):\n",
|
|||
|
|
" next_pos, next_key = pos, k\n",
|
|||
|
|
" if next_key is None:\n",
|
|||
|
|
" if text:\n",
|
|||
|
|
" r = paragraph.add_run(text)\n",
|
|||
|
|
" _copy_run_format(base_run, r, keep_bold=base_run.bold)\n",
|
|||
|
|
" break\n",
|
|||
|
|
" before = text[:next_pos]\n",
|
|||
|
|
" if before:\n",
|
|||
|
|
" r = paragraph.add_run(before)\n",
|
|||
|
|
" _copy_run_format(base_run, r, keep_bold=base_run.bold)\n",
|
|||
|
|
" val = str(replacements[next_key])\n",
|
|||
|
|
" r_val = paragraph.add_run(val)\n",
|
|||
|
|
" _copy_run_format(base_run, r_val, keep_bold=True)\n",
|
|||
|
|
" text = text[next_pos + len(next_key):]\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" _replace_in_paragraph(p)\n",
|
|||
|
|
" for table in doc.tables:\n",
|
|||
|
|
" for row in table.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" _replace_in_paragraph(p)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 32,
|
|||
|
|
"id": "78816507",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str,\n",
|
|||
|
|
" column_contains: str, \n",
|
|||
|
|
" indent_cm: float = 0.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" cols = [c for c in df.columns if column_contains.lower() in str(c).lower()]\n",
|
|||
|
|
" items = []\n",
|
|||
|
|
" for col in cols:\n",
|
|||
|
|
" s = df[col].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" items.extend(s.tolist())\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in items:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" items = out\n",
|
|||
|
|
" letters = string.ascii_lowercase\n",
|
|||
|
|
" def _apply_par_format(par: Paragraph):\n",
|
|||
|
|
" fmt = par.paragraph_format\n",
|
|||
|
|
" fmt.left_indent = Cm(indent_cm)\n",
|
|||
|
|
" fmt.first_line_indent = Cm(-indent_cm)\n",
|
|||
|
|
" fmt.line_spacing = 1.5\n",
|
|||
|
|
" fmt.space_before = Pt(0)\n",
|
|||
|
|
" fmt.space_after = Pt(0)\n",
|
|||
|
|
" def _add_run(par: Paragraph, text: str):\n",
|
|||
|
|
" r = par.add_run(text)\n",
|
|||
|
|
" force_run_font(r, font_name, font_size_pt)\n",
|
|||
|
|
" return r\n",
|
|||
|
|
" def _process_paragraph(p: Paragraph) -> bool:\n",
|
|||
|
|
" if placeholder not in p.text:\n",
|
|||
|
|
" return False\n",
|
|||
|
|
" current = p\n",
|
|||
|
|
" if not items:\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" _add_run(newp, \"a. Sem dados;\")\n",
|
|||
|
|
" _apply_par_format(newp)\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" return True\n",
|
|||
|
|
" for i, txt in enumerate(items):\n",
|
|||
|
|
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" _add_run(newp, f\"{sub}. \")\n",
|
|||
|
|
" _add_run(newp, txt)\n",
|
|||
|
|
" _add_run(newp, \";\")\n",
|
|||
|
|
" _apply_par_format(newp)\n",
|
|||
|
|
" current = newp\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" return True\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return\n",
|
|||
|
|
" for table in doc.tables:\n",
|
|||
|
|
" for row in table.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 33,
|
|||
|
|
"id": "e3f33ea2",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_column_subitems_hanging2(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str,\n",
|
|||
|
|
" column_contains: str,\n",
|
|||
|
|
" left_indent_cm: float = 2.75,\n",
|
|||
|
|
" hanging_cm: float = 0.6,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" cols = [c for c in df.columns if column_contains.lower() in str(c).lower()]\n",
|
|||
|
|
" items = []\n",
|
|||
|
|
" for col in cols:\n",
|
|||
|
|
" s = df[col].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" items.extend(s.tolist())\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in items:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" items = out\n",
|
|||
|
|
" letters = string.ascii_lowercase\n",
|
|||
|
|
" def _apply_par_format(par: Paragraph):\n",
|
|||
|
|
" fmt = par.paragraph_format\n",
|
|||
|
|
" fmt.left_indent = Cm(left_indent_cm)\n",
|
|||
|
|
" fmt.first_line_indent = Cm(-hanging_cm) \n",
|
|||
|
|
" fmt.line_spacing = 1.5\n",
|
|||
|
|
" fmt.space_before = Pt(0)\n",
|
|||
|
|
" fmt.space_after = Pt(0)\n",
|
|||
|
|
" def _add_run(par: Paragraph, text: str):\n",
|
|||
|
|
" r = par.add_run(text)\n",
|
|||
|
|
" force_run_font(r, font_name, font_size_pt)\n",
|
|||
|
|
" return r\n",
|
|||
|
|
" def _process_paragraph(p: Paragraph) -> bool:\n",
|
|||
|
|
" if placeholder not in p.text:\n",
|
|||
|
|
" return False\n",
|
|||
|
|
" current = p\n",
|
|||
|
|
" if not items:\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" _add_run(newp, \"a. Sem dados;\")\n",
|
|||
|
|
" _apply_par_format(newp)\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" return True\n",
|
|||
|
|
" for i, txt in enumerate(items):\n",
|
|||
|
|
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" _add_run(newp, f\"{sub}. \")\n",
|
|||
|
|
" _add_run(newp, txt)\n",
|
|||
|
|
" _add_run(newp, \";\")\n",
|
|||
|
|
" _apply_par_format(newp)\n",
|
|||
|
|
" current = newp\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" return True\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return\n",
|
|||
|
|
" for table in doc.tables:\n",
|
|||
|
|
" for row in table.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 34,
|
|||
|
|
"id": "2905d4ff",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_temas_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{TEMAS}}\",\n",
|
|||
|
|
" indent_cm: float = 0.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None, \n",
|
|||
|
|
" **_ignored, \n",
|
|||
|
|
"):\n",
|
|||
|
|
" temas_raw = extract_temas_list(df)\n",
|
|||
|
|
" temas = [str(t).strip() for t in (temas_raw or []) if str(t).strip()]\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in temas:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" temas = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" temas = temas[:max_items]\n",
|
|||
|
|
" if not temas:\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Temas\": [\"Sem dados\"]})\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_temas\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Temas\": temas})\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_temas\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 35,
|
|||
|
|
"id": "f14de057",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def extract_desenvolver_list(df: pd.DataFrame) -> list[str]:\n",
|
|||
|
|
" cols = [c for c in df.columns if \"_desenvolver\" in str(c).lower()]\n",
|
|||
|
|
" items = []\n",
|
|||
|
|
" for col in cols:\n",
|
|||
|
|
" s = df[col].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" items.extend(s.tolist())\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in items:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" return out"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 36,
|
|||
|
|
"id": "6779cbad",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def ollama_summarize_desenvolver(\n",
|
|||
|
|
" items: list[str],\n",
|
|||
|
|
" max_items: int = 6,\n",
|
|||
|
|
" model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" timeout: float = 30.0\n",
|
|||
|
|
") -> list[str]:\n",
|
|||
|
|
" items_txt = \"\\n\".join(f\"- {t}\" for t in items)\n",
|
|||
|
|
" prompt = f\"\"\"\n",
|
|||
|
|
"És um analista a escrever um relatório oficial de avaliação.\n",
|
|||
|
|
"\n",
|
|||
|
|
"Tens uma lista de aspetos a desenvolver/melhorar apontados pelos formandos. Faz o seguinte:\n",
|
|||
|
|
"1) Agrupa itens repetidos/semelhantes;\n",
|
|||
|
|
"2) Seleciona os mais importantes e recorrentes;\n",
|
|||
|
|
"3) Reescreve numa lista curta, clara e formal (Português de Portugal);\n",
|
|||
|
|
"4) NÃO inventes novos pontos;\n",
|
|||
|
|
"5) No máximo {max_items} itens;\n",
|
|||
|
|
"6) Frases curtas, em formato de sintagma nominal (ex.: \"Melhoria da componente prática\", \"Aprofundamento de ...\").\n",
|
|||
|
|
"\n",
|
|||
|
|
"Itens:\n",
|
|||
|
|
"{items_txt}\n",
|
|||
|
|
"\n",
|
|||
|
|
"Devolve APENAS a lista final no formato:\n",
|
|||
|
|
"- Item 1\n",
|
|||
|
|
"- Item 2\n",
|
|||
|
|
"- Item 3\n",
|
|||
|
|
"\"\"\".strip()\n",
|
|||
|
|
" payload = {\n",
|
|||
|
|
" \"model\": model,\n",
|
|||
|
|
" \"prompt\": prompt,\n",
|
|||
|
|
" \"stream\": False,\n",
|
|||
|
|
" \"options\": {\"temperature\": 0.2}\n",
|
|||
|
|
" }\n",
|
|||
|
|
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=timeout)\n",
|
|||
|
|
" r.raise_for_status()\n",
|
|||
|
|
" text = r.json().get(\"response\", \"\")\n",
|
|||
|
|
" lines = []\n",
|
|||
|
|
" for line in text.splitlines():\n",
|
|||
|
|
" line = line.strip()\n",
|
|||
|
|
" if line.startswith((\"-\", \"•\")):\n",
|
|||
|
|
" item = line.lstrip(\"-•\").strip(\" .;\")\n",
|
|||
|
|
" if item:\n",
|
|||
|
|
" lines.append(item)\n",
|
|||
|
|
" return lines[:max_items]\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 37,
|
|||
|
|
"id": "7e8deb1f",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_desenvolver_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{DESENVOLVER}}\",\n",
|
|||
|
|
" indent_cm: float = 1.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" use_ollama: bool = True,\n",
|
|||
|
|
" ollama_model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" max_items: int = 6,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" raw = extract_desenvolver_list(df)\n",
|
|||
|
|
" if not raw:\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc, df,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_desenvolver\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=deduplicate\n",
|
|||
|
|
" )\n",
|
|||
|
|
" final_items = None\n",
|
|||
|
|
" if use_ollama and ollama_available():\n",
|
|||
|
|
" try:\n",
|
|||
|
|
" llm_items = ollama_summarize_desenvolver(\n",
|
|||
|
|
" raw, max_items=max_items, model=ollama_model\n",
|
|||
|
|
" )\n",
|
|||
|
|
" if llm_items:\n",
|
|||
|
|
" final_items = llm_items\n",
|
|||
|
|
" except Exception:\n",
|
|||
|
|
" final_items = None\n",
|
|||
|
|
" if final_items is None:\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc, df,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_desenvolver\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=deduplicate\n",
|
|||
|
|
" )\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_desenvolver\": final_items})\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc, df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_desenvolver\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 38,
|
|||
|
|
"id": "f5249e3c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def extract_incluir_list(df: pd.DataFrame) -> list[str]:\n",
|
|||
|
|
" cols = [c for c in df.columns if \"_incluir\" in str(c).lower()]\n",
|
|||
|
|
" items = []\n",
|
|||
|
|
" for col in cols:\n",
|
|||
|
|
" s = df[col].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" items.extend(s.tolist())\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in items:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" return out\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 39,
|
|||
|
|
"id": "7f8a9eb4",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def ollama_summarize_incluir(\n",
|
|||
|
|
" items: list[str],\n",
|
|||
|
|
" max_items: int = 6,\n",
|
|||
|
|
" model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" timeout: float = 30.0\n",
|
|||
|
|
") -> list[str]:\n",
|
|||
|
|
" items_txt = \"\\n\".join(f\"- {t}\" for t in items)\n",
|
|||
|
|
" prompt = f\"\"\"\n",
|
|||
|
|
"És um analista a escrever um relatório oficial de avaliação.\n",
|
|||
|
|
"\n",
|
|||
|
|
"Tens uma lista de conteúdos/temas que os formandos sugerem que sejam incluídos. Faz o seguinte:\n",
|
|||
|
|
"1) Agrupa itens repetidos/semelhantes;\n",
|
|||
|
|
"2) Seleciona os mais importantes e recorrentes;\n",
|
|||
|
|
"3) Reescreve numa lista curta, clara e formal (Português de Portugal);\n",
|
|||
|
|
"4) NÃO inventes novos pontos;\n",
|
|||
|
|
"5) No máximo {max_items} itens;\n",
|
|||
|
|
"6) Frases curtas e objetivas.\n",
|
|||
|
|
"\n",
|
|||
|
|
"Itens:\n",
|
|||
|
|
"{items_txt}\n",
|
|||
|
|
"\n",
|
|||
|
|
"Devolve APENAS a lista final no formato:\n",
|
|||
|
|
"- Item 1\n",
|
|||
|
|
"- Item 2\n",
|
|||
|
|
"- Item 3\n",
|
|||
|
|
"\"\"\".strip()\n",
|
|||
|
|
"\n",
|
|||
|
|
" payload = {\n",
|
|||
|
|
" \"model\": model,\n",
|
|||
|
|
" \"prompt\": prompt,\n",
|
|||
|
|
" \"stream\": False,\n",
|
|||
|
|
" \"options\": {\"temperature\": 0.2}\n",
|
|||
|
|
" }\n",
|
|||
|
|
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=timeout)\n",
|
|||
|
|
" r.raise_for_status()\n",
|
|||
|
|
" text = r.json().get(\"response\", \"\")\n",
|
|||
|
|
"\n",
|
|||
|
|
" lines = []\n",
|
|||
|
|
" for line in text.splitlines():\n",
|
|||
|
|
" line = line.strip()\n",
|
|||
|
|
" if line.startswith((\"-\", \"•\")):\n",
|
|||
|
|
" item = line.lstrip(\"-•\").strip(\" .;\")\n",
|
|||
|
|
" if item:\n",
|
|||
|
|
" lines.append(item)\n",
|
|||
|
|
"\n",
|
|||
|
|
" return lines[:max_items]\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 40,
|
|||
|
|
"id": "b1a43e80",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_incluir_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{INCLUIR}}\",\n",
|
|||
|
|
" indent_cm: float = 1.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" use_ollama: bool = True,\n",
|
|||
|
|
" ollama_model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" max_items: int = 6,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" raw = extract_incluir_list(df)\n",
|
|||
|
|
" if not raw:\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc, df,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_incluir\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=deduplicate\n",
|
|||
|
|
" )\n",
|
|||
|
|
" final_items = None\n",
|
|||
|
|
" if use_ollama and ollama_available():\n",
|
|||
|
|
" try:\n",
|
|||
|
|
" llm_items = ollama_summarize_incluir(\n",
|
|||
|
|
" raw, max_items=max_items, model=ollama_model\n",
|
|||
|
|
" )\n",
|
|||
|
|
" if llm_items:\n",
|
|||
|
|
" final_items = llm_items\n",
|
|||
|
|
" except Exception:\n",
|
|||
|
|
" final_items = None\n",
|
|||
|
|
" if final_items is None:\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc, df,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_incluir\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=deduplicate\n",
|
|||
|
|
" )\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_incluir\": final_items})\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc, df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_incluir\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 41,
|
|||
|
|
"id": "615ac60c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def ollama_summarize_observacoes_paragraph(\n",
|
|||
|
|
" items: list[str],\n",
|
|||
|
|
" model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" timeout: float = 45.0\n",
|
|||
|
|
") -> str:\n",
|
|||
|
|
" items_txt = \"\\n\".join(f\"- {t}\" for t in items)\n",
|
|||
|
|
"\n",
|
|||
|
|
" prompt = f\"\"\"\n",
|
|||
|
|
"És um analista a escrever um relatório oficial de avaliação.\n",
|
|||
|
|
"\n",
|
|||
|
|
"Tens observações livres escritas pelos formandos. Produz um ÚNICO PARÁGRAFO de síntese:\n",
|
|||
|
|
"- Português de Portugal, tom formal e objetivo;\n",
|
|||
|
|
"- Não inventes informação;\n",
|
|||
|
|
"- Agrupa ideias repetidas;\n",
|
|||
|
|
"- Evita exemplos pessoais e detalhes identificáveis;\n",
|
|||
|
|
"- 3 a 6 frases, no máximo ~120 palavras.\n",
|
|||
|
|
"\n",
|
|||
|
|
"Observações:\n",
|
|||
|
|
"{items_txt}\n",
|
|||
|
|
"Devolve APENAS o parágrafo final (sem tópicos, sem títulos, sem listas).\n",
|
|||
|
|
"\"\"\".strip()\n",
|
|||
|
|
" payload = {\n",
|
|||
|
|
" \"model\": model,\n",
|
|||
|
|
" \"prompt\": prompt,\n",
|
|||
|
|
" \"stream\": False,\n",
|
|||
|
|
" \"options\": {\"temperature\": 0.2}\n",
|
|||
|
|
" }\n",
|
|||
|
|
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=timeout)\n",
|
|||
|
|
" r.raise_for_status()\n",
|
|||
|
|
" text = r.json().get(\"response\", \"\").strip()\n",
|
|||
|
|
" text = \" \".join(line.strip() for line in text.splitlines() if line.strip())\n",
|
|||
|
|
" return text\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 42,
|
|||
|
|
"id": "2a130fdd",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def extract_observacoes_list(df: pd.DataFrame) -> list[str]:\n",
|
|||
|
|
" cols = [c for c in df.columns if \"_observ\" in str(c).lower()]\n",
|
|||
|
|
" items = []\n",
|
|||
|
|
" for col in cols:\n",
|
|||
|
|
" s = df[col].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" items.extend(s.tolist())\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in items:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" return out"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 43,
|
|||
|
|
"id": "218d5649",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def _iter_paragraphs_in_table(tbl):\n",
|
|||
|
|
" for row in tbl.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in cell.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)\n",
|
|||
|
|
"def iter_all_paragraphs_everywhere(doc):\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in doc.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)\n",
|
|||
|
|
" for section in doc.sections:\n",
|
|||
|
|
" containers = [\n",
|
|||
|
|
" section.header,\n",
|
|||
|
|
" section.footer,\n",
|
|||
|
|
" section.first_page_header,\n",
|
|||
|
|
" section.first_page_footer,\n",
|
|||
|
|
" section.even_page_header,\n",
|
|||
|
|
" section.even_page_footer,\n",
|
|||
|
|
" ]\n",
|
|||
|
|
" for c in containers:\n",
|
|||
|
|
" for p in c.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in c.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 44,
|
|||
|
|
"id": "b09a33b9",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_observacoes_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{OBSERVACOES}}\",\n",
|
|||
|
|
" indent_cm: float = 0.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" use_ollama: bool = True,\n",
|
|||
|
|
" ollama_model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" raw = extract_observacoes_list(df) \n",
|
|||
|
|
" items = [str(t).strip() for t in (raw or []) if str(t).strip()]\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in items:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" items = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" items = items[:max_items]\n",
|
|||
|
|
" paragraph_text = None\n",
|
|||
|
|
" if items and use_ollama and ollama_available():\n",
|
|||
|
|
" try:\n",
|
|||
|
|
" paragraph_text = ollama_summarize_observacoes_paragraph(items, model=ollama_model)\n",
|
|||
|
|
" if paragraph_text:\n",
|
|||
|
|
" paragraph_text = paragraph_text.strip()\n",
|
|||
|
|
" except Exception:\n",
|
|||
|
|
" paragraph_text = None\n",
|
|||
|
|
" letters = string.ascii_lowercase\n",
|
|||
|
|
" def _apply_par_format(par: Paragraph, hanging=False):\n",
|
|||
|
|
" fmt = par.paragraph_format\n",
|
|||
|
|
" fmt.left_indent = Cm(indent_cm)\n",
|
|||
|
|
" if hanging:\n",
|
|||
|
|
" fmt.first_line_indent = Cm(-0.6)\n",
|
|||
|
|
" fmt.line_spacing = 1.5\n",
|
|||
|
|
" fmt.space_before = Pt(0)\n",
|
|||
|
|
" fmt.space_after = Pt(0)\n",
|
|||
|
|
" def _add_run(par: Paragraph, text: str, bold=False):\n",
|
|||
|
|
" r = par.add_run(text)\n",
|
|||
|
|
" force_run_font(r, font_name, font_size_pt, bold=bold)\n",
|
|||
|
|
" return r\n",
|
|||
|
|
" replaced = 0\n",
|
|||
|
|
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
|
|||
|
|
" if placeholder not in (p.text or \"\"):\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" current = p\n",
|
|||
|
|
" if paragraph_text:\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" _add_run(newp, paragraph_text)\n",
|
|||
|
|
" _apply_par_format(newp, hanging=False)\n",
|
|||
|
|
" else:\n",
|
|||
|
|
" if not items:\n",
|
|||
|
|
" items_to_write = [\"Sem dados\"]\n",
|
|||
|
|
" else:\n",
|
|||
|
|
" items_to_write = items\n",
|
|||
|
|
" for i, txt in enumerate(items_to_write):\n",
|
|||
|
|
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" _add_run(newp, f\"{sub}. \")\n",
|
|||
|
|
" _add_run(newp, txt.rstrip(\".;\"))\n",
|
|||
|
|
" _add_run(newp, \";\")\n",
|
|||
|
|
" _apply_par_format(newp, hanging=True)\n",
|
|||
|
|
" current = newp\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" replaced += 1\n",
|
|||
|
|
" return replaced\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 45,
|
|||
|
|
"id": "e4df3360",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def build_melhoria_checklist_items(\n",
|
|||
|
|
" temas_items: list[str] | None,\n",
|
|||
|
|
" desenvolver_items: list[str] | None,\n",
|
|||
|
|
" incluir_items: list[str] | None,\n",
|
|||
|
|
" observacoes_text_or_items: str | list[str] | None = None,\n",
|
|||
|
|
" use_ollama: bool = True,\n",
|
|||
|
|
" ollama_model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" max_items: int = 10,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" temas_items = [x.strip() for x in (temas_items or []) if str(x).strip()]\n",
|
|||
|
|
" desenvolver_items = [x.strip() for x in (desenvolver_items or []) if str(x).strip()]\n",
|
|||
|
|
" incluir_items = [x.strip() for x in (incluir_items or []) if str(x).strip()]\n",
|
|||
|
|
" if isinstance(observacoes_text_or_items, str):\n",
|
|||
|
|
" observacoes_text = observacoes_text_or_items.strip()\n",
|
|||
|
|
" elif isinstance(observacoes_text_or_items, list):\n",
|
|||
|
|
" obs_list = [str(x).strip() for x in observacoes_text_or_items if str(x).strip()]\n",
|
|||
|
|
" observacoes_text = \"\\n\".join(f\"- {x}\" for x in obs_list)\n",
|
|||
|
|
" else:\n",
|
|||
|
|
" observacoes_text = \"\"\n",
|
|||
|
|
" if use_ollama and ollama_available():\n",
|
|||
|
|
" try:\n",
|
|||
|
|
" temas_txt = \"\\n\".join(f\"- {t}\" for t in temas_items)\n",
|
|||
|
|
" des_txt = \"\\n\".join(f\"- {t}\" for t in desenvolver_items)\n",
|
|||
|
|
" inc_txt = \"\\n\".join(f\"- {t}\" for t in incluir_items)\n",
|
|||
|
|
" prompt = f\"\"\"\n",
|
|||
|
|
"És um analista a escrever um relatório oficial de avaliação.\n",
|
|||
|
|
"\n",
|
|||
|
|
"A partir dos seguintes outputs, cria uma CHECKLIST de melhorias (ações).\n",
|
|||
|
|
"Regras:\n",
|
|||
|
|
"- Não inventes pontos.\n",
|
|||
|
|
"- Junta redundâncias.\n",
|
|||
|
|
"- Escreve cada item como ação (ex.: \"Reforçar ...\", \"Incluir ...\", \"Aprofundar ...\", \"Melhorar ...\").\n",
|
|||
|
|
"- No máximo {max_items} itens.\n",
|
|||
|
|
"- Devolve APENAS lista em bullets \"- ...\".\n",
|
|||
|
|
"\n",
|
|||
|
|
"TEMAS:\n",
|
|||
|
|
"{temas_txt}\n",
|
|||
|
|
"\n",
|
|||
|
|
"A DESENVOLVER:\n",
|
|||
|
|
"{des_txt}\n",
|
|||
|
|
"\n",
|
|||
|
|
"A INCLUIR:\n",
|
|||
|
|
"{inc_txt}\n",
|
|||
|
|
"\n",
|
|||
|
|
"OBSERVAÇÕES:\n",
|
|||
|
|
"{observacoes_text}\n",
|
|||
|
|
"\"\"\".strip()\n",
|
|||
|
|
"\n",
|
|||
|
|
" payload = {\n",
|
|||
|
|
" \"model\": ollama_model,\n",
|
|||
|
|
" \"prompt\": prompt,\n",
|
|||
|
|
" \"stream\": False,\n",
|
|||
|
|
" \"options\": {\"temperature\": 0.2},\n",
|
|||
|
|
" }\n",
|
|||
|
|
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=45.0)\n",
|
|||
|
|
" r.raise_for_status()\n",
|
|||
|
|
" text = r.json().get(\"response\", \"\").strip()\n",
|
|||
|
|
"\n",
|
|||
|
|
" llm_items = []\n",
|
|||
|
|
" for line in text.splitlines():\n",
|
|||
|
|
" line = line.strip()\n",
|
|||
|
|
" if line.startswith((\"-\", \"•\")):\n",
|
|||
|
|
" item = line.lstrip(\"-•\").strip(\" .;\")\n",
|
|||
|
|
" if item:\n",
|
|||
|
|
" llm_items.append(item)\n",
|
|||
|
|
"\n",
|
|||
|
|
" llm_items = llm_items[:max_items]\n",
|
|||
|
|
" if llm_items:\n",
|
|||
|
|
" return llm_items\n",
|
|||
|
|
" except Exception:\n",
|
|||
|
|
" pass\n",
|
|||
|
|
" merged = temas_items + desenvolver_items + incluir_items\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for x in merged:\n",
|
|||
|
|
" x = x.strip().strip(\"•-\").strip()\n",
|
|||
|
|
" if not x:\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" k = x.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(x)\n",
|
|||
|
|
" return out[:max_items] if out else [\"Sem dados\"]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 46,
|
|||
|
|
"id": "d430cad8",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_checklist_subitems(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" checklist_items: list[str],\n",
|
|||
|
|
" placeholder: str = \"{{CHECKLIST_MELHORAR}}\",\n",
|
|||
|
|
" indent_cm: float = 0.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" checkbox: str = \"-\",\n",
|
|||
|
|
"):\n",
|
|||
|
|
" letters = string.ascii_lowercase\n",
|
|||
|
|
"\n",
|
|||
|
|
" def _apply_par_format(par: Paragraph):\n",
|
|||
|
|
" fmt = par.paragraph_format\n",
|
|||
|
|
" fmt.left_indent = Cm(indent_cm)\n",
|
|||
|
|
" fmt.first_line_indent = Cm(-indent_cm)\n",
|
|||
|
|
" fmt.line_spacing = 1.5\n",
|
|||
|
|
" fmt.space_before = Pt(0)\n",
|
|||
|
|
" fmt.space_after = Pt(0)\n",
|
|||
|
|
"\n",
|
|||
|
|
" def _add_run(par: Paragraph, text: str):\n",
|
|||
|
|
" r = par.add_run(text)\n",
|
|||
|
|
" force_run_font(r, font_name, font_size_pt)\n",
|
|||
|
|
" return r\n",
|
|||
|
|
"\n",
|
|||
|
|
" def _process_paragraph(p: Paragraph) -> bool:\n",
|
|||
|
|
" if placeholder not in p.text:\n",
|
|||
|
|
" return False\n",
|
|||
|
|
"\n",
|
|||
|
|
" current = p\n",
|
|||
|
|
" items = checklist_items or [\"Sem dados\"]\n",
|
|||
|
|
"\n",
|
|||
|
|
" for i, item in enumerate(items):\n",
|
|||
|
|
" sub = letters[i] if i < 26 else f\"a{i+1}\"\n",
|
|||
|
|
" newp = insert_paragraph_after(current)\n",
|
|||
|
|
" _add_run(newp, f\"{sub}. {checkbox} \")\n",
|
|||
|
|
" _add_run(newp, item.strip().rstrip(\".;\"))\n",
|
|||
|
|
" _add_run(newp, \";\")\n",
|
|||
|
|
" _apply_par_format(newp)\n",
|
|||
|
|
" current = newp\n",
|
|||
|
|
"\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" return True\n",
|
|||
|
|
"\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return\n",
|
|||
|
|
"\n",
|
|||
|
|
" for table in doc.tables:\n",
|
|||
|
|
" for row in table.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" if _process_paragraph(p):\n",
|
|||
|
|
" return"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 47,
|
|||
|
|
"id": "c92e42dc",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_observacoes_smart2(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{OBSERVACOES2}}\",\n",
|
|||
|
|
" indent_cm: float = 2.75,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
" **_ignored,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" obs_raw = extract_observacoes_list(df)\n",
|
|||
|
|
" itens = [str(t).strip() for t in (obs_raw or []) if str(t).strip()]\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in itens:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" itens = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" itens = itens[:max_items]\n",
|
|||
|
|
" if not itens:\n",
|
|||
|
|
" itens = [\"Sem dados\"]\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Observacoes\": itens})\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging2(\n",
|
|||
|
|
" doc, df2,\n",
|
|||
|
|
" placeholder=\"{{OBSERVACOES2}}\",\n",
|
|||
|
|
" column_contains=\"_observa\",\n",
|
|||
|
|
" left_indent_cm=2.75,\n",
|
|||
|
|
" hanging_cm=0.6,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" deduplicate=True\n",
|
|||
|
|
")\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 48,
|
|||
|
|
"id": "c9bc1475",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_incluir_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{INCLUIR}}\",\n",
|
|||
|
|
" indent_cm: float = 0.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
" **_ignored,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" incluir_raw = extract_incluir_list(df)\n",
|
|||
|
|
" itens = [str(t).strip() for t in (incluir_raw or []) if str(t).strip()]\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in itens:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" itens = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" itens = itens[:max_items]\n",
|
|||
|
|
" if not itens:\n",
|
|||
|
|
" itens = [\"Sem dados\"]\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Incluir\": itens})\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_incluir\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 49,
|
|||
|
|
"id": "070ae13a",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_desenvolver_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{DESENVOLVER}}\",\n",
|
|||
|
|
" indent_cm: float = 0.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
" **_ignored,\n",
|
|||
|
|
"):\n",
|
|||
|
|
" desenvolver_raw = extract_desenvolver_list(df) \n",
|
|||
|
|
" itens = [str(t).strip() for t in (desenvolver_raw or []) if str(t).strip()]\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in itens:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" itens = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" itens = itens[:max_items]\n",
|
|||
|
|
" if not itens:\n",
|
|||
|
|
" itens = [\"Sem dados\"]\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Desenvolver\": itens})\n",
|
|||
|
|
" return replace_placeholder_with_column_subitems_hanging(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_desenvolver\",\n",
|
|||
|
|
" indent_cm=indent_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 50,
|
|||
|
|
"id": "9ee1a5cf",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def delete_all_between_anchors_xml(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" start=\"{{ANCORA1}}\",\n",
|
|||
|
|
" end=\"{{ANCORA2}}\",\n",
|
|||
|
|
" debug=False,\n",
|
|||
|
|
" max_passes=10_000,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" body = doc._element.body\n",
|
|||
|
|
" removed_blocks = 0\n",
|
|||
|
|
" passes = 0\n",
|
|||
|
|
" def child_contains_needle(elm, needle: str) -> bool:\n",
|
|||
|
|
" texts = elm.xpath(\".//*[local-name()='t']/text()\")\n",
|
|||
|
|
" joined = \"\".join(texts) if texts else \"\"\n",
|
|||
|
|
" return needle in joined\n",
|
|||
|
|
" while passes < max_passes:\n",
|
|||
|
|
" passes += 1\n",
|
|||
|
|
" children = list(body.iterchildren())\n",
|
|||
|
|
" start_idx = None\n",
|
|||
|
|
" end_idx = None\n",
|
|||
|
|
" for i, elm in enumerate(children):\n",
|
|||
|
|
" if start_idx is None and child_contains_needle(elm, start):\n",
|
|||
|
|
" start_idx = i\n",
|
|||
|
|
" if debug:\n",
|
|||
|
|
" print(f\"[DEBUG] start in child {i} tag={elm.tag}\")\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" if start_idx is not None and child_contains_needle(elm, end):\n",
|
|||
|
|
" end_idx = i\n",
|
|||
|
|
" if debug:\n",
|
|||
|
|
" print(f\"[DEBUG] end in child {i} tag={elm.tag}\")\n",
|
|||
|
|
" break\n",
|
|||
|
|
" if start_idx is None or end_idx is None:\n",
|
|||
|
|
" if debug:\n",
|
|||
|
|
" print(\"[DEBUG] done. start/end:\", start_idx, end_idx)\n",
|
|||
|
|
" break\n",
|
|||
|
|
" for elm in reversed(children[start_idx:end_idx + 1]):\n",
|
|||
|
|
" body.remove(elm)\n",
|
|||
|
|
" removed_blocks += 1\n",
|
|||
|
|
" return removed_blocks"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 51,
|
|||
|
|
"id": "19f98bbc",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def delete_all_between_anchors_xml(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" start=\"{{ANCORA3}}\",\n",
|
|||
|
|
" end=\"{{ANCORA4}}\",\n",
|
|||
|
|
" debug=False,\n",
|
|||
|
|
" max_passes=10_000,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" body = doc._element.body\n",
|
|||
|
|
" removed_blocks = 0\n",
|
|||
|
|
" passes = 0\n",
|
|||
|
|
" def child_contains_needle(elm, needle: str) -> bool:\n",
|
|||
|
|
" texts = elm.xpath(\".//*[local-name()='t']/text()\")\n",
|
|||
|
|
" joined = \"\".join(texts) if texts else \"\"\n",
|
|||
|
|
" return needle in joined\n",
|
|||
|
|
" while passes < max_passes:\n",
|
|||
|
|
" passes += 1\n",
|
|||
|
|
" children = list(body.iterchildren())\n",
|
|||
|
|
" start_idx = None\n",
|
|||
|
|
" end_idx = None\n",
|
|||
|
|
" for i, elm in enumerate(children):\n",
|
|||
|
|
" if start_idx is None and child_contains_needle(elm, start):\n",
|
|||
|
|
" start_idx = i\n",
|
|||
|
|
" if debug:\n",
|
|||
|
|
" print(f\"[DEBUG] start in child {i} tag={elm.tag}\")\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" if start_idx is not None and child_contains_needle(elm, end):\n",
|
|||
|
|
" end_idx = i\n",
|
|||
|
|
" if debug:\n",
|
|||
|
|
" print(f\"[DEBUG] end in child {i} tag={elm.tag}\")\n",
|
|||
|
|
" break\n",
|
|||
|
|
" if start_idx is None or end_idx is None:\n",
|
|||
|
|
" if debug:\n",
|
|||
|
|
" print(\"[DEBUG] done. start/end:\", start_idx, end_idx)\n",
|
|||
|
|
" break\n",
|
|||
|
|
" for elm in reversed(children[start_idx:end_idx + 1]):\n",
|
|||
|
|
" body.remove(elm)\n",
|
|||
|
|
" removed_blocks += 1\n",
|
|||
|
|
" return removed_blocks"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 52,
|
|||
|
|
"id": "8da13fb2",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def _iter_paragraphs_in_table(tbl):\n",
|
|||
|
|
" for row in tbl.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in cell.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)\n",
|
|||
|
|
"def iter_all_paragraphs_everywhere(doc):\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in doc.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)\n",
|
|||
|
|
" for section in doc.sections:\n",
|
|||
|
|
" containers = [\n",
|
|||
|
|
" section.header,\n",
|
|||
|
|
" section.footer,\n",
|
|||
|
|
" section.first_page_header,\n",
|
|||
|
|
" section.first_page_footer,\n",
|
|||
|
|
" section.even_page_header,\n",
|
|||
|
|
" section.even_page_footer,\n",
|
|||
|
|
" ]\n",
|
|||
|
|
" for c in containers:\n",
|
|||
|
|
" for p in c.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in c.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 53,
|
|||
|
|
"id": "ce5986ca",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Selected file:\n",
|
|||
|
|
"E:/Relatórios Internos/3Curso QP praças/Question_rio_Final_Curso_para_Formadores.xlsx\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"Tk().withdraw()\n",
|
|||
|
|
"file_path3 = askopenfilename(\n",
|
|||
|
|
" title=\"Select Excel das Formadores\",\n",
|
|||
|
|
" filetypes=[(\"Excel files\", \"*.xlsx *.xls\")]\n",
|
|||
|
|
")\n",
|
|||
|
|
"if not file_path3:\n",
|
|||
|
|
" print(\"Nenhum ficheiro selecionado.\")\n",
|
|||
|
|
" df3 = None\n",
|
|||
|
|
"else:\n",
|
|||
|
|
" print(f\"Selected file:\\n{file_path3}\")\n",
|
|||
|
|
" df3 = pd.read_excel(file_path3)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 54,
|
|||
|
|
"id": "961996c2",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"df3 = None\n",
|
|||
|
|
"ninq3 = 0\n",
|
|||
|
|
"medpub = 0\n",
|
|||
|
|
"medmeiosaux = 0\n",
|
|||
|
|
"medapform = 0\n",
|
|||
|
|
"medapdc = 0\n",
|
|||
|
|
"medobjesp = 0\n",
|
|||
|
|
"medmetensi = 0\n",
|
|||
|
|
"medtempform = 0\n",
|
|||
|
|
"medlocaisform = 0\n",
|
|||
|
|
"medlançaaval = 0\n",
|
|||
|
|
"medtipoaval = 0\n",
|
|||
|
|
"medtempoaval = 0\n",
|
|||
|
|
"medobjapre = 0\n",
|
|||
|
|
"medadqonjesp = 0\n",
|
|||
|
|
"medinterforma = 0\n",
|
|||
|
|
"prerequesitos = 0\n",
|
|||
|
|
"Conteudo = 0\n",
|
|||
|
|
"objgeral = 0\n",
|
|||
|
|
"objfinal = 0\n",
|
|||
|
|
"objadq = 0\n",
|
|||
|
|
"avadq = 0\n",
|
|||
|
|
"refere = 0\n",
|
|||
|
|
"if file_path3:\n",
|
|||
|
|
" df3 = pd.read_excel(file_path3)\n",
|
|||
|
|
" ninq3 = df3.shape[0]\n",
|
|||
|
|
" medpub = round(df3.iloc[:, 12].mean(), 2)\n",
|
|||
|
|
" medmeiosaux = round(df3.iloc[:, 13].mean(), 2)\n",
|
|||
|
|
" medapform = round(df3.iloc[:, 14].mean(), 2)\n",
|
|||
|
|
" medapdc = round(df3.iloc[:, 15].mean(), 2)\n",
|
|||
|
|
" medobjesp = round(df3.iloc[:, 16].mean(), 2)\n",
|
|||
|
|
" medmetensi = round(df3.iloc[:, 17].mean(), 2)\n",
|
|||
|
|
" medtempform = round(df3.iloc[:, 18].mean(), 2)\n",
|
|||
|
|
" medlocaisform = round(df3.iloc[:, 19].mean(), 2)\n",
|
|||
|
|
" medlançaaval = round(df3.iloc[:, 20].mean(), 2)\n",
|
|||
|
|
" medtipoaval = round(df3.iloc[:, 21].mean(), 2)\n",
|
|||
|
|
" medtempoaval = round(df3.iloc[:, 22].mean(), 2)\n",
|
|||
|
|
" medobjapre = round(df3.iloc[:, 23].mean(), 2)\n",
|
|||
|
|
" medadqonjesp = round(df3.iloc[:, 24].mean(), 2)\n",
|
|||
|
|
" medinterforma = round(df3.iloc[:, 25].mean(), 2)\n",
|
|||
|
|
" if ninq3 > 0:\n",
|
|||
|
|
" prerequesitos = round((df3.iloc[:, 26].eq(1).sum() / ninq3) * 100, 2)\n",
|
|||
|
|
" Conteudo = round((df3.iloc[:, 27].eq(1).sum() / ninq3) * 100, 2)\n",
|
|||
|
|
" objgeral = round((df3.iloc[:, 28].eq(1).sum() / ninq3) * 100, 2)\n",
|
|||
|
|
" objfinal = round((df3.iloc[:, 29].eq(1).sum() / ninq3) * 100, 2)\n",
|
|||
|
|
" objadq = round((df3.iloc[:, 30].eq(1).sum() / ninq3) * 100, 2)\n",
|
|||
|
|
" avadq = round((df3.iloc[:, 31].eq(1).sum() / ninq3) * 100, 2)\n",
|
|||
|
|
" refere = round((df3.iloc[:, 32].eq(1).sum() / ninq3) * 100, 2)\n",
|
|||
|
|
"else:\n",
|
|||
|
|
" print(\"Nenhum ficheiro selecionado (df3). Valores definidos a 0.\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 55,
|
|||
|
|
"id": "2edd4d84",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Selected file:\n",
|
|||
|
|
"E:/Relatórios Internos/3Curso QP praças/Question_rio_Final_Curso_para_Dire_o_de_Curso_N_velamento.xlsx\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"Tk().withdraw()\n",
|
|||
|
|
"file_path4 = askopenfilename(\n",
|
|||
|
|
" title=\"Select Excel da Direção de Curso\",\n",
|
|||
|
|
" filetypes=[(\"Excel files\", \"*.xlsx *.xls\")]\n",
|
|||
|
|
")\n",
|
|||
|
|
"if not file_path4:\n",
|
|||
|
|
" print(\"Nenhum ficheiro selecionado.\")\n",
|
|||
|
|
" df4 = None\n",
|
|||
|
|
"else:\n",
|
|||
|
|
" print(f\"Selected file:\\n{file_path4}\")\n",
|
|||
|
|
" df4 = pd.read_excel(file_path4)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 56,
|
|||
|
|
"id": "97b51ec3",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"df4 = None\n",
|
|||
|
|
"ninq4 = 0\n",
|
|||
|
|
"medprogcurso = 0\n",
|
|||
|
|
"medcontcurso = 0\n",
|
|||
|
|
"medestrcurso = 0\n",
|
|||
|
|
"medutilprat = 0\n",
|
|||
|
|
"medcargahoraria = 0\n",
|
|||
|
|
"med1 = 0\n",
|
|||
|
|
"medinstal = 0\n",
|
|||
|
|
"medaudiovis = 0\n",
|
|||
|
|
"meddocdispor = 0\n",
|
|||
|
|
"medapadmin = 0\n",
|
|||
|
|
"medapcoord = 0\n",
|
|||
|
|
"med2 = 0\n",
|
|||
|
|
"medmotform = 0\n",
|
|||
|
|
"medrelpart = 0\n",
|
|||
|
|
"medpontass = 0\n",
|
|||
|
|
"med3 = 0\n",
|
|||
|
|
"if file_path4:\n",
|
|||
|
|
" df4 = pd.read_excel(file_path4)\n",
|
|||
|
|
" ninq4 = df4.shape[0]\n",
|
|||
|
|
" medprogcurso = round(df4.iloc[:, 10].mean(), 2)\n",
|
|||
|
|
" medcontcurso = round(df4.iloc[:, 11].mean(), 2)\n",
|
|||
|
|
" medestrcurso = round(df4.iloc[:, 12].mean(), 2)\n",
|
|||
|
|
" medutilprat = round(df4.iloc[:, 13].mean(), 2)\n",
|
|||
|
|
" medcargahoraria= round(df4.iloc[:, 14].mean(), 2)\n",
|
|||
|
|
" med1 = round(\n",
|
|||
|
|
" (medprogcurso + medcontcurso + medestrcurso +\n",
|
|||
|
|
" medutilprat + medcargahoraria) / 5,\n",
|
|||
|
|
" 2\n",
|
|||
|
|
" )\n",
|
|||
|
|
" medinstal = round(df4.iloc[:, 15].mean(), 2)\n",
|
|||
|
|
" medaudiovis = round(df4.iloc[:, 16].mean(), 2)\n",
|
|||
|
|
" meddocdispor = round(df4.iloc[:, 17].mean(), 2)\n",
|
|||
|
|
" medapadmin = round(df4.iloc[:, 18].mean(), 2)\n",
|
|||
|
|
" medapcoord = round(df4.iloc[:, 19].mean(), 2)\n",
|
|||
|
|
" med2 = round(\n",
|
|||
|
|
" (medinstal + medaudiovis + meddocdispor +\n",
|
|||
|
|
" medapadmin + medapcoord) / 5,\n",
|
|||
|
|
" 2\n",
|
|||
|
|
" )\n",
|
|||
|
|
" medmotform = round(df4.iloc[:, 20].mean(), 2)\n",
|
|||
|
|
" medrelpart = round(df4.iloc[:, 21].mean(), 2)\n",
|
|||
|
|
" medpontass = round(df4.iloc[:, 22].mean(), 2)\n",
|
|||
|
|
" med3 = round(\n",
|
|||
|
|
" (medmotform + medrelpart + medpontass) / 3,\n",
|
|||
|
|
" 2\n",
|
|||
|
|
" )\n",
|
|||
|
|
"else:\n",
|
|||
|
|
" print(\"Nenhum ficheiro selecionado. Valores definidos a 0.\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 57,
|
|||
|
|
"id": "ad4e7d71",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Resposta</th>\n",
|
|||
|
|
" <th>Data/hora de submissão:</th>\n",
|
|||
|
|
" <th>Instituição</th>\n",
|
|||
|
|
" <th>Departamento</th>\n",
|
|||
|
|
" <th>SalaOnline</th>\n",
|
|||
|
|
" <th>Grupo</th>\n",
|
|||
|
|
" <th>ID</th>\n",
|
|||
|
|
" <th>Nome completo</th>\n",
|
|||
|
|
" <th>NIM / CC / BI</th>\n",
|
|||
|
|
" <th>Q00_Data</th>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <th>Q00_Funcionamento do Curso->Documentação ao Dispor (1 - Inadequada...5 - Adequada à Realidade)</th>\n",
|
|||
|
|
" <th>Q00_Funcionamento do Curso->Apoio Administrativo (1- - Ineficaz...5 - Muito eficaz)</th>\n",
|
|||
|
|
" <th>Q00_Funcionamento do Curso->Apoio Prestado pelo Coordenador (1 - Ineficaz...5 - Muito eficaz)</th>\n",
|
|||
|
|
" <th>Q00_Apreciação dos Módulos->Motivação dos Participantes (1 - Baixa...5 - Muito Elevada)</th>\n",
|
|||
|
|
" <th>Q00_Apreciação dos Módulos->Relacionamento entre Participantes (1 - Fechado...5- Muito Aberto)</th>\n",
|
|||
|
|
" <th>Q00_Apreciação dos Módulos->Pontualidade e Assiduidade (1 - Fraca...5 - Muito Boa)</th>\n",
|
|||
|
|
" <th>Q00_Temas</th>\n",
|
|||
|
|
" <th>Q00_desenvolver</th>\n",
|
|||
|
|
" <th>Q00_Incluir</th>\n",
|
|||
|
|
" <th>Q00_Positivos</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>117497</td>\n",
|
|||
|
|
" <td>03/08/2025 10:48:31</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>EA - 3.º Curso de Formação de Praças do Quadro...</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>Anónimo1</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>2025-08-03</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>Armamento e tiro</td>\n",
|
|||
|
|
" <td>Armamento e tiro</td>\n",
|
|||
|
|
" <td>Nada a referir</td>\n",
|
|||
|
|
" <td>Os tempos de formação dedicados à prática do t...</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>117890</td>\n",
|
|||
|
|
" <td>13/08/2025 09:30:14</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>EA - 3.º Curso de Formação de Praças do Quadro...</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>Anónimo2</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>2025-08-13</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>2 rows × 27 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Resposta Data/hora de submissão: Instituição Departamento \\\n",
|
|||
|
|
"0 117497 03/08/2025 10:48:31 NaN NaN \n",
|
|||
|
|
"1 117890 13/08/2025 09:30:14 NaN NaN \n",
|
|||
|
|
"\n",
|
|||
|
|
" SalaOnline Grupo ID Nome completo \\\n",
|
|||
|
|
"0 EA - 3.º Curso de Formação de Praças do Quadro... NaN NaN Anónimo1 \n",
|
|||
|
|
"1 EA - 3.º Curso de Formação de Praças do Quadro... NaN NaN Anónimo2 \n",
|
|||
|
|
"\n",
|
|||
|
|
" NIM / CC / BI Q00_Data ... \\\n",
|
|||
|
|
"0 NaN 2025-08-03 ... \n",
|
|||
|
|
"1 NaN 2025-08-13 ... \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Funcionamento do Curso->Documentação ao Dispor (1 - Inadequada...5 - Adequada à Realidade) \\\n",
|
|||
|
|
"0 5 \n",
|
|||
|
|
"1 4 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Funcionamento do Curso->Apoio Administrativo (1- - Ineficaz...5 - Muito eficaz) \\\n",
|
|||
|
|
"0 5 \n",
|
|||
|
|
"1 4 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Funcionamento do Curso->Apoio Prestado pelo Coordenador (1 - Ineficaz...5 - Muito eficaz) \\\n",
|
|||
|
|
"0 5 \n",
|
|||
|
|
"1 3 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Apreciação dos Módulos->Motivação dos Participantes (1 - Baixa...5 - Muito Elevada) \\\n",
|
|||
|
|
"0 4 \n",
|
|||
|
|
"1 3 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Apreciação dos Módulos->Relacionamento entre Participantes (1 - Fechado...5- Muito Aberto) \\\n",
|
|||
|
|
"0 4 \n",
|
|||
|
|
"1 3 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Apreciação dos Módulos->Pontualidade e Assiduidade (1 - Fraca...5 - Muito Boa) \\\n",
|
|||
|
|
"0 4 \n",
|
|||
|
|
"1 4 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Temas Q00_desenvolver Q00_Incluir \\\n",
|
|||
|
|
"0 Armamento e tiro Armamento e tiro Nada a referir \n",
|
|||
|
|
"1 NaN NaN NaN \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Positivos \n",
|
|||
|
|
"0 Os tempos de formação dedicados à prática do t... \n",
|
|||
|
|
"1 NaN \n",
|
|||
|
|
"\n",
|
|||
|
|
"[2 rows x 27 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 57,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df4"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 58,
|
|||
|
|
"id": "f63e680c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_propostas(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df3: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{Propostas}}\",\n",
|
|||
|
|
" left_indent_cm: float = 2.75,\n",
|
|||
|
|
" hanging_cm: float = 0.6,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" cols = [c for c in df3.columns if \"_propostas\" in str(c).lower()]\n",
|
|||
|
|
" itens = []\n",
|
|||
|
|
" for c in cols:\n",
|
|||
|
|
" s = df3[c].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" itens.extend(s.tolist())\n",
|
|||
|
|
"\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in itens:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" itens = out\n",
|
|||
|
|
"\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" itens = itens[:max_items]\n",
|
|||
|
|
"\n",
|
|||
|
|
" if not itens:\n",
|
|||
|
|
" itens = [\"Sem dados\"]\n",
|
|||
|
|
"\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Propostas\": itens})\n",
|
|||
|
|
"\n",
|
|||
|
|
" replaced = 0\n",
|
|||
|
|
"\n",
|
|||
|
|
" while True:\n",
|
|||
|
|
" changed = False\n",
|
|||
|
|
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
|
|||
|
|
" if placeholder in (p.text or \"\"):\n",
|
|||
|
|
" replace_placeholder_with_column_subitems_hanging2(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_propostas\",\n",
|
|||
|
|
" left_indent_cm=left_indent_cm,\n",
|
|||
|
|
" hanging_cm=hanging_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )\n",
|
|||
|
|
" replaced += 1\n",
|
|||
|
|
" changed = True\n",
|
|||
|
|
" break \n",
|
|||
|
|
" if not changed:\n",
|
|||
|
|
" break\n",
|
|||
|
|
"\n",
|
|||
|
|
" return replaced"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 59,
|
|||
|
|
"id": "31986236",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def delete_paragraph(paragraph):\n",
|
|||
|
|
" p = paragraph._p\n",
|
|||
|
|
" parent = p.getparent()\n",
|
|||
|
|
" if parent is not None:\n",
|
|||
|
|
" parent.remove(p)\n",
|
|||
|
|
"def _iter_paragraphs_in_table(tbl):\n",
|
|||
|
|
" for row in tbl.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in cell.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in cell.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)\n",
|
|||
|
|
"def iter_all_paragraphs_everywhere(doc):\n",
|
|||
|
|
" for p in doc.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in doc.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)\n",
|
|||
|
|
" for section in doc.sections:\n",
|
|||
|
|
" containers = [\n",
|
|||
|
|
" section.header,\n",
|
|||
|
|
" section.footer,\n",
|
|||
|
|
" section.first_page_header,\n",
|
|||
|
|
" section.first_page_footer,\n",
|
|||
|
|
" section.even_page_header,\n",
|
|||
|
|
" section.even_page_footer,\n",
|
|||
|
|
" ]\n",
|
|||
|
|
" for c in containers:\n",
|
|||
|
|
" for p in c.paragraphs:\n",
|
|||
|
|
" yield p\n",
|
|||
|
|
" for t in c.tables:\n",
|
|||
|
|
" yield from _iter_paragraphs_in_table(t)\n",
|
|||
|
|
"def delete_lines_with_ancora(doc, pattern=r\"ANCORA\") -> int:\n",
|
|||
|
|
" rx = re.compile(pattern, flags=re.IGNORECASE)\n",
|
|||
|
|
" to_delete = []\n",
|
|||
|
|
" for p in iter_all_paragraphs_everywhere(doc):\n",
|
|||
|
|
" if rx.search(p.text or \"\"):\n",
|
|||
|
|
" to_delete.append(p)\n",
|
|||
|
|
" for p in reversed(to_delete):\n",
|
|||
|
|
" delete_paragraph(p)\n",
|
|||
|
|
" return len(to_delete)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 60,
|
|||
|
|
"id": "abece7b0",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_temasdir(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df4: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{temasdir}}\",\n",
|
|||
|
|
" left_indent_cm: float = 2.75,\n",
|
|||
|
|
" hanging_cm: float = 0.6,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" cols = [c for c in df4.columns if \"_temas\" in str(c).lower()]\n",
|
|||
|
|
" itens = []\n",
|
|||
|
|
" for c in cols:\n",
|
|||
|
|
" s = df4[c].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" itens.extend(s.tolist())\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in itens:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" itens = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" itens = itens[:max_items]\n",
|
|||
|
|
" if not itens:\n",
|
|||
|
|
" itens = [\"Sem dados\"]\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Temas\": itens})\n",
|
|||
|
|
" replaced = 0\n",
|
|||
|
|
" while True:\n",
|
|||
|
|
" changed = False\n",
|
|||
|
|
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
|
|||
|
|
" if placeholder in (p.text or \"\"):\n",
|
|||
|
|
" replace_placeholder_with_column_subitems_hanging2(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_temas\",\n",
|
|||
|
|
" left_indent_cm=left_indent_cm,\n",
|
|||
|
|
" hanging_cm=hanging_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )\n",
|
|||
|
|
" replaced += 1\n",
|
|||
|
|
" changed = True\n",
|
|||
|
|
" break\n",
|
|||
|
|
" if not changed:\n",
|
|||
|
|
" break\n",
|
|||
|
|
" return replaced"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 61,
|
|||
|
|
"id": "280d80ff",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_desenvolverdir(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df4: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{desenvolverdir}}\",\n",
|
|||
|
|
" left_indent_cm: float = 2.75,\n",
|
|||
|
|
" hanging_cm: float = 0.6,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" cols = [c for c in df4.columns if \"_desenvolver\" in str(c).lower()]\n",
|
|||
|
|
" itens = []\n",
|
|||
|
|
" for c in cols:\n",
|
|||
|
|
" s = df4[c].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" itens.extend(s.tolist())\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in itens:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" itens = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" itens = itens[:max_items]\n",
|
|||
|
|
" if not itens:\n",
|
|||
|
|
" itens = [\"Sem dados\"]\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Desenvolver\": itens})\n",
|
|||
|
|
" replaced = 0\n",
|
|||
|
|
" while True:\n",
|
|||
|
|
" changed = False\n",
|
|||
|
|
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
|
|||
|
|
" if placeholder in (p.text or \"\"):\n",
|
|||
|
|
" replace_placeholder_with_column_subitems_hanging2(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_desenvolver\",\n",
|
|||
|
|
" left_indent_cm=left_indent_cm,\n",
|
|||
|
|
" hanging_cm=hanging_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )\n",
|
|||
|
|
" replaced += 1\n",
|
|||
|
|
" changed = True\n",
|
|||
|
|
" break\n",
|
|||
|
|
" if not changed:\n",
|
|||
|
|
" break\n",
|
|||
|
|
" return replaced"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 62,
|
|||
|
|
"id": "9c0fa5f2",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_incluirdir(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df4: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{incluirdir}}\",\n",
|
|||
|
|
" left_indent_cm: float = 2.75,\n",
|
|||
|
|
" hanging_cm: float = 0.6,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" cols = [c for c in df4.columns if \"_incluir\" in str(c).lower()]\n",
|
|||
|
|
" itens = []\n",
|
|||
|
|
" for c in cols:\n",
|
|||
|
|
" s = df4[c].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" itens.extend(s.tolist())\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in itens:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" itens = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" itens = itens[:max_items]\n",
|
|||
|
|
" if not itens:\n",
|
|||
|
|
" itens = [\"Sem dados\"]\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Incluir\": itens})\n",
|
|||
|
|
" replaced = 0\n",
|
|||
|
|
" while True:\n",
|
|||
|
|
" changed = False\n",
|
|||
|
|
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
|
|||
|
|
" if placeholder in (p.text or \"\"):\n",
|
|||
|
|
" replace_placeholder_with_column_subitems_hanging2(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_incluir\",\n",
|
|||
|
|
" left_indent_cm=left_indent_cm,\n",
|
|||
|
|
" hanging_cm=hanging_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )\n",
|
|||
|
|
" replaced += 1\n",
|
|||
|
|
" changed = True\n",
|
|||
|
|
" break\n",
|
|||
|
|
" if not changed:\n",
|
|||
|
|
" break\n",
|
|||
|
|
" return replaced\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 63,
|
|||
|
|
"id": "84a86195",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_positivosdir(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df4: pd.DataFrame,\n",
|
|||
|
|
" placeholder: str = \"{{positivosdir}}\",\n",
|
|||
|
|
" left_indent_cm: float = 2.75,\n",
|
|||
|
|
" hanging_cm: float = 0.6,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" deduplicate: bool = True,\n",
|
|||
|
|
" max_items: int | None = None,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" cols = [c for c in df4.columns if \"_positivos\" in str(c).lower()]\n",
|
|||
|
|
" itens = []\n",
|
|||
|
|
" for c in cols:\n",
|
|||
|
|
" s = df4[c].dropna().astype(str).str.strip()\n",
|
|||
|
|
" s = s[s != \"\"]\n",
|
|||
|
|
" itens.extend(s.tolist())\n",
|
|||
|
|
" if deduplicate:\n",
|
|||
|
|
" seen = set()\n",
|
|||
|
|
" out = []\n",
|
|||
|
|
" for t in itens:\n",
|
|||
|
|
" k = t.lower()\n",
|
|||
|
|
" if k not in seen:\n",
|
|||
|
|
" seen.add(k)\n",
|
|||
|
|
" out.append(t)\n",
|
|||
|
|
" itens = out\n",
|
|||
|
|
" if max_items is not None:\n",
|
|||
|
|
" itens = itens[:max_items]\n",
|
|||
|
|
" if not itens:\n",
|
|||
|
|
" itens = [\"Sem dados\"]\n",
|
|||
|
|
" df_tmp = pd.DataFrame({\"_Positivos\": itens})\n",
|
|||
|
|
" replaced = 0\n",
|
|||
|
|
" while True:\n",
|
|||
|
|
" changed = False\n",
|
|||
|
|
" for p in list(iter_all_paragraphs_everywhere(doc)):\n",
|
|||
|
|
" if placeholder in (p.text or \"\"):\n",
|
|||
|
|
" replace_placeholder_with_column_subitems_hanging2(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_tmp,\n",
|
|||
|
|
" placeholder=placeholder,\n",
|
|||
|
|
" column_contains=\"_positivos\",\n",
|
|||
|
|
" left_indent_cm=left_indent_cm,\n",
|
|||
|
|
" hanging_cm=hanging_cm,\n",
|
|||
|
|
" font_name=font_name,\n",
|
|||
|
|
" font_size_pt=font_size_pt,\n",
|
|||
|
|
" deduplicate=False\n",
|
|||
|
|
" )\n",
|
|||
|
|
" replaced += 1\n",
|
|||
|
|
" changed = True\n",
|
|||
|
|
" break\n",
|
|||
|
|
" if not changed:\n",
|
|||
|
|
" break\n",
|
|||
|
|
" return replaced\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 64,
|
|||
|
|
"id": "c75442af",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Resposta</th>\n",
|
|||
|
|
" <th>Data/hora de submissão:</th>\n",
|
|||
|
|
" <th>Instituição</th>\n",
|
|||
|
|
" <th>Departamento</th>\n",
|
|||
|
|
" <th>SalaOnline</th>\n",
|
|||
|
|
" <th>Grupo</th>\n",
|
|||
|
|
" <th>ID</th>\n",
|
|||
|
|
" <th>Nome completo</th>\n",
|
|||
|
|
" <th>NIM / CC / BI</th>\n",
|
|||
|
|
" <th>Q00_Data</th>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <th>Q00_Funcionamento do Curso->Documentação ao Dispor (1 - Inadequada...5 - Adequada à Realidade)</th>\n",
|
|||
|
|
" <th>Q00_Funcionamento do Curso->Apoio Administrativo (1- - Ineficaz...5 - Muito eficaz)</th>\n",
|
|||
|
|
" <th>Q00_Funcionamento do Curso->Apoio Prestado pelo Coordenador (1 - Ineficaz...5 - Muito eficaz)</th>\n",
|
|||
|
|
" <th>Q00_Apreciação dos Módulos->Motivação dos Participantes (1 - Baixa...5 - Muito Elevada)</th>\n",
|
|||
|
|
" <th>Q00_Apreciação dos Módulos->Relacionamento entre Participantes (1 - Fechado...5- Muito Aberto)</th>\n",
|
|||
|
|
" <th>Q00_Apreciação dos Módulos->Pontualidade e Assiduidade (1 - Fraca...5 - Muito Boa)</th>\n",
|
|||
|
|
" <th>Q00_Temas</th>\n",
|
|||
|
|
" <th>Q00_desenvolver</th>\n",
|
|||
|
|
" <th>Q00_Incluir</th>\n",
|
|||
|
|
" <th>Q00_Positivos</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>117497</td>\n",
|
|||
|
|
" <td>03/08/2025 10:48:31</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>EA - 3.º Curso de Formação de Praças do Quadro...</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>Anónimo1</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>2025-08-03</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>Armamento e tiro</td>\n",
|
|||
|
|
" <td>Armamento e tiro</td>\n",
|
|||
|
|
" <td>Nada a referir</td>\n",
|
|||
|
|
" <td>Os tempos de formação dedicados à prática do t...</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>117890</td>\n",
|
|||
|
|
" <td>13/08/2025 09:30:14</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>EA - 3.º Curso de Formação de Praças do Quadro...</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>Anónimo2</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>2025-08-13</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>2 rows × 27 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Resposta Data/hora de submissão: Instituição Departamento \\\n",
|
|||
|
|
"0 117497 03/08/2025 10:48:31 NaN NaN \n",
|
|||
|
|
"1 117890 13/08/2025 09:30:14 NaN NaN \n",
|
|||
|
|
"\n",
|
|||
|
|
" SalaOnline Grupo ID Nome completo \\\n",
|
|||
|
|
"0 EA - 3.º Curso de Formação de Praças do Quadro... NaN NaN Anónimo1 \n",
|
|||
|
|
"1 EA - 3.º Curso de Formação de Praças do Quadro... NaN NaN Anónimo2 \n",
|
|||
|
|
"\n",
|
|||
|
|
" NIM / CC / BI Q00_Data ... \\\n",
|
|||
|
|
"0 NaN 2025-08-03 ... \n",
|
|||
|
|
"1 NaN 2025-08-13 ... \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Funcionamento do Curso->Documentação ao Dispor (1 - Inadequada...5 - Adequada à Realidade) \\\n",
|
|||
|
|
"0 5 \n",
|
|||
|
|
"1 4 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Funcionamento do Curso->Apoio Administrativo (1- - Ineficaz...5 - Muito eficaz) \\\n",
|
|||
|
|
"0 5 \n",
|
|||
|
|
"1 4 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Funcionamento do Curso->Apoio Prestado pelo Coordenador (1 - Ineficaz...5 - Muito eficaz) \\\n",
|
|||
|
|
"0 5 \n",
|
|||
|
|
"1 3 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Apreciação dos Módulos->Motivação dos Participantes (1 - Baixa...5 - Muito Elevada) \\\n",
|
|||
|
|
"0 4 \n",
|
|||
|
|
"1 3 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Apreciação dos Módulos->Relacionamento entre Participantes (1 - Fechado...5- Muito Aberto) \\\n",
|
|||
|
|
"0 4 \n",
|
|||
|
|
"1 3 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Apreciação dos Módulos->Pontualidade e Assiduidade (1 - Fraca...5 - Muito Boa) \\\n",
|
|||
|
|
"0 4 \n",
|
|||
|
|
"1 4 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Temas Q00_desenvolver Q00_Incluir \\\n",
|
|||
|
|
"0 Armamento e tiro Armamento e tiro Nada a referir \n",
|
|||
|
|
"1 NaN NaN NaN \n",
|
|||
|
|
"\n",
|
|||
|
|
" Q00_Positivos \n",
|
|||
|
|
"0 Os tempos de formação dedicados à prática do t... \n",
|
|||
|
|
"1 NaN \n",
|
|||
|
|
"\n",
|
|||
|
|
"[2 rows x 27 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 64,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df4"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 65,
|
|||
|
|
"id": "5721702e",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"OBSERVACOES substituídos: 0\n",
|
|||
|
|
"Saved: relatorio_final.docx\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"nforma, mediaformquanl, mediaform = compute_formadores_summary(df2)\n",
|
|||
|
|
"replacements = {\n",
|
|||
|
|
" \"{{NOMEDOCURSOEXTENSO}}\": str(NOMEDOCURSO),\n",
|
|||
|
|
" \"{{prerequesitos}}\": str(prerequesitos),\n",
|
|||
|
|
" \"{{prerequesitos2}}\": str(100-prerequesitos),\n",
|
|||
|
|
" \"{{Conteudo}}\": str(Conteudo),\n",
|
|||
|
|
" \"{{Conteudo2}}\": str(100-Conteudo),\n",
|
|||
|
|
" \"{{objgeral}}\": str(objgeral),\n",
|
|||
|
|
" \"{{objgeral2}}\": str(100-objgeral), \n",
|
|||
|
|
" \"{{objfinal}}\": str(objfinal),\n",
|
|||
|
|
" \"{{objfinal2}}\": str(100-objfinal), \n",
|
|||
|
|
" \"{{objadq}}\": str(objadq),\n",
|
|||
|
|
" \"{{objadq2}}\": str(100-objadq),\n",
|
|||
|
|
" \"{{avadq}}\": str(avadq),\n",
|
|||
|
|
" \"{{avadq2}}\": str(100-avadq),\n",
|
|||
|
|
" \"{{refere}}\": str(refere),\n",
|
|||
|
|
" \"{{refere2}}\": str(100-refere),\n",
|
|||
|
|
" \"{{ninq4}}\": str(ninq4), \n",
|
|||
|
|
" \"{{ninq3}}\": str(ninq3),\n",
|
|||
|
|
" \"{{nforma}}\" : str(nforma),\n",
|
|||
|
|
" \"{{mediaformquanl}}\" : str(mediaformquanl),\n",
|
|||
|
|
" \"{{mediaform}}\" : str(mediaform),\n",
|
|||
|
|
" \"{{mediaaloj}}\": str(medalojamento2),\n",
|
|||
|
|
" \"{{mediaaloj1}}\": str(medalojamento),\n",
|
|||
|
|
" \"{{mediaalime}}\": str(medalimentacao2),\n",
|
|||
|
|
" \"{{mediaalime1}}\": str(medalimentacao),\n",
|
|||
|
|
" \"{{medalimentacao}}\": str(medalimentacaofinal),\n",
|
|||
|
|
" \"{{apdir}}\": str(medapdir),\n",
|
|||
|
|
" \"{{graudif}}\": str(meddificuldade2),\n",
|
|||
|
|
" \"{{graudif1}}\": str(meddificuldade),\n",
|
|||
|
|
" \"{{meddificuldadefinal}}\": str(meddificuldadefinal),\n",
|
|||
|
|
" \"{{funcfut}}\": str(medfuncfut2),\n",
|
|||
|
|
" \"{{funcfut1}}\": str(medfuncfut),\n",
|
|||
|
|
" \"{{medfuncfutfinal}}\": str(medfuncfutfinal),\n",
|
|||
|
|
" \"{{apadm}}\": str(medaplog),\n",
|
|||
|
|
" \"{{motapro}}\": str(medmotvpart2),\n",
|
|||
|
|
" \"{{motapro1}}\": str(medmotvpart),\n",
|
|||
|
|
" \"{{medmotvpartfinal}}\": str(medmotvpartfinal),\n",
|
|||
|
|
" \"{{conhcurso}}\": str(medconhecimento2),\n",
|
|||
|
|
" \"{{conhcurso1}}\": str(medconhecimento),\n",
|
|||
|
|
" \"{{medconhecimentofinal}}\": str(medconhecimentofinal), \n",
|
|||
|
|
" \"{{objcruso}}\": str(objcruso),\n",
|
|||
|
|
" \"{{contcurso}}\": str(contcurso),\n",
|
|||
|
|
" \"{{medalojamento}}\": str(medalojamentofinal),\n",
|
|||
|
|
" \"{{adeqtrab}}\": str(adeqtrab),\n",
|
|||
|
|
" \"{{instform}}\": str(instform),\n",
|
|||
|
|
" \"{{audiovisuais}}\": str(audiovisuais),\n",
|
|||
|
|
" \"{{biblio}}\": str(biblio),\n",
|
|||
|
|
" \"{{ninq}}\": str(ninq),\n",
|
|||
|
|
" \"{{ninqfim}}\": str(ninq2),\n",
|
|||
|
|
" \"{{medpub}}\": str(medpub),\n",
|
|||
|
|
" \"{{medmeiosaux}}\": str(medmeiosaux),\n",
|
|||
|
|
" \"{{medapform}}\": str(medapform),\n",
|
|||
|
|
" \"{{medapdc}}\": str(medapdc),\n",
|
|||
|
|
" \"{{medobjesp}}\": str(medobjesp),\n",
|
|||
|
|
" \"{{medmetensi}}\": str(medmetensi),\n",
|
|||
|
|
" \"{{medtempform}}\": str(medtempform),\n",
|
|||
|
|
" \"{{medlocaisform}}\": str(medlocaisform),\n",
|
|||
|
|
" \"{{medlançaaval}}\": str(medlançaaval),\n",
|
|||
|
|
" \"{{medtipoaval}}\": str(medtipoaval),\n",
|
|||
|
|
" \"{{medtempoaval}}\": str(medtempoaval),\n",
|
|||
|
|
" \"{{medobjapre}}\": str(medobjapre),\n",
|
|||
|
|
" \"{{medadqonjesp}}\": str(medadqonjesp),\n",
|
|||
|
|
" \"{{medinterforma}}\": str(medinterforma),\n",
|
|||
|
|
" \"{{medprogcurso}}\": str(medprogcurso),\n",
|
|||
|
|
" \"{{medcontcurso}}\": str(medcontcurso),\n",
|
|||
|
|
" \"{{medestrcurso}}\": str(medestrcurso),\n",
|
|||
|
|
" \"{{medutilprat}}\": str(medutilprat),\n",
|
|||
|
|
" \"{{medcargahoraria}}\": str(medcargahoraria),\n",
|
|||
|
|
" \"{{medinstal}}\": str(medinstal),\n",
|
|||
|
|
" \"{{medaudiovis}}\": str(medaudiovis),\n",
|
|||
|
|
" \"{{meddocdispor}}\": str(meddocdispor),\n",
|
|||
|
|
" \"{{medapadmin}}\": str(medapadmin),\n",
|
|||
|
|
" \"{{medapcoord}}\": str(medapcoord),\n",
|
|||
|
|
" \"{{medmotform}}\": str(medmotform),\n",
|
|||
|
|
" \"{{medrelpart}}\": str(medrelpart),\n",
|
|||
|
|
" \"{{medpontass}}\": str(medpontass),\n",
|
|||
|
|
" \"{{med1}}\": str(med1),\n",
|
|||
|
|
" \"{{med2}}\": str(med2),\n",
|
|||
|
|
" \"{{med3}}\": str(med3),\n",
|
|||
|
|
" \"{{medpontassqual}}\": str(avaliacao_qualitativa(medpontass)),\n",
|
|||
|
|
" \"{{medrelpartqual}}\": str(avaliacao_qualitativa(medrelpart)),\n",
|
|||
|
|
" \"{{medmotformqual}}\": str(avaliacao_qualitativa(medmotform)),\n",
|
|||
|
|
" \"{{medapcoordqual}}\": str(avaliacao_qualitativa(medapcoord)),\n",
|
|||
|
|
" \"{{medapadminqual}}\": str(avaliacao_qualitativa(medapadmin)),\n",
|
|||
|
|
" \"{{meddocdisporqual}}\": str(avaliacao_qualitativa(meddocdispor)),\n",
|
|||
|
|
" \"{{medaudiovisqual}}\": str(avaliacao_qualitativa(medaudiovis)),\n",
|
|||
|
|
" \"{{medinstalqual}}\": str(avaliacao_qualitativa(medinstal)),\n",
|
|||
|
|
" \"{{medcargahorariaqual}}\": str(avaliacao_qualitativa(medcargahoraria)),\n",
|
|||
|
|
" \"{{medutilpratoqual}}\": str(avaliacao_qualitativa(medutilprat)),\n",
|
|||
|
|
" \"{{medestrcursooqual}}\": str(avaliacao_qualitativa(medestrcurso)),\n",
|
|||
|
|
" \"{{medprogcursoqual}}\": str(avaliacao_qualitativa(medcontcurso)),\n",
|
|||
|
|
" \"{{medinterformaqual}}\": str(avaliacao_qualitativa(medinterforma)),\n",
|
|||
|
|
" \"{{medadqonjespqual}}\": str(avaliacao_qualitativa(medadqonjesp)),\n",
|
|||
|
|
" \"{{medobjaprequal}}\": str(avaliacao_qualitativa(medobjapre)),\n",
|
|||
|
|
" \"{{medtempoavalqual}}\": str(avaliacao_qualitativa(medtempoaval)),\n",
|
|||
|
|
" \"{{medtipoavalqual}}\": str(avaliacao_qualitativa(medtipoaval)),\n",
|
|||
|
|
" \"{{medlançaavalual}}\": str(avaliacao_qualitativa(medlançaaval)),\n",
|
|||
|
|
" \"{{medlocaisformqual}}\": str(avaliacao_qualitativa(medlocaisform)),\n",
|
|||
|
|
" \"{{medtempformqual}}\": str(avaliacao_qualitativa(medtempform)),\n",
|
|||
|
|
" \"{{medmetensiqual}}\": str(avaliacao_qualitativa(medmetensi)),\n",
|
|||
|
|
" \"{{medobjespqual}}\": str(avaliacao_qualitativa(medobjesp)),\n",
|
|||
|
|
" \"{{medapdcqual}}\": str(avaliacao_qualitativa(medapdc)),\n",
|
|||
|
|
" \"{{medapformqual}}\": str(avaliacao_qualitativa(medapform)),\n",
|
|||
|
|
" \"{{medmeiosauxqual}}\": str(avaliacao_qualitativa(medmeiosaux)),\n",
|
|||
|
|
" \"{{medpubqual}}\": str(avaliacao_qualitativa(medpub)),\n",
|
|||
|
|
" \"{{mediaalojqual}}\": str(avaliacao_qualitativa(medalojamento2)),\n",
|
|||
|
|
" \"{{apdirqual}}\": str(avaliacao_qualitativa(medapdir)),\n",
|
|||
|
|
" \"{{funcfutqual}}\": str(avaliacao_qualitativa(medfuncfut2)),\n",
|
|||
|
|
" \"{{graudifaqual}}\": str(avaliacao_qualitativa(meddificuldade2)),\n",
|
|||
|
|
" \"{{apadmqual}}\": str(avaliacao_qualitativa(medaplog)),\n",
|
|||
|
|
" \"{{motaproqual}}\": str(avaliacao_qualitativa(medmotvpart2)),\n",
|
|||
|
|
" \"{{conhcursoqual}}\": str(avaliacao_qualitativa(medconhecimento2)),\n",
|
|||
|
|
" \"{{objcrusoqual}}\": str(avaliacao_qualitativa(objcruso)),\n",
|
|||
|
|
" \"{{contcursoqual}}\": str(avaliacao_qualitativa(contcurso)),\n",
|
|||
|
|
" \"{{adeqtrabqual}}\": str(avaliacao_qualitativa(adeqtrab)),\n",
|
|||
|
|
" \"{{instformqual}}\": str(avaliacao_qualitativa(instform)),\n",
|
|||
|
|
" \"{{audiovisuaisqual}}\": str(avaliacao_qualitativa(audiovisuais)),\n",
|
|||
|
|
" \"{{biblioqual}}\": str(avaliacao_qualitativa(biblio)),\n",
|
|||
|
|
" \"{{mediaalimequal}}\": str(avaliacao_qualitativa(medalimentacao2)),\n",
|
|||
|
|
" \"{{NOMEDOCURSO}}\": str(NOMEDOCURSOcurto),\n",
|
|||
|
|
" \"{{MESi}}\": str(MESi),\n",
|
|||
|
|
" \"{{AAAAi}}\": str(AAAAi),\n",
|
|||
|
|
" \"{{DDf}}\": str(DDf),\n",
|
|||
|
|
" \"{{MESf}}\": str(MESf),\n",
|
|||
|
|
" \"{{AAAAf}}\": str(AAAAf),\n",
|
|||
|
|
" \"{{MEDIAFINALCURSO}}\": str(MEDIAFINALCURSO),\n",
|
|||
|
|
" \"{{NFORMANDOS}}\": str(NFORMANDOS),\n",
|
|||
|
|
" \"{{FINALIDADECURSO}}\": str(FINALIDADECURSO),\n",
|
|||
|
|
" \"{{DDi}}\": str(DDi)\n",
|
|||
|
|
"}\n",
|
|||
|
|
"template_path = \"Anexo RAI..docx\" \n",
|
|||
|
|
"output_path = \"relatorio_final.docx\" \n",
|
|||
|
|
"\n",
|
|||
|
|
"doc = Document(template_path)\n",
|
|||
|
|
"if not file_path3:\n",
|
|||
|
|
" print(\"Nenhum ficheiro selecionado. A remover secção do documento.\")\n",
|
|||
|
|
" delete_all_between_anchors_xml(doc, \"{{ANCORA1}}\", \"{{ANCORA2}}\", debug=True)\n",
|
|||
|
|
" df3 = None\n",
|
|||
|
|
"if file_path3:\n",
|
|||
|
|
" replace_placeholder_with_propostas(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df3,\n",
|
|||
|
|
" placeholder=\"{{Propostas}}\",\n",
|
|||
|
|
" left_indent_cm=2.75,\n",
|
|||
|
|
" hanging_cm=0.6,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" deduplicate=True,\n",
|
|||
|
|
" max_items=None\n",
|
|||
|
|
" )\n",
|
|||
|
|
"\n",
|
|||
|
|
"if not file_path4:\n",
|
|||
|
|
" print(\"Nenhum ficheiro selecionado. A remover secção do documento.\")\n",
|
|||
|
|
" delete_all_between_anchors_xml(doc, \"{{ANCORA3}}\", \"{{ANCORA4}}\", debug=True)\n",
|
|||
|
|
" df4 = None\n",
|
|||
|
|
"if file_path4:\n",
|
|||
|
|
" replace_placeholder_with_positivosdir(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df4,\n",
|
|||
|
|
" placeholder=\"{{positivosdir}}\",\n",
|
|||
|
|
" left_indent_cm=2.75,\n",
|
|||
|
|
" hanging_cm=0.6,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" deduplicate=True\n",
|
|||
|
|
" )\n",
|
|||
|
|
" replace_placeholder_with_incluirdir(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df4,\n",
|
|||
|
|
" placeholder=\"{{incluirdir}}\",\n",
|
|||
|
|
" left_indent_cm=2.75,\n",
|
|||
|
|
" hanging_cm=0.6,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" deduplicate=True\n",
|
|||
|
|
" )\n",
|
|||
|
|
" replace_placeholder_with_desenvolverdir(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df4,\n",
|
|||
|
|
" placeholder=\"{{desenvolverdir}}\",\n",
|
|||
|
|
" left_indent_cm=2.75,\n",
|
|||
|
|
" hanging_cm=0.6,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" deduplicate=True\n",
|
|||
|
|
" )\n",
|
|||
|
|
" replace_placeholder_with_temasdir(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df4,\n",
|
|||
|
|
" placeholder=\"{{temasdir}}\",\n",
|
|||
|
|
" left_indent_cm=2.75,\n",
|
|||
|
|
" hanging_cm=0.6,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" deduplicate=True\n",
|
|||
|
|
" )\n",
|
|||
|
|
" \n",
|
|||
|
|
"replace_placeholders_docx_bold_values_keep_style(doc, replacements)\n",
|
|||
|
|
"replace_placeholder_with_q06_subitems(\n",
|
|||
|
|
" doc, df2,\n",
|
|||
|
|
" placeholder=\"{{Q06_Apreciacao}}\",\n",
|
|||
|
|
" indent_cm=2.75,\n",
|
|||
|
|
" indent_title=True,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"replace_placeholder_with_formadores_table(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df2,\n",
|
|||
|
|
" placeholder=\"{{tabelaFormadores}}\",\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"replace_placeholder_with_uc_table(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df_inicial=df,\n",
|
|||
|
|
" df_final=df2,\n",
|
|||
|
|
" placeholder=\"{{tabelasUC}}\",\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"replace_placeholder_with_temas_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df2,\n",
|
|||
|
|
" placeholder=\"{{TEMAS}}\",\n",
|
|||
|
|
" indent_cm=0.5,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" deduplicate=True,\n",
|
|||
|
|
" max_items=6\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"replace_placeholder_with_desenvolver_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df2,\n",
|
|||
|
|
" placeholder=\"{{DESENVOLVER}}\",\n",
|
|||
|
|
" indent_cm=0.5,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" max_items=6\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"replace_placeholder_with_incluir_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df2,\n",
|
|||
|
|
" placeholder=\"{{INCLUIR}}\",\n",
|
|||
|
|
" indent_cm=0.5,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" max_items=6\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"replace_placeholder_with_observacoes_smart2(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df2,\n",
|
|||
|
|
" placeholder=\"{{OBSERVACOES2}}\",\n",
|
|||
|
|
" indent_cm=2.75,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" max_items=6\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"replace_placeholder_with_observacoes_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df2,\n",
|
|||
|
|
" placeholder=\"{{OBSERVACOES}}\",\n",
|
|||
|
|
" indent_cm=0.5,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" use_ollama=True,\n",
|
|||
|
|
" ollama_model=\"llama3.1:8b\" # ajusta ao que existir nas máquinas\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
"n = replace_placeholder_with_observacoes_smart(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" df2,\n",
|
|||
|
|
" placeholder=\"{{OBSERVACOES}}\",\n",
|
|||
|
|
" indent_cm=0.5,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" use_ollama=True,\n",
|
|||
|
|
" ollama_model=\"llama3.1:8b\" # ajusta ao que existir nas máquinas\n",
|
|||
|
|
")\n",
|
|||
|
|
"print(\"OBSERVACOES substituídos:\", n)\n",
|
|||
|
|
"\n",
|
|||
|
|
"delete_lines_with_ancora(doc, pattern=r\"ANCORA\")\n",
|
|||
|
|
"doc.save(output_path)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(f\"Saved: {output_path}\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 66,
|
|||
|
|
"id": "37116ac4",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def iter_body_blocks(doc):\n",
|
|||
|
|
" body = doc._element.body\n",
|
|||
|
|
" for child in body.iterchildren():\n",
|
|||
|
|
" tag = child.tag.rsplit(\"}\", 1)[-1]\n",
|
|||
|
|
" if tag == \"p\":\n",
|
|||
|
|
" yield (\"p\", Paragraph(child, doc))\n",
|
|||
|
|
" elif tag == \"tbl\":\n",
|
|||
|
|
" yield (\"tbl\", Table(child, doc))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 67,
|
|||
|
|
"id": "94b4cf8d",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def table_to_text(tbl) -> str:\n",
|
|||
|
|
" lines = []\n",
|
|||
|
|
" for row in tbl.rows:\n",
|
|||
|
|
" row_txt = []\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" txt = \" \".join(p.text.strip() for p in cell.paragraphs if p.text.strip())\n",
|
|||
|
|
" txt = re.sub(r\"\\s+\", \" \", txt).strip()\n",
|
|||
|
|
" row_txt.append(txt)\n",
|
|||
|
|
" if any(row_txt):\n",
|
|||
|
|
" lines.append(\" | \".join(row_txt))\n",
|
|||
|
|
" return \"\\n\".join(lines).strip()\n",
|
|||
|
|
"def extract_text_between_markers(doc, start_re: str, end_re: str | None = None, debug=False) -> str:\n",
|
|||
|
|
" start_rx = re.compile(start_re, flags=re.IGNORECASE)\n",
|
|||
|
|
" end_rx = re.compile(end_re, flags=re.IGNORECASE) if end_re else None\n",
|
|||
|
|
" collecting = False\n",
|
|||
|
|
" chunks = []\n",
|
|||
|
|
" for kind, obj in iter_body_blocks(doc):\n",
|
|||
|
|
" if kind == \"p\":\n",
|
|||
|
|
" txt = (obj.text or \"\").strip()\n",
|
|||
|
|
" if not collecting and start_rx.search(txt):\n",
|
|||
|
|
" collecting = True\n",
|
|||
|
|
" if debug:\n",
|
|||
|
|
" print(\"[DEBUG] START matched:\", txt)\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" if collecting and end_rx and end_rx.search(txt):\n",
|
|||
|
|
" if debug:\n",
|
|||
|
|
" print(\"[DEBUG] END matched:\", txt)\n",
|
|||
|
|
" break\n",
|
|||
|
|
" if collecting and txt:\n",
|
|||
|
|
" chunks.append(txt)\n",
|
|||
|
|
" elif kind == \"tbl\":\n",
|
|||
|
|
" if collecting:\n",
|
|||
|
|
" ttxt = table_to_text(obj)\n",
|
|||
|
|
" if ttxt:\n",
|
|||
|
|
" chunks.append(ttxt)\n",
|
|||
|
|
" out = \"\\n\".join(chunks).strip()\n",
|
|||
|
|
" out = re.sub(r\"\\n{3,}\", \"\\n\\n\", out)\n",
|
|||
|
|
" return out"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 68,
|
|||
|
|
"id": "3b2a6313",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def ollama_available(timeout=0.4) -> bool:\n",
|
|||
|
|
" try:\n",
|
|||
|
|
" r = requests.get(\"http://localhost:11434/api/tags\", timeout=timeout)\n",
|
|||
|
|
" return r.status_code == 200\n",
|
|||
|
|
" except Exception:\n",
|
|||
|
|
" return False\n",
|
|||
|
|
"\n",
|
|||
|
|
"def ollama_summarize_text(\n",
|
|||
|
|
" text: str,\n",
|
|||
|
|
" model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" max_chars: int = 24000, \n",
|
|||
|
|
" timeout: float = 120.0,\n",
|
|||
|
|
" system_prompt: str = \"\",\n",
|
|||
|
|
" user_prompt: str = \"\",\n",
|
|||
|
|
") -> str:\n",
|
|||
|
|
" if not text.strip():\n",
|
|||
|
|
" return \"\"\n",
|
|||
|
|
" if len(text) > max_chars:\n",
|
|||
|
|
" text = text[:max_chars] + \"\\n\\n[Texto truncado por limite de tamanho.]\"\n",
|
|||
|
|
" prompt = f\"\"\"\n",
|
|||
|
|
"{system_prompt}\n",
|
|||
|
|
"{user_prompt}\n",
|
|||
|
|
"TEXTO:\n",
|
|||
|
|
"{text}\n",
|
|||
|
|
"\n",
|
|||
|
|
"DEVOLVE APENAS O RESULTADO FINAL, SEM EXPLICAÇÕES.\n",
|
|||
|
|
"\"\"\".strip()\n",
|
|||
|
|
"\n",
|
|||
|
|
" payload = {\n",
|
|||
|
|
" \"model\": model,\n",
|
|||
|
|
" \"prompt\": prompt,\n",
|
|||
|
|
" \"stream\": False,\n",
|
|||
|
|
" \"options\": {\"temperature\": 0.2}\n",
|
|||
|
|
" }\n",
|
|||
|
|
" r = requests.post(\"http://localhost:11434/api/generate\", json=payload, timeout=timeout)\n",
|
|||
|
|
" r.raise_for_status()\n",
|
|||
|
|
" return (r.json().get(\"response\", \"\") or \"\").strip()\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 69,
|
|||
|
|
"id": "595d6cff",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def replace_placeholder_with_text_paragraph_all(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" placeholder: str,\n",
|
|||
|
|
" text: str,\n",
|
|||
|
|
" indent_cm: float = 0.0,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
") -> int:\n",
|
|||
|
|
" replaced = 0\n",
|
|||
|
|
" def _apply_fmt(p: Paragraph):\n",
|
|||
|
|
" fmt = p.paragraph_format\n",
|
|||
|
|
" fmt.left_indent = Cm(indent_cm)\n",
|
|||
|
|
" fmt.line_spacing = 1.5\n",
|
|||
|
|
" fmt.space_before = Pt(0)\n",
|
|||
|
|
" fmt.space_after = Pt(0)\n",
|
|||
|
|
" def _process_paragraph(p: Paragraph):\n",
|
|||
|
|
" nonlocal replaced\n",
|
|||
|
|
" if placeholder not in (p.text or \"\"):\n",
|
|||
|
|
" return\n",
|
|||
|
|
" if not p.runs:\n",
|
|||
|
|
" r = p.add_run(\"\")\n",
|
|||
|
|
" for run in p.runs:\n",
|
|||
|
|
" run.text = \"\"\n",
|
|||
|
|
" r0 = p.runs[0]\n",
|
|||
|
|
" r0.text = text\n",
|
|||
|
|
" force_run_font(r0, font_name, font_size_pt)\n",
|
|||
|
|
" _apply_fmt(p)\n",
|
|||
|
|
" replaced += 1\n",
|
|||
|
|
" for p in list(doc.paragraphs):\n",
|
|||
|
|
" _process_paragraph(p)\n",
|
|||
|
|
" for tbl in doc.tables:\n",
|
|||
|
|
" for row in tbl.rows:\n",
|
|||
|
|
" for cell in row.cells:\n",
|
|||
|
|
" for p in list(cell.paragraphs):\n",
|
|||
|
|
" _process_paragraph(p)\n",
|
|||
|
|
" return replaced"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 70,
|
|||
|
|
"id": "5476ed71",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def fill_llm_placeholders_llm9_llm10(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" placeholder_llm10: str = \"{{LLM10}}\",\n",
|
|||
|
|
" indent_cm: float = 0.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" debug_extract: bool = False,\n",
|
|||
|
|
") -> dict:\n",
|
|||
|
|
" if not ollama_available():\n",
|
|||
|
|
" return {\"ok\": False, \"reason\": \"ollama_not_available\", \"llm9\": 0, \"llm10\": 0}\n",
|
|||
|
|
" ap5 = extract_text_between_markers(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" start_re=r\"^\\s*Apêndice\\s*5\\b\",\n",
|
|||
|
|
" end_re=r\"^\\s*Apêndice\\s*6\\b\",\n",
|
|||
|
|
" debug=debug_extract\n",
|
|||
|
|
" )\n",
|
|||
|
|
" ap6 = extract_text_between_markers(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" start_re=r\"^\\s*Apêndice\\s*6\\b\",\n",
|
|||
|
|
" end_re=r\"^\\s*1\\s*[–-]\\s\", \n",
|
|||
|
|
" debug=debug_extract\n",
|
|||
|
|
" )\n",
|
|||
|
|
" texto_llm10 = \"\\n\\n\".join([t for t in [ap5, ap6] if t.strip()]).strip()\n",
|
|||
|
|
" sys_pt = \"És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação.\"\n",
|
|||
|
|
" prompt_llm10 = (\n",
|
|||
|
|
" \"Resume os conteúdos do Apêndice 5 e do Apêndice 6 num texto único, formal e conciso \"\n",
|
|||
|
|
" \"(1 a 2 parágrafos). Realça pontos-chave e recomendações.\"\n",
|
|||
|
|
" )\n",
|
|||
|
|
" resumo10 = \"\"\n",
|
|||
|
|
" if texto_llm10.strip():\n",
|
|||
|
|
" resumo10 = ollama_summarize_text(texto_llm10, model=model, system_prompt=sys_pt, user_prompt=prompt_llm10)\n",
|
|||
|
|
" n10 = replace_placeholder_with_text_paragraph_all(\n",
|
|||
|
|
" doc, placeholder_llm10, resumo10.strip() or \"Sem dados.\",\n",
|
|||
|
|
" indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt\n",
|
|||
|
|
" )\n",
|
|||
|
|
" return {\n",
|
|||
|
|
" \"ok\": True,\n",
|
|||
|
|
" \"llm10\": n10,\n",
|
|||
|
|
" \"chars_in_llm10\": len(texto_llm10),\n",
|
|||
|
|
" }"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 71,
|
|||
|
|
"id": "8df236c3",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"def fill_llm_placeholder_from_doc_range(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" placeholder: str,\n",
|
|||
|
|
" start_marker: str,\n",
|
|||
|
|
" end_marker: str,\n",
|
|||
|
|
" model: str = \"llama3.1:8b\",\n",
|
|||
|
|
" indent_cm: float = 0.5,\n",
|
|||
|
|
" font_name: str = \"Arial\",\n",
|
|||
|
|
" font_size_pt: int = 12,\n",
|
|||
|
|
" debug_extract: bool = False,\n",
|
|||
|
|
") -> dict:\n",
|
|||
|
|
"\n",
|
|||
|
|
" if not ollama_available():\n",
|
|||
|
|
" return {\"ok\": False, \"reason\": \"ollama_not_available\", \"replaced\": 0, \"chars\": 0}\n",
|
|||
|
|
" start_re = r\"^\\s*\" + re.escape(start_marker.strip()) + r\"\\s*$\"\n",
|
|||
|
|
" end_re = r\"^\\s*\" + re.escape(end_marker.strip()) + r\"\\s*$\"\n",
|
|||
|
|
" texto = extract_text_between_markers(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" start_re=start_re,\n",
|
|||
|
|
" end_re=end_re,\n",
|
|||
|
|
" debug=debug_extract\n",
|
|||
|
|
" ).strip()\n",
|
|||
|
|
" if not texto:\n",
|
|||
|
|
" start_re2 = re.escape(start_marker.strip())\n",
|
|||
|
|
" end_re2 = re.escape(end_marker.strip())\n",
|
|||
|
|
" texto = extract_text_between_markers(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" start_re=start_re2,\n",
|
|||
|
|
" end_re=end_re2,\n",
|
|||
|
|
" debug=debug_extract\n",
|
|||
|
|
" ).strip()\n",
|
|||
|
|
" if not texto:\n",
|
|||
|
|
" n = replace_placeholder_with_text_paragraph_all(\n",
|
|||
|
|
" doc, placeholder, \"Sem dados.\",\n",
|
|||
|
|
" indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt\n",
|
|||
|
|
" )\n",
|
|||
|
|
" return {\"ok\": True, \"replaced\": n, \"chars\": 0, \"note\": \"range_not_found\"}\n",
|
|||
|
|
" sys_pt = \"És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação.\"\n",
|
|||
|
|
" user_prompt = (\n",
|
|||
|
|
" \"Lê o texto e produz um resumo final, em estilo de conclusões, adequado a relatório oficial:\\n\"\n",
|
|||
|
|
" \"• 1 parágrafo de enquadramento (2–4 frases)\\n\"\n",
|
|||
|
|
" \"• 5–8 bullets com conclusões/recomendações principais\\n\"\n",
|
|||
|
|
" \"• Não inventes dados nem percentagens.\"\n",
|
|||
|
|
" )\n",
|
|||
|
|
" resumo = ollama_summarize_text(\n",
|
|||
|
|
" texto,\n",
|
|||
|
|
" model=model,\n",
|
|||
|
|
" system_prompt=sys_pt,\n",
|
|||
|
|
" user_prompt=user_prompt\n",
|
|||
|
|
" ).strip() or \"Sem dados.\"\n",
|
|||
|
|
" n = replace_placeholder_with_text_paragraph_all(\n",
|
|||
|
|
" doc, placeholder, resumo,\n",
|
|||
|
|
" indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt\n",
|
|||
|
|
" )\n",
|
|||
|
|
" return {\"ok\": True, \"replaced\": n, \"chars\": len(texto)}\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 72,
|
|||
|
|
"id": "dac9419a",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"output_path = \"relatorio_final.docx\" \n",
|
|||
|
|
"doc = Document(output_path)\n",
|
|||
|
|
"fill_llm_placeholder_from_doc_range(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" placeholder=\"{{LLM9}}\",\n",
|
|||
|
|
" start_marker=\"RELATÓRIO DE AVALIAÇÃO INTERNA\",\n",
|
|||
|
|
" end_marker=\"O CHEFE DA DIREÇÃO DE AVALIAÇÃO E QUALIDADE\",\n",
|
|||
|
|
" model=\"llama3.1:8b\",\n",
|
|||
|
|
" indent_cm=0.5,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" debug_extract=True\n",
|
|||
|
|
")\n",
|
|||
|
|
"fill_llm_placeholders_llm9_llm10(\n",
|
|||
|
|
" doc,\n",
|
|||
|
|
" model=\"llama3.1:8b\",\n",
|
|||
|
|
" placeholder_llm10=\"{{LLM10}}\",\n",
|
|||
|
|
" indent_cm=0.5,\n",
|
|||
|
|
" font_name=\"Arial\",\n",
|
|||
|
|
" font_size_pt=12,\n",
|
|||
|
|
" debug_extract=True\n",
|
|||
|
|
")\n",
|
|||
|
|
"doc.save(output_path)"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"metadata": {
|
|||
|
|
"kernelspec": {
|
|||
|
|
"display_name": "Python 3",
|
|||
|
|
"language": "python",
|
|||
|
|
"name": "python3"
|
|||
|
|
},
|
|||
|
|
"language_info": {
|
|||
|
|
"codemirror_mode": {
|
|||
|
|
"name": "ipython",
|
|||
|
|
"version": 3
|
|||
|
|
},
|
|||
|
|
"file_extension": ".py",
|
|||
|
|
"mimetype": "text/x-python",
|
|||
|
|
"name": "python",
|
|||
|
|
"nbconvert_exporter": "python",
|
|||
|
|
"pygments_lexer": "ipython3",
|
|||
|
|
"version": "3.12.10"
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"nbformat": 4,
|
|||
|
|
"nbformat_minor": 5
|
|||
|
|
}
|