Files
Botdaq/Relatórios Internos/reportcreator.py
T
2026-03-14 22:57:45 +00:00

2605 lines
83 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# %%
#ollama pull llama3.1:8b
import pandas as pd
import os
import sys
from tkinter import Tk
from tkinter.filedialog import askopenfilename
from docx import Document
from docx.document import Document as DocxDocument
from docx.text.paragraph import Paragraph
import re
import string
from docx.oxml import OxmlElement
from docx.shared import Cm, Pt
from docx.oxml.ns import qn
from docx.text.run import Run
from docx.table import Table
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_ROW_HEIGHT_RULE, WD_ALIGN_VERTICAL
import requests
import tkinter as tk
from tkinter import messagebox
# %%
def _resource_path(relative_path: str) -> str:
base_dir = getattr(sys, "_MEIPASS", os.path.abspath(os.path.dirname(__file__)))
return os.path.join(base_dir, relative_path)
# %%
def ollama_available(timeout=0.4) -> bool:
try:
r = requests.get("http://localhost:11434/api/tags", timeout=timeout) #aqui depois colocar o bot
return r.status_code == 200
except Exception:
return False
# %%
def avaliacao_qualitativa(valor: float) -> str:
if valor is None or pd.isna(valor):
return "N/A"
if 1.0 <= valor <= 3.0:
return "Rever Urgentemente"
elif 3.0 < valor <= 3.5:
return "Rever e Melhorar"
elif 3.5 < valor <= 3.9:
return "Bom"
elif 3.9 < valor <= 4.5:
return "Qualidade"
elif 4.5 < valor <= 5.0:
return "Excelência"
else:
return "Out of Range"
# %%
def format_header_row(row, height_cm=5.2):
row.height = Cm(height_cm)
row.height_rule = WD_ROW_HEIGHT_RULE.EXACTLY
for cell in row.cells:
cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
for p in cell.paragraphs:
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
# %%
def set_cell_text_vertical(cell, direction="btLr"):
tcPr = cell._tc.get_or_add_tcPr()
td = OxmlElement("w:textDirection")
td.set(qn("w:val"), direction)
tcPr.append(td)
def set_table_all_columns_width(tbl, width_cm=2.3):
tbl.autofit = False
w = Cm(width_cm)
for row in tbl.rows:
for cell in row.cells:
cell.width = w
# %%
def force_run_font(run: Run, font_name="Arial", font_size_pt=12, bold=None):
if bold is not None:
run.bold = bool(bold)
run.font.name = font_name
run.font.size = Pt(font_size_pt)
run._element.rPr.rFonts.set(qn("w:eastAsia"), font_name)
# %%
def format_pt_number(x: float) -> str:
if pd.isna(x):
return ""
return f"{x:.2f}".replace(".", ",")
# %%
def delete_paragraph(paragraph: Paragraph) -> None:
p = paragraph._p
p.getparent().remove(p)
paragraph._p = paragraph._element = None
# %%
def clean_module_title(col_name: str) -> str:
s = str(col_name).strip()
if "->" in s:
s = s.split("->", 1)[1].strip()
return s
# %%
#def clean_module_title(col_name: str) -> str:
# s = str(col_name).strip()
# s = re.sub(r"^.*?->\s*", "", s)
# s = re.sub(r"^\s*Q06\s*[-_ ]\s*Aprecia.*?[-:]\s*", "", s, flags=re.IGNORECASE)
# return s.strip()
# %%
def apply_table_paragraph_spacing(cell, line_spacing=1.5):
for p in cell.paragraphs:
fmt = p.paragraph_format
fmt.line_spacing = line_spacing
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
# %%
def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]:
means = []
for j in range(start_col_idx, df.shape[1]):
col = pd.to_numeric(df.iloc[:, j], errors="coerce")
if col.notna().sum() == 0:
break
means.append(col.mean())
return means
# %%
def insert_table_after_paragraph(paragraph: Paragraph, rows: int, cols: int) -> Table:
doc = paragraph.part.document
tbl = doc.add_table(rows=rows, cols=cols)
paragraph._p.addnext(tbl._tbl)
return tbl
# %%
def replace_placeholder_with_uc_table(
doc,
df_inicial: pd.DataFrame,
df_final: pd.DataFrame,
placeholder: str = "{{tabelasUC}}",
font_name: str = "Arial",
font_size_pt: int = 12,
start_col_idx_inicial: int = 16,
):
def extract_uc_cols(df: pd.DataFrame):
cols = []
for c in df.columns:
name = str(c).lower()
if "q06" in name and "aprecia" in name:
cols.append(c)
cols.sort(key=lambda x: str(x))
return cols
uc_cols = extract_uc_cols(df_final)
ini_means = mean_columns_until_empty(df_inicial, start_col_idx=start_col_idx_inicial)
def build_rows():
data = []
for i, c in enumerate(uc_cols):
ini = ini_means[i] if i < len(ini_means) else float("nan")
fin = pd.to_numeric(df_final[c], errors="coerce").mean() if c in df_final.columns else float("nan")
diff = fin - ini if (not pd.isna(fin) and not pd.isna(ini)) else float("nan")
data.append((clean_module_title(c), ini, fin, diff))
return data
rows_data = build_rows()
def _fill_cell(cell, text: str, bold=False, align=None):
cell.text = ""
p = cell.paragraphs[0]
if align is not None:
p.alignment = align
r = p.add_run(text)
force_run_font(r, font_name, font_size_pt, bold=bold)
apply_table_paragraph_spacing(cell, line_spacing=1.5)
def _apply_table_layout(tbl):
tbl.style = "Table Grid"
tbl.autofit = False
col_widths = [Cm(11), Cm(1.6), Cm(1.6), Cm(3.5)]
for row in tbl.rows:
for i, w in enumerate(col_widths):
row.cells[i].width = w
headers = ["Apreciação dos módulos", "Inicial", "Final", "Ganhos/Perdas"]
for j, h in enumerate(headers):
_fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)
def _populate_rows(tbl):
if not rows_data:
_fill_cell(tbl.rows[1].cells[0], "Sem dados", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)
_fill_cell(tbl.rows[1].cells[1], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[1].cells[2], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[1].cells[3], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
return
for i, (title, ini, fin, diff) in enumerate(rows_data, start=1):
_fill_cell(tbl.rows[i].cells[0], title, bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)
_fill_cell(tbl.rows[i].cells[1], format_pt_number(ini), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[i].cells[2], format_pt_number(fin), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
diff_txt = "" if pd.isna(diff) else f"{diff:+.2f}".replace(".", ",")
_fill_cell(tbl.rows[i].cells[3], diff_txt, bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
def _make_table_at_paragraph(p: Paragraph):
n_rows = 1 + max(1, len(rows_data))
tbl = insert_table_after_paragraph(p, rows=n_rows, cols=4)
_apply_table_layout(tbl)
_populate_rows(tbl)
delete_paragraph(p)
def _process_paragraph(p: Paragraph) -> bool:
if placeholder not in p.text:
return False
_make_table_at_paragraph(p)
return True
for p in doc.paragraphs:
if _process_paragraph(p):
return
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
if placeholder in p.text:
cell.text = ""
n_rows = 1 + max(1, len(rows_data))
tbl = cell.add_table(rows=n_rows, cols=4)
_apply_table_layout(tbl)
_populate_rows(tbl)
return
# %%
def insert_paragraph_after(paragraph: Paragraph) -> Paragraph:
new_p = OxmlElement("w:p")
paragraph._p.addnext(new_p)
return Paragraph(new_p, paragraph._parent)
# %%
def replace_placeholder_with_q06_subitems(
doc,
df: pd.DataFrame,
placeholder: str = "{{Q06_Apreciacao}}",
item_number: int = 5,
indent_cm: float = 2.75,
indent_title: bool = True,
font_name: str = "Arial",
font_size_pt: int = 12,
):
cols = []
for c in df.columns:
name = str(c).lower()
if "q06" in name and "aprecia" in name:
cols.append(c)
cols.sort(key=lambda x: str(x))
letters = string.ascii_lowercase
def _apply_par_format(par: Paragraph, left_indent_cm: float):
fmt = par.paragraph_format
fmt.left_indent = Cm(left_indent_cm)
fmt.line_spacing = 1.5
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
def _add_line(par: Paragraph, text: str):
run = par.add_run(text)
force_run_font(run, font_name, font_size_pt)
return run
def _apply_par_format(par: Paragraph, left_indent_cm: float):
fmt = par.paragraph_format
fmt.left_indent = Cm(left_indent_cm)
fmt.line_spacing = 1.5
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
def _process_paragraph(p: Paragraph) -> bool:
if placeholder not in p.text:
return False
current = p
inserted_any = False
if not cols:
newp = insert_paragraph_after(current)
_add_line(newp, "(a)\tSem dados;")
_apply_par_format(newp, indent_cm)
inserted_any = True
else:
for i, c in enumerate(cols):
mean_val = pd.to_numeric(df[c], errors="coerce").mean()
mean_str = f"{mean_val:.2f}".replace(".", ",")
label = avaliacao_qualitativa(mean_val)
module_title = clean_module_title(c)
sub = letters[i] if i < 26 else f"a{i+1}"
newp = insert_paragraph_after(current)
r1 = newp.add_run(f"({sub})\t{module_title} (")
force_run_font(r1, font_name, font_size_pt)
r2 = newp.add_run(mean_str)
r2.bold = True
force_run_font(r2, font_name, font_size_pt)
r3 = newp.add_run("), que corresponde a ")
force_run_font(r3, font_name, font_size_pt)
r4 = newp.add_run(label)
r4.bold = True
force_run_font(r4, font_name, font_size_pt)
r5 = newp.add_run(";")
force_run_font(r5, font_name, font_size_pt)
_apply_par_format(newp, indent_cm)
current = newp
inserted_any = True
delete_paragraph(p)
return inserted_any
for p in doc.paragraphs:
if _process_paragraph(p):
return
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
if _process_paragraph(p):
return
# %%
def _collect_course_info():
form = tk.Tk()
form.title("Dados do curso")
form.resizable(False, False)
fields = [
("Nomenclatura do curso", "NOMEDOCURSO"),
("Dia de inicio (DD)", "DDi"),
("Mes de inicio (Extenso)", "MESi"),
("Ano de inicio (AAAA)", "AAAAi"),
("Dia de fim (DD)", "DDf"),
("Mes de fim (Extenso)", "MESf"),
("Ano de fim (AAAA)", "AAAAf"),
("Numero de formandos", "NFORMANDOS"),
("Finalidade do curso", "FINALIDADECURSO"),
("Média final do curso", "MEDIAFINALCURSO"),
]
entries = {}
for i, (label, key) in enumerate(fields):
tk.Label(form, text=label, anchor="w").grid(row=i, column=0, padx=8, pady=4, sticky="w")
ent = tk.Entry(form, width=30)
ent.grid(row=i, column=1, padx=8, pady=4)
entries[key] = ent
form_values = {}
def _submit():
values = {k: e.get().strip() for k, e in entries.items()}
missing = [label for (label, key) in fields if not values[key]]
if missing:
messagebox.showerror("Dados em falta", "Preencha: " + ", ".join(missing))
return
form_values.update(values)
form.destroy()
tk.Button(form, text="Continuar", command=_submit).grid(row=len(fields), column=0, columnspan=2, pady=10)
form.mainloop()
if not form_values:
raise RuntimeError("Formulario cancelado")
return form_values
course_info = _collect_course_info()
def _to_int_or_str(s):
return int(s) if s.isdigit() else s
NOMEDOCURSOcurto = course_info["NOMEDOCURSO"]
DDi = _to_int_or_str(course_info["DDi"])
MESi = _to_int_or_str(course_info["MESi"])
AAAAi = _to_int_or_str(course_info["AAAAi"])
DDf = _to_int_or_str(course_info["DDf"])
MESf = _to_int_or_str(course_info["MESf"])
AAAAf = _to_int_or_str(course_info["AAAAf"])
NFORMANDOS = _to_int_or_str(course_info["NFORMANDOS"])
FINALIDADECURSO = course_info["FINALIDADECURSO"]
MEDIAFINALCURSO = course_info["MEDIAFINALCURSO"]
# %%
Tk().withdraw()
file_path = askopenfilename(
title="Select Excel das expetativas iniciais",
filetypes=[("Excel files", "*.xlsx *.xls")])
if not file_path:
raise FileNotFoundError("No file selected")
print(f"Selected file:{file_path}")
# %%
df = pd.read_excel(file_path)
# %%
ninq = df.shape[0]-1
medalojamento = round(df.iloc[:,10].mean(),2)
medalimentacao = round(df.iloc[:,11].mean(),2)
meddificuldade = round(df.iloc[:,12].mean(),2)
medfuncfut = round(df.iloc[:,13].mean(),2)
medmotvpart = round(df.iloc[:,14].mean(),2)
medconhecimento = round(df.iloc[:,15].mean(),2)
# %%
def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]:
means = []
n_cols = df.shape[1]
for j in range(start_col_idx, n_cols):
col = pd.to_numeric(df.iloc[:, j], errors="coerce")
if col.notna().sum() == 0:
break
means.append(col.mean())
return means
# %%
medias = mean_columns_until_empty(df, start_col_idx=16)
print(len(medias), medias[:5])
# %%
Tk().withdraw()
file_path2 = askopenfilename(
title="Select Excel das expetativas finais",
filetypes=[("Excel files", "*.xlsx *.xls")])
if not file_path2:
raise FileNotFoundError("No file selected")
print(f"Selected file:\n{file_path2}")
# %%
df2 = pd.read_excel(file_path2)
# %%
ninq2 = df2.shape[0]-1
medaplog = round(df2.iloc[:,10].mean(),2)
medalojamento2 = round(df2.iloc[:,11].mean(),2)
medalimentacao2 = round(df2.iloc[:,12].mean(),2)
medapdir = round(df2.iloc[:,13].mean(),2)
meddificuldade2 = round(df2.iloc[:,14].mean(),2)
medfuncfut2 = round(df2.iloc[:,15].mean(),2)
medmotvpart2 = round(df2.iloc[:,16].mean(),2)
medconhecimento2 = round(df2.iloc[:,17].mean(),2)
medalojamentofinal = round(medalojamento2 - medalojamento,2)
medalimentacaofinal = round(medalimentacao2 - medalimentacao,2)
meddificuldadefinal = round(meddificuldade2 - meddificuldade,2)
medfuncfutfinal = round(medfuncfut2 - medfuncfut,2)
medmotvpartfinal = round(medmotvpart2 - medmotvpart,2)
medconhecimentofinal = round(medconhecimento2 - medconhecimento,2)
objcruso = round(df2.iloc[:,18].mean(),2)
contcurso = round(df2.iloc[:,19].mean(),2)
adeqtrab = round(df2.iloc[:,20].mean(),2)
instform = round(df2.iloc[:,21].mean(),2)
audiovisuais = round(df2.iloc[:,22].mean(),2)
biblio = round(df2.iloc[:,23].mean(),2)
NOMEDOCURSO = df2.iloc[1,4].split("-")[1].split("")[0].strip()
# %%
def build_formadores_rows(df: pd.DataFrame):
groups = {}
for c in df.columns:
name = str(c)
low = name.lower()
if "_formador" not in low:
continue
m = re.match(r"^\s*(Q\d+)\s*_Formador\s*->\s*(.+?)\s*(?:\(|$)", name, flags=re.IGNORECASE)
if not m:
continue
qcode = m.group(1).upper()
metric_raw = m.group(2).strip().lower()
groups.setdefault(qcode, {})
groups[qcode][metric_raw] = name
def qnum(q):
mm = re.match(r"Q(\d+)", q)
return int(mm.group(1)) if mm else 10**9
qcodes_sorted = sorted(groups.keys(), key=qnum)
def metric_key(metric_raw: str) -> str | None:
mr = metric_raw.lower()
if "dom" in mr and "ass" in mr:
return "dominio"
if "métod" in mr or "metod" in mr:
return "metodos"
if "lingu" in mr:
return "linguagem"
if "empenh" in mr:
return "empenho"
if "relac" in mr or "formand" in mr:
return "relacao"
return None
rows = []
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
for idx, qcode in enumerate(qcodes_sorted):
cols_map = groups[qcode]
picked = {"dominio": None, "metodos": None, "linguagem": None, "empenho": None, "relacao": None}
for raw, colname in cols_map.items():
k = metric_key(raw)
if k and picked[k] is None:
picked[k] = colname
def col_mean(colname):
if not colname:
return float("nan")
return pd.to_numeric(df[colname], errors="coerce").mean()
dominio = col_mean(picked["dominio"])
metodos = col_mean(picked["metodos"])
linguagem = col_mean(picked["linguagem"])
empenho = col_mean(picked["empenho"])
relacao = col_mean(picked["relacao"])
vals = [dominio, metodos, linguagem, empenho, relacao]
media_final = pd.Series(vals, dtype="float").mean(skipna=True)
label = letters[idx] if idx < len(letters) else f"F{idx+1}"
rows.append({
"label": label,
"qcode": qcode,
"dominio": dominio,
"metodos": metodos,
"linguagem": linguagem,
"empenho": empenho,
"relacao": relacao,
"media_final": media_final,
})
return rows
# %%
def replace_placeholder_with_formadores_table(
doc,
df2: pd.DataFrame,
placeholder: str = "{{tabelaFormadores}}",
font_name: str = "Arial",
font_size_pt: int = 12,
col_width_cm: float = 2.3,
header_vertical: bool = True,
rotate_first_header: bool = True,
):
rows = build_formadores_rows(df2)
def _fill_cell(cell, text: str, bold=False, align=None):
cell.text = ""
p = cell.paragraphs[0]
if align is not None:
p.alignment = align
r = p.add_run(text)
force_run_font(r, font_name, font_size_pt, bold=bold)
apply_table_paragraph_spacing(cell, line_spacing=1.5)
headers = ["Formadores", "Domínio do assunto","Métodos utilizados","Linguagem utilizada","Empenho","Relação c/ formandos","Média final",]
global_mean = pd.Series([r["media_final"] for r in rows], dtype="float").mean(skipna=True)
def _apply_layout(tbl):
tbl.style = "Table Grid"
tbl.autofit = False
set_table_all_columns_width(tbl, width_cm=col_width_cm)
if header_vertical:
start_j = 0 if rotate_first_header else 1
for j in range(start_j, len(headers)):
set_cell_text_vertical(tbl.rows[0].cells[j], direction="btLr")
def _populate_table(tbl):
for j, h in enumerate(headers):
_fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)
if not rows:
_fill_cell(tbl.rows[1].cells[0], "Sem dados", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)
for j in range(1, len(headers)):
_fill_cell(tbl.rows[1].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[2].cells[0], "Média", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT)
for j in range(1, len(headers) - 1):
_fill_cell(tbl.rows[2].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[2].cells[-1], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
return
for i, r in enumerate(rows, start=1):
_fill_cell(tbl.rows[i].cells[0], r["label"], bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[i].cells[1], format_pt_number(r["dominio"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[i].cells[2], format_pt_number(r["metodos"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[i].cells[3], format_pt_number(r["linguagem"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[i].cells[4], format_pt_number(r["empenho"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[i].cells[5], format_pt_number(r["relacao"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[i].cells[6], format_pt_number(r["media_final"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
last = 1 + len(rows)
_fill_cell(tbl.rows[last].cells[0], "Média", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT)
for j in range(1, len(headers) - 1):
_fill_cell(tbl.rows[last].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
_fill_cell(tbl.rows[last].cells[-1], format_pt_number(global_mean), bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)
def _make_table_at_paragraph(p: Paragraph):
n_rows = 1 + max(1, len(rows)) + 1
tbl = insert_table_after_paragraph(p, rows=n_rows, cols=len(headers))
_apply_layout(tbl)
format_header_row(tbl.rows[0], height_cm=5.2)
_populate_table(tbl)
delete_paragraph(p)
for p in doc.paragraphs:
if placeholder in p.text:
_make_table_at_paragraph(p)
return
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
if placeholder in p.text:
cell.text = ""
n_rows = 1 + max(1, len(rows)) + 1
tbl = cell.add_table(rows=n_rows, cols=len(headers))
_apply_layout(tbl)
format_header_row(tbl.rows[0], height_cm=5.2)
_populate_table(tbl)
return
# %%
def compute_formadores_summary(df: pd.DataFrame):
rows = build_formadores_rows(df)
nforma = len(rows)
global_mean = pd.Series([r["media_final"] for r in rows], dtype="float").mean(skipna=True)
mediaformquanl = "" if pd.isna(global_mean) else f"{global_mean:.2f}".replace(".", ",")
mediaform = "" if pd.isna(global_mean) else avaliacao_qualitativa(global_mean)
return nforma, mediaformquanl, mediaform
# %%
def _copy_run_format(src_run, dst_run, keep_bold=None):
dst_run.bold = src_run.bold if keep_bold is None else keep_bold
dst_run.italic = src_run.italic
dst_run.underline = src_run.underline
if src_run.font.name:
dst_run.font.name = src_run.font.name
dst_run._element.rPr.rFonts.set(qn("w:eastAsia"), src_run.font.name)
if src_run.font.size:
dst_run.font.size = src_run.font.size
# %%
def extract_temas_list(df: pd.DataFrame) -> list[str]:
temas_cols = [c for c in df.columns if "_temas" in str(c).lower()]
temas = []
for col in temas_cols:
s = df[col].dropna().astype(str).str.strip()
s = s[s != ""]
temas.extend(s.tolist())
seen = set()
out = []
for t in temas:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
return out
# %%
def replace_placeholders_docx_bold_values_keep_style(doc, replacements: dict[str, str]):
keys = sorted(replacements.keys(), key=len, reverse=True)
def _replace_in_paragraph(paragraph):
if not paragraph.runs:
return
full_text = "".join(run.text for run in paragraph.runs)
if not any(k in full_text for k in keys):
return
base_run = paragraph.runs[0]
for run in paragraph.runs:
run.text = ""
text = full_text
while True:
next_pos = None
next_key = None
for k in keys:
pos = text.find(k)
if pos != -1 and (next_pos is None or pos < next_pos):
next_pos, next_key = pos, k
if next_key is None:
if text:
r = paragraph.add_run(text)
_copy_run_format(base_run, r, keep_bold=base_run.bold)
break
before = text[:next_pos]
if before:
r = paragraph.add_run(before)
_copy_run_format(base_run, r, keep_bold=base_run.bold)
val = str(replacements[next_key])
r_val = paragraph.add_run(val)
_copy_run_format(base_run, r_val, keep_bold=True)
text = text[next_pos + len(next_key):]
for p in doc.paragraphs:
_replace_in_paragraph(p)
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
_replace_in_paragraph(p)
# %%
def replace_placeholder_with_column_subitems_hanging(
doc,
df: pd.DataFrame,
placeholder: str,
column_contains: str,
indent_cm: float = 0.5,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
):
cols = [c for c in df.columns if column_contains.lower() in str(c).lower()]
items = []
for col in cols:
s = df[col].dropna().astype(str).str.strip()
s = s[s != ""]
items.extend(s.tolist())
if deduplicate:
seen = set()
out = []
for t in items:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
items = out
letters = string.ascii_lowercase
def _apply_par_format(par: Paragraph):
fmt = par.paragraph_format
fmt.left_indent = Cm(indent_cm)
fmt.first_line_indent = Cm(-indent_cm)
fmt.line_spacing = 1.5
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
def _add_run(par: Paragraph, text: str):
r = par.add_run(text)
force_run_font(r, font_name, font_size_pt)
return r
def _process_paragraph(p: Paragraph) -> bool:
if placeholder not in p.text:
return False
current = p
if not items:
newp = insert_paragraph_after(current)
_add_run(newp, "a. Sem dados;")
_apply_par_format(newp)
delete_paragraph(p)
return True
for i, txt in enumerate(items):
sub = letters[i] if i < 26 else f"a{i+1}"
newp = insert_paragraph_after(current)
_add_run(newp, f"{sub}. ")
_add_run(newp, txt)
_add_run(newp, ";")
_apply_par_format(newp)
current = newp
delete_paragraph(p)
return True
for p in doc.paragraphs:
if _process_paragraph(p):
return
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
if _process_paragraph(p):
return
# %%
def replace_placeholder_with_column_subitems_hanging2(
doc,
df: pd.DataFrame,
placeholder: str,
column_contains: str,
left_indent_cm: float = 2.75,
hanging_cm: float = 0.6,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
):
cols = [c for c in df.columns if column_contains.lower() in str(c).lower()]
items = []
for col in cols:
s = df[col].dropna().astype(str).str.strip()
s = s[s != ""]
items.extend(s.tolist())
if deduplicate:
seen = set()
out = []
for t in items:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
items = out
letters = string.ascii_lowercase
def _apply_par_format(par: Paragraph):
fmt = par.paragraph_format
fmt.left_indent = Cm(left_indent_cm)
fmt.first_line_indent = Cm(-hanging_cm)
fmt.line_spacing = 1.5
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
def _add_run(par: Paragraph, text: str):
r = par.add_run(text)
force_run_font(r, font_name, font_size_pt)
return r
def _process_paragraph(p: Paragraph) -> bool:
if placeholder not in p.text:
return False
current = p
if not items:
newp = insert_paragraph_after(current)
_add_run(newp, "a. Sem dados;")
_apply_par_format(newp)
delete_paragraph(p)
return True
for i, txt in enumerate(items):
sub = letters[i] if i < 26 else f"a{i+1}"
newp = insert_paragraph_after(current)
_add_run(newp, f"{sub}. ")
_add_run(newp, txt)
_add_run(newp, ";")
_apply_par_format(newp)
current = newp
delete_paragraph(p)
return True
for p in doc.paragraphs:
if _process_paragraph(p):
return
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
if _process_paragraph(p):
return
# %%
def replace_placeholder_with_temas_smart(
doc,
df: pd.DataFrame,
placeholder: str = "{{TEMAS}}",
indent_cm: float = 0.5,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
**_ignored,
):
temas_raw = extract_temas_list(df)
temas = [str(t).strip() for t in (temas_raw or []) if str(t).strip()]
if deduplicate:
seen = set()
out = []
for t in temas:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
temas = out
if max_items is not None:
temas = temas[:max_items]
if not temas:
df_tmp = pd.DataFrame({"_Temas": ["Sem dados"]})
return replace_placeholder_with_column_subitems_hanging(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_temas",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
df_tmp = pd.DataFrame({"_Temas": temas})
return replace_placeholder_with_column_subitems_hanging(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_temas",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
# %%
def extract_desenvolver_list(df: pd.DataFrame) -> list[str]:
cols = [c for c in df.columns if "_desenvolver" in str(c).lower()]
items = []
for col in cols:
s = df[col].dropna().astype(str).str.strip()
s = s[s != ""]
items.extend(s.tolist())
seen = set()
out = []
for t in items:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
return out
# %%
def ollama_summarize_desenvolver(
items: list[str],
max_items: int = 6,
model: str = "llama3.1:8b",
timeout: float = 30.0
) -> list[str]:
items_txt = "\n".join(f"- {t}" for t in items)
prompt = f"""
És um analista a escrever um relatório oficial de avaliação.
Tens uma lista de aspetos a desenvolver/melhorar apontados pelos formandos. Faz o seguinte:
1) Agrupa itens repetidos/semelhantes;
2) Seleciona os mais importantes e recorrentes;
3) Reescreve numa lista curta, clara e formal (Português de Portugal);
4) NÃO inventes novos pontos;
5) No máximo {max_items} itens;
6) Frases curtas, em formato de sintagma nominal (ex.: "Melhoria da componente prática", "Aprofundamento de ...").
Itens:
{items_txt}
Devolve APENAS a lista final no formato:
- Item 1
- Item 2
- Item 3
""".strip()
payload = {
"model": model,
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.2}
}
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout)
r.raise_for_status()
text = r.json().get("response", "")
lines = []
for line in text.splitlines():
line = line.strip()
if line.startswith(("-", "")):
item = line.lstrip("-•").strip(" .;")
if item:
lines.append(item)
return lines[:max_items]
# %%
def replace_placeholder_with_desenvolver_smart(
doc,
df: pd.DataFrame,
placeholder: str = "{{DESENVOLVER}}",
indent_cm: float = 1.5,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
use_ollama: bool = True,
ollama_model: str = "llama3.1:8b",
max_items: int = 6,
):
raw = extract_desenvolver_list(df)
if not raw:
return replace_placeholder_with_column_subitems_hanging(
doc, df,
placeholder=placeholder,
column_contains="_desenvolver",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=deduplicate
)
final_items = None
if use_ollama and ollama_available():
try:
llm_items = ollama_summarize_desenvolver(
raw, max_items=max_items, model=ollama_model
)
if llm_items:
final_items = llm_items
except Exception:
final_items = None
if final_items is None:
return replace_placeholder_with_column_subitems_hanging(
doc, df,
placeholder=placeholder,
column_contains="_desenvolver",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=deduplicate
)
df_tmp = pd.DataFrame({"_desenvolver": final_items})
return replace_placeholder_with_column_subitems_hanging(
doc, df_tmp,
placeholder=placeholder,
column_contains="_desenvolver",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
# %%
def extract_incluir_list(df: pd.DataFrame) -> list[str]:
cols = [c for c in df.columns if "_incluir" in str(c).lower()]
items = []
for col in cols:
s = df[col].dropna().astype(str).str.strip()
s = s[s != ""]
items.extend(s.tolist())
seen = set()
out = []
for t in items:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
return out
# %%
def ollama_summarize_incluir(
items: list[str],
max_items: int = 6,
model: str = "llama3.1:8b",
timeout: float = 30.0
) -> list[str]:
items_txt = "\n".join(f"- {t}" for t in items)
prompt = f"""
És um analista a escrever um relatório oficial de avaliação.
Tens uma lista de conteúdos/temas que os formandos sugerem que sejam incluídos. Faz o seguinte:
1) Agrupa itens repetidos/semelhantes;
2) Seleciona os mais importantes e recorrentes;
3) Reescreve numa lista curta, clara e formal (Português de Portugal);
4) NÃO inventes novos pontos;
5) No máximo {max_items} itens;
6) Frases curtas e objetivas.
Itens:
{items_txt}
Devolve APENAS a lista final no formato:
- Item 1
- Item 2
- Item 3
""".strip()
payload = {
"model": model,
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.2}
}
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout)
r.raise_for_status()
text = r.json().get("response", "")
lines = []
for line in text.splitlines():
line = line.strip()
if line.startswith(("-", "")):
item = line.lstrip("-•").strip(" .;")
if item:
lines.append(item)
return lines[:max_items]
# %%
def replace_placeholder_with_incluir_smart(
doc,
df: pd.DataFrame,
placeholder: str = "{{INCLUIR}}",
indent_cm: float = 1.5,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
use_ollama: bool = True,
ollama_model: str = "llama3.1:8b",
max_items: int = 6,
):
raw = extract_incluir_list(df)
if not raw:
return replace_placeholder_with_column_subitems_hanging(
doc, df,
placeholder=placeholder,
column_contains="_incluir",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=deduplicate
)
final_items = None
if use_ollama and ollama_available():
try:
llm_items = ollama_summarize_incluir(
raw, max_items=max_items, model=ollama_model
)
if llm_items:
final_items = llm_items
except Exception:
final_items = None
if final_items is None:
return replace_placeholder_with_column_subitems_hanging(
doc, df,
placeholder=placeholder,
column_contains="_incluir",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=deduplicate
)
df_tmp = pd.DataFrame({"_incluir": final_items})
return replace_placeholder_with_column_subitems_hanging(
doc, df_tmp,
placeholder=placeholder,
column_contains="_incluir",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
# %%
def ollama_summarize_observacoes_paragraph(
items: list[str],
model: str = "llama3.1:8b",
timeout: float = 45.0
) -> str:
items_txt = "\n".join(f"- {t}" for t in items)
prompt = f"""
És um analista a escrever um relatório oficial de avaliação.
Tens observações livres escritas pelos formandos. Produz um ÚNICO PARÁGRAFO de síntese:
- Português de Portugal, tom formal e objetivo;
- Não inventes informação;
- Agrupa ideias repetidas;
- Evita exemplos pessoais e detalhes identificáveis;
- 3 a 6 frases, no máximo ~120 palavras.
Observações:
{items_txt}
Devolve APENAS o parágrafo final (sem tópicos, sem títulos, sem listas).
""".strip()
payload = {
"model": model,
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.2}
}
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout)
r.raise_for_status()
text = r.json().get("response", "").strip()
text = " ".join(line.strip() for line in text.splitlines() if line.strip())
return text
# %%
def extract_observacoes_list(df: pd.DataFrame) -> list[str]:
cols = [c for c in df.columns if "_observ" in str(c).lower()]
items = []
for col in cols:
s = df[col].dropna().astype(str).str.strip()
s = s[s != ""]
items.extend(s.tolist())
seen = set()
out = []
for t in items:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
return out
# %%
def _iter_paragraphs_in_table(tbl):
for row in tbl.rows:
for cell in row.cells:
for p in cell.paragraphs:
yield p
for t in cell.tables:
yield from _iter_paragraphs_in_table(t)
def iter_all_paragraphs_everywhere(doc):
for p in doc.paragraphs:
yield p
for t in doc.tables:
yield from _iter_paragraphs_in_table(t)
for section in doc.sections:
containers = [
section.header,
section.footer,
section.first_page_header,
section.first_page_footer,
section.even_page_header,
section.even_page_footer,
]
for c in containers:
for p in c.paragraphs:
yield p
for t in c.tables:
yield from _iter_paragraphs_in_table(t)
# %%
def replace_placeholder_with_observacoes_smart(
doc,
df: pd.DataFrame,
placeholder: str = "{{OBSERVACOES}}",
indent_cm: float = 0.5,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
use_ollama: bool = True,
ollama_model: str = "llama3.1:8b",
max_items: int | None = None,
) -> int:
raw = extract_observacoes_list(df)
items = [str(t).strip() for t in (raw or []) if str(t).strip()]
if deduplicate:
seen = set()
out = []
for t in items:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
items = out
if max_items is not None:
items = items[:max_items]
paragraph_text = None
if items and use_ollama and ollama_available():
try:
paragraph_text = ollama_summarize_observacoes_paragraph(items, model=ollama_model)
if paragraph_text:
paragraph_text = paragraph_text.strip()
except Exception:
paragraph_text = None
letters = string.ascii_lowercase
def _apply_par_format(par: Paragraph, hanging=False):
fmt = par.paragraph_format
fmt.left_indent = Cm(indent_cm)
if hanging:
fmt.first_line_indent = Cm(-0.6)
fmt.line_spacing = 1.5
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
def _add_run(par: Paragraph, text: str, bold=False):
r = par.add_run(text)
force_run_font(r, font_name, font_size_pt, bold=bold)
return r
replaced = 0
for p in list(iter_all_paragraphs_everywhere(doc)):
if placeholder not in (p.text or ""):
continue
current = p
if paragraph_text:
newp = insert_paragraph_after(current)
_add_run(newp, paragraph_text)
_apply_par_format(newp, hanging=False)
else:
if not items:
items_to_write = ["Sem dados"]
else:
items_to_write = items
for i, txt in enumerate(items_to_write):
sub = letters[i] if i < 26 else f"a{i+1}"
newp = insert_paragraph_after(current)
_add_run(newp, f"{sub}. ")
_add_run(newp, txt.rstrip(".;"))
_add_run(newp, ";")
_apply_par_format(newp, hanging=True)
current = newp
delete_paragraph(p)
replaced += 1
return replaced
# %%
def build_melhoria_checklist_items(
temas_items: list[str] | None,
desenvolver_items: list[str] | None,
incluir_items: list[str] | None,
observacoes_text_or_items: str | list[str] | None = None,
use_ollama: bool = True,
ollama_model: str = "llama3.1:8b",
max_items: int = 10,
):
temas_items = [x.strip() for x in (temas_items or []) if str(x).strip()]
desenvolver_items = [x.strip() for x in (desenvolver_items or []) if str(x).strip()]
incluir_items = [x.strip() for x in (incluir_items or []) if str(x).strip()]
if isinstance(observacoes_text_or_items, str):
observacoes_text = observacoes_text_or_items.strip()
elif isinstance(observacoes_text_or_items, list):
obs_list = [str(x).strip() for x in observacoes_text_or_items if str(x).strip()]
observacoes_text = "\n".join(f"- {x}" for x in obs_list)
else:
observacoes_text = ""
if use_ollama and ollama_available():
try:
temas_txt = "\n".join(f"- {t}" for t in temas_items)
des_txt = "\n".join(f"- {t}" for t in desenvolver_items)
inc_txt = "\n".join(f"- {t}" for t in incluir_items)
prompt = f"""
És um analista a escrever um relatório oficial de avaliação.
A partir dos seguintes outputs, cria uma CHECKLIST de melhorias (ações).
Regras:
- Não inventes pontos.
- Junta redundâncias.
- Escreve cada item como ação (ex.: "Reforçar ...", "Incluir ...", "Aprofundar ...", "Melhorar ...").
- No máximo {max_items} itens.
- Devolve APENAS lista em bullets "- ...".
TEMAS:
{temas_txt}
A DESENVOLVER:
{des_txt}
A INCLUIR:
{inc_txt}
OBSERVAÇÕES:
{observacoes_text}
""".strip()
payload = {
"model": ollama_model,
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.2},
}
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=45.0)
r.raise_for_status()
text = r.json().get("response", "").strip()
llm_items = []
for line in text.splitlines():
line = line.strip()
if line.startswith(("-", "")):
item = line.lstrip("-•").strip(" .;")
if item:
llm_items.append(item)
llm_items = llm_items[:max_items]
if llm_items:
return llm_items
except Exception:
pass
merged = temas_items + desenvolver_items + incluir_items
seen = set()
out = []
for x in merged:
x = x.strip().strip("•-").strip()
if not x:
continue
k = x.lower()
if k not in seen:
seen.add(k)
out.append(x)
return out[:max_items] if out else ["Sem dados"]
# %%
def replace_placeholder_with_checklist_subitems(
doc,
checklist_items: list[str],
placeholder: str = "{{CHECKLIST_MELHORAR}}",
indent_cm: float = 0.5,
font_name: str = "Arial",
font_size_pt: int = 12,
checkbox: str = "-",
):
letters = string.ascii_lowercase
def _apply_par_format(par: Paragraph):
fmt = par.paragraph_format
fmt.left_indent = Cm(indent_cm)
fmt.first_line_indent = Cm(-indent_cm)
fmt.line_spacing = 1.5
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
def _add_run(par: Paragraph, text: str):
r = par.add_run(text)
force_run_font(r, font_name, font_size_pt)
return r
def _process_paragraph(p: Paragraph) -> bool:
if placeholder not in p.text:
return False
current = p
items = checklist_items or ["Sem dados"]
for i, item in enumerate(items):
sub = letters[i] if i < 26 else f"a{i+1}"
newp = insert_paragraph_after(current)
_add_run(newp, f"{sub}. {checkbox} ")
_add_run(newp, item.strip().rstrip(".;"))
_add_run(newp, ";")
_apply_par_format(newp)
current = newp
delete_paragraph(p)
return True
for p in doc.paragraphs:
if _process_paragraph(p):
return
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
if _process_paragraph(p):
return
# %%
def replace_placeholder_with_observacoes_smart2(
doc,
df: pd.DataFrame,
placeholder: str = "{{OBSERVACOES2}}",
indent_cm: float = 2.75,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
**_ignored,
):
obs_raw = extract_observacoes_list(df)
itens = [str(t).strip() for t in (obs_raw or []) if str(t).strip()]
if deduplicate:
seen = set()
out = []
for t in itens:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
itens = out
if max_items is not None:
itens = itens[:max_items]
if not itens:
itens = ["Sem dados"]
df_tmp = pd.DataFrame({"_Observacoes": itens})
return replace_placeholder_with_column_subitems_hanging2(
doc, df2,
placeholder="{{OBSERVACOES2}}",
column_contains="_observa",
left_indent_cm=2.75,
hanging_cm=0.6,
font_name="Arial",
font_size_pt=12,
deduplicate=True
)
# %%
def replace_placeholder_with_incluir_smart(
doc,
df: pd.DataFrame,
placeholder: str = "{{INCLUIR}}",
indent_cm: float = 0.5,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
**_ignored,
):
incluir_raw = extract_incluir_list(df)
itens = [str(t).strip() for t in (incluir_raw or []) if str(t).strip()]
if deduplicate:
seen = set()
out = []
for t in itens:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
itens = out
if max_items is not None:
itens = itens[:max_items]
if not itens:
itens = ["Sem dados"]
df_tmp = pd.DataFrame({"_Incluir": itens})
return replace_placeholder_with_column_subitems_hanging(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_incluir",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
# %%
def replace_placeholder_with_desenvolver_smart(
doc,
df: pd.DataFrame,
placeholder: str = "{{DESENVOLVER}}",
indent_cm: float = 0.5,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
**_ignored,
):
desenvolver_raw = extract_desenvolver_list(df)
itens = [str(t).strip() for t in (desenvolver_raw or []) if str(t).strip()]
if deduplicate:
seen = set()
out = []
for t in itens:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
itens = out
if max_items is not None:
itens = itens[:max_items]
if not itens:
itens = ["Sem dados"]
df_tmp = pd.DataFrame({"_Desenvolver": itens})
return replace_placeholder_with_column_subitems_hanging(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_desenvolver",
indent_cm=indent_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
# %%
def delete_all_between_anchors_xml(
doc,
start="{{ANCORA1}}",
end="{{ANCORA2}}",
debug=False,
max_passes=10_000,
) -> int:
body = doc._element.body
removed_blocks = 0
passes = 0
def child_contains_needle(elm, needle: str) -> bool:
texts = elm.xpath(".//*[local-name()='t']/text()")
joined = "".join(texts) if texts else ""
return needle in joined
while passes < max_passes:
passes += 1
children = list(body.iterchildren())
start_idx = None
end_idx = None
for i, elm in enumerate(children):
if start_idx is None and child_contains_needle(elm, start):
start_idx = i
if debug:
print(f"[DEBUG] start in child {i} tag={elm.tag}")
continue
if start_idx is not None and child_contains_needle(elm, end):
end_idx = i
if debug:
print(f"[DEBUG] end in child {i} tag={elm.tag}")
break
if start_idx is None or end_idx is None:
if debug:
print("[DEBUG] done. start/end:", start_idx, end_idx)
break
for elm in reversed(children[start_idx:end_idx + 1]):
body.remove(elm)
removed_blocks += 1
return removed_blocks
# %%
def delete_all_between_anchors_xml(
doc,
start="{{ANCORA3}}",
end="{{ANCORA4}}",
debug=False,
max_passes=10_000,
) -> int:
body = doc._element.body
removed_blocks = 0
passes = 0
def child_contains_needle(elm, needle: str) -> bool:
texts = elm.xpath(".//*[local-name()='t']/text()")
joined = "".join(texts) if texts else ""
return needle in joined
while passes < max_passes:
passes += 1
children = list(body.iterchildren())
start_idx = None
end_idx = None
for i, elm in enumerate(children):
if start_idx is None and child_contains_needle(elm, start):
start_idx = i
if debug:
print(f"[DEBUG] start in child {i} tag={elm.tag}")
continue
if start_idx is not None and child_contains_needle(elm, end):
end_idx = i
if debug:
print(f"[DEBUG] end in child {i} tag={elm.tag}")
break
if start_idx is None or end_idx is None:
if debug:
print("[DEBUG] done. start/end:", start_idx, end_idx)
break
for elm in reversed(children[start_idx:end_idx + 1]):
body.remove(elm)
removed_blocks += 1
return removed_blocks
# %%
def _iter_paragraphs_in_table(tbl):
for row in tbl.rows:
for cell in row.cells:
for p in cell.paragraphs:
yield p
for t in cell.tables:
yield from _iter_paragraphs_in_table(t)
def iter_all_paragraphs_everywhere(doc):
for p in doc.paragraphs:
yield p
for t in doc.tables:
yield from _iter_paragraphs_in_table(t)
for section in doc.sections:
containers = [
section.header,
section.footer,
section.first_page_header,
section.first_page_footer,
section.even_page_header,
section.even_page_footer,
]
for c in containers:
for p in c.paragraphs:
yield p
for t in c.tables:
yield from _iter_paragraphs_in_table(t)
# %%
Tk().withdraw()
file_path3 = askopenfilename(
title="Select Excel das Formadores",
filetypes=[("Excel files", "*.xlsx *.xls")]
)
if not file_path3:
print("Nenhum ficheiro selecionado.")
df3 = None
else:
print(f"Selected file:\n{file_path3}")
df3 = pd.read_excel(file_path3)
# %%
df3 = None
ninq3 = 0
medpub = 0
medmeiosaux = 0
medapform = 0
medapdc = 0
medobjesp = 0
medmetensi = 0
medtempform = 0
medlocaisform = 0
medlançaaval = 0
medtipoaval = 0
medtempoaval = 0
medobjapre = 0
medadqonjesp = 0
medinterforma = 0
prerequesitos = 0
Conteudo = 0
objgeral = 0
objfinal = 0
objadq = 0
avadq = 0
refere = 0
if file_path3:
df3 = pd.read_excel(file_path3)
ninq3 = df3.shape[0]
medpub = round(df3.iloc[:, 12].mean(), 2)
medmeiosaux = round(df3.iloc[:, 13].mean(), 2)
medapform = round(df3.iloc[:, 14].mean(), 2)
medapdc = round(df3.iloc[:, 15].mean(), 2)
medobjesp = round(df3.iloc[:, 16].mean(), 2)
medmetensi = round(df3.iloc[:, 17].mean(), 2)
medtempform = round(df3.iloc[:, 18].mean(), 2)
medlocaisform = round(df3.iloc[:, 19].mean(), 2)
medlançaaval = round(df3.iloc[:, 20].mean(), 2)
medtipoaval = round(df3.iloc[:, 21].mean(), 2)
medtempoaval = round(df3.iloc[:, 22].mean(), 2)
medobjapre = round(df3.iloc[:, 23].mean(), 2)
medadqonjesp = round(df3.iloc[:, 24].mean(), 2)
medinterforma = round(df3.iloc[:, 25].mean(), 2)
if ninq3 > 0:
prerequesitos = round((df3.iloc[:, 26].eq(1).sum() / ninq3) * 100, 2)
Conteudo = round((df3.iloc[:, 27].eq(1).sum() / ninq3) * 100, 2)
objgeral = round((df3.iloc[:, 28].eq(1).sum() / ninq3) * 100, 2)
objfinal = round((df3.iloc[:, 29].eq(1).sum() / ninq3) * 100, 2)
objadq = round((df3.iloc[:, 30].eq(1).sum() / ninq3) * 100, 2)
avadq = round((df3.iloc[:, 31].eq(1).sum() / ninq3) * 100, 2)
refere = round((df3.iloc[:, 32].eq(1).sum() / ninq3) * 100, 2)
else:
print("Nenhum ficheiro selecionado (df3). Valores definidos a 0.")
# %%
Tk().withdraw()
file_path4 = askopenfilename(
title="Select Excel da Direção de Curso",
filetypes=[("Excel files", "*.xlsx *.xls")]
)
if not file_path4:
print("Nenhum ficheiro selecionado.")
df4 = None
else:
print(f"Selected file:\n{file_path4}")
df4 = pd.read_excel(file_path4)
# %%
df4 = None
ninq4 = 0
medprogcurso = 0
medcontcurso = 0
medestrcurso = 0
medutilprat = 0
medcargahoraria = 0
med1 = 0
medinstal = 0
medaudiovis = 0
meddocdispor = 0
medapadmin = 0
medapcoord = 0
med2 = 0
medmotform = 0
medrelpart = 0
medpontass = 0
med3 = 0
if file_path4:
df4 = pd.read_excel(file_path4)
ninq4 = df4.shape[0]
medprogcurso = round(df4.iloc[:, 10].mean(), 2)
medcontcurso = round(df4.iloc[:, 11].mean(), 2)
medestrcurso = round(df4.iloc[:, 12].mean(), 2)
medutilprat = round(df4.iloc[:, 13].mean(), 2)
medcargahoraria= round(df4.iloc[:, 14].mean(), 2)
med1 = round(
(medprogcurso + medcontcurso + medestrcurso +
medutilprat + medcargahoraria) / 5,
2
)
medinstal = round(df4.iloc[:, 15].mean(), 2)
medaudiovis = round(df4.iloc[:, 16].mean(), 2)
meddocdispor = round(df4.iloc[:, 17].mean(), 2)
medapadmin = round(df4.iloc[:, 18].mean(), 2)
medapcoord = round(df4.iloc[:, 19].mean(), 2)
med2 = round(
(medinstal + medaudiovis + meddocdispor +
medapadmin + medapcoord) / 5,
2
)
medmotform = round(df4.iloc[:, 20].mean(), 2)
medrelpart = round(df4.iloc[:, 21].mean(), 2)
medpontass = round(df4.iloc[:, 22].mean(), 2)
med3 = round(
(medmotform + medrelpart + medpontass) / 3,
2
)
else:
print("Nenhum ficheiro selecionado. Valores definidos a 0.")
# %%
df4
# %%
def replace_placeholder_with_propostas(
doc,
df3: pd.DataFrame,
placeholder: str = "{{Propostas}}",
left_indent_cm: float = 2.75,
hanging_cm: float = 0.6,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
) -> int:
cols = [c for c in df3.columns if "_propostas" in str(c).lower()]
itens = []
for c in cols:
s = df3[c].dropna().astype(str).str.strip()
s = s[s != ""]
itens.extend(s.tolist())
if deduplicate:
seen = set()
out = []
for t in itens:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
itens = out
if max_items is not None:
itens = itens[:max_items]
if not itens:
itens = ["Sem dados"]
df_tmp = pd.DataFrame({"_Propostas": itens})
replaced = 0
while True:
changed = False
for p in list(iter_all_paragraphs_everywhere(doc)):
if placeholder in (p.text or ""):
replace_placeholder_with_column_subitems_hanging2(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_propostas",
left_indent_cm=left_indent_cm,
hanging_cm=hanging_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
replaced += 1
changed = True
break
if not changed:
break
return replaced
# %%
def delete_paragraph(paragraph):
p = paragraph._p
parent = p.getparent()
if parent is not None:
parent.remove(p)
def _iter_paragraphs_in_table(tbl):
for row in tbl.rows:
for cell in row.cells:
for p in cell.paragraphs:
yield p
for t in cell.tables:
yield from _iter_paragraphs_in_table(t)
def iter_all_paragraphs_everywhere(doc):
for p in doc.paragraphs:
yield p
for t in doc.tables:
yield from _iter_paragraphs_in_table(t)
for section in doc.sections:
containers = [
section.header,
section.footer,
section.first_page_header,
section.first_page_footer,
section.even_page_header,
section.even_page_footer,
]
for c in containers:
for p in c.paragraphs:
yield p
for t in c.tables:
yield from _iter_paragraphs_in_table(t)
def delete_lines_with_ancora(doc, pattern=r"ANCORA") -> int:
rx = re.compile(pattern, flags=re.IGNORECASE)
to_delete = []
for p in iter_all_paragraphs_everywhere(doc):
if rx.search(p.text or ""):
to_delete.append(p)
for p in reversed(to_delete):
delete_paragraph(p)
return len(to_delete)
# %%
def replace_placeholder_with_temasdir(
doc,
df4: pd.DataFrame,
placeholder: str = "{{temasdir}}",
left_indent_cm: float = 2.75,
hanging_cm: float = 0.6,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
) -> int:
cols = [c for c in df4.columns if "_temas" in str(c).lower()]
itens = []
for c in cols:
s = df4[c].dropna().astype(str).str.strip()
s = s[s != ""]
itens.extend(s.tolist())
if deduplicate:
seen = set()
out = []
for t in itens:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
itens = out
if max_items is not None:
itens = itens[:max_items]
if not itens:
itens = ["Sem dados"]
df_tmp = pd.DataFrame({"_Temas": itens})
replaced = 0
while True:
changed = False
for p in list(iter_all_paragraphs_everywhere(doc)):
if placeholder in (p.text or ""):
replace_placeholder_with_column_subitems_hanging2(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_temas",
left_indent_cm=left_indent_cm,
hanging_cm=hanging_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
replaced += 1
changed = True
break
if not changed:
break
return replaced
# %%
def replace_placeholder_with_desenvolverdir(
doc,
df4: pd.DataFrame,
placeholder: str = "{{desenvolverdir}}",
left_indent_cm: float = 2.75,
hanging_cm: float = 0.6,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
) -> int:
cols = [c for c in df4.columns if "_desenvolver" in str(c).lower()]
itens = []
for c in cols:
s = df4[c].dropna().astype(str).str.strip()
s = s[s != ""]
itens.extend(s.tolist())
if deduplicate:
seen = set()
out = []
for t in itens:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
itens = out
if max_items is not None:
itens = itens[:max_items]
if not itens:
itens = ["Sem dados"]
df_tmp = pd.DataFrame({"_Desenvolver": itens})
replaced = 0
while True:
changed = False
for p in list(iter_all_paragraphs_everywhere(doc)):
if placeholder in (p.text or ""):
replace_placeholder_with_column_subitems_hanging2(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_desenvolver",
left_indent_cm=left_indent_cm,
hanging_cm=hanging_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
replaced += 1
changed = True
break
if not changed:
break
return replaced
# %%
def replace_placeholder_with_incluirdir(
doc,
df4: pd.DataFrame,
placeholder: str = "{{incluirdir}}",
left_indent_cm: float = 2.75,
hanging_cm: float = 0.6,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
) -> int:
cols = [c for c in df4.columns if "_incluir" in str(c).lower()]
itens = []
for c in cols:
s = df4[c].dropna().astype(str).str.strip()
s = s[s != ""]
itens.extend(s.tolist())
if deduplicate:
seen = set()
out = []
for t in itens:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
itens = out
if max_items is not None:
itens = itens[:max_items]
if not itens:
itens = ["Sem dados"]
df_tmp = pd.DataFrame({"_Incluir": itens})
replaced = 0
while True:
changed = False
for p in list(iter_all_paragraphs_everywhere(doc)):
if placeholder in (p.text or ""):
replace_placeholder_with_column_subitems_hanging2(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_incluir",
left_indent_cm=left_indent_cm,
hanging_cm=hanging_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
replaced += 1
changed = True
break
if not changed:
break
return replaced
# %%
def replace_placeholder_with_positivosdir(
doc,
df4: pd.DataFrame,
placeholder: str = "{{positivosdir}}",
left_indent_cm: float = 2.75,
hanging_cm: float = 0.6,
font_name: str = "Arial",
font_size_pt: int = 12,
deduplicate: bool = True,
max_items: int | None = None,
) -> int:
cols = [c for c in df4.columns if "_positivos" in str(c).lower()]
itens = []
for c in cols:
s = df4[c].dropna().astype(str).str.strip()
s = s[s != ""]
itens.extend(s.tolist())
if deduplicate:
seen = set()
out = []
for t in itens:
k = t.lower()
if k not in seen:
seen.add(k)
out.append(t)
itens = out
if max_items is not None:
itens = itens[:max_items]
if not itens:
itens = ["Sem dados"]
df_tmp = pd.DataFrame({"_Positivos": itens})
replaced = 0
while True:
changed = False
for p in list(iter_all_paragraphs_everywhere(doc)):
if placeholder in (p.text or ""):
replace_placeholder_with_column_subitems_hanging2(
doc,
df_tmp,
placeholder=placeholder,
column_contains="_positivos",
left_indent_cm=left_indent_cm,
hanging_cm=hanging_cm,
font_name=font_name,
font_size_pt=font_size_pt,
deduplicate=False
)
replaced += 1
changed = True
break
if not changed:
break
return replaced
# %%
df4
# %%
nforma, mediaformquanl, mediaform = compute_formadores_summary(df2)
replacements = {
"{{NOMEDOCURSOEXTENSO}}": str(NOMEDOCURSO),
"{{prerequesitos}}": str(prerequesitos),
"{{prerequesitos2}}": str(100-prerequesitos),
"{{Conteudo}}": str(Conteudo),
"{{Conteudo2}}": str(100-Conteudo),
"{{objgeral}}": str(objgeral),
"{{objgeral2}}": str(100-objgeral),
"{{objfinal}}": str(objfinal),
"{{objfinal2}}": str(100-objfinal),
"{{objadq}}": str(objadq),
"{{objadq2}}": str(100-objadq),
"{{avadq}}": str(avadq),
"{{avadq2}}": str(100-avadq),
"{{refere}}": str(refere),
"{{refere2}}": str(100-refere),
"{{ninq4}}": str(ninq4),
"{{ninq3}}": str(ninq3),
"{{nforma}}" : str(nforma),
"{{mediaformquanl}}" : str(mediaformquanl),
"{{mediaform}}" : str(mediaform),
"{{mediaaloj}}": str(medalojamento2),
"{{mediaaloj1}}": str(medalojamento),
"{{mediaalime}}": str(medalimentacao2),
"{{mediaalime1}}": str(medalimentacao),
"{{medalimentacao}}": str(medalimentacaofinal),
"{{apdir}}": str(medapdir),
"{{graudif}}": str(meddificuldade2),
"{{graudif1}}": str(meddificuldade),
"{{meddificuldadefinal}}": str(meddificuldadefinal),
"{{funcfut}}": str(medfuncfut2),
"{{funcfut1}}": str(medfuncfut),
"{{medfuncfutfinal}}": str(medfuncfutfinal),
"{{apadm}}": str(medaplog),
"{{motapro}}": str(medmotvpart2),
"{{motapro1}}": str(medmotvpart),
"{{medmotvpartfinal}}": str(medmotvpartfinal),
"{{conhcurso}}": str(medconhecimento2),
"{{conhcurso1}}": str(medconhecimento),
"{{medconhecimentofinal}}": str(medconhecimentofinal),
"{{objcruso}}": str(objcruso),
"{{contcurso}}": str(contcurso),
"{{medalojamento}}": str(medalojamentofinal),
"{{adeqtrab}}": str(adeqtrab),
"{{instform}}": str(instform),
"{{audiovisuais}}": str(audiovisuais),
"{{biblio}}": str(biblio),
"{{ninq}}": str(ninq),
"{{ninqfim}}": str(ninq2),
"{{medpub}}": str(medpub),
"{{medmeiosaux}}": str(medmeiosaux),
"{{medapform}}": str(medapform),
"{{medapdc}}": str(medapdc),
"{{medobjesp}}": str(medobjesp),
"{{medmetensi}}": str(medmetensi),
"{{medtempform}}": str(medtempform),
"{{medlocaisform}}": str(medlocaisform),
"{{medlançaaval}}": str(medlançaaval),
"{{medtipoaval}}": str(medtipoaval),
"{{medtempoaval}}": str(medtempoaval),
"{{medobjapre}}": str(medobjapre),
"{{medadqonjesp}}": str(medadqonjesp),
"{{medinterforma}}": str(medinterforma),
"{{medprogcurso}}": str(medprogcurso),
"{{medcontcurso}}": str(medcontcurso),
"{{medestrcurso}}": str(medestrcurso),
"{{medutilprat}}": str(medutilprat),
"{{medcargahoraria}}": str(medcargahoraria),
"{{medinstal}}": str(medinstal),
"{{medaudiovis}}": str(medaudiovis),
"{{meddocdispor}}": str(meddocdispor),
"{{medapadmin}}": str(medapadmin),
"{{medapcoord}}": str(medapcoord),
"{{medmotform}}": str(medmotform),
"{{medrelpart}}": str(medrelpart),
"{{medpontass}}": str(medpontass),
"{{med1}}": str(med1),
"{{med2}}": str(med2),
"{{med3}}": str(med3),
"{{medpontassqual}}": str(avaliacao_qualitativa(medpontass)),
"{{medrelpartqual}}": str(avaliacao_qualitativa(medrelpart)),
"{{medmotformqual}}": str(avaliacao_qualitativa(medmotform)),
"{{medapcoordqual}}": str(avaliacao_qualitativa(medapcoord)),
"{{medapadminqual}}": str(avaliacao_qualitativa(medapadmin)),
"{{meddocdisporqual}}": str(avaliacao_qualitativa(meddocdispor)),
"{{medaudiovisqual}}": str(avaliacao_qualitativa(medaudiovis)),
"{{medinstalqual}}": str(avaliacao_qualitativa(medinstal)),
"{{medcargahorariaqual}}": str(avaliacao_qualitativa(medcargahoraria)),
"{{medutilpratoqual}}": str(avaliacao_qualitativa(medutilprat)),
"{{medestrcursooqual}}": str(avaliacao_qualitativa(medestrcurso)),
"{{medprogcursoqual}}": str(avaliacao_qualitativa(medcontcurso)),
"{{medinterformaqual}}": str(avaliacao_qualitativa(medinterforma)),
"{{medadqonjespqual}}": str(avaliacao_qualitativa(medadqonjesp)),
"{{medobjaprequal}}": str(avaliacao_qualitativa(medobjapre)),
"{{medtempoavalqual}}": str(avaliacao_qualitativa(medtempoaval)),
"{{medtipoavalqual}}": str(avaliacao_qualitativa(medtipoaval)),
"{{medlançaavalual}}": str(avaliacao_qualitativa(medlançaaval)),
"{{medlocaisformqual}}": str(avaliacao_qualitativa(medlocaisform)),
"{{medtempformqual}}": str(avaliacao_qualitativa(medtempform)),
"{{medmetensiqual}}": str(avaliacao_qualitativa(medmetensi)),
"{{medobjespqual}}": str(avaliacao_qualitativa(medobjesp)),
"{{medapdcqual}}": str(avaliacao_qualitativa(medapdc)),
"{{medapformqual}}": str(avaliacao_qualitativa(medapform)),
"{{medmeiosauxqual}}": str(avaliacao_qualitativa(medmeiosaux)),
"{{medpubqual}}": str(avaliacao_qualitativa(medpub)),
"{{mediaalojqual}}": str(avaliacao_qualitativa(medalojamento2)),
"{{apdirqual}}": str(avaliacao_qualitativa(medapdir)),
"{{funcfutqual}}": str(avaliacao_qualitativa(medfuncfut2)),
"{{graudifaqual}}": str(avaliacao_qualitativa(meddificuldade2)),
"{{apadmqual}}": str(avaliacao_qualitativa(medaplog)),
"{{motaproqual}}": str(avaliacao_qualitativa(medmotvpart2)),
"{{conhcursoqual}}": str(avaliacao_qualitativa(medconhecimento2)),
"{{objcrusoqual}}": str(avaliacao_qualitativa(objcruso)),
"{{contcursoqual}}": str(avaliacao_qualitativa(contcurso)),
"{{adeqtrabqual}}": str(avaliacao_qualitativa(adeqtrab)),
"{{instformqual}}": str(avaliacao_qualitativa(instform)),
"{{audiovisuaisqual}}": str(avaliacao_qualitativa(audiovisuais)),
"{{biblioqual}}": str(avaliacao_qualitativa(biblio)),
"{{mediaalimequal}}": str(avaliacao_qualitativa(medalimentacao2)),
"{{NOMEDOCURSO}}": str(NOMEDOCURSOcurto),
"{{MESi}}": str(MESi),
"{{AAAAi}}": str(AAAAi),
"{{DDf}}": str(DDf),
"{{MESf}}": str(MESf),
"{{AAAAf}}": str(AAAAf),
"{{MEDIAFINALCURSO}}": str(MEDIAFINALCURSO),
"{{NFORMANDOS}}": str(NFORMANDOS),
"{{FINALIDADECURSO}}": str(FINALIDADECURSO),
"{{DDi}}": str(DDi)
}
template_path = _resource_path("Anexo RAI..docx")
output_path = os.path.join(os.getcwd(), "relatorio_final.docx")
doc = Document(template_path)
if not file_path3:
print("Nenhum ficheiro selecionado. A remover secção do documento.")
delete_all_between_anchors_xml(doc, "{{ANCORA1}}", "{{ANCORA2}}", debug=True)
df3 = None
if file_path3:
replace_placeholder_with_propostas(
doc,
df3,
placeholder="{{Propostas}}",
left_indent_cm=2.75,
hanging_cm=0.6,
font_name="Arial",
font_size_pt=12,
deduplicate=True,
max_items=None
)
if not file_path4:
print("Nenhum ficheiro selecionado. A remover secção do documento.")
delete_all_between_anchors_xml(doc, "{{ANCORA3}}", "{{ANCORA4}}", debug=True)
df4 = None
if file_path4:
replace_placeholder_with_positivosdir(
doc,
df4,
placeholder="{{positivosdir}}",
left_indent_cm=2.75,
hanging_cm=0.6,
font_name="Arial",
font_size_pt=12,
deduplicate=True
)
replace_placeholder_with_incluirdir(
doc,
df4,
placeholder="{{incluirdir}}",
left_indent_cm=2.75,
hanging_cm=0.6,
font_name="Arial",
font_size_pt=12,
deduplicate=True
)
replace_placeholder_with_desenvolverdir(
doc,
df4,
placeholder="{{desenvolverdir}}",
left_indent_cm=2.75,
hanging_cm=0.6,
font_name="Arial",
font_size_pt=12,
deduplicate=True
)
replace_placeholder_with_temasdir(
doc,
df4,
placeholder="{{temasdir}}",
left_indent_cm=2.75,
hanging_cm=0.6,
font_name="Arial",
font_size_pt=12,
deduplicate=True
)
replace_placeholders_docx_bold_values_keep_style(doc, replacements)
replace_placeholder_with_q06_subitems(
doc, df2,
placeholder="{{Q06_Apreciacao}}",
indent_cm=2.75,
indent_title=True,
font_name="Arial",
font_size_pt=12
)
replace_placeholder_with_formadores_table(
doc,
df2,
placeholder="{{tabelaFormadores}}",
font_name="Arial",
font_size_pt=12
)
replace_placeholder_with_uc_table(
doc,
df_inicial=df,
df_final=df2,
placeholder="{{tabelasUC}}",
font_name="Arial",
font_size_pt=12
)
replace_placeholder_with_temas_smart(
doc,
df2,
placeholder="{{TEMAS}}",
indent_cm=0.5,
font_name="Arial",
font_size_pt=12,
deduplicate=True,
max_items=6
)
replace_placeholder_with_desenvolver_smart(
doc,
df2,
placeholder="{{DESENVOLVER}}",
indent_cm=0.5,
font_name="Arial",
font_size_pt=12,
max_items=6
)
replace_placeholder_with_incluir_smart(
doc,
df2,
placeholder="{{INCLUIR}}",
indent_cm=0.5,
font_name="Arial",
font_size_pt=12,
max_items=6
)
replace_placeholder_with_observacoes_smart2(
doc,
df2,
placeholder="{{OBSERVACOES2}}",
indent_cm=2.75,
font_name="Arial",
font_size_pt=12,
max_items=6
)
replace_placeholder_with_observacoes_smart(
doc,
df2,
placeholder="{{OBSERVACOES}}",
indent_cm=0.5,
font_name="Arial",
font_size_pt=12,
use_ollama=True,
ollama_model="llama3.1:8b" # ajusta ao que existir nas máquinas
)
n = replace_placeholder_with_observacoes_smart(
doc,
df2,
placeholder="{{OBSERVACOES}}",
indent_cm=0.5,
font_name="Arial",
font_size_pt=12,
use_ollama=True,
ollama_model="llama3.1:8b" # ajusta ao que existir nas máquinas
)
print("OBSERVACOES substituídos:", n)
delete_lines_with_ancora(doc, pattern=r"ANCORA")
doc.save(output_path)
print(f"Saved: {output_path}")
# %%
def iter_body_blocks(doc):
body = doc._element.body
for child in body.iterchildren():
tag = child.tag.rsplit("}", 1)[-1]
if tag == "p":
yield ("p", Paragraph(child, doc))
elif tag == "tbl":
yield ("tbl", Table(child, doc))
# %%
def table_to_text(tbl) -> str:
lines = []
for row in tbl.rows:
row_txt = []
for cell in row.cells:
txt = " ".join(p.text.strip() for p in cell.paragraphs if p.text.strip())
txt = re.sub(r"\s+", " ", txt).strip()
row_txt.append(txt)
if any(row_txt):
lines.append(" | ".join(row_txt))
return "\n".join(lines).strip()
def extract_text_between_markers(doc, start_re: str, end_re: str | None = None, debug=False) -> str:
start_rx = re.compile(start_re, flags=re.IGNORECASE)
end_rx = re.compile(end_re, flags=re.IGNORECASE) if end_re else None
collecting = False
chunks = []
for kind, obj in iter_body_blocks(doc):
if kind == "p":
txt = (obj.text or "").strip()
if not collecting and start_rx.search(txt):
collecting = True
if debug:
print("[DEBUG] START matched:", txt)
continue
if collecting and end_rx and end_rx.search(txt):
if debug:
print("[DEBUG] END matched:", txt)
break
if collecting and txt:
chunks.append(txt)
elif kind == "tbl":
if collecting:
ttxt = table_to_text(obj)
if ttxt:
chunks.append(ttxt)
out = "\n".join(chunks).strip()
out = re.sub(r"\n{3,}", "\n\n", out)
return out
# %%
def ollama_available(timeout=0.4) -> bool:
try:
r = requests.get("http://localhost:11434/api/tags", timeout=timeout)
return r.status_code == 200
except Exception:
return False
def ollama_summarize_text(
text: str,
model: str = "llama3.1:8b",
max_chars: int = 24000,
timeout: float = 120.0,
system_prompt: str = "",
user_prompt: str = "",
) -> str:
if not text.strip():
return ""
if len(text) > max_chars:
text = text[:max_chars] + "\n\n[Texto truncado por limite de tamanho.]"
prompt = f"""
{system_prompt}
{user_prompt}
TEXTO:
{text}
DEVOLVE APENAS O RESULTADO FINAL, SEM EXPLICAÇÕES.
""".strip()
payload = {
"model": model,
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.2}
}
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout)
r.raise_for_status()
return (r.json().get("response", "") or "").strip()
# %%
def replace_placeholder_with_text_paragraph_all(
doc,
placeholder: str,
text: str,
indent_cm: float = 0.0,
font_name: str = "Arial",
font_size_pt: int = 12,
) -> int:
replaced = 0
def _apply_fmt(p: Paragraph):
fmt = p.paragraph_format
fmt.left_indent = Cm(indent_cm)
fmt.line_spacing = 1.5
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
def _process_paragraph(p: Paragraph):
nonlocal replaced
if placeholder not in (p.text or ""):
return
if not p.runs:
r = p.add_run("")
for run in p.runs:
run.text = ""
r0 = p.runs[0]
r0.text = text
force_run_font(r0, font_name, font_size_pt)
_apply_fmt(p)
replaced += 1
for p in list(doc.paragraphs):
_process_paragraph(p)
for tbl in doc.tables:
for row in tbl.rows:
for cell in row.cells:
for p in list(cell.paragraphs):
_process_paragraph(p)
return replaced
# %%
def fill_llm_placeholders_llm9_llm10(
doc,
model: str = "llama3.1:8b",
placeholder_llm10: str = "{{LLM10}}",
indent_cm: float = 0.5,
font_name: str = "Arial",
font_size_pt: int = 12,
debug_extract: bool = False,
) -> dict:
if not ollama_available():
return {"ok": False, "reason": "ollama_not_available", "llm9": 0, "llm10": 0}
ap5 = extract_text_between_markers(
doc,
start_re=r"^\s*Apêndice\s*5\b",
end_re=r"^\s*Apêndice\s*6\b",
debug=debug_extract
)
ap6 = extract_text_between_markers(
doc,
start_re=r"^\s*Apêndice\s*6\b",
end_re=r"^\s*1\s*[-]\s",
debug=debug_extract
)
texto_llm10 = "\n\n".join([t for t in [ap5, ap6] if t.strip()]).strip()
sys_pt = "És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação."
prompt_llm10 = (
"Resume os conteúdos do Apêndice 5 e do Apêndice 6 num texto único, formal e conciso "
"(1 a 2 parágrafos). Realça pontos-chave e recomendações."
)
resumo10 = ""
if texto_llm10.strip():
resumo10 = ollama_summarize_text(texto_llm10, model=model, system_prompt=sys_pt, user_prompt=prompt_llm10)
n10 = replace_placeholder_with_text_paragraph_all(
doc, placeholder_llm10, resumo10.strip() or "Sem dados.",
indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt
)
return {
"ok": True,
"llm10": n10,
"chars_in_llm10": len(texto_llm10),
}
# %%
def fill_llm_placeholder_from_doc_range(
doc,
placeholder: str,
start_marker: str,
end_marker: str,
model: str = "llama3.1:8b",
indent_cm: float = 0.5,
font_name: str = "Arial",
font_size_pt: int = 12,
debug_extract: bool = False,
) -> dict:
if not ollama_available():
return {"ok": False, "reason": "ollama_not_available", "replaced": 0, "chars": 0}
start_re = r"^\s*" + re.escape(start_marker.strip()) + r"\s*$"
end_re = r"^\s*" + re.escape(end_marker.strip()) + r"\s*$"
texto = extract_text_between_markers(
doc,
start_re=start_re,
end_re=end_re,
debug=debug_extract
).strip()
if not texto:
start_re2 = re.escape(start_marker.strip())
end_re2 = re.escape(end_marker.strip())
texto = extract_text_between_markers(
doc,
start_re=start_re2,
end_re=end_re2,
debug=debug_extract
).strip()
if not texto:
n = replace_placeholder_with_text_paragraph_all(
doc, placeholder, "Sem dados.",
indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt
)
return {"ok": True, "replaced": n, "chars": 0, "note": "range_not_found"}
sys_pt = "És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação."
user_prompt = (
"Lê o texto e produz um resumo final, em estilo de conclusões, adequado a relatório oficial:\n"
"• 1 parágrafo de enquadramento (24 frases)\n"
"• 58 bullets com conclusões/recomendações principais\n"
"• Não inventes dados nem percentagens."
)
resumo = ollama_summarize_text(
texto,
model=model,
system_prompt=sys_pt,
user_prompt=user_prompt
).strip() or "Sem dados."
n = replace_placeholder_with_text_paragraph_all(
doc, placeholder, resumo,
indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt
)
return {"ok": True, "replaced": n, "chars": len(texto)}
# %%
doc = Document(output_path)
fill_llm_placeholder_from_doc_range(
doc,
placeholder="{{LLM9}}",
start_marker="RELATÓRIO DE AVALIAÇÃO INTERNA",
end_marker="O CHEFE DA DIREÇÃO DE AVALIAÇÃO E QUALIDADE",
model="llama3.1:8b",
indent_cm=0.5,
font_name="Arial",
font_size_pt=12,
debug_extract=True
)
fill_llm_placeholders_llm9_llm10(
doc,
model="llama3.1:8b",
placeholder_llm10="{{LLM10}}",
indent_cm=0.5,
font_name="Arial",
font_size_pt=12,
debug_extract=True
)
doc.save(output_path)