2605 lines
83 KiB
Python
2605 lines
83 KiB
Python
# %%
|
||
#ollama pull llama3.1:8b
|
||
import pandas as pd
|
||
import os
|
||
import sys
|
||
from tkinter import Tk
|
||
from tkinter.filedialog import askopenfilename
|
||
from docx import Document
|
||
from docx.document import Document as DocxDocument
|
||
from docx.text.paragraph import Paragraph
|
||
import re
|
||
import string
|
||
from docx.oxml import OxmlElement
|
||
from docx.shared import Cm, Pt
|
||
from docx.oxml.ns import qn
|
||
from docx.text.run import Run
|
||
from docx.table import Table
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
from docx.enum.table import WD_ROW_HEIGHT_RULE, WD_ALIGN_VERTICAL
|
||
import requests
|
||
import tkinter as tk
|
||
from tkinter import messagebox
|
||
|
||
# %%
|
||
def _resource_path(relative_path: str) -> str:
|
||
base_dir = getattr(sys, "_MEIPASS", os.path.abspath(os.path.dirname(__file__)))
|
||
return os.path.join(base_dir, relative_path)
|
||
|
||
# %%
|
||
def ollama_available(timeout=0.4) -> bool:
|
||
try:
|
||
r = requests.get("http://localhost:11434/api/tags", timeout=timeout) #aqui depois colocar o bot
|
||
return r.status_code == 200
|
||
except Exception:
|
||
return False
|
||
|
||
# %%
|
||
def avaliacao_qualitativa(valor: float) -> str:
|
||
if valor is None or pd.isna(valor):
|
||
return "N/A"
|
||
if 1.0 <= valor <= 3.0:
|
||
return "Rever Urgentemente"
|
||
elif 3.0 < valor <= 3.5:
|
||
return "Rever e Melhorar"
|
||
elif 3.5 < valor <= 3.9:
|
||
return "Bom"
|
||
elif 3.9 < valor <= 4.5:
|
||
return "Qualidade"
|
||
elif 4.5 < valor <= 5.0:
|
||
return "Excelência"
|
||
else:
|
||
return "Out of Range"
|
||
|
||
# %%
|
||
def format_header_row(row, height_cm=5.2):
|
||
row.height = Cm(height_cm)
|
||
row.height_rule = WD_ROW_HEIGHT_RULE.EXACTLY
|
||
for cell in row.cells:
|
||
cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
|
||
for p in cell.paragraphs:
|
||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
|
||
# %%
|
||
def set_cell_text_vertical(cell, direction="btLr"):
|
||
tcPr = cell._tc.get_or_add_tcPr()
|
||
td = OxmlElement("w:textDirection")
|
||
td.set(qn("w:val"), direction)
|
||
tcPr.append(td)
|
||
|
||
def set_table_all_columns_width(tbl, width_cm=2.3):
|
||
tbl.autofit = False
|
||
w = Cm(width_cm)
|
||
for row in tbl.rows:
|
||
for cell in row.cells:
|
||
cell.width = w
|
||
|
||
# %%
|
||
def force_run_font(run: Run, font_name="Arial", font_size_pt=12, bold=None):
|
||
if bold is not None:
|
||
run.bold = bool(bold)
|
||
run.font.name = font_name
|
||
run.font.size = Pt(font_size_pt)
|
||
run._element.rPr.rFonts.set(qn("w:eastAsia"), font_name)
|
||
|
||
# %%
|
||
def format_pt_number(x: float) -> str:
|
||
if pd.isna(x):
|
||
return ""
|
||
return f"{x:.2f}".replace(".", ",")
|
||
|
||
# %%
|
||
def delete_paragraph(paragraph: Paragraph) -> None:
|
||
p = paragraph._p
|
||
p.getparent().remove(p)
|
||
paragraph._p = paragraph._element = None
|
||
|
||
# %%
|
||
def clean_module_title(col_name: str) -> str:
|
||
s = str(col_name).strip()
|
||
if "->" in s:
|
||
s = s.split("->", 1)[1].strip()
|
||
return s
|
||
|
||
# %%
|
||
#def clean_module_title(col_name: str) -> str:
|
||
# s = str(col_name).strip()
|
||
# s = re.sub(r"^.*?->\s*", "", s)
|
||
# s = re.sub(r"^\s*Q06\s*[-–_ ]\s*Aprecia.*?[-–:]\s*", "", s, flags=re.IGNORECASE)
|
||
# return s.strip()
|
||
|
||
# %%
|
||
def apply_table_paragraph_spacing(cell, line_spacing=1.5):
|
||
for p in cell.paragraphs:
|
||
fmt = p.paragraph_format
|
||
fmt.line_spacing = line_spacing
|
||
fmt.space_before = Pt(0)
|
||
fmt.space_after = Pt(0)
|
||
|
||
# %%
|
||
def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]:
|
||
means = []
|
||
for j in range(start_col_idx, df.shape[1]):
|
||
col = pd.to_numeric(df.iloc[:, j], errors="coerce")
|
||
if col.notna().sum() == 0:
|
||
break
|
||
means.append(col.mean())
|
||
return means
|
||
|
||
# %%
|
||
def insert_table_after_paragraph(paragraph: Paragraph, rows: int, cols: int) -> Table:
|
||
doc = paragraph.part.document
|
||
tbl = doc.add_table(rows=rows, cols=cols)
|
||
paragraph._p.addnext(tbl._tbl)
|
||
return tbl
|
||
|
||
# %%
|
||
def replace_placeholder_with_uc_table(
|
||
doc,
|
||
df_inicial: pd.DataFrame,
|
||
df_final: pd.DataFrame,
|
||
placeholder: str = "{{tabelasUC}}",
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
start_col_idx_inicial: int = 16,
|
||
):
|
||
def extract_uc_cols(df: pd.DataFrame):
|
||
cols = []
|
||
for c in df.columns:
|
||
name = str(c).lower()
|
||
if "q06" in name and "aprecia" in name:
|
||
cols.append(c)
|
||
cols.sort(key=lambda x: str(x))
|
||
return cols
|
||
|
||
uc_cols = extract_uc_cols(df_final)
|
||
ini_means = mean_columns_until_empty(df_inicial, start_col_idx=start_col_idx_inicial)
|
||
|
||
def build_rows():
|
||
data = []
|
||
for i, c in enumerate(uc_cols):
|
||
ini = ini_means[i] if i < len(ini_means) else float("nan")
|
||
fin = pd.to_numeric(df_final[c], errors="coerce").mean() if c in df_final.columns else float("nan")
|
||
diff = fin - ini if (not pd.isna(fin) and not pd.isna(ini)) else float("nan")
|
||
data.append((clean_module_title(c), ini, fin, diff))
|
||
return data
|
||
|
||
rows_data = build_rows()
|
||
|
||
def _fill_cell(cell, text: str, bold=False, align=None):
|
||
cell.text = ""
|
||
p = cell.paragraphs[0]
|
||
if align is not None:
|
||
p.alignment = align
|
||
r = p.add_run(text)
|
||
force_run_font(r, font_name, font_size_pt, bold=bold)
|
||
apply_table_paragraph_spacing(cell, line_spacing=1.5)
|
||
|
||
|
||
def _apply_table_layout(tbl):
|
||
tbl.style = "Table Grid"
|
||
tbl.autofit = False
|
||
col_widths = [Cm(11), Cm(1.6), Cm(1.6), Cm(3.5)]
|
||
for row in tbl.rows:
|
||
for i, w in enumerate(col_widths):
|
||
row.cells[i].width = w
|
||
headers = ["Apreciação dos módulos", "Inicial", "Final", "Ganhos/Perdas"]
|
||
for j, h in enumerate(headers):
|
||
_fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
def _populate_rows(tbl):
|
||
if not rows_data:
|
||
_fill_cell(tbl.rows[1].cells[0], "Sem dados", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)
|
||
_fill_cell(tbl.rows[1].cells[1], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[1].cells[2], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[1].cells[3], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
return
|
||
for i, (title, ini, fin, diff) in enumerate(rows_data, start=1):
|
||
_fill_cell(tbl.rows[i].cells[0], title, bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)
|
||
_fill_cell(tbl.rows[i].cells[1], format_pt_number(ini), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[i].cells[2], format_pt_number(fin), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
diff_txt = "" if pd.isna(diff) else f"{diff:+.2f}".replace(".", ",")
|
||
_fill_cell(tbl.rows[i].cells[3], diff_txt, bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
def _make_table_at_paragraph(p: Paragraph):
|
||
n_rows = 1 + max(1, len(rows_data))
|
||
tbl = insert_table_after_paragraph(p, rows=n_rows, cols=4)
|
||
_apply_table_layout(tbl)
|
||
_populate_rows(tbl)
|
||
delete_paragraph(p)
|
||
def _process_paragraph(p: Paragraph) -> bool:
|
||
if placeholder not in p.text:
|
||
return False
|
||
_make_table_at_paragraph(p)
|
||
return True
|
||
for p in doc.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
if placeholder in p.text:
|
||
cell.text = ""
|
||
n_rows = 1 + max(1, len(rows_data))
|
||
tbl = cell.add_table(rows=n_rows, cols=4)
|
||
_apply_table_layout(tbl)
|
||
_populate_rows(tbl)
|
||
return
|
||
|
||
# %%
|
||
def insert_paragraph_after(paragraph: Paragraph) -> Paragraph:
|
||
new_p = OxmlElement("w:p")
|
||
paragraph._p.addnext(new_p)
|
||
return Paragraph(new_p, paragraph._parent)
|
||
|
||
# %%
|
||
def replace_placeholder_with_q06_subitems(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str = "{{Q06_Apreciacao}}",
|
||
item_number: int = 5,
|
||
indent_cm: float = 2.75,
|
||
indent_title: bool = True,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
):
|
||
cols = []
|
||
for c in df.columns:
|
||
name = str(c).lower()
|
||
if "q06" in name and "aprecia" in name:
|
||
cols.append(c)
|
||
cols.sort(key=lambda x: str(x))
|
||
letters = string.ascii_lowercase
|
||
def _apply_par_format(par: Paragraph, left_indent_cm: float):
|
||
fmt = par.paragraph_format
|
||
fmt.left_indent = Cm(left_indent_cm)
|
||
fmt.line_spacing = 1.5
|
||
fmt.space_before = Pt(0)
|
||
fmt.space_after = Pt(0)
|
||
def _add_line(par: Paragraph, text: str):
|
||
run = par.add_run(text)
|
||
force_run_font(run, font_name, font_size_pt)
|
||
return run
|
||
def _apply_par_format(par: Paragraph, left_indent_cm: float):
|
||
fmt = par.paragraph_format
|
||
fmt.left_indent = Cm(left_indent_cm)
|
||
fmt.line_spacing = 1.5
|
||
fmt.space_before = Pt(0)
|
||
fmt.space_after = Pt(0)
|
||
def _process_paragraph(p: Paragraph) -> bool:
|
||
if placeholder not in p.text:
|
||
return False
|
||
current = p
|
||
inserted_any = False
|
||
if not cols:
|
||
newp = insert_paragraph_after(current)
|
||
_add_line(newp, "(a)\tSem dados;")
|
||
_apply_par_format(newp, indent_cm)
|
||
inserted_any = True
|
||
else:
|
||
for i, c in enumerate(cols):
|
||
mean_val = pd.to_numeric(df[c], errors="coerce").mean()
|
||
mean_str = f"{mean_val:.2f}".replace(".", ",")
|
||
label = avaliacao_qualitativa(mean_val)
|
||
module_title = clean_module_title(c)
|
||
sub = letters[i] if i < 26 else f"a{i+1}"
|
||
newp = insert_paragraph_after(current)
|
||
r1 = newp.add_run(f"({sub})\t{module_title} (")
|
||
force_run_font(r1, font_name, font_size_pt)
|
||
r2 = newp.add_run(mean_str)
|
||
r2.bold = True
|
||
force_run_font(r2, font_name, font_size_pt)
|
||
r3 = newp.add_run("), que corresponde a ")
|
||
force_run_font(r3, font_name, font_size_pt)
|
||
r4 = newp.add_run(label)
|
||
r4.bold = True
|
||
force_run_font(r4, font_name, font_size_pt)
|
||
r5 = newp.add_run(";")
|
||
force_run_font(r5, font_name, font_size_pt)
|
||
_apply_par_format(newp, indent_cm)
|
||
current = newp
|
||
inserted_any = True
|
||
delete_paragraph(p)
|
||
return inserted_any
|
||
for p in doc.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
|
||
# %%
|
||
def _collect_course_info():
|
||
form = tk.Tk()
|
||
form.title("Dados do curso")
|
||
form.resizable(False, False)
|
||
|
||
fields = [
|
||
("Nomenclatura do curso", "NOMEDOCURSO"),
|
||
("Dia de inicio (DD)", "DDi"),
|
||
("Mes de inicio (Extenso)", "MESi"),
|
||
("Ano de inicio (AAAA)", "AAAAi"),
|
||
("Dia de fim (DD)", "DDf"),
|
||
("Mes de fim (Extenso)", "MESf"),
|
||
("Ano de fim (AAAA)", "AAAAf"),
|
||
("Numero de formandos", "NFORMANDOS"),
|
||
("Finalidade do curso", "FINALIDADECURSO"),
|
||
("Média final do curso", "MEDIAFINALCURSO"),
|
||
]
|
||
entries = {}
|
||
for i, (label, key) in enumerate(fields):
|
||
tk.Label(form, text=label, anchor="w").grid(row=i, column=0, padx=8, pady=4, sticky="w")
|
||
ent = tk.Entry(form, width=30)
|
||
ent.grid(row=i, column=1, padx=8, pady=4)
|
||
entries[key] = ent
|
||
form_values = {}
|
||
def _submit():
|
||
values = {k: e.get().strip() for k, e in entries.items()}
|
||
missing = [label for (label, key) in fields if not values[key]]
|
||
if missing:
|
||
messagebox.showerror("Dados em falta", "Preencha: " + ", ".join(missing))
|
||
return
|
||
form_values.update(values)
|
||
form.destroy()
|
||
tk.Button(form, text="Continuar", command=_submit).grid(row=len(fields), column=0, columnspan=2, pady=10)
|
||
form.mainloop()
|
||
if not form_values:
|
||
raise RuntimeError("Formulario cancelado")
|
||
return form_values
|
||
course_info = _collect_course_info()
|
||
def _to_int_or_str(s):
|
||
return int(s) if s.isdigit() else s
|
||
NOMEDOCURSOcurto = course_info["NOMEDOCURSO"]
|
||
DDi = _to_int_or_str(course_info["DDi"])
|
||
MESi = _to_int_or_str(course_info["MESi"])
|
||
AAAAi = _to_int_or_str(course_info["AAAAi"])
|
||
DDf = _to_int_or_str(course_info["DDf"])
|
||
MESf = _to_int_or_str(course_info["MESf"])
|
||
AAAAf = _to_int_or_str(course_info["AAAAf"])
|
||
NFORMANDOS = _to_int_or_str(course_info["NFORMANDOS"])
|
||
FINALIDADECURSO = course_info["FINALIDADECURSO"]
|
||
MEDIAFINALCURSO = course_info["MEDIAFINALCURSO"]
|
||
|
||
# %%
|
||
Tk().withdraw()
|
||
file_path = askopenfilename(
|
||
title="Select Excel das expetativas iniciais",
|
||
filetypes=[("Excel files", "*.xlsx *.xls")])
|
||
if not file_path:
|
||
raise FileNotFoundError("No file selected")
|
||
print(f"Selected file:{file_path}")
|
||
|
||
# %%
|
||
df = pd.read_excel(file_path)
|
||
|
||
# %%
|
||
ninq = df.shape[0]-1
|
||
medalojamento = round(df.iloc[:,10].mean(),2)
|
||
medalimentacao = round(df.iloc[:,11].mean(),2)
|
||
meddificuldade = round(df.iloc[:,12].mean(),2)
|
||
medfuncfut = round(df.iloc[:,13].mean(),2)
|
||
medmotvpart = round(df.iloc[:,14].mean(),2)
|
||
medconhecimento = round(df.iloc[:,15].mean(),2)
|
||
|
||
# %%
|
||
def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]:
|
||
means = []
|
||
n_cols = df.shape[1]
|
||
for j in range(start_col_idx, n_cols):
|
||
col = pd.to_numeric(df.iloc[:, j], errors="coerce")
|
||
if col.notna().sum() == 0:
|
||
break
|
||
means.append(col.mean())
|
||
return means
|
||
|
||
# %%
|
||
medias = mean_columns_until_empty(df, start_col_idx=16)
|
||
print(len(medias), medias[:5])
|
||
|
||
# %%
|
||
Tk().withdraw()
|
||
file_path2 = askopenfilename(
|
||
title="Select Excel das expetativas finais",
|
||
filetypes=[("Excel files", "*.xlsx *.xls")])
|
||
if not file_path2:
|
||
raise FileNotFoundError("No file selected")
|
||
print(f"Selected file:\n{file_path2}")
|
||
|
||
# %%
|
||
df2 = pd.read_excel(file_path2)
|
||
|
||
# %%
|
||
ninq2 = df2.shape[0]-1
|
||
medaplog = round(df2.iloc[:,10].mean(),2)
|
||
medalojamento2 = round(df2.iloc[:,11].mean(),2)
|
||
medalimentacao2 = round(df2.iloc[:,12].mean(),2)
|
||
medapdir = round(df2.iloc[:,13].mean(),2)
|
||
meddificuldade2 = round(df2.iloc[:,14].mean(),2)
|
||
medfuncfut2 = round(df2.iloc[:,15].mean(),2)
|
||
medmotvpart2 = round(df2.iloc[:,16].mean(),2)
|
||
medconhecimento2 = round(df2.iloc[:,17].mean(),2)
|
||
medalojamentofinal = round(medalojamento2 - medalojamento,2)
|
||
medalimentacaofinal = round(medalimentacao2 - medalimentacao,2)
|
||
meddificuldadefinal = round(meddificuldade2 - meddificuldade,2)
|
||
medfuncfutfinal = round(medfuncfut2 - medfuncfut,2)
|
||
medmotvpartfinal = round(medmotvpart2 - medmotvpart,2)
|
||
medconhecimentofinal = round(medconhecimento2 - medconhecimento,2)
|
||
objcruso = round(df2.iloc[:,18].mean(),2)
|
||
contcurso = round(df2.iloc[:,19].mean(),2)
|
||
adeqtrab = round(df2.iloc[:,20].mean(),2)
|
||
instform = round(df2.iloc[:,21].mean(),2)
|
||
audiovisuais = round(df2.iloc[:,22].mean(),2)
|
||
biblio = round(df2.iloc[:,23].mean(),2)
|
||
NOMEDOCURSO = df2.iloc[1,4].split("-")[1].split("–")[0].strip()
|
||
|
||
# %%
|
||
def build_formadores_rows(df: pd.DataFrame):
|
||
groups = {}
|
||
for c in df.columns:
|
||
name = str(c)
|
||
low = name.lower()
|
||
if "_formador" not in low:
|
||
continue
|
||
m = re.match(r"^\s*(Q\d+)\s*_Formador\s*->\s*(.+?)\s*(?:\(|$)", name, flags=re.IGNORECASE)
|
||
if not m:
|
||
continue
|
||
qcode = m.group(1).upper()
|
||
metric_raw = m.group(2).strip().lower()
|
||
groups.setdefault(qcode, {})
|
||
groups[qcode][metric_raw] = name
|
||
def qnum(q):
|
||
mm = re.match(r"Q(\d+)", q)
|
||
return int(mm.group(1)) if mm else 10**9
|
||
qcodes_sorted = sorted(groups.keys(), key=qnum)
|
||
def metric_key(metric_raw: str) -> str | None:
|
||
mr = metric_raw.lower()
|
||
if "dom" in mr and "ass" in mr:
|
||
return "dominio"
|
||
if "métod" in mr or "metod" in mr:
|
||
return "metodos"
|
||
if "lingu" in mr:
|
||
return "linguagem"
|
||
if "empenh" in mr:
|
||
return "empenho"
|
||
if "relac" in mr or "formand" in mr:
|
||
return "relacao"
|
||
return None
|
||
rows = []
|
||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||
for idx, qcode in enumerate(qcodes_sorted):
|
||
cols_map = groups[qcode]
|
||
picked = {"dominio": None, "metodos": None, "linguagem": None, "empenho": None, "relacao": None}
|
||
for raw, colname in cols_map.items():
|
||
k = metric_key(raw)
|
||
if k and picked[k] is None:
|
||
picked[k] = colname
|
||
def col_mean(colname):
|
||
if not colname:
|
||
return float("nan")
|
||
return pd.to_numeric(df[colname], errors="coerce").mean()
|
||
dominio = col_mean(picked["dominio"])
|
||
metodos = col_mean(picked["metodos"])
|
||
linguagem = col_mean(picked["linguagem"])
|
||
empenho = col_mean(picked["empenho"])
|
||
relacao = col_mean(picked["relacao"])
|
||
vals = [dominio, metodos, linguagem, empenho, relacao]
|
||
media_final = pd.Series(vals, dtype="float").mean(skipna=True)
|
||
label = letters[idx] if idx < len(letters) else f"F{idx+1}"
|
||
rows.append({
|
||
"label": label,
|
||
"qcode": qcode,
|
||
"dominio": dominio,
|
||
"metodos": metodos,
|
||
"linguagem": linguagem,
|
||
"empenho": empenho,
|
||
"relacao": relacao,
|
||
"media_final": media_final,
|
||
})
|
||
return rows
|
||
|
||
# %%
|
||
def replace_placeholder_with_formadores_table(
|
||
doc,
|
||
df2: pd.DataFrame,
|
||
placeholder: str = "{{tabelaFormadores}}",
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
col_width_cm: float = 2.3,
|
||
header_vertical: bool = True,
|
||
rotate_first_header: bool = True,
|
||
):
|
||
rows = build_formadores_rows(df2)
|
||
def _fill_cell(cell, text: str, bold=False, align=None):
|
||
cell.text = ""
|
||
p = cell.paragraphs[0]
|
||
if align is not None:
|
||
p.alignment = align
|
||
r = p.add_run(text)
|
||
force_run_font(r, font_name, font_size_pt, bold=bold)
|
||
apply_table_paragraph_spacing(cell, line_spacing=1.5)
|
||
headers = ["Formadores", "Domínio do assunto","Métodos utilizados","Linguagem utilizada","Empenho","Relação c/ formandos","Média final",]
|
||
global_mean = pd.Series([r["media_final"] for r in rows], dtype="float").mean(skipna=True)
|
||
def _apply_layout(tbl):
|
||
tbl.style = "Table Grid"
|
||
tbl.autofit = False
|
||
set_table_all_columns_width(tbl, width_cm=col_width_cm)
|
||
if header_vertical:
|
||
start_j = 0 if rotate_first_header else 1
|
||
for j in range(start_j, len(headers)):
|
||
set_cell_text_vertical(tbl.rows[0].cells[j], direction="btLr")
|
||
def _populate_table(tbl):
|
||
for j, h in enumerate(headers):
|
||
_fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
if not rows:
|
||
_fill_cell(tbl.rows[1].cells[0], "Sem dados", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT)
|
||
for j in range(1, len(headers)):
|
||
_fill_cell(tbl.rows[1].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[2].cells[0], "Média", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT)
|
||
for j in range(1, len(headers) - 1):
|
||
_fill_cell(tbl.rows[2].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[2].cells[-1], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
return
|
||
for i, r in enumerate(rows, start=1):
|
||
_fill_cell(tbl.rows[i].cells[0], r["label"], bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[i].cells[1], format_pt_number(r["dominio"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[i].cells[2], format_pt_number(r["metodos"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[i].cells[3], format_pt_number(r["linguagem"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[i].cells[4], format_pt_number(r["empenho"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[i].cells[5], format_pt_number(r["relacao"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[i].cells[6], format_pt_number(r["media_final"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
last = 1 + len(rows)
|
||
_fill_cell(tbl.rows[last].cells[0], "Média", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT)
|
||
for j in range(1, len(headers) - 1):
|
||
_fill_cell(tbl.rows[last].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
_fill_cell(tbl.rows[last].cells[-1], format_pt_number(global_mean), bold=True, align=WD_ALIGN_PARAGRAPH.CENTER)
|
||
def _make_table_at_paragraph(p: Paragraph):
|
||
n_rows = 1 + max(1, len(rows)) + 1
|
||
tbl = insert_table_after_paragraph(p, rows=n_rows, cols=len(headers))
|
||
_apply_layout(tbl)
|
||
format_header_row(tbl.rows[0], height_cm=5.2)
|
||
_populate_table(tbl)
|
||
delete_paragraph(p)
|
||
for p in doc.paragraphs:
|
||
if placeholder in p.text:
|
||
_make_table_at_paragraph(p)
|
||
return
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
if placeholder in p.text:
|
||
cell.text = ""
|
||
n_rows = 1 + max(1, len(rows)) + 1
|
||
tbl = cell.add_table(rows=n_rows, cols=len(headers))
|
||
_apply_layout(tbl)
|
||
format_header_row(tbl.rows[0], height_cm=5.2)
|
||
_populate_table(tbl)
|
||
return
|
||
|
||
# %%
|
||
def compute_formadores_summary(df: pd.DataFrame):
|
||
rows = build_formadores_rows(df)
|
||
nforma = len(rows)
|
||
global_mean = pd.Series([r["media_final"] for r in rows], dtype="float").mean(skipna=True)
|
||
mediaformquanl = "" if pd.isna(global_mean) else f"{global_mean:.2f}".replace(".", ",")
|
||
mediaform = "" if pd.isna(global_mean) else avaliacao_qualitativa(global_mean)
|
||
return nforma, mediaformquanl, mediaform
|
||
|
||
# %%
|
||
def _copy_run_format(src_run, dst_run, keep_bold=None):
|
||
dst_run.bold = src_run.bold if keep_bold is None else keep_bold
|
||
dst_run.italic = src_run.italic
|
||
dst_run.underline = src_run.underline
|
||
if src_run.font.name:
|
||
dst_run.font.name = src_run.font.name
|
||
dst_run._element.rPr.rFonts.set(qn("w:eastAsia"), src_run.font.name)
|
||
if src_run.font.size:
|
||
dst_run.font.size = src_run.font.size
|
||
|
||
# %%
|
||
def extract_temas_list(df: pd.DataFrame) -> list[str]:
|
||
temas_cols = [c for c in df.columns if "_temas" in str(c).lower()]
|
||
temas = []
|
||
for col in temas_cols:
|
||
s = df[col].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
temas.extend(s.tolist())
|
||
seen = set()
|
||
out = []
|
||
for t in temas:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
return out
|
||
|
||
# %%
|
||
def replace_placeholders_docx_bold_values_keep_style(doc, replacements: dict[str, str]):
|
||
keys = sorted(replacements.keys(), key=len, reverse=True)
|
||
def _replace_in_paragraph(paragraph):
|
||
if not paragraph.runs:
|
||
return
|
||
full_text = "".join(run.text for run in paragraph.runs)
|
||
if not any(k in full_text for k in keys):
|
||
return
|
||
base_run = paragraph.runs[0]
|
||
for run in paragraph.runs:
|
||
run.text = ""
|
||
text = full_text
|
||
while True:
|
||
next_pos = None
|
||
next_key = None
|
||
for k in keys:
|
||
pos = text.find(k)
|
||
if pos != -1 and (next_pos is None or pos < next_pos):
|
||
next_pos, next_key = pos, k
|
||
if next_key is None:
|
||
if text:
|
||
r = paragraph.add_run(text)
|
||
_copy_run_format(base_run, r, keep_bold=base_run.bold)
|
||
break
|
||
before = text[:next_pos]
|
||
if before:
|
||
r = paragraph.add_run(before)
|
||
_copy_run_format(base_run, r, keep_bold=base_run.bold)
|
||
val = str(replacements[next_key])
|
||
r_val = paragraph.add_run(val)
|
||
_copy_run_format(base_run, r_val, keep_bold=True)
|
||
text = text[next_pos + len(next_key):]
|
||
for p in doc.paragraphs:
|
||
_replace_in_paragraph(p)
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
_replace_in_paragraph(p)
|
||
|
||
# %%
|
||
def replace_placeholder_with_column_subitems_hanging(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str,
|
||
column_contains: str,
|
||
indent_cm: float = 0.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
):
|
||
cols = [c for c in df.columns if column_contains.lower() in str(c).lower()]
|
||
items = []
|
||
for col in cols:
|
||
s = df[col].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
items.extend(s.tolist())
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in items:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
items = out
|
||
letters = string.ascii_lowercase
|
||
def _apply_par_format(par: Paragraph):
|
||
fmt = par.paragraph_format
|
||
fmt.left_indent = Cm(indent_cm)
|
||
fmt.first_line_indent = Cm(-indent_cm)
|
||
fmt.line_spacing = 1.5
|
||
fmt.space_before = Pt(0)
|
||
fmt.space_after = Pt(0)
|
||
def _add_run(par: Paragraph, text: str):
|
||
r = par.add_run(text)
|
||
force_run_font(r, font_name, font_size_pt)
|
||
return r
|
||
def _process_paragraph(p: Paragraph) -> bool:
|
||
if placeholder not in p.text:
|
||
return False
|
||
current = p
|
||
if not items:
|
||
newp = insert_paragraph_after(current)
|
||
_add_run(newp, "a. Sem dados;")
|
||
_apply_par_format(newp)
|
||
delete_paragraph(p)
|
||
return True
|
||
for i, txt in enumerate(items):
|
||
sub = letters[i] if i < 26 else f"a{i+1}"
|
||
newp = insert_paragraph_after(current)
|
||
_add_run(newp, f"{sub}. ")
|
||
_add_run(newp, txt)
|
||
_add_run(newp, ";")
|
||
_apply_par_format(newp)
|
||
current = newp
|
||
delete_paragraph(p)
|
||
return True
|
||
for p in doc.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
|
||
# %%
|
||
def replace_placeholder_with_column_subitems_hanging2(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str,
|
||
column_contains: str,
|
||
left_indent_cm: float = 2.75,
|
||
hanging_cm: float = 0.6,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
):
|
||
cols = [c for c in df.columns if column_contains.lower() in str(c).lower()]
|
||
items = []
|
||
for col in cols:
|
||
s = df[col].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
items.extend(s.tolist())
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in items:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
items = out
|
||
letters = string.ascii_lowercase
|
||
def _apply_par_format(par: Paragraph):
|
||
fmt = par.paragraph_format
|
||
fmt.left_indent = Cm(left_indent_cm)
|
||
fmt.first_line_indent = Cm(-hanging_cm)
|
||
fmt.line_spacing = 1.5
|
||
fmt.space_before = Pt(0)
|
||
fmt.space_after = Pt(0)
|
||
def _add_run(par: Paragraph, text: str):
|
||
r = par.add_run(text)
|
||
force_run_font(r, font_name, font_size_pt)
|
||
return r
|
||
def _process_paragraph(p: Paragraph) -> bool:
|
||
if placeholder not in p.text:
|
||
return False
|
||
current = p
|
||
if not items:
|
||
newp = insert_paragraph_after(current)
|
||
_add_run(newp, "a. Sem dados;")
|
||
_apply_par_format(newp)
|
||
delete_paragraph(p)
|
||
return True
|
||
for i, txt in enumerate(items):
|
||
sub = letters[i] if i < 26 else f"a{i+1}"
|
||
newp = insert_paragraph_after(current)
|
||
_add_run(newp, f"{sub}. ")
|
||
_add_run(newp, txt)
|
||
_add_run(newp, ";")
|
||
_apply_par_format(newp)
|
||
current = newp
|
||
delete_paragraph(p)
|
||
return True
|
||
for p in doc.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
|
||
# %%
|
||
def replace_placeholder_with_temas_smart(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str = "{{TEMAS}}",
|
||
indent_cm: float = 0.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
**_ignored,
|
||
):
|
||
temas_raw = extract_temas_list(df)
|
||
temas = [str(t).strip() for t in (temas_raw or []) if str(t).strip()]
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in temas:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
temas = out
|
||
if max_items is not None:
|
||
temas = temas[:max_items]
|
||
if not temas:
|
||
df_tmp = pd.DataFrame({"_Temas": ["Sem dados"]})
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_temas",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
df_tmp = pd.DataFrame({"_Temas": temas})
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_temas",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
|
||
# %%
|
||
def extract_desenvolver_list(df: pd.DataFrame) -> list[str]:
|
||
cols = [c for c in df.columns if "_desenvolver" in str(c).lower()]
|
||
items = []
|
||
for col in cols:
|
||
s = df[col].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
items.extend(s.tolist())
|
||
seen = set()
|
||
out = []
|
||
for t in items:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
return out
|
||
|
||
# %%
|
||
def ollama_summarize_desenvolver(
|
||
items: list[str],
|
||
max_items: int = 6,
|
||
model: str = "llama3.1:8b",
|
||
timeout: float = 30.0
|
||
) -> list[str]:
|
||
items_txt = "\n".join(f"- {t}" for t in items)
|
||
prompt = f"""
|
||
És um analista a escrever um relatório oficial de avaliação.
|
||
|
||
Tens uma lista de aspetos a desenvolver/melhorar apontados pelos formandos. Faz o seguinte:
|
||
1) Agrupa itens repetidos/semelhantes;
|
||
2) Seleciona os mais importantes e recorrentes;
|
||
3) Reescreve numa lista curta, clara e formal (Português de Portugal);
|
||
4) NÃO inventes novos pontos;
|
||
5) No máximo {max_items} itens;
|
||
6) Frases curtas, em formato de sintagma nominal (ex.: "Melhoria da componente prática", "Aprofundamento de ...").
|
||
|
||
Itens:
|
||
{items_txt}
|
||
|
||
Devolve APENAS a lista final no formato:
|
||
- Item 1
|
||
- Item 2
|
||
- Item 3
|
||
""".strip()
|
||
payload = {
|
||
"model": model,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {"temperature": 0.2}
|
||
}
|
||
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout)
|
||
r.raise_for_status()
|
||
text = r.json().get("response", "")
|
||
lines = []
|
||
for line in text.splitlines():
|
||
line = line.strip()
|
||
if line.startswith(("-", "•")):
|
||
item = line.lstrip("-•").strip(" .;")
|
||
if item:
|
||
lines.append(item)
|
||
return lines[:max_items]
|
||
|
||
# %%
|
||
def replace_placeholder_with_desenvolver_smart(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str = "{{DESENVOLVER}}",
|
||
indent_cm: float = 1.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
use_ollama: bool = True,
|
||
ollama_model: str = "llama3.1:8b",
|
||
max_items: int = 6,
|
||
):
|
||
raw = extract_desenvolver_list(df)
|
||
if not raw:
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc, df,
|
||
placeholder=placeholder,
|
||
column_contains="_desenvolver",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=deduplicate
|
||
)
|
||
final_items = None
|
||
if use_ollama and ollama_available():
|
||
try:
|
||
llm_items = ollama_summarize_desenvolver(
|
||
raw, max_items=max_items, model=ollama_model
|
||
)
|
||
if llm_items:
|
||
final_items = llm_items
|
||
except Exception:
|
||
final_items = None
|
||
if final_items is None:
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc, df,
|
||
placeholder=placeholder,
|
||
column_contains="_desenvolver",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=deduplicate
|
||
)
|
||
df_tmp = pd.DataFrame({"_desenvolver": final_items})
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc, df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_desenvolver",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
|
||
# %%
|
||
def extract_incluir_list(df: pd.DataFrame) -> list[str]:
|
||
cols = [c for c in df.columns if "_incluir" in str(c).lower()]
|
||
items = []
|
||
for col in cols:
|
||
s = df[col].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
items.extend(s.tolist())
|
||
seen = set()
|
||
out = []
|
||
for t in items:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
return out
|
||
|
||
# %%
|
||
def ollama_summarize_incluir(
|
||
items: list[str],
|
||
max_items: int = 6,
|
||
model: str = "llama3.1:8b",
|
||
timeout: float = 30.0
|
||
) -> list[str]:
|
||
items_txt = "\n".join(f"- {t}" for t in items)
|
||
prompt = f"""
|
||
És um analista a escrever um relatório oficial de avaliação.
|
||
|
||
Tens uma lista de conteúdos/temas que os formandos sugerem que sejam incluídos. Faz o seguinte:
|
||
1) Agrupa itens repetidos/semelhantes;
|
||
2) Seleciona os mais importantes e recorrentes;
|
||
3) Reescreve numa lista curta, clara e formal (Português de Portugal);
|
||
4) NÃO inventes novos pontos;
|
||
5) No máximo {max_items} itens;
|
||
6) Frases curtas e objetivas.
|
||
|
||
Itens:
|
||
{items_txt}
|
||
|
||
Devolve APENAS a lista final no formato:
|
||
- Item 1
|
||
- Item 2
|
||
- Item 3
|
||
""".strip()
|
||
|
||
payload = {
|
||
"model": model,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {"temperature": 0.2}
|
||
}
|
||
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout)
|
||
r.raise_for_status()
|
||
text = r.json().get("response", "")
|
||
|
||
lines = []
|
||
for line in text.splitlines():
|
||
line = line.strip()
|
||
if line.startswith(("-", "•")):
|
||
item = line.lstrip("-•").strip(" .;")
|
||
if item:
|
||
lines.append(item)
|
||
|
||
return lines[:max_items]
|
||
|
||
# %%
|
||
def replace_placeholder_with_incluir_smart(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str = "{{INCLUIR}}",
|
||
indent_cm: float = 1.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
use_ollama: bool = True,
|
||
ollama_model: str = "llama3.1:8b",
|
||
max_items: int = 6,
|
||
):
|
||
raw = extract_incluir_list(df)
|
||
if not raw:
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc, df,
|
||
placeholder=placeholder,
|
||
column_contains="_incluir",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=deduplicate
|
||
)
|
||
final_items = None
|
||
if use_ollama and ollama_available():
|
||
try:
|
||
llm_items = ollama_summarize_incluir(
|
||
raw, max_items=max_items, model=ollama_model
|
||
)
|
||
if llm_items:
|
||
final_items = llm_items
|
||
except Exception:
|
||
final_items = None
|
||
if final_items is None:
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc, df,
|
||
placeholder=placeholder,
|
||
column_contains="_incluir",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=deduplicate
|
||
)
|
||
df_tmp = pd.DataFrame({"_incluir": final_items})
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc, df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_incluir",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
|
||
# %%
|
||
def ollama_summarize_observacoes_paragraph(
|
||
items: list[str],
|
||
model: str = "llama3.1:8b",
|
||
timeout: float = 45.0
|
||
) -> str:
|
||
items_txt = "\n".join(f"- {t}" for t in items)
|
||
|
||
prompt = f"""
|
||
És um analista a escrever um relatório oficial de avaliação.
|
||
|
||
Tens observações livres escritas pelos formandos. Produz um ÚNICO PARÁGRAFO de síntese:
|
||
- Português de Portugal, tom formal e objetivo;
|
||
- Não inventes informação;
|
||
- Agrupa ideias repetidas;
|
||
- Evita exemplos pessoais e detalhes identificáveis;
|
||
- 3 a 6 frases, no máximo ~120 palavras.
|
||
|
||
Observações:
|
||
{items_txt}
|
||
Devolve APENAS o parágrafo final (sem tópicos, sem títulos, sem listas).
|
||
""".strip()
|
||
payload = {
|
||
"model": model,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {"temperature": 0.2}
|
||
}
|
||
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout)
|
||
r.raise_for_status()
|
||
text = r.json().get("response", "").strip()
|
||
text = " ".join(line.strip() for line in text.splitlines() if line.strip())
|
||
return text
|
||
|
||
# %%
|
||
def extract_observacoes_list(df: pd.DataFrame) -> list[str]:
|
||
cols = [c for c in df.columns if "_observ" in str(c).lower()]
|
||
items = []
|
||
for col in cols:
|
||
s = df[col].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
items.extend(s.tolist())
|
||
seen = set()
|
||
out = []
|
||
for t in items:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
return out
|
||
|
||
# %%
|
||
def _iter_paragraphs_in_table(tbl):
|
||
for row in tbl.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
yield p
|
||
for t in cell.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
def iter_all_paragraphs_everywhere(doc):
|
||
for p in doc.paragraphs:
|
||
yield p
|
||
for t in doc.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
for section in doc.sections:
|
||
containers = [
|
||
section.header,
|
||
section.footer,
|
||
section.first_page_header,
|
||
section.first_page_footer,
|
||
section.even_page_header,
|
||
section.even_page_footer,
|
||
]
|
||
for c in containers:
|
||
for p in c.paragraphs:
|
||
yield p
|
||
for t in c.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
|
||
# %%
|
||
def replace_placeholder_with_observacoes_smart(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str = "{{OBSERVACOES}}",
|
||
indent_cm: float = 0.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
use_ollama: bool = True,
|
||
ollama_model: str = "llama3.1:8b",
|
||
max_items: int | None = None,
|
||
) -> int:
|
||
raw = extract_observacoes_list(df)
|
||
items = [str(t).strip() for t in (raw or []) if str(t).strip()]
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in items:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
items = out
|
||
if max_items is not None:
|
||
items = items[:max_items]
|
||
paragraph_text = None
|
||
if items and use_ollama and ollama_available():
|
||
try:
|
||
paragraph_text = ollama_summarize_observacoes_paragraph(items, model=ollama_model)
|
||
if paragraph_text:
|
||
paragraph_text = paragraph_text.strip()
|
||
except Exception:
|
||
paragraph_text = None
|
||
letters = string.ascii_lowercase
|
||
def _apply_par_format(par: Paragraph, hanging=False):
|
||
fmt = par.paragraph_format
|
||
fmt.left_indent = Cm(indent_cm)
|
||
if hanging:
|
||
fmt.first_line_indent = Cm(-0.6)
|
||
fmt.line_spacing = 1.5
|
||
fmt.space_before = Pt(0)
|
||
fmt.space_after = Pt(0)
|
||
def _add_run(par: Paragraph, text: str, bold=False):
|
||
r = par.add_run(text)
|
||
force_run_font(r, font_name, font_size_pt, bold=bold)
|
||
return r
|
||
replaced = 0
|
||
for p in list(iter_all_paragraphs_everywhere(doc)):
|
||
if placeholder not in (p.text or ""):
|
||
continue
|
||
current = p
|
||
if paragraph_text:
|
||
newp = insert_paragraph_after(current)
|
||
_add_run(newp, paragraph_text)
|
||
_apply_par_format(newp, hanging=False)
|
||
else:
|
||
if not items:
|
||
items_to_write = ["Sem dados"]
|
||
else:
|
||
items_to_write = items
|
||
for i, txt in enumerate(items_to_write):
|
||
sub = letters[i] if i < 26 else f"a{i+1}"
|
||
newp = insert_paragraph_after(current)
|
||
_add_run(newp, f"{sub}. ")
|
||
_add_run(newp, txt.rstrip(".;"))
|
||
_add_run(newp, ";")
|
||
_apply_par_format(newp, hanging=True)
|
||
current = newp
|
||
delete_paragraph(p)
|
||
replaced += 1
|
||
return replaced
|
||
|
||
# %%
|
||
def build_melhoria_checklist_items(
|
||
temas_items: list[str] | None,
|
||
desenvolver_items: list[str] | None,
|
||
incluir_items: list[str] | None,
|
||
observacoes_text_or_items: str | list[str] | None = None,
|
||
use_ollama: bool = True,
|
||
ollama_model: str = "llama3.1:8b",
|
||
max_items: int = 10,
|
||
):
|
||
temas_items = [x.strip() for x in (temas_items or []) if str(x).strip()]
|
||
desenvolver_items = [x.strip() for x in (desenvolver_items or []) if str(x).strip()]
|
||
incluir_items = [x.strip() for x in (incluir_items or []) if str(x).strip()]
|
||
if isinstance(observacoes_text_or_items, str):
|
||
observacoes_text = observacoes_text_or_items.strip()
|
||
elif isinstance(observacoes_text_or_items, list):
|
||
obs_list = [str(x).strip() for x in observacoes_text_or_items if str(x).strip()]
|
||
observacoes_text = "\n".join(f"- {x}" for x in obs_list)
|
||
else:
|
||
observacoes_text = ""
|
||
if use_ollama and ollama_available():
|
||
try:
|
||
temas_txt = "\n".join(f"- {t}" for t in temas_items)
|
||
des_txt = "\n".join(f"- {t}" for t in desenvolver_items)
|
||
inc_txt = "\n".join(f"- {t}" for t in incluir_items)
|
||
prompt = f"""
|
||
És um analista a escrever um relatório oficial de avaliação.
|
||
|
||
A partir dos seguintes outputs, cria uma CHECKLIST de melhorias (ações).
|
||
Regras:
|
||
- Não inventes pontos.
|
||
- Junta redundâncias.
|
||
- Escreve cada item como ação (ex.: "Reforçar ...", "Incluir ...", "Aprofundar ...", "Melhorar ...").
|
||
- No máximo {max_items} itens.
|
||
- Devolve APENAS lista em bullets "- ...".
|
||
|
||
TEMAS:
|
||
{temas_txt}
|
||
|
||
A DESENVOLVER:
|
||
{des_txt}
|
||
|
||
A INCLUIR:
|
||
{inc_txt}
|
||
|
||
OBSERVAÇÕES:
|
||
{observacoes_text}
|
||
""".strip()
|
||
|
||
payload = {
|
||
"model": ollama_model,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {"temperature": 0.2},
|
||
}
|
||
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=45.0)
|
||
r.raise_for_status()
|
||
text = r.json().get("response", "").strip()
|
||
|
||
llm_items = []
|
||
for line in text.splitlines():
|
||
line = line.strip()
|
||
if line.startswith(("-", "•")):
|
||
item = line.lstrip("-•").strip(" .;")
|
||
if item:
|
||
llm_items.append(item)
|
||
|
||
llm_items = llm_items[:max_items]
|
||
if llm_items:
|
||
return llm_items
|
||
except Exception:
|
||
pass
|
||
merged = temas_items + desenvolver_items + incluir_items
|
||
seen = set()
|
||
out = []
|
||
for x in merged:
|
||
x = x.strip().strip("•-").strip()
|
||
if not x:
|
||
continue
|
||
k = x.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(x)
|
||
return out[:max_items] if out else ["Sem dados"]
|
||
|
||
# %%
|
||
def replace_placeholder_with_checklist_subitems(
|
||
doc,
|
||
checklist_items: list[str],
|
||
placeholder: str = "{{CHECKLIST_MELHORAR}}",
|
||
indent_cm: float = 0.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
checkbox: str = "-",
|
||
):
|
||
letters = string.ascii_lowercase
|
||
|
||
def _apply_par_format(par: Paragraph):
|
||
fmt = par.paragraph_format
|
||
fmt.left_indent = Cm(indent_cm)
|
||
fmt.first_line_indent = Cm(-indent_cm)
|
||
fmt.line_spacing = 1.5
|
||
fmt.space_before = Pt(0)
|
||
fmt.space_after = Pt(0)
|
||
|
||
def _add_run(par: Paragraph, text: str):
|
||
r = par.add_run(text)
|
||
force_run_font(r, font_name, font_size_pt)
|
||
return r
|
||
|
||
def _process_paragraph(p: Paragraph) -> bool:
|
||
if placeholder not in p.text:
|
||
return False
|
||
|
||
current = p
|
||
items = checklist_items or ["Sem dados"]
|
||
|
||
for i, item in enumerate(items):
|
||
sub = letters[i] if i < 26 else f"a{i+1}"
|
||
newp = insert_paragraph_after(current)
|
||
_add_run(newp, f"{sub}. {checkbox} ")
|
||
_add_run(newp, item.strip().rstrip(".;"))
|
||
_add_run(newp, ";")
|
||
_apply_par_format(newp)
|
||
current = newp
|
||
|
||
delete_paragraph(p)
|
||
return True
|
||
|
||
for p in doc.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
if _process_paragraph(p):
|
||
return
|
||
|
||
# %%
|
||
def replace_placeholder_with_observacoes_smart2(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str = "{{OBSERVACOES2}}",
|
||
indent_cm: float = 2.75,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
**_ignored,
|
||
):
|
||
obs_raw = extract_observacoes_list(df)
|
||
itens = [str(t).strip() for t in (obs_raw or []) if str(t).strip()]
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in itens:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
itens = out
|
||
if max_items is not None:
|
||
itens = itens[:max_items]
|
||
if not itens:
|
||
itens = ["Sem dados"]
|
||
df_tmp = pd.DataFrame({"_Observacoes": itens})
|
||
return replace_placeholder_with_column_subitems_hanging2(
|
||
doc, df2,
|
||
placeholder="{{OBSERVACOES2}}",
|
||
column_contains="_observa",
|
||
left_indent_cm=2.75,
|
||
hanging_cm=0.6,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
deduplicate=True
|
||
)
|
||
|
||
# %%
|
||
def replace_placeholder_with_incluir_smart(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str = "{{INCLUIR}}",
|
||
indent_cm: float = 0.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
**_ignored,
|
||
):
|
||
incluir_raw = extract_incluir_list(df)
|
||
itens = [str(t).strip() for t in (incluir_raw or []) if str(t).strip()]
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in itens:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
itens = out
|
||
if max_items is not None:
|
||
itens = itens[:max_items]
|
||
if not itens:
|
||
itens = ["Sem dados"]
|
||
df_tmp = pd.DataFrame({"_Incluir": itens})
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_incluir",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
|
||
# %%
|
||
def replace_placeholder_with_desenvolver_smart(
|
||
doc,
|
||
df: pd.DataFrame,
|
||
placeholder: str = "{{DESENVOLVER}}",
|
||
indent_cm: float = 0.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
**_ignored,
|
||
):
|
||
desenvolver_raw = extract_desenvolver_list(df)
|
||
itens = [str(t).strip() for t in (desenvolver_raw or []) if str(t).strip()]
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in itens:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
itens = out
|
||
if max_items is not None:
|
||
itens = itens[:max_items]
|
||
if not itens:
|
||
itens = ["Sem dados"]
|
||
df_tmp = pd.DataFrame({"_Desenvolver": itens})
|
||
return replace_placeholder_with_column_subitems_hanging(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_desenvolver",
|
||
indent_cm=indent_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
|
||
# %%
|
||
def delete_all_between_anchors_xml(
|
||
doc,
|
||
start="{{ANCORA1}}",
|
||
end="{{ANCORA2}}",
|
||
debug=False,
|
||
max_passes=10_000,
|
||
) -> int:
|
||
body = doc._element.body
|
||
removed_blocks = 0
|
||
passes = 0
|
||
def child_contains_needle(elm, needle: str) -> bool:
|
||
texts = elm.xpath(".//*[local-name()='t']/text()")
|
||
joined = "".join(texts) if texts else ""
|
||
return needle in joined
|
||
while passes < max_passes:
|
||
passes += 1
|
||
children = list(body.iterchildren())
|
||
start_idx = None
|
||
end_idx = None
|
||
for i, elm in enumerate(children):
|
||
if start_idx is None and child_contains_needle(elm, start):
|
||
start_idx = i
|
||
if debug:
|
||
print(f"[DEBUG] start in child {i} tag={elm.tag}")
|
||
continue
|
||
if start_idx is not None and child_contains_needle(elm, end):
|
||
end_idx = i
|
||
if debug:
|
||
print(f"[DEBUG] end in child {i} tag={elm.tag}")
|
||
break
|
||
if start_idx is None or end_idx is None:
|
||
if debug:
|
||
print("[DEBUG] done. start/end:", start_idx, end_idx)
|
||
break
|
||
for elm in reversed(children[start_idx:end_idx + 1]):
|
||
body.remove(elm)
|
||
removed_blocks += 1
|
||
return removed_blocks
|
||
|
||
# %%
|
||
def delete_all_between_anchors_xml(
|
||
doc,
|
||
start="{{ANCORA3}}",
|
||
end="{{ANCORA4}}",
|
||
debug=False,
|
||
max_passes=10_000,
|
||
) -> int:
|
||
body = doc._element.body
|
||
removed_blocks = 0
|
||
passes = 0
|
||
def child_contains_needle(elm, needle: str) -> bool:
|
||
texts = elm.xpath(".//*[local-name()='t']/text()")
|
||
joined = "".join(texts) if texts else ""
|
||
return needle in joined
|
||
while passes < max_passes:
|
||
passes += 1
|
||
children = list(body.iterchildren())
|
||
start_idx = None
|
||
end_idx = None
|
||
for i, elm in enumerate(children):
|
||
if start_idx is None and child_contains_needle(elm, start):
|
||
start_idx = i
|
||
if debug:
|
||
print(f"[DEBUG] start in child {i} tag={elm.tag}")
|
||
continue
|
||
if start_idx is not None and child_contains_needle(elm, end):
|
||
end_idx = i
|
||
if debug:
|
||
print(f"[DEBUG] end in child {i} tag={elm.tag}")
|
||
break
|
||
if start_idx is None or end_idx is None:
|
||
if debug:
|
||
print("[DEBUG] done. start/end:", start_idx, end_idx)
|
||
break
|
||
for elm in reversed(children[start_idx:end_idx + 1]):
|
||
body.remove(elm)
|
||
removed_blocks += 1
|
||
return removed_blocks
|
||
|
||
# %%
|
||
def _iter_paragraphs_in_table(tbl):
|
||
for row in tbl.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
yield p
|
||
for t in cell.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
def iter_all_paragraphs_everywhere(doc):
|
||
for p in doc.paragraphs:
|
||
yield p
|
||
for t in doc.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
for section in doc.sections:
|
||
containers = [
|
||
section.header,
|
||
section.footer,
|
||
section.first_page_header,
|
||
section.first_page_footer,
|
||
section.even_page_header,
|
||
section.even_page_footer,
|
||
]
|
||
for c in containers:
|
||
for p in c.paragraphs:
|
||
yield p
|
||
for t in c.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
|
||
# %%
|
||
Tk().withdraw()
|
||
file_path3 = askopenfilename(
|
||
title="Select Excel das Formadores",
|
||
filetypes=[("Excel files", "*.xlsx *.xls")]
|
||
)
|
||
if not file_path3:
|
||
print("Nenhum ficheiro selecionado.")
|
||
df3 = None
|
||
else:
|
||
print(f"Selected file:\n{file_path3}")
|
||
df3 = pd.read_excel(file_path3)
|
||
|
||
# %%
|
||
df3 = None
|
||
ninq3 = 0
|
||
medpub = 0
|
||
medmeiosaux = 0
|
||
medapform = 0
|
||
medapdc = 0
|
||
medobjesp = 0
|
||
medmetensi = 0
|
||
medtempform = 0
|
||
medlocaisform = 0
|
||
medlançaaval = 0
|
||
medtipoaval = 0
|
||
medtempoaval = 0
|
||
medobjapre = 0
|
||
medadqonjesp = 0
|
||
medinterforma = 0
|
||
prerequesitos = 0
|
||
Conteudo = 0
|
||
objgeral = 0
|
||
objfinal = 0
|
||
objadq = 0
|
||
avadq = 0
|
||
refere = 0
|
||
if file_path3:
|
||
df3 = pd.read_excel(file_path3)
|
||
ninq3 = df3.shape[0]
|
||
medpub = round(df3.iloc[:, 12].mean(), 2)
|
||
medmeiosaux = round(df3.iloc[:, 13].mean(), 2)
|
||
medapform = round(df3.iloc[:, 14].mean(), 2)
|
||
medapdc = round(df3.iloc[:, 15].mean(), 2)
|
||
medobjesp = round(df3.iloc[:, 16].mean(), 2)
|
||
medmetensi = round(df3.iloc[:, 17].mean(), 2)
|
||
medtempform = round(df3.iloc[:, 18].mean(), 2)
|
||
medlocaisform = round(df3.iloc[:, 19].mean(), 2)
|
||
medlançaaval = round(df3.iloc[:, 20].mean(), 2)
|
||
medtipoaval = round(df3.iloc[:, 21].mean(), 2)
|
||
medtempoaval = round(df3.iloc[:, 22].mean(), 2)
|
||
medobjapre = round(df3.iloc[:, 23].mean(), 2)
|
||
medadqonjesp = round(df3.iloc[:, 24].mean(), 2)
|
||
medinterforma = round(df3.iloc[:, 25].mean(), 2)
|
||
if ninq3 > 0:
|
||
prerequesitos = round((df3.iloc[:, 26].eq(1).sum() / ninq3) * 100, 2)
|
||
Conteudo = round((df3.iloc[:, 27].eq(1).sum() / ninq3) * 100, 2)
|
||
objgeral = round((df3.iloc[:, 28].eq(1).sum() / ninq3) * 100, 2)
|
||
objfinal = round((df3.iloc[:, 29].eq(1).sum() / ninq3) * 100, 2)
|
||
objadq = round((df3.iloc[:, 30].eq(1).sum() / ninq3) * 100, 2)
|
||
avadq = round((df3.iloc[:, 31].eq(1).sum() / ninq3) * 100, 2)
|
||
refere = round((df3.iloc[:, 32].eq(1).sum() / ninq3) * 100, 2)
|
||
else:
|
||
print("Nenhum ficheiro selecionado (df3). Valores definidos a 0.")
|
||
|
||
# %%
|
||
Tk().withdraw()
|
||
file_path4 = askopenfilename(
|
||
title="Select Excel da Direção de Curso",
|
||
filetypes=[("Excel files", "*.xlsx *.xls")]
|
||
)
|
||
if not file_path4:
|
||
print("Nenhum ficheiro selecionado.")
|
||
df4 = None
|
||
else:
|
||
print(f"Selected file:\n{file_path4}")
|
||
df4 = pd.read_excel(file_path4)
|
||
|
||
# %%
|
||
df4 = None
|
||
ninq4 = 0
|
||
medprogcurso = 0
|
||
medcontcurso = 0
|
||
medestrcurso = 0
|
||
medutilprat = 0
|
||
medcargahoraria = 0
|
||
med1 = 0
|
||
medinstal = 0
|
||
medaudiovis = 0
|
||
meddocdispor = 0
|
||
medapadmin = 0
|
||
medapcoord = 0
|
||
med2 = 0
|
||
medmotform = 0
|
||
medrelpart = 0
|
||
medpontass = 0
|
||
med3 = 0
|
||
if file_path4:
|
||
df4 = pd.read_excel(file_path4)
|
||
ninq4 = df4.shape[0]
|
||
medprogcurso = round(df4.iloc[:, 10].mean(), 2)
|
||
medcontcurso = round(df4.iloc[:, 11].mean(), 2)
|
||
medestrcurso = round(df4.iloc[:, 12].mean(), 2)
|
||
medutilprat = round(df4.iloc[:, 13].mean(), 2)
|
||
medcargahoraria= round(df4.iloc[:, 14].mean(), 2)
|
||
med1 = round(
|
||
(medprogcurso + medcontcurso + medestrcurso +
|
||
medutilprat + medcargahoraria) / 5,
|
||
2
|
||
)
|
||
medinstal = round(df4.iloc[:, 15].mean(), 2)
|
||
medaudiovis = round(df4.iloc[:, 16].mean(), 2)
|
||
meddocdispor = round(df4.iloc[:, 17].mean(), 2)
|
||
medapadmin = round(df4.iloc[:, 18].mean(), 2)
|
||
medapcoord = round(df4.iloc[:, 19].mean(), 2)
|
||
med2 = round(
|
||
(medinstal + medaudiovis + meddocdispor +
|
||
medapadmin + medapcoord) / 5,
|
||
2
|
||
)
|
||
medmotform = round(df4.iloc[:, 20].mean(), 2)
|
||
medrelpart = round(df4.iloc[:, 21].mean(), 2)
|
||
medpontass = round(df4.iloc[:, 22].mean(), 2)
|
||
med3 = round(
|
||
(medmotform + medrelpart + medpontass) / 3,
|
||
2
|
||
)
|
||
else:
|
||
print("Nenhum ficheiro selecionado. Valores definidos a 0.")
|
||
|
||
# %%
|
||
df4
|
||
|
||
# %%
|
||
def replace_placeholder_with_propostas(
|
||
doc,
|
||
df3: pd.DataFrame,
|
||
placeholder: str = "{{Propostas}}",
|
||
left_indent_cm: float = 2.75,
|
||
hanging_cm: float = 0.6,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
) -> int:
|
||
cols = [c for c in df3.columns if "_propostas" in str(c).lower()]
|
||
itens = []
|
||
for c in cols:
|
||
s = df3[c].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
itens.extend(s.tolist())
|
||
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in itens:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
itens = out
|
||
|
||
if max_items is not None:
|
||
itens = itens[:max_items]
|
||
|
||
if not itens:
|
||
itens = ["Sem dados"]
|
||
|
||
df_tmp = pd.DataFrame({"_Propostas": itens})
|
||
|
||
replaced = 0
|
||
|
||
while True:
|
||
changed = False
|
||
for p in list(iter_all_paragraphs_everywhere(doc)):
|
||
if placeholder in (p.text or ""):
|
||
replace_placeholder_with_column_subitems_hanging2(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_propostas",
|
||
left_indent_cm=left_indent_cm,
|
||
hanging_cm=hanging_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
replaced += 1
|
||
changed = True
|
||
break
|
||
if not changed:
|
||
break
|
||
|
||
return replaced
|
||
|
||
# %%
|
||
def delete_paragraph(paragraph):
|
||
p = paragraph._p
|
||
parent = p.getparent()
|
||
if parent is not None:
|
||
parent.remove(p)
|
||
def _iter_paragraphs_in_table(tbl):
|
||
for row in tbl.rows:
|
||
for cell in row.cells:
|
||
for p in cell.paragraphs:
|
||
yield p
|
||
for t in cell.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
def iter_all_paragraphs_everywhere(doc):
|
||
for p in doc.paragraphs:
|
||
yield p
|
||
for t in doc.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
for section in doc.sections:
|
||
containers = [
|
||
section.header,
|
||
section.footer,
|
||
section.first_page_header,
|
||
section.first_page_footer,
|
||
section.even_page_header,
|
||
section.even_page_footer,
|
||
]
|
||
for c in containers:
|
||
for p in c.paragraphs:
|
||
yield p
|
||
for t in c.tables:
|
||
yield from _iter_paragraphs_in_table(t)
|
||
def delete_lines_with_ancora(doc, pattern=r"ANCORA") -> int:
|
||
rx = re.compile(pattern, flags=re.IGNORECASE)
|
||
to_delete = []
|
||
for p in iter_all_paragraphs_everywhere(doc):
|
||
if rx.search(p.text or ""):
|
||
to_delete.append(p)
|
||
for p in reversed(to_delete):
|
||
delete_paragraph(p)
|
||
return len(to_delete)
|
||
|
||
# %%
|
||
def replace_placeholder_with_temasdir(
|
||
doc,
|
||
df4: pd.DataFrame,
|
||
placeholder: str = "{{temasdir}}",
|
||
left_indent_cm: float = 2.75,
|
||
hanging_cm: float = 0.6,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
) -> int:
|
||
cols = [c for c in df4.columns if "_temas" in str(c).lower()]
|
||
itens = []
|
||
for c in cols:
|
||
s = df4[c].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
itens.extend(s.tolist())
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in itens:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
itens = out
|
||
if max_items is not None:
|
||
itens = itens[:max_items]
|
||
if not itens:
|
||
itens = ["Sem dados"]
|
||
df_tmp = pd.DataFrame({"_Temas": itens})
|
||
replaced = 0
|
||
while True:
|
||
changed = False
|
||
for p in list(iter_all_paragraphs_everywhere(doc)):
|
||
if placeholder in (p.text or ""):
|
||
replace_placeholder_with_column_subitems_hanging2(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_temas",
|
||
left_indent_cm=left_indent_cm,
|
||
hanging_cm=hanging_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
replaced += 1
|
||
changed = True
|
||
break
|
||
if not changed:
|
||
break
|
||
return replaced
|
||
|
||
# %%
|
||
def replace_placeholder_with_desenvolverdir(
|
||
doc,
|
||
df4: pd.DataFrame,
|
||
placeholder: str = "{{desenvolverdir}}",
|
||
left_indent_cm: float = 2.75,
|
||
hanging_cm: float = 0.6,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
) -> int:
|
||
cols = [c for c in df4.columns if "_desenvolver" in str(c).lower()]
|
||
itens = []
|
||
for c in cols:
|
||
s = df4[c].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
itens.extend(s.tolist())
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in itens:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
itens = out
|
||
if max_items is not None:
|
||
itens = itens[:max_items]
|
||
if not itens:
|
||
itens = ["Sem dados"]
|
||
df_tmp = pd.DataFrame({"_Desenvolver": itens})
|
||
replaced = 0
|
||
while True:
|
||
changed = False
|
||
for p in list(iter_all_paragraphs_everywhere(doc)):
|
||
if placeholder in (p.text or ""):
|
||
replace_placeholder_with_column_subitems_hanging2(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_desenvolver",
|
||
left_indent_cm=left_indent_cm,
|
||
hanging_cm=hanging_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
replaced += 1
|
||
changed = True
|
||
break
|
||
if not changed:
|
||
break
|
||
return replaced
|
||
|
||
# %%
|
||
def replace_placeholder_with_incluirdir(
|
||
doc,
|
||
df4: pd.DataFrame,
|
||
placeholder: str = "{{incluirdir}}",
|
||
left_indent_cm: float = 2.75,
|
||
hanging_cm: float = 0.6,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
) -> int:
|
||
cols = [c for c in df4.columns if "_incluir" in str(c).lower()]
|
||
itens = []
|
||
for c in cols:
|
||
s = df4[c].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
itens.extend(s.tolist())
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in itens:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
itens = out
|
||
if max_items is not None:
|
||
itens = itens[:max_items]
|
||
if not itens:
|
||
itens = ["Sem dados"]
|
||
df_tmp = pd.DataFrame({"_Incluir": itens})
|
||
replaced = 0
|
||
while True:
|
||
changed = False
|
||
for p in list(iter_all_paragraphs_everywhere(doc)):
|
||
if placeholder in (p.text or ""):
|
||
replace_placeholder_with_column_subitems_hanging2(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_incluir",
|
||
left_indent_cm=left_indent_cm,
|
||
hanging_cm=hanging_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
replaced += 1
|
||
changed = True
|
||
break
|
||
if not changed:
|
||
break
|
||
return replaced
|
||
|
||
# %%
|
||
def replace_placeholder_with_positivosdir(
|
||
doc,
|
||
df4: pd.DataFrame,
|
||
placeholder: str = "{{positivosdir}}",
|
||
left_indent_cm: float = 2.75,
|
||
hanging_cm: float = 0.6,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
deduplicate: bool = True,
|
||
max_items: int | None = None,
|
||
) -> int:
|
||
cols = [c for c in df4.columns if "_positivos" in str(c).lower()]
|
||
itens = []
|
||
for c in cols:
|
||
s = df4[c].dropna().astype(str).str.strip()
|
||
s = s[s != ""]
|
||
itens.extend(s.tolist())
|
||
if deduplicate:
|
||
seen = set()
|
||
out = []
|
||
for t in itens:
|
||
k = t.lower()
|
||
if k not in seen:
|
||
seen.add(k)
|
||
out.append(t)
|
||
itens = out
|
||
if max_items is not None:
|
||
itens = itens[:max_items]
|
||
if not itens:
|
||
itens = ["Sem dados"]
|
||
df_tmp = pd.DataFrame({"_Positivos": itens})
|
||
replaced = 0
|
||
while True:
|
||
changed = False
|
||
for p in list(iter_all_paragraphs_everywhere(doc)):
|
||
if placeholder in (p.text or ""):
|
||
replace_placeholder_with_column_subitems_hanging2(
|
||
doc,
|
||
df_tmp,
|
||
placeholder=placeholder,
|
||
column_contains="_positivos",
|
||
left_indent_cm=left_indent_cm,
|
||
hanging_cm=hanging_cm,
|
||
font_name=font_name,
|
||
font_size_pt=font_size_pt,
|
||
deduplicate=False
|
||
)
|
||
replaced += 1
|
||
changed = True
|
||
break
|
||
if not changed:
|
||
break
|
||
return replaced
|
||
|
||
# %%
|
||
df4
|
||
|
||
# %%
|
||
nforma, mediaformquanl, mediaform = compute_formadores_summary(df2)
|
||
replacements = {
|
||
"{{NOMEDOCURSOEXTENSO}}": str(NOMEDOCURSO),
|
||
"{{prerequesitos}}": str(prerequesitos),
|
||
"{{prerequesitos2}}": str(100-prerequesitos),
|
||
"{{Conteudo}}": str(Conteudo),
|
||
"{{Conteudo2}}": str(100-Conteudo),
|
||
"{{objgeral}}": str(objgeral),
|
||
"{{objgeral2}}": str(100-objgeral),
|
||
"{{objfinal}}": str(objfinal),
|
||
"{{objfinal2}}": str(100-objfinal),
|
||
"{{objadq}}": str(objadq),
|
||
"{{objadq2}}": str(100-objadq),
|
||
"{{avadq}}": str(avadq),
|
||
"{{avadq2}}": str(100-avadq),
|
||
"{{refere}}": str(refere),
|
||
"{{refere2}}": str(100-refere),
|
||
"{{ninq4}}": str(ninq4),
|
||
"{{ninq3}}": str(ninq3),
|
||
"{{nforma}}" : str(nforma),
|
||
"{{mediaformquanl}}" : str(mediaformquanl),
|
||
"{{mediaform}}" : str(mediaform),
|
||
"{{mediaaloj}}": str(medalojamento2),
|
||
"{{mediaaloj1}}": str(medalojamento),
|
||
"{{mediaalime}}": str(medalimentacao2),
|
||
"{{mediaalime1}}": str(medalimentacao),
|
||
"{{medalimentacao}}": str(medalimentacaofinal),
|
||
"{{apdir}}": str(medapdir),
|
||
"{{graudif}}": str(meddificuldade2),
|
||
"{{graudif1}}": str(meddificuldade),
|
||
"{{meddificuldadefinal}}": str(meddificuldadefinal),
|
||
"{{funcfut}}": str(medfuncfut2),
|
||
"{{funcfut1}}": str(medfuncfut),
|
||
"{{medfuncfutfinal}}": str(medfuncfutfinal),
|
||
"{{apadm}}": str(medaplog),
|
||
"{{motapro}}": str(medmotvpart2),
|
||
"{{motapro1}}": str(medmotvpart),
|
||
"{{medmotvpartfinal}}": str(medmotvpartfinal),
|
||
"{{conhcurso}}": str(medconhecimento2),
|
||
"{{conhcurso1}}": str(medconhecimento),
|
||
"{{medconhecimentofinal}}": str(medconhecimentofinal),
|
||
"{{objcruso}}": str(objcruso),
|
||
"{{contcurso}}": str(contcurso),
|
||
"{{medalojamento}}": str(medalojamentofinal),
|
||
"{{adeqtrab}}": str(adeqtrab),
|
||
"{{instform}}": str(instform),
|
||
"{{audiovisuais}}": str(audiovisuais),
|
||
"{{biblio}}": str(biblio),
|
||
"{{ninq}}": str(ninq),
|
||
"{{ninqfim}}": str(ninq2),
|
||
"{{medpub}}": str(medpub),
|
||
"{{medmeiosaux}}": str(medmeiosaux),
|
||
"{{medapform}}": str(medapform),
|
||
"{{medapdc}}": str(medapdc),
|
||
"{{medobjesp}}": str(medobjesp),
|
||
"{{medmetensi}}": str(medmetensi),
|
||
"{{medtempform}}": str(medtempform),
|
||
"{{medlocaisform}}": str(medlocaisform),
|
||
"{{medlançaaval}}": str(medlançaaval),
|
||
"{{medtipoaval}}": str(medtipoaval),
|
||
"{{medtempoaval}}": str(medtempoaval),
|
||
"{{medobjapre}}": str(medobjapre),
|
||
"{{medadqonjesp}}": str(medadqonjesp),
|
||
"{{medinterforma}}": str(medinterforma),
|
||
"{{medprogcurso}}": str(medprogcurso),
|
||
"{{medcontcurso}}": str(medcontcurso),
|
||
"{{medestrcurso}}": str(medestrcurso),
|
||
"{{medutilprat}}": str(medutilprat),
|
||
"{{medcargahoraria}}": str(medcargahoraria),
|
||
"{{medinstal}}": str(medinstal),
|
||
"{{medaudiovis}}": str(medaudiovis),
|
||
"{{meddocdispor}}": str(meddocdispor),
|
||
"{{medapadmin}}": str(medapadmin),
|
||
"{{medapcoord}}": str(medapcoord),
|
||
"{{medmotform}}": str(medmotform),
|
||
"{{medrelpart}}": str(medrelpart),
|
||
"{{medpontass}}": str(medpontass),
|
||
"{{med1}}": str(med1),
|
||
"{{med2}}": str(med2),
|
||
"{{med3}}": str(med3),
|
||
"{{medpontassqual}}": str(avaliacao_qualitativa(medpontass)),
|
||
"{{medrelpartqual}}": str(avaliacao_qualitativa(medrelpart)),
|
||
"{{medmotformqual}}": str(avaliacao_qualitativa(medmotform)),
|
||
"{{medapcoordqual}}": str(avaliacao_qualitativa(medapcoord)),
|
||
"{{medapadminqual}}": str(avaliacao_qualitativa(medapadmin)),
|
||
"{{meddocdisporqual}}": str(avaliacao_qualitativa(meddocdispor)),
|
||
"{{medaudiovisqual}}": str(avaliacao_qualitativa(medaudiovis)),
|
||
"{{medinstalqual}}": str(avaliacao_qualitativa(medinstal)),
|
||
"{{medcargahorariaqual}}": str(avaliacao_qualitativa(medcargahoraria)),
|
||
"{{medutilpratoqual}}": str(avaliacao_qualitativa(medutilprat)),
|
||
"{{medestrcursooqual}}": str(avaliacao_qualitativa(medestrcurso)),
|
||
"{{medprogcursoqual}}": str(avaliacao_qualitativa(medcontcurso)),
|
||
"{{medinterformaqual}}": str(avaliacao_qualitativa(medinterforma)),
|
||
"{{medadqonjespqual}}": str(avaliacao_qualitativa(medadqonjesp)),
|
||
"{{medobjaprequal}}": str(avaliacao_qualitativa(medobjapre)),
|
||
"{{medtempoavalqual}}": str(avaliacao_qualitativa(medtempoaval)),
|
||
"{{medtipoavalqual}}": str(avaliacao_qualitativa(medtipoaval)),
|
||
"{{medlançaavalual}}": str(avaliacao_qualitativa(medlançaaval)),
|
||
"{{medlocaisformqual}}": str(avaliacao_qualitativa(medlocaisform)),
|
||
"{{medtempformqual}}": str(avaliacao_qualitativa(medtempform)),
|
||
"{{medmetensiqual}}": str(avaliacao_qualitativa(medmetensi)),
|
||
"{{medobjespqual}}": str(avaliacao_qualitativa(medobjesp)),
|
||
"{{medapdcqual}}": str(avaliacao_qualitativa(medapdc)),
|
||
"{{medapformqual}}": str(avaliacao_qualitativa(medapform)),
|
||
"{{medmeiosauxqual}}": str(avaliacao_qualitativa(medmeiosaux)),
|
||
"{{medpubqual}}": str(avaliacao_qualitativa(medpub)),
|
||
"{{mediaalojqual}}": str(avaliacao_qualitativa(medalojamento2)),
|
||
"{{apdirqual}}": str(avaliacao_qualitativa(medapdir)),
|
||
"{{funcfutqual}}": str(avaliacao_qualitativa(medfuncfut2)),
|
||
"{{graudifaqual}}": str(avaliacao_qualitativa(meddificuldade2)),
|
||
"{{apadmqual}}": str(avaliacao_qualitativa(medaplog)),
|
||
"{{motaproqual}}": str(avaliacao_qualitativa(medmotvpart2)),
|
||
"{{conhcursoqual}}": str(avaliacao_qualitativa(medconhecimento2)),
|
||
"{{objcrusoqual}}": str(avaliacao_qualitativa(objcruso)),
|
||
"{{contcursoqual}}": str(avaliacao_qualitativa(contcurso)),
|
||
"{{adeqtrabqual}}": str(avaliacao_qualitativa(adeqtrab)),
|
||
"{{instformqual}}": str(avaliacao_qualitativa(instform)),
|
||
"{{audiovisuaisqual}}": str(avaliacao_qualitativa(audiovisuais)),
|
||
"{{biblioqual}}": str(avaliacao_qualitativa(biblio)),
|
||
"{{mediaalimequal}}": str(avaliacao_qualitativa(medalimentacao2)),
|
||
"{{NOMEDOCURSO}}": str(NOMEDOCURSOcurto),
|
||
"{{MESi}}": str(MESi),
|
||
"{{AAAAi}}": str(AAAAi),
|
||
"{{DDf}}": str(DDf),
|
||
"{{MESf}}": str(MESf),
|
||
"{{AAAAf}}": str(AAAAf),
|
||
"{{MEDIAFINALCURSO}}": str(MEDIAFINALCURSO),
|
||
"{{NFORMANDOS}}": str(NFORMANDOS),
|
||
"{{FINALIDADECURSO}}": str(FINALIDADECURSO),
|
||
"{{DDi}}": str(DDi)
|
||
}
|
||
template_path = _resource_path("Anexo RAI..docx")
|
||
output_path = os.path.join(os.getcwd(), "relatorio_final.docx")
|
||
|
||
doc = Document(template_path)
|
||
if not file_path3:
|
||
print("Nenhum ficheiro selecionado. A remover secção do documento.")
|
||
delete_all_between_anchors_xml(doc, "{{ANCORA1}}", "{{ANCORA2}}", debug=True)
|
||
df3 = None
|
||
if file_path3:
|
||
replace_placeholder_with_propostas(
|
||
doc,
|
||
df3,
|
||
placeholder="{{Propostas}}",
|
||
left_indent_cm=2.75,
|
||
hanging_cm=0.6,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
deduplicate=True,
|
||
max_items=None
|
||
)
|
||
|
||
if not file_path4:
|
||
print("Nenhum ficheiro selecionado. A remover secção do documento.")
|
||
delete_all_between_anchors_xml(doc, "{{ANCORA3}}", "{{ANCORA4}}", debug=True)
|
||
df4 = None
|
||
if file_path4:
|
||
replace_placeholder_with_positivosdir(
|
||
doc,
|
||
df4,
|
||
placeholder="{{positivosdir}}",
|
||
left_indent_cm=2.75,
|
||
hanging_cm=0.6,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
deduplicate=True
|
||
)
|
||
replace_placeholder_with_incluirdir(
|
||
doc,
|
||
df4,
|
||
placeholder="{{incluirdir}}",
|
||
left_indent_cm=2.75,
|
||
hanging_cm=0.6,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
deduplicate=True
|
||
)
|
||
replace_placeholder_with_desenvolverdir(
|
||
doc,
|
||
df4,
|
||
placeholder="{{desenvolverdir}}",
|
||
left_indent_cm=2.75,
|
||
hanging_cm=0.6,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
deduplicate=True
|
||
)
|
||
replace_placeholder_with_temasdir(
|
||
doc,
|
||
df4,
|
||
placeholder="{{temasdir}}",
|
||
left_indent_cm=2.75,
|
||
hanging_cm=0.6,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
deduplicate=True
|
||
)
|
||
|
||
replace_placeholders_docx_bold_values_keep_style(doc, replacements)
|
||
replace_placeholder_with_q06_subitems(
|
||
doc, df2,
|
||
placeholder="{{Q06_Apreciacao}}",
|
||
indent_cm=2.75,
|
||
indent_title=True,
|
||
font_name="Arial",
|
||
font_size_pt=12
|
||
)
|
||
|
||
replace_placeholder_with_formadores_table(
|
||
doc,
|
||
df2,
|
||
placeholder="{{tabelaFormadores}}",
|
||
font_name="Arial",
|
||
font_size_pt=12
|
||
)
|
||
|
||
replace_placeholder_with_uc_table(
|
||
doc,
|
||
df_inicial=df,
|
||
df_final=df2,
|
||
placeholder="{{tabelasUC}}",
|
||
font_name="Arial",
|
||
font_size_pt=12
|
||
)
|
||
|
||
replace_placeholder_with_temas_smart(
|
||
doc,
|
||
df2,
|
||
placeholder="{{TEMAS}}",
|
||
indent_cm=0.5,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
deduplicate=True,
|
||
max_items=6
|
||
)
|
||
|
||
replace_placeholder_with_desenvolver_smart(
|
||
doc,
|
||
df2,
|
||
placeholder="{{DESENVOLVER}}",
|
||
indent_cm=0.5,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
max_items=6
|
||
)
|
||
|
||
replace_placeholder_with_incluir_smart(
|
||
doc,
|
||
df2,
|
||
placeholder="{{INCLUIR}}",
|
||
indent_cm=0.5,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
max_items=6
|
||
)
|
||
|
||
replace_placeholder_with_observacoes_smart2(
|
||
doc,
|
||
df2,
|
||
placeholder="{{OBSERVACOES2}}",
|
||
indent_cm=2.75,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
max_items=6
|
||
)
|
||
|
||
replace_placeholder_with_observacoes_smart(
|
||
doc,
|
||
df2,
|
||
placeholder="{{OBSERVACOES}}",
|
||
indent_cm=0.5,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
use_ollama=True,
|
||
ollama_model="llama3.1:8b" # ajusta ao que existir nas máquinas
|
||
)
|
||
|
||
|
||
n = replace_placeholder_with_observacoes_smart(
|
||
doc,
|
||
df2,
|
||
placeholder="{{OBSERVACOES}}",
|
||
indent_cm=0.5,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
use_ollama=True,
|
||
ollama_model="llama3.1:8b" # ajusta ao que existir nas máquinas
|
||
)
|
||
print("OBSERVACOES substituídos:", n)
|
||
|
||
delete_lines_with_ancora(doc, pattern=r"ANCORA")
|
||
doc.save(output_path)
|
||
|
||
print(f"Saved: {output_path}")
|
||
|
||
# %%
|
||
def iter_body_blocks(doc):
|
||
body = doc._element.body
|
||
for child in body.iterchildren():
|
||
tag = child.tag.rsplit("}", 1)[-1]
|
||
if tag == "p":
|
||
yield ("p", Paragraph(child, doc))
|
||
elif tag == "tbl":
|
||
yield ("tbl", Table(child, doc))
|
||
|
||
# %%
|
||
def table_to_text(tbl) -> str:
|
||
lines = []
|
||
for row in tbl.rows:
|
||
row_txt = []
|
||
for cell in row.cells:
|
||
txt = " ".join(p.text.strip() for p in cell.paragraphs if p.text.strip())
|
||
txt = re.sub(r"\s+", " ", txt).strip()
|
||
row_txt.append(txt)
|
||
if any(row_txt):
|
||
lines.append(" | ".join(row_txt))
|
||
return "\n".join(lines).strip()
|
||
def extract_text_between_markers(doc, start_re: str, end_re: str | None = None, debug=False) -> str:
|
||
start_rx = re.compile(start_re, flags=re.IGNORECASE)
|
||
end_rx = re.compile(end_re, flags=re.IGNORECASE) if end_re else None
|
||
collecting = False
|
||
chunks = []
|
||
for kind, obj in iter_body_blocks(doc):
|
||
if kind == "p":
|
||
txt = (obj.text or "").strip()
|
||
if not collecting and start_rx.search(txt):
|
||
collecting = True
|
||
if debug:
|
||
print("[DEBUG] START matched:", txt)
|
||
continue
|
||
if collecting and end_rx and end_rx.search(txt):
|
||
if debug:
|
||
print("[DEBUG] END matched:", txt)
|
||
break
|
||
if collecting and txt:
|
||
chunks.append(txt)
|
||
elif kind == "tbl":
|
||
if collecting:
|
||
ttxt = table_to_text(obj)
|
||
if ttxt:
|
||
chunks.append(ttxt)
|
||
out = "\n".join(chunks).strip()
|
||
out = re.sub(r"\n{3,}", "\n\n", out)
|
||
return out
|
||
|
||
# %%
|
||
def ollama_available(timeout=0.4) -> bool:
|
||
try:
|
||
r = requests.get("http://localhost:11434/api/tags", timeout=timeout)
|
||
return r.status_code == 200
|
||
except Exception:
|
||
return False
|
||
|
||
def ollama_summarize_text(
|
||
text: str,
|
||
model: str = "llama3.1:8b",
|
||
max_chars: int = 24000,
|
||
timeout: float = 120.0,
|
||
system_prompt: str = "",
|
||
user_prompt: str = "",
|
||
) -> str:
|
||
if not text.strip():
|
||
return ""
|
||
if len(text) > max_chars:
|
||
text = text[:max_chars] + "\n\n[Texto truncado por limite de tamanho.]"
|
||
prompt = f"""
|
||
{system_prompt}
|
||
{user_prompt}
|
||
TEXTO:
|
||
{text}
|
||
|
||
DEVOLVE APENAS O RESULTADO FINAL, SEM EXPLICAÇÕES.
|
||
""".strip()
|
||
|
||
payload = {
|
||
"model": model,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {"temperature": 0.2}
|
||
}
|
||
r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout)
|
||
r.raise_for_status()
|
||
return (r.json().get("response", "") or "").strip()
|
||
|
||
# %%
|
||
def replace_placeholder_with_text_paragraph_all(
|
||
doc,
|
||
placeholder: str,
|
||
text: str,
|
||
indent_cm: float = 0.0,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
) -> int:
|
||
replaced = 0
|
||
def _apply_fmt(p: Paragraph):
|
||
fmt = p.paragraph_format
|
||
fmt.left_indent = Cm(indent_cm)
|
||
fmt.line_spacing = 1.5
|
||
fmt.space_before = Pt(0)
|
||
fmt.space_after = Pt(0)
|
||
def _process_paragraph(p: Paragraph):
|
||
nonlocal replaced
|
||
if placeholder not in (p.text or ""):
|
||
return
|
||
if not p.runs:
|
||
r = p.add_run("")
|
||
for run in p.runs:
|
||
run.text = ""
|
||
r0 = p.runs[0]
|
||
r0.text = text
|
||
force_run_font(r0, font_name, font_size_pt)
|
||
_apply_fmt(p)
|
||
replaced += 1
|
||
for p in list(doc.paragraphs):
|
||
_process_paragraph(p)
|
||
for tbl in doc.tables:
|
||
for row in tbl.rows:
|
||
for cell in row.cells:
|
||
for p in list(cell.paragraphs):
|
||
_process_paragraph(p)
|
||
return replaced
|
||
|
||
# %%
|
||
def fill_llm_placeholders_llm9_llm10(
|
||
doc,
|
||
model: str = "llama3.1:8b",
|
||
placeholder_llm10: str = "{{LLM10}}",
|
||
indent_cm: float = 0.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
debug_extract: bool = False,
|
||
) -> dict:
|
||
if not ollama_available():
|
||
return {"ok": False, "reason": "ollama_not_available", "llm9": 0, "llm10": 0}
|
||
ap5 = extract_text_between_markers(
|
||
doc,
|
||
start_re=r"^\s*Apêndice\s*5\b",
|
||
end_re=r"^\s*Apêndice\s*6\b",
|
||
debug=debug_extract
|
||
)
|
||
ap6 = extract_text_between_markers(
|
||
doc,
|
||
start_re=r"^\s*Apêndice\s*6\b",
|
||
end_re=r"^\s*1\s*[–-]\s",
|
||
debug=debug_extract
|
||
)
|
||
texto_llm10 = "\n\n".join([t for t in [ap5, ap6] if t.strip()]).strip()
|
||
sys_pt = "És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação."
|
||
prompt_llm10 = (
|
||
"Resume os conteúdos do Apêndice 5 e do Apêndice 6 num texto único, formal e conciso "
|
||
"(1 a 2 parágrafos). Realça pontos-chave e recomendações."
|
||
)
|
||
resumo10 = ""
|
||
if texto_llm10.strip():
|
||
resumo10 = ollama_summarize_text(texto_llm10, model=model, system_prompt=sys_pt, user_prompt=prompt_llm10)
|
||
n10 = replace_placeholder_with_text_paragraph_all(
|
||
doc, placeholder_llm10, resumo10.strip() or "Sem dados.",
|
||
indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt
|
||
)
|
||
return {
|
||
"ok": True,
|
||
"llm10": n10,
|
||
"chars_in_llm10": len(texto_llm10),
|
||
}
|
||
|
||
# %%
|
||
def fill_llm_placeholder_from_doc_range(
|
||
doc,
|
||
placeholder: str,
|
||
start_marker: str,
|
||
end_marker: str,
|
||
model: str = "llama3.1:8b",
|
||
indent_cm: float = 0.5,
|
||
font_name: str = "Arial",
|
||
font_size_pt: int = 12,
|
||
debug_extract: bool = False,
|
||
) -> dict:
|
||
|
||
if not ollama_available():
|
||
return {"ok": False, "reason": "ollama_not_available", "replaced": 0, "chars": 0}
|
||
start_re = r"^\s*" + re.escape(start_marker.strip()) + r"\s*$"
|
||
end_re = r"^\s*" + re.escape(end_marker.strip()) + r"\s*$"
|
||
texto = extract_text_between_markers(
|
||
doc,
|
||
start_re=start_re,
|
||
end_re=end_re,
|
||
debug=debug_extract
|
||
).strip()
|
||
if not texto:
|
||
start_re2 = re.escape(start_marker.strip())
|
||
end_re2 = re.escape(end_marker.strip())
|
||
texto = extract_text_between_markers(
|
||
doc,
|
||
start_re=start_re2,
|
||
end_re=end_re2,
|
||
debug=debug_extract
|
||
).strip()
|
||
if not texto:
|
||
n = replace_placeholder_with_text_paragraph_all(
|
||
doc, placeholder, "Sem dados.",
|
||
indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt
|
||
)
|
||
return {"ok": True, "replaced": n, "chars": 0, "note": "range_not_found"}
|
||
sys_pt = "És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação."
|
||
user_prompt = (
|
||
"Lê o texto e produz um resumo final, em estilo de conclusões, adequado a relatório oficial:\n"
|
||
"• 1 parágrafo de enquadramento (2–4 frases)\n"
|
||
"• 5–8 bullets com conclusões/recomendações principais\n"
|
||
"• Não inventes dados nem percentagens."
|
||
)
|
||
resumo = ollama_summarize_text(
|
||
texto,
|
||
model=model,
|
||
system_prompt=sys_pt,
|
||
user_prompt=user_prompt
|
||
).strip() or "Sem dados."
|
||
n = replace_placeholder_with_text_paragraph_all(
|
||
doc, placeholder, resumo,
|
||
indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt
|
||
)
|
||
return {"ok": True, "replaced": n, "chars": len(texto)}
|
||
|
||
# %%
|
||
doc = Document(output_path)
|
||
fill_llm_placeholder_from_doc_range(
|
||
doc,
|
||
placeholder="{{LLM9}}",
|
||
start_marker="RELATÓRIO DE AVALIAÇÃO INTERNA",
|
||
end_marker="O CHEFE DA DIREÇÃO DE AVALIAÇÃO E QUALIDADE",
|
||
model="llama3.1:8b",
|
||
indent_cm=0.5,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
debug_extract=True
|
||
)
|
||
fill_llm_placeholders_llm9_llm10(
|
||
doc,
|
||
model="llama3.1:8b",
|
||
placeholder_llm10="{{LLM10}}",
|
||
indent_cm=0.5,
|
||
font_name="Arial",
|
||
font_size_pt=12,
|
||
debug_extract=True
|
||
)
|
||
doc.save(output_path)
|