# %% #ollama pull llama3.1:8b import pandas as pd import os import sys from tkinter import Tk from tkinter.filedialog import askopenfilename from docx import Document from docx.document import Document as DocxDocument from docx.text.paragraph import Paragraph import re import string from docx.oxml import OxmlElement from docx.shared import Cm, Pt from docx.oxml.ns import qn from docx.text.run import Run from docx.table import Table from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_ROW_HEIGHT_RULE, WD_ALIGN_VERTICAL import requests import tkinter as tk from tkinter import messagebox # %% def _resource_path(relative_path: str) -> str: base_dir = getattr(sys, "_MEIPASS", os.path.abspath(os.path.dirname(__file__))) return os.path.join(base_dir, relative_path) # %% def ollama_available(timeout=0.4) -> bool: try: r = requests.get("http://localhost:11434/api/tags", timeout=timeout) #aqui depois colocar o bot return r.status_code == 200 except Exception: return False # %% def avaliacao_qualitativa(valor: float) -> str: if valor is None or pd.isna(valor): return "N/A" if 1.0 <= valor <= 3.0: return "Rever Urgentemente" elif 3.0 < valor <= 3.5: return "Rever e Melhorar" elif 3.5 < valor <= 3.9: return "Bom" elif 3.9 < valor <= 4.5: return "Qualidade" elif 4.5 < valor <= 5.0: return "Excelência" else: return "Out of Range" # %% def format_header_row(row, height_cm=5.2): row.height = Cm(height_cm) row.height_rule = WD_ROW_HEIGHT_RULE.EXACTLY for cell in row.cells: cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER for p in cell.paragraphs: p.alignment = WD_ALIGN_PARAGRAPH.CENTER # %% def set_cell_text_vertical(cell, direction="btLr"): tcPr = cell._tc.get_or_add_tcPr() td = OxmlElement("w:textDirection") td.set(qn("w:val"), direction) tcPr.append(td) def set_table_all_columns_width(tbl, width_cm=2.3): tbl.autofit = False w = Cm(width_cm) for row in tbl.rows: for cell in row.cells: cell.width = w # %% def force_run_font(run: Run, font_name="Arial", font_size_pt=12, bold=None): if bold is not None: run.bold = bool(bold) run.font.name = font_name run.font.size = Pt(font_size_pt) run._element.rPr.rFonts.set(qn("w:eastAsia"), font_name) # %% def format_pt_number(x: float) -> str: if pd.isna(x): return "" return f"{x:.2f}".replace(".", ",") # %% def delete_paragraph(paragraph: Paragraph) -> None: p = paragraph._p p.getparent().remove(p) paragraph._p = paragraph._element = None # %% def clean_module_title(col_name: str) -> str: s = str(col_name).strip() if "->" in s: s = s.split("->", 1)[1].strip() return s # %% #def clean_module_title(col_name: str) -> str: # s = str(col_name).strip() # s = re.sub(r"^.*?->\s*", "", s) # s = re.sub(r"^\s*Q06\s*[-–_ ]\s*Aprecia.*?[-–:]\s*", "", s, flags=re.IGNORECASE) # return s.strip() # %% def apply_table_paragraph_spacing(cell, line_spacing=1.5): for p in cell.paragraphs: fmt = p.paragraph_format fmt.line_spacing = line_spacing fmt.space_before = Pt(0) fmt.space_after = Pt(0) # %% def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]: means = [] for j in range(start_col_idx, df.shape[1]): col = pd.to_numeric(df.iloc[:, j], errors="coerce") if col.notna().sum() == 0: break means.append(col.mean()) return means # %% def insert_table_after_paragraph(paragraph: Paragraph, rows: int, cols: int) -> Table: doc = paragraph.part.document tbl = doc.add_table(rows=rows, cols=cols) paragraph._p.addnext(tbl._tbl) return tbl # %% def replace_placeholder_with_uc_table( doc, df_inicial: pd.DataFrame, df_final: pd.DataFrame, placeholder: str = "{{tabelasUC}}", font_name: str = "Arial", font_size_pt: int = 12, start_col_idx_inicial: int = 16, ): def extract_uc_cols(df: pd.DataFrame): cols = [] for c in df.columns: name = str(c).lower() if "q06" in name and "aprecia" in name: cols.append(c) cols.sort(key=lambda x: str(x)) return cols uc_cols = extract_uc_cols(df_final) ini_means = mean_columns_until_empty(df_inicial, start_col_idx=start_col_idx_inicial) def build_rows(): data = [] for i, c in enumerate(uc_cols): ini = ini_means[i] if i < len(ini_means) else float("nan") fin = pd.to_numeric(df_final[c], errors="coerce").mean() if c in df_final.columns else float("nan") diff = fin - ini if (not pd.isna(fin) and not pd.isna(ini)) else float("nan") data.append((clean_module_title(c), ini, fin, diff)) return data rows_data = build_rows() def _fill_cell(cell, text: str, bold=False, align=None): cell.text = "" p = cell.paragraphs[0] if align is not None: p.alignment = align r = p.add_run(text) force_run_font(r, font_name, font_size_pt, bold=bold) apply_table_paragraph_spacing(cell, line_spacing=1.5) def _apply_table_layout(tbl): tbl.style = "Table Grid" tbl.autofit = False col_widths = [Cm(11), Cm(1.6), Cm(1.6), Cm(3.5)] for row in tbl.rows: for i, w in enumerate(col_widths): row.cells[i].width = w headers = ["Apreciação dos módulos", "Inicial", "Final", "Ganhos/Perdas"] for j, h in enumerate(headers): _fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER) def _populate_rows(tbl): if not rows_data: _fill_cell(tbl.rows[1].cells[0], "Sem dados", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT) _fill_cell(tbl.rows[1].cells[1], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[1].cells[2], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[1].cells[3], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) return for i, (title, ini, fin, diff) in enumerate(rows_data, start=1): _fill_cell(tbl.rows[i].cells[0], title, bold=False, align=WD_ALIGN_PARAGRAPH.LEFT) _fill_cell(tbl.rows[i].cells[1], format_pt_number(ini), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[i].cells[2], format_pt_number(fin), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) diff_txt = "" if pd.isna(diff) else f"{diff:+.2f}".replace(".", ",") _fill_cell(tbl.rows[i].cells[3], diff_txt, bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) def _make_table_at_paragraph(p: Paragraph): n_rows = 1 + max(1, len(rows_data)) tbl = insert_table_after_paragraph(p, rows=n_rows, cols=4) _apply_table_layout(tbl) _populate_rows(tbl) delete_paragraph(p) def _process_paragraph(p: Paragraph) -> bool: if placeholder not in p.text: return False _make_table_at_paragraph(p) return True for p in doc.paragraphs: if _process_paragraph(p): return for table in doc.tables: for row in table.rows: for cell in row.cells: for p in cell.paragraphs: if placeholder in p.text: cell.text = "" n_rows = 1 + max(1, len(rows_data)) tbl = cell.add_table(rows=n_rows, cols=4) _apply_table_layout(tbl) _populate_rows(tbl) return # %% def insert_paragraph_after(paragraph: Paragraph) -> Paragraph: new_p = OxmlElement("w:p") paragraph._p.addnext(new_p) return Paragraph(new_p, paragraph._parent) # %% def replace_placeholder_with_q06_subitems( doc, df: pd.DataFrame, placeholder: str = "{{Q06_Apreciacao}}", item_number: int = 5, indent_cm: float = 2.75, indent_title: bool = True, font_name: str = "Arial", font_size_pt: int = 12, ): cols = [] for c in df.columns: name = str(c).lower() if "q06" in name and "aprecia" in name: cols.append(c) cols.sort(key=lambda x: str(x)) letters = string.ascii_lowercase def _apply_par_format(par: Paragraph, left_indent_cm: float): fmt = par.paragraph_format fmt.left_indent = Cm(left_indent_cm) fmt.line_spacing = 1.5 fmt.space_before = Pt(0) fmt.space_after = Pt(0) def _add_line(par: Paragraph, text: str): run = par.add_run(text) force_run_font(run, font_name, font_size_pt) return run def _apply_par_format(par: Paragraph, left_indent_cm: float): fmt = par.paragraph_format fmt.left_indent = Cm(left_indent_cm) fmt.line_spacing = 1.5 fmt.space_before = Pt(0) fmt.space_after = Pt(0) def _process_paragraph(p: Paragraph) -> bool: if placeholder not in p.text: return False current = p inserted_any = False if not cols: newp = insert_paragraph_after(current) _add_line(newp, "(a)\tSem dados;") _apply_par_format(newp, indent_cm) inserted_any = True else: for i, c in enumerate(cols): mean_val = pd.to_numeric(df[c], errors="coerce").mean() mean_str = f"{mean_val:.2f}".replace(".", ",") label = avaliacao_qualitativa(mean_val) module_title = clean_module_title(c) sub = letters[i] if i < 26 else f"a{i+1}" newp = insert_paragraph_after(current) r1 = newp.add_run(f"({sub})\t{module_title} (") force_run_font(r1, font_name, font_size_pt) r2 = newp.add_run(mean_str) r2.bold = True force_run_font(r2, font_name, font_size_pt) r3 = newp.add_run("), que corresponde a ") force_run_font(r3, font_name, font_size_pt) r4 = newp.add_run(label) r4.bold = True force_run_font(r4, font_name, font_size_pt) r5 = newp.add_run(";") force_run_font(r5, font_name, font_size_pt) _apply_par_format(newp, indent_cm) current = newp inserted_any = True delete_paragraph(p) return inserted_any for p in doc.paragraphs: if _process_paragraph(p): return for table in doc.tables: for row in table.rows: for cell in row.cells: for p in cell.paragraphs: if _process_paragraph(p): return # %% def _collect_course_info(): form = tk.Tk() form.title("Dados do curso") form.resizable(False, False) fields = [ ("Nomenclatura do curso", "NOMEDOCURSO"), ("Dia de inicio (DD)", "DDi"), ("Mes de inicio (Extenso)", "MESi"), ("Ano de inicio (AAAA)", "AAAAi"), ("Dia de fim (DD)", "DDf"), ("Mes de fim (Extenso)", "MESf"), ("Ano de fim (AAAA)", "AAAAf"), ("Numero de formandos", "NFORMANDOS"), ("Finalidade do curso", "FINALIDADECURSO"), ("Média final do curso", "MEDIAFINALCURSO"), ] entries = {} for i, (label, key) in enumerate(fields): tk.Label(form, text=label, anchor="w").grid(row=i, column=0, padx=8, pady=4, sticky="w") ent = tk.Entry(form, width=30) ent.grid(row=i, column=1, padx=8, pady=4) entries[key] = ent form_values = {} def _submit(): values = {k: e.get().strip() for k, e in entries.items()} missing = [label for (label, key) in fields if not values[key]] if missing: messagebox.showerror("Dados em falta", "Preencha: " + ", ".join(missing)) return form_values.update(values) form.destroy() tk.Button(form, text="Continuar", command=_submit).grid(row=len(fields), column=0, columnspan=2, pady=10) form.mainloop() if not form_values: raise RuntimeError("Formulario cancelado") return form_values course_info = _collect_course_info() def _to_int_or_str(s): return int(s) if s.isdigit() else s NOMEDOCURSOcurto = course_info["NOMEDOCURSO"] DDi = _to_int_or_str(course_info["DDi"]) MESi = _to_int_or_str(course_info["MESi"]) AAAAi = _to_int_or_str(course_info["AAAAi"]) DDf = _to_int_or_str(course_info["DDf"]) MESf = _to_int_or_str(course_info["MESf"]) AAAAf = _to_int_or_str(course_info["AAAAf"]) NFORMANDOS = _to_int_or_str(course_info["NFORMANDOS"]) FINALIDADECURSO = course_info["FINALIDADECURSO"] MEDIAFINALCURSO = course_info["MEDIAFINALCURSO"] # %% Tk().withdraw() file_path = askopenfilename( title="Select Excel das expetativas iniciais", filetypes=[("Excel files", "*.xlsx *.xls")]) if not file_path: raise FileNotFoundError("No file selected") print(f"Selected file:{file_path}") # %% df = pd.read_excel(file_path) # %% ninq = df.shape[0]-1 medalojamento = round(df.iloc[:,10].mean(),2) medalimentacao = round(df.iloc[:,11].mean(),2) meddificuldade = round(df.iloc[:,12].mean(),2) medfuncfut = round(df.iloc[:,13].mean(),2) medmotvpart = round(df.iloc[:,14].mean(),2) medconhecimento = round(df.iloc[:,15].mean(),2) # %% def mean_columns_until_empty(df: pd.DataFrame, start_col_idx: int = 16) -> list[float]: means = [] n_cols = df.shape[1] for j in range(start_col_idx, n_cols): col = pd.to_numeric(df.iloc[:, j], errors="coerce") if col.notna().sum() == 0: break means.append(col.mean()) return means # %% medias = mean_columns_until_empty(df, start_col_idx=16) print(len(medias), medias[:5]) # %% Tk().withdraw() file_path2 = askopenfilename( title="Select Excel das expetativas finais", filetypes=[("Excel files", "*.xlsx *.xls")]) if not file_path2: raise FileNotFoundError("No file selected") print(f"Selected file:\n{file_path2}") # %% df2 = pd.read_excel(file_path2) # %% ninq2 = df2.shape[0]-1 medaplog = round(df2.iloc[:,10].mean(),2) medalojamento2 = round(df2.iloc[:,11].mean(),2) medalimentacao2 = round(df2.iloc[:,12].mean(),2) medapdir = round(df2.iloc[:,13].mean(),2) meddificuldade2 = round(df2.iloc[:,14].mean(),2) medfuncfut2 = round(df2.iloc[:,15].mean(),2) medmotvpart2 = round(df2.iloc[:,16].mean(),2) medconhecimento2 = round(df2.iloc[:,17].mean(),2) medalojamentofinal = round(medalojamento2 - medalojamento,2) medalimentacaofinal = round(medalimentacao2 - medalimentacao,2) meddificuldadefinal = round(meddificuldade2 - meddificuldade,2) medfuncfutfinal = round(medfuncfut2 - medfuncfut,2) medmotvpartfinal = round(medmotvpart2 - medmotvpart,2) medconhecimentofinal = round(medconhecimento2 - medconhecimento,2) objcruso = round(df2.iloc[:,18].mean(),2) contcurso = round(df2.iloc[:,19].mean(),2) adeqtrab = round(df2.iloc[:,20].mean(),2) instform = round(df2.iloc[:,21].mean(),2) audiovisuais = round(df2.iloc[:,22].mean(),2) biblio = round(df2.iloc[:,23].mean(),2) NOMEDOCURSO = df2.iloc[1,4].split("-")[1].split("–")[0].strip() # %% def build_formadores_rows(df: pd.DataFrame): groups = {} for c in df.columns: name = str(c) low = name.lower() if "_formador" not in low: continue m = re.match(r"^\s*(Q\d+)\s*_Formador\s*->\s*(.+?)\s*(?:\(|$)", name, flags=re.IGNORECASE) if not m: continue qcode = m.group(1).upper() metric_raw = m.group(2).strip().lower() groups.setdefault(qcode, {}) groups[qcode][metric_raw] = name def qnum(q): mm = re.match(r"Q(\d+)", q) return int(mm.group(1)) if mm else 10**9 qcodes_sorted = sorted(groups.keys(), key=qnum) def metric_key(metric_raw: str) -> str | None: mr = metric_raw.lower() if "dom" in mr and "ass" in mr: return "dominio" if "métod" in mr or "metod" in mr: return "metodos" if "lingu" in mr: return "linguagem" if "empenh" in mr: return "empenho" if "relac" in mr or "formand" in mr: return "relacao" return None rows = [] letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" for idx, qcode in enumerate(qcodes_sorted): cols_map = groups[qcode] picked = {"dominio": None, "metodos": None, "linguagem": None, "empenho": None, "relacao": None} for raw, colname in cols_map.items(): k = metric_key(raw) if k and picked[k] is None: picked[k] = colname def col_mean(colname): if not colname: return float("nan") return pd.to_numeric(df[colname], errors="coerce").mean() dominio = col_mean(picked["dominio"]) metodos = col_mean(picked["metodos"]) linguagem = col_mean(picked["linguagem"]) empenho = col_mean(picked["empenho"]) relacao = col_mean(picked["relacao"]) vals = [dominio, metodos, linguagem, empenho, relacao] media_final = pd.Series(vals, dtype="float").mean(skipna=True) label = letters[idx] if idx < len(letters) else f"F{idx+1}" rows.append({ "label": label, "qcode": qcode, "dominio": dominio, "metodos": metodos, "linguagem": linguagem, "empenho": empenho, "relacao": relacao, "media_final": media_final, }) return rows # %% def replace_placeholder_with_formadores_table( doc, df2: pd.DataFrame, placeholder: str = "{{tabelaFormadores}}", font_name: str = "Arial", font_size_pt: int = 12, col_width_cm: float = 2.3, header_vertical: bool = True, rotate_first_header: bool = True, ): rows = build_formadores_rows(df2) def _fill_cell(cell, text: str, bold=False, align=None): cell.text = "" p = cell.paragraphs[0] if align is not None: p.alignment = align r = p.add_run(text) force_run_font(r, font_name, font_size_pt, bold=bold) apply_table_paragraph_spacing(cell, line_spacing=1.5) headers = ["Formadores", "Domínio do assunto","Métodos utilizados","Linguagem utilizada","Empenho","Relação c/ formandos","Média final",] global_mean = pd.Series([r["media_final"] for r in rows], dtype="float").mean(skipna=True) def _apply_layout(tbl): tbl.style = "Table Grid" tbl.autofit = False set_table_all_columns_width(tbl, width_cm=col_width_cm) if header_vertical: start_j = 0 if rotate_first_header else 1 for j in range(start_j, len(headers)): set_cell_text_vertical(tbl.rows[0].cells[j], direction="btLr") def _populate_table(tbl): for j, h in enumerate(headers): _fill_cell(tbl.rows[0].cells[j], h, bold=True, align=WD_ALIGN_PARAGRAPH.CENTER) if not rows: _fill_cell(tbl.rows[1].cells[0], "Sem dados", bold=False, align=WD_ALIGN_PARAGRAPH.LEFT) for j in range(1, len(headers)): _fill_cell(tbl.rows[1].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[2].cells[0], "Média", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT) for j in range(1, len(headers) - 1): _fill_cell(tbl.rows[2].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[2].cells[-1], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) return for i, r in enumerate(rows, start=1): _fill_cell(tbl.rows[i].cells[0], r["label"], bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[i].cells[1], format_pt_number(r["dominio"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[i].cells[2], format_pt_number(r["metodos"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[i].cells[3], format_pt_number(r["linguagem"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[i].cells[4], format_pt_number(r["empenho"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[i].cells[5], format_pt_number(r["relacao"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[i].cells[6], format_pt_number(r["media_final"]), bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) last = 1 + len(rows) _fill_cell(tbl.rows[last].cells[0], "Média", bold=True, align=WD_ALIGN_PARAGRAPH.LEFT) for j in range(1, len(headers) - 1): _fill_cell(tbl.rows[last].cells[j], "", bold=False, align=WD_ALIGN_PARAGRAPH.CENTER) _fill_cell(tbl.rows[last].cells[-1], format_pt_number(global_mean), bold=True, align=WD_ALIGN_PARAGRAPH.CENTER) def _make_table_at_paragraph(p: Paragraph): n_rows = 1 + max(1, len(rows)) + 1 tbl = insert_table_after_paragraph(p, rows=n_rows, cols=len(headers)) _apply_layout(tbl) format_header_row(tbl.rows[0], height_cm=5.2) _populate_table(tbl) delete_paragraph(p) for p in doc.paragraphs: if placeholder in p.text: _make_table_at_paragraph(p) return for table in doc.tables: for row in table.rows: for cell in row.cells: for p in cell.paragraphs: if placeholder in p.text: cell.text = "" n_rows = 1 + max(1, len(rows)) + 1 tbl = cell.add_table(rows=n_rows, cols=len(headers)) _apply_layout(tbl) format_header_row(tbl.rows[0], height_cm=5.2) _populate_table(tbl) return # %% def compute_formadores_summary(df: pd.DataFrame): rows = build_formadores_rows(df) nforma = len(rows) global_mean = pd.Series([r["media_final"] for r in rows], dtype="float").mean(skipna=True) mediaformquanl = "" if pd.isna(global_mean) else f"{global_mean:.2f}".replace(".", ",") mediaform = "" if pd.isna(global_mean) else avaliacao_qualitativa(global_mean) return nforma, mediaformquanl, mediaform # %% def _copy_run_format(src_run, dst_run, keep_bold=None): dst_run.bold = src_run.bold if keep_bold is None else keep_bold dst_run.italic = src_run.italic dst_run.underline = src_run.underline if src_run.font.name: dst_run.font.name = src_run.font.name dst_run._element.rPr.rFonts.set(qn("w:eastAsia"), src_run.font.name) if src_run.font.size: dst_run.font.size = src_run.font.size # %% def extract_temas_list(df: pd.DataFrame) -> list[str]: temas_cols = [c for c in df.columns if "_temas" in str(c).lower()] temas = [] for col in temas_cols: s = df[col].dropna().astype(str).str.strip() s = s[s != ""] temas.extend(s.tolist()) seen = set() out = [] for t in temas: k = t.lower() if k not in seen: seen.add(k) out.append(t) return out # %% def replace_placeholders_docx_bold_values_keep_style(doc, replacements: dict[str, str]): keys = sorted(replacements.keys(), key=len, reverse=True) def _replace_in_paragraph(paragraph): if not paragraph.runs: return full_text = "".join(run.text for run in paragraph.runs) if not any(k in full_text for k in keys): return base_run = paragraph.runs[0] for run in paragraph.runs: run.text = "" text = full_text while True: next_pos = None next_key = None for k in keys: pos = text.find(k) if pos != -1 and (next_pos is None or pos < next_pos): next_pos, next_key = pos, k if next_key is None: if text: r = paragraph.add_run(text) _copy_run_format(base_run, r, keep_bold=base_run.bold) break before = text[:next_pos] if before: r = paragraph.add_run(before) _copy_run_format(base_run, r, keep_bold=base_run.bold) val = str(replacements[next_key]) r_val = paragraph.add_run(val) _copy_run_format(base_run, r_val, keep_bold=True) text = text[next_pos + len(next_key):] for p in doc.paragraphs: _replace_in_paragraph(p) for table in doc.tables: for row in table.rows: for cell in row.cells: for p in cell.paragraphs: _replace_in_paragraph(p) # %% def replace_placeholder_with_column_subitems_hanging( doc, df: pd.DataFrame, placeholder: str, column_contains: str, indent_cm: float = 0.5, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, ): cols = [c for c in df.columns if column_contains.lower() in str(c).lower()] items = [] for col in cols: s = df[col].dropna().astype(str).str.strip() s = s[s != ""] items.extend(s.tolist()) if deduplicate: seen = set() out = [] for t in items: k = t.lower() if k not in seen: seen.add(k) out.append(t) items = out letters = string.ascii_lowercase def _apply_par_format(par: Paragraph): fmt = par.paragraph_format fmt.left_indent = Cm(indent_cm) fmt.first_line_indent = Cm(-indent_cm) fmt.line_spacing = 1.5 fmt.space_before = Pt(0) fmt.space_after = Pt(0) def _add_run(par: Paragraph, text: str): r = par.add_run(text) force_run_font(r, font_name, font_size_pt) return r def _process_paragraph(p: Paragraph) -> bool: if placeholder not in p.text: return False current = p if not items: newp = insert_paragraph_after(current) _add_run(newp, "a. Sem dados;") _apply_par_format(newp) delete_paragraph(p) return True for i, txt in enumerate(items): sub = letters[i] if i < 26 else f"a{i+1}" newp = insert_paragraph_after(current) _add_run(newp, f"{sub}. ") _add_run(newp, txt) _add_run(newp, ";") _apply_par_format(newp) current = newp delete_paragraph(p) return True for p in doc.paragraphs: if _process_paragraph(p): return for table in doc.tables: for row in table.rows: for cell in row.cells: for p in cell.paragraphs: if _process_paragraph(p): return # %% def replace_placeholder_with_column_subitems_hanging2( doc, df: pd.DataFrame, placeholder: str, column_contains: str, left_indent_cm: float = 2.75, hanging_cm: float = 0.6, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, ): cols = [c for c in df.columns if column_contains.lower() in str(c).lower()] items = [] for col in cols: s = df[col].dropna().astype(str).str.strip() s = s[s != ""] items.extend(s.tolist()) if deduplicate: seen = set() out = [] for t in items: k = t.lower() if k not in seen: seen.add(k) out.append(t) items = out letters = string.ascii_lowercase def _apply_par_format(par: Paragraph): fmt = par.paragraph_format fmt.left_indent = Cm(left_indent_cm) fmt.first_line_indent = Cm(-hanging_cm) fmt.line_spacing = 1.5 fmt.space_before = Pt(0) fmt.space_after = Pt(0) def _add_run(par: Paragraph, text: str): r = par.add_run(text) force_run_font(r, font_name, font_size_pt) return r def _process_paragraph(p: Paragraph) -> bool: if placeholder not in p.text: return False current = p if not items: newp = insert_paragraph_after(current) _add_run(newp, "a. Sem dados;") _apply_par_format(newp) delete_paragraph(p) return True for i, txt in enumerate(items): sub = letters[i] if i < 26 else f"a{i+1}" newp = insert_paragraph_after(current) _add_run(newp, f"{sub}. ") _add_run(newp, txt) _add_run(newp, ";") _apply_par_format(newp) current = newp delete_paragraph(p) return True for p in doc.paragraphs: if _process_paragraph(p): return for table in doc.tables: for row in table.rows: for cell in row.cells: for p in cell.paragraphs: if _process_paragraph(p): return # %% def replace_placeholder_with_temas_smart( doc, df: pd.DataFrame, placeholder: str = "{{TEMAS}}", indent_cm: float = 0.5, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, **_ignored, ): temas_raw = extract_temas_list(df) temas = [str(t).strip() for t in (temas_raw or []) if str(t).strip()] if deduplicate: seen = set() out = [] for t in temas: k = t.lower() if k not in seen: seen.add(k) out.append(t) temas = out if max_items is not None: temas = temas[:max_items] if not temas: df_tmp = pd.DataFrame({"_Temas": ["Sem dados"]}) return replace_placeholder_with_column_subitems_hanging( doc, df_tmp, placeholder=placeholder, column_contains="_temas", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) df_tmp = pd.DataFrame({"_Temas": temas}) return replace_placeholder_with_column_subitems_hanging( doc, df_tmp, placeholder=placeholder, column_contains="_temas", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) # %% def extract_desenvolver_list(df: pd.DataFrame) -> list[str]: cols = [c for c in df.columns if "_desenvolver" in str(c).lower()] items = [] for col in cols: s = df[col].dropna().astype(str).str.strip() s = s[s != ""] items.extend(s.tolist()) seen = set() out = [] for t in items: k = t.lower() if k not in seen: seen.add(k) out.append(t) return out # %% def ollama_summarize_desenvolver( items: list[str], max_items: int = 6, model: str = "llama3.1:8b", timeout: float = 30.0 ) -> list[str]: items_txt = "\n".join(f"- {t}" for t in items) prompt = f""" És um analista a escrever um relatório oficial de avaliação. Tens uma lista de aspetos a desenvolver/melhorar apontados pelos formandos. Faz o seguinte: 1) Agrupa itens repetidos/semelhantes; 2) Seleciona os mais importantes e recorrentes; 3) Reescreve numa lista curta, clara e formal (Português de Portugal); 4) NÃO inventes novos pontos; 5) No máximo {max_items} itens; 6) Frases curtas, em formato de sintagma nominal (ex.: "Melhoria da componente prática", "Aprofundamento de ..."). Itens: {items_txt} Devolve APENAS a lista final no formato: - Item 1 - Item 2 - Item 3 """.strip() payload = { "model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.2} } r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout) r.raise_for_status() text = r.json().get("response", "") lines = [] for line in text.splitlines(): line = line.strip() if line.startswith(("-", "•")): item = line.lstrip("-•").strip(" .;") if item: lines.append(item) return lines[:max_items] # %% def replace_placeholder_with_desenvolver_smart( doc, df: pd.DataFrame, placeholder: str = "{{DESENVOLVER}}", indent_cm: float = 1.5, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, use_ollama: bool = True, ollama_model: str = "llama3.1:8b", max_items: int = 6, ): raw = extract_desenvolver_list(df) if not raw: return replace_placeholder_with_column_subitems_hanging( doc, df, placeholder=placeholder, column_contains="_desenvolver", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=deduplicate ) final_items = None if use_ollama and ollama_available(): try: llm_items = ollama_summarize_desenvolver( raw, max_items=max_items, model=ollama_model ) if llm_items: final_items = llm_items except Exception: final_items = None if final_items is None: return replace_placeholder_with_column_subitems_hanging( doc, df, placeholder=placeholder, column_contains="_desenvolver", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=deduplicate ) df_tmp = pd.DataFrame({"_desenvolver": final_items}) return replace_placeholder_with_column_subitems_hanging( doc, df_tmp, placeholder=placeholder, column_contains="_desenvolver", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) # %% def extract_incluir_list(df: pd.DataFrame) -> list[str]: cols = [c for c in df.columns if "_incluir" in str(c).lower()] items = [] for col in cols: s = df[col].dropna().astype(str).str.strip() s = s[s != ""] items.extend(s.tolist()) seen = set() out = [] for t in items: k = t.lower() if k not in seen: seen.add(k) out.append(t) return out # %% def ollama_summarize_incluir( items: list[str], max_items: int = 6, model: str = "llama3.1:8b", timeout: float = 30.0 ) -> list[str]: items_txt = "\n".join(f"- {t}" for t in items) prompt = f""" És um analista a escrever um relatório oficial de avaliação. Tens uma lista de conteúdos/temas que os formandos sugerem que sejam incluídos. Faz o seguinte: 1) Agrupa itens repetidos/semelhantes; 2) Seleciona os mais importantes e recorrentes; 3) Reescreve numa lista curta, clara e formal (Português de Portugal); 4) NÃO inventes novos pontos; 5) No máximo {max_items} itens; 6) Frases curtas e objetivas. Itens: {items_txt} Devolve APENAS a lista final no formato: - Item 1 - Item 2 - Item 3 """.strip() payload = { "model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.2} } r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout) r.raise_for_status() text = r.json().get("response", "") lines = [] for line in text.splitlines(): line = line.strip() if line.startswith(("-", "•")): item = line.lstrip("-•").strip(" .;") if item: lines.append(item) return lines[:max_items] # %% def replace_placeholder_with_incluir_smart( doc, df: pd.DataFrame, placeholder: str = "{{INCLUIR}}", indent_cm: float = 1.5, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, use_ollama: bool = True, ollama_model: str = "llama3.1:8b", max_items: int = 6, ): raw = extract_incluir_list(df) if not raw: return replace_placeholder_with_column_subitems_hanging( doc, df, placeholder=placeholder, column_contains="_incluir", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=deduplicate ) final_items = None if use_ollama and ollama_available(): try: llm_items = ollama_summarize_incluir( raw, max_items=max_items, model=ollama_model ) if llm_items: final_items = llm_items except Exception: final_items = None if final_items is None: return replace_placeholder_with_column_subitems_hanging( doc, df, placeholder=placeholder, column_contains="_incluir", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=deduplicate ) df_tmp = pd.DataFrame({"_incluir": final_items}) return replace_placeholder_with_column_subitems_hanging( doc, df_tmp, placeholder=placeholder, column_contains="_incluir", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) # %% def ollama_summarize_observacoes_paragraph( items: list[str], model: str = "llama3.1:8b", timeout: float = 45.0 ) -> str: items_txt = "\n".join(f"- {t}" for t in items) prompt = f""" És um analista a escrever um relatório oficial de avaliação. Tens observações livres escritas pelos formandos. Produz um ÚNICO PARÁGRAFO de síntese: - Português de Portugal, tom formal e objetivo; - Não inventes informação; - Agrupa ideias repetidas; - Evita exemplos pessoais e detalhes identificáveis; - 3 a 6 frases, no máximo ~120 palavras. Observações: {items_txt} Devolve APENAS o parágrafo final (sem tópicos, sem títulos, sem listas). """.strip() payload = { "model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.2} } r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout) r.raise_for_status() text = r.json().get("response", "").strip() text = " ".join(line.strip() for line in text.splitlines() if line.strip()) return text # %% def extract_observacoes_list(df: pd.DataFrame) -> list[str]: cols = [c for c in df.columns if "_observ" in str(c).lower()] items = [] for col in cols: s = df[col].dropna().astype(str).str.strip() s = s[s != ""] items.extend(s.tolist()) seen = set() out = [] for t in items: k = t.lower() if k not in seen: seen.add(k) out.append(t) return out # %% def _iter_paragraphs_in_table(tbl): for row in tbl.rows: for cell in row.cells: for p in cell.paragraphs: yield p for t in cell.tables: yield from _iter_paragraphs_in_table(t) def iter_all_paragraphs_everywhere(doc): for p in doc.paragraphs: yield p for t in doc.tables: yield from _iter_paragraphs_in_table(t) for section in doc.sections: containers = [ section.header, section.footer, section.first_page_header, section.first_page_footer, section.even_page_header, section.even_page_footer, ] for c in containers: for p in c.paragraphs: yield p for t in c.tables: yield from _iter_paragraphs_in_table(t) # %% def replace_placeholder_with_observacoes_smart( doc, df: pd.DataFrame, placeholder: str = "{{OBSERVACOES}}", indent_cm: float = 0.5, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, use_ollama: bool = True, ollama_model: str = "llama3.1:8b", max_items: int | None = None, ) -> int: raw = extract_observacoes_list(df) items = [str(t).strip() for t in (raw or []) if str(t).strip()] if deduplicate: seen = set() out = [] for t in items: k = t.lower() if k not in seen: seen.add(k) out.append(t) items = out if max_items is not None: items = items[:max_items] paragraph_text = None if items and use_ollama and ollama_available(): try: paragraph_text = ollama_summarize_observacoes_paragraph(items, model=ollama_model) if paragraph_text: paragraph_text = paragraph_text.strip() except Exception: paragraph_text = None letters = string.ascii_lowercase def _apply_par_format(par: Paragraph, hanging=False): fmt = par.paragraph_format fmt.left_indent = Cm(indent_cm) if hanging: fmt.first_line_indent = Cm(-0.6) fmt.line_spacing = 1.5 fmt.space_before = Pt(0) fmt.space_after = Pt(0) def _add_run(par: Paragraph, text: str, bold=False): r = par.add_run(text) force_run_font(r, font_name, font_size_pt, bold=bold) return r replaced = 0 for p in list(iter_all_paragraphs_everywhere(doc)): if placeholder not in (p.text or ""): continue current = p if paragraph_text: newp = insert_paragraph_after(current) _add_run(newp, paragraph_text) _apply_par_format(newp, hanging=False) else: if not items: items_to_write = ["Sem dados"] else: items_to_write = items for i, txt in enumerate(items_to_write): sub = letters[i] if i < 26 else f"a{i+1}" newp = insert_paragraph_after(current) _add_run(newp, f"{sub}. ") _add_run(newp, txt.rstrip(".;")) _add_run(newp, ";") _apply_par_format(newp, hanging=True) current = newp delete_paragraph(p) replaced += 1 return replaced # %% def build_melhoria_checklist_items( temas_items: list[str] | None, desenvolver_items: list[str] | None, incluir_items: list[str] | None, observacoes_text_or_items: str | list[str] | None = None, use_ollama: bool = True, ollama_model: str = "llama3.1:8b", max_items: int = 10, ): temas_items = [x.strip() for x in (temas_items or []) if str(x).strip()] desenvolver_items = [x.strip() for x in (desenvolver_items or []) if str(x).strip()] incluir_items = [x.strip() for x in (incluir_items or []) if str(x).strip()] if isinstance(observacoes_text_or_items, str): observacoes_text = observacoes_text_or_items.strip() elif isinstance(observacoes_text_or_items, list): obs_list = [str(x).strip() for x in observacoes_text_or_items if str(x).strip()] observacoes_text = "\n".join(f"- {x}" for x in obs_list) else: observacoes_text = "" if use_ollama and ollama_available(): try: temas_txt = "\n".join(f"- {t}" for t in temas_items) des_txt = "\n".join(f"- {t}" for t in desenvolver_items) inc_txt = "\n".join(f"- {t}" for t in incluir_items) prompt = f""" És um analista a escrever um relatório oficial de avaliação. A partir dos seguintes outputs, cria uma CHECKLIST de melhorias (ações). Regras: - Não inventes pontos. - Junta redundâncias. - Escreve cada item como ação (ex.: "Reforçar ...", "Incluir ...", "Aprofundar ...", "Melhorar ..."). - No máximo {max_items} itens. - Devolve APENAS lista em bullets "- ...". TEMAS: {temas_txt} A DESENVOLVER: {des_txt} A INCLUIR: {inc_txt} OBSERVAÇÕES: {observacoes_text} """.strip() payload = { "model": ollama_model, "prompt": prompt, "stream": False, "options": {"temperature": 0.2}, } r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=45.0) r.raise_for_status() text = r.json().get("response", "").strip() llm_items = [] for line in text.splitlines(): line = line.strip() if line.startswith(("-", "•")): item = line.lstrip("-•").strip(" .;") if item: llm_items.append(item) llm_items = llm_items[:max_items] if llm_items: return llm_items except Exception: pass merged = temas_items + desenvolver_items + incluir_items seen = set() out = [] for x in merged: x = x.strip().strip("•-").strip() if not x: continue k = x.lower() if k not in seen: seen.add(k) out.append(x) return out[:max_items] if out else ["Sem dados"] # %% def replace_placeholder_with_checklist_subitems( doc, checklist_items: list[str], placeholder: str = "{{CHECKLIST_MELHORAR}}", indent_cm: float = 0.5, font_name: str = "Arial", font_size_pt: int = 12, checkbox: str = "-", ): letters = string.ascii_lowercase def _apply_par_format(par: Paragraph): fmt = par.paragraph_format fmt.left_indent = Cm(indent_cm) fmt.first_line_indent = Cm(-indent_cm) fmt.line_spacing = 1.5 fmt.space_before = Pt(0) fmt.space_after = Pt(0) def _add_run(par: Paragraph, text: str): r = par.add_run(text) force_run_font(r, font_name, font_size_pt) return r def _process_paragraph(p: Paragraph) -> bool: if placeholder not in p.text: return False current = p items = checklist_items or ["Sem dados"] for i, item in enumerate(items): sub = letters[i] if i < 26 else f"a{i+1}" newp = insert_paragraph_after(current) _add_run(newp, f"{sub}. {checkbox} ") _add_run(newp, item.strip().rstrip(".;")) _add_run(newp, ";") _apply_par_format(newp) current = newp delete_paragraph(p) return True for p in doc.paragraphs: if _process_paragraph(p): return for table in doc.tables: for row in table.rows: for cell in row.cells: for p in cell.paragraphs: if _process_paragraph(p): return # %% def replace_placeholder_with_observacoes_smart2( doc, df: pd.DataFrame, placeholder: str = "{{OBSERVACOES2}}", indent_cm: float = 2.75, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, **_ignored, ): obs_raw = extract_observacoes_list(df) itens = [str(t).strip() for t in (obs_raw or []) if str(t).strip()] if deduplicate: seen = set() out = [] for t in itens: k = t.lower() if k not in seen: seen.add(k) out.append(t) itens = out if max_items is not None: itens = itens[:max_items] if not itens: itens = ["Sem dados"] df_tmp = pd.DataFrame({"_Observacoes": itens}) return replace_placeholder_with_column_subitems_hanging2( doc, df2, placeholder="{{OBSERVACOES2}}", column_contains="_observa", left_indent_cm=2.75, hanging_cm=0.6, font_name="Arial", font_size_pt=12, deduplicate=True ) # %% def replace_placeholder_with_incluir_smart( doc, df: pd.DataFrame, placeholder: str = "{{INCLUIR}}", indent_cm: float = 0.5, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, **_ignored, ): incluir_raw = extract_incluir_list(df) itens = [str(t).strip() for t in (incluir_raw or []) if str(t).strip()] if deduplicate: seen = set() out = [] for t in itens: k = t.lower() if k not in seen: seen.add(k) out.append(t) itens = out if max_items is not None: itens = itens[:max_items] if not itens: itens = ["Sem dados"] df_tmp = pd.DataFrame({"_Incluir": itens}) return replace_placeholder_with_column_subitems_hanging( doc, df_tmp, placeholder=placeholder, column_contains="_incluir", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) # %% def replace_placeholder_with_desenvolver_smart( doc, df: pd.DataFrame, placeholder: str = "{{DESENVOLVER}}", indent_cm: float = 0.5, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, **_ignored, ): desenvolver_raw = extract_desenvolver_list(df) itens = [str(t).strip() for t in (desenvolver_raw or []) if str(t).strip()] if deduplicate: seen = set() out = [] for t in itens: k = t.lower() if k not in seen: seen.add(k) out.append(t) itens = out if max_items is not None: itens = itens[:max_items] if not itens: itens = ["Sem dados"] df_tmp = pd.DataFrame({"_Desenvolver": itens}) return replace_placeholder_with_column_subitems_hanging( doc, df_tmp, placeholder=placeholder, column_contains="_desenvolver", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) # %% def delete_all_between_anchors_xml( doc, start="{{ANCORA1}}", end="{{ANCORA2}}", debug=False, max_passes=10_000, ) -> int: body = doc._element.body removed_blocks = 0 passes = 0 def child_contains_needle(elm, needle: str) -> bool: texts = elm.xpath(".//*[local-name()='t']/text()") joined = "".join(texts) if texts else "" return needle in joined while passes < max_passes: passes += 1 children = list(body.iterchildren()) start_idx = None end_idx = None for i, elm in enumerate(children): if start_idx is None and child_contains_needle(elm, start): start_idx = i if debug: print(f"[DEBUG] start in child {i} tag={elm.tag}") continue if start_idx is not None and child_contains_needle(elm, end): end_idx = i if debug: print(f"[DEBUG] end in child {i} tag={elm.tag}") break if start_idx is None or end_idx is None: if debug: print("[DEBUG] done. start/end:", start_idx, end_idx) break for elm in reversed(children[start_idx:end_idx + 1]): body.remove(elm) removed_blocks += 1 return removed_blocks # %% def delete_all_between_anchors_xml( doc, start="{{ANCORA3}}", end="{{ANCORA4}}", debug=False, max_passes=10_000, ) -> int: body = doc._element.body removed_blocks = 0 passes = 0 def child_contains_needle(elm, needle: str) -> bool: texts = elm.xpath(".//*[local-name()='t']/text()") joined = "".join(texts) if texts else "" return needle in joined while passes < max_passes: passes += 1 children = list(body.iterchildren()) start_idx = None end_idx = None for i, elm in enumerate(children): if start_idx is None and child_contains_needle(elm, start): start_idx = i if debug: print(f"[DEBUG] start in child {i} tag={elm.tag}") continue if start_idx is not None and child_contains_needle(elm, end): end_idx = i if debug: print(f"[DEBUG] end in child {i} tag={elm.tag}") break if start_idx is None or end_idx is None: if debug: print("[DEBUG] done. start/end:", start_idx, end_idx) break for elm in reversed(children[start_idx:end_idx + 1]): body.remove(elm) removed_blocks += 1 return removed_blocks # %% def _iter_paragraphs_in_table(tbl): for row in tbl.rows: for cell in row.cells: for p in cell.paragraphs: yield p for t in cell.tables: yield from _iter_paragraphs_in_table(t) def iter_all_paragraphs_everywhere(doc): for p in doc.paragraphs: yield p for t in doc.tables: yield from _iter_paragraphs_in_table(t) for section in doc.sections: containers = [ section.header, section.footer, section.first_page_header, section.first_page_footer, section.even_page_header, section.even_page_footer, ] for c in containers: for p in c.paragraphs: yield p for t in c.tables: yield from _iter_paragraphs_in_table(t) # %% Tk().withdraw() file_path3 = askopenfilename( title="Select Excel das Formadores", filetypes=[("Excel files", "*.xlsx *.xls")] ) if not file_path3: print("Nenhum ficheiro selecionado.") df3 = None else: print(f"Selected file:\n{file_path3}") df3 = pd.read_excel(file_path3) # %% df3 = None ninq3 = 0 medpub = 0 medmeiosaux = 0 medapform = 0 medapdc = 0 medobjesp = 0 medmetensi = 0 medtempform = 0 medlocaisform = 0 medlançaaval = 0 medtipoaval = 0 medtempoaval = 0 medobjapre = 0 medadqonjesp = 0 medinterforma = 0 prerequesitos = 0 Conteudo = 0 objgeral = 0 objfinal = 0 objadq = 0 avadq = 0 refere = 0 if file_path3: df3 = pd.read_excel(file_path3) ninq3 = df3.shape[0] medpub = round(df3.iloc[:, 12].mean(), 2) medmeiosaux = round(df3.iloc[:, 13].mean(), 2) medapform = round(df3.iloc[:, 14].mean(), 2) medapdc = round(df3.iloc[:, 15].mean(), 2) medobjesp = round(df3.iloc[:, 16].mean(), 2) medmetensi = round(df3.iloc[:, 17].mean(), 2) medtempform = round(df3.iloc[:, 18].mean(), 2) medlocaisform = round(df3.iloc[:, 19].mean(), 2) medlançaaval = round(df3.iloc[:, 20].mean(), 2) medtipoaval = round(df3.iloc[:, 21].mean(), 2) medtempoaval = round(df3.iloc[:, 22].mean(), 2) medobjapre = round(df3.iloc[:, 23].mean(), 2) medadqonjesp = round(df3.iloc[:, 24].mean(), 2) medinterforma = round(df3.iloc[:, 25].mean(), 2) if ninq3 > 0: prerequesitos = round((df3.iloc[:, 26].eq(1).sum() / ninq3) * 100, 2) Conteudo = round((df3.iloc[:, 27].eq(1).sum() / ninq3) * 100, 2) objgeral = round((df3.iloc[:, 28].eq(1).sum() / ninq3) * 100, 2) objfinal = round((df3.iloc[:, 29].eq(1).sum() / ninq3) * 100, 2) objadq = round((df3.iloc[:, 30].eq(1).sum() / ninq3) * 100, 2) avadq = round((df3.iloc[:, 31].eq(1).sum() / ninq3) * 100, 2) refere = round((df3.iloc[:, 32].eq(1).sum() / ninq3) * 100, 2) else: print("Nenhum ficheiro selecionado (df3). Valores definidos a 0.") # %% Tk().withdraw() file_path4 = askopenfilename( title="Select Excel da Direção de Curso", filetypes=[("Excel files", "*.xlsx *.xls")] ) if not file_path4: print("Nenhum ficheiro selecionado.") df4 = None else: print(f"Selected file:\n{file_path4}") df4 = pd.read_excel(file_path4) # %% df4 = None ninq4 = 0 medprogcurso = 0 medcontcurso = 0 medestrcurso = 0 medutilprat = 0 medcargahoraria = 0 med1 = 0 medinstal = 0 medaudiovis = 0 meddocdispor = 0 medapadmin = 0 medapcoord = 0 med2 = 0 medmotform = 0 medrelpart = 0 medpontass = 0 med3 = 0 if file_path4: df4 = pd.read_excel(file_path4) ninq4 = df4.shape[0] medprogcurso = round(df4.iloc[:, 10].mean(), 2) medcontcurso = round(df4.iloc[:, 11].mean(), 2) medestrcurso = round(df4.iloc[:, 12].mean(), 2) medutilprat = round(df4.iloc[:, 13].mean(), 2) medcargahoraria= round(df4.iloc[:, 14].mean(), 2) med1 = round( (medprogcurso + medcontcurso + medestrcurso + medutilprat + medcargahoraria) / 5, 2 ) medinstal = round(df4.iloc[:, 15].mean(), 2) medaudiovis = round(df4.iloc[:, 16].mean(), 2) meddocdispor = round(df4.iloc[:, 17].mean(), 2) medapadmin = round(df4.iloc[:, 18].mean(), 2) medapcoord = round(df4.iloc[:, 19].mean(), 2) med2 = round( (medinstal + medaudiovis + meddocdispor + medapadmin + medapcoord) / 5, 2 ) medmotform = round(df4.iloc[:, 20].mean(), 2) medrelpart = round(df4.iloc[:, 21].mean(), 2) medpontass = round(df4.iloc[:, 22].mean(), 2) med3 = round( (medmotform + medrelpart + medpontass) / 3, 2 ) else: print("Nenhum ficheiro selecionado. Valores definidos a 0.") # %% df4 # %% def replace_placeholder_with_propostas( doc, df3: pd.DataFrame, placeholder: str = "{{Propostas}}", left_indent_cm: float = 2.75, hanging_cm: float = 0.6, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, ) -> int: cols = [c for c in df3.columns if "_propostas" in str(c).lower()] itens = [] for c in cols: s = df3[c].dropna().astype(str).str.strip() s = s[s != ""] itens.extend(s.tolist()) if deduplicate: seen = set() out = [] for t in itens: k = t.lower() if k not in seen: seen.add(k) out.append(t) itens = out if max_items is not None: itens = itens[:max_items] if not itens: itens = ["Sem dados"] df_tmp = pd.DataFrame({"_Propostas": itens}) replaced = 0 while True: changed = False for p in list(iter_all_paragraphs_everywhere(doc)): if placeholder in (p.text or ""): replace_placeholder_with_column_subitems_hanging2( doc, df_tmp, placeholder=placeholder, column_contains="_propostas", left_indent_cm=left_indent_cm, hanging_cm=hanging_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) replaced += 1 changed = True break if not changed: break return replaced # %% def delete_paragraph(paragraph): p = paragraph._p parent = p.getparent() if parent is not None: parent.remove(p) def _iter_paragraphs_in_table(tbl): for row in tbl.rows: for cell in row.cells: for p in cell.paragraphs: yield p for t in cell.tables: yield from _iter_paragraphs_in_table(t) def iter_all_paragraphs_everywhere(doc): for p in doc.paragraphs: yield p for t in doc.tables: yield from _iter_paragraphs_in_table(t) for section in doc.sections: containers = [ section.header, section.footer, section.first_page_header, section.first_page_footer, section.even_page_header, section.even_page_footer, ] for c in containers: for p in c.paragraphs: yield p for t in c.tables: yield from _iter_paragraphs_in_table(t) def delete_lines_with_ancora(doc, pattern=r"ANCORA") -> int: rx = re.compile(pattern, flags=re.IGNORECASE) to_delete = [] for p in iter_all_paragraphs_everywhere(doc): if rx.search(p.text or ""): to_delete.append(p) for p in reversed(to_delete): delete_paragraph(p) return len(to_delete) # %% def replace_placeholder_with_temasdir( doc, df4: pd.DataFrame, placeholder: str = "{{temasdir}}", left_indent_cm: float = 2.75, hanging_cm: float = 0.6, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, ) -> int: cols = [c for c in df4.columns if "_temas" in str(c).lower()] itens = [] for c in cols: s = df4[c].dropna().astype(str).str.strip() s = s[s != ""] itens.extend(s.tolist()) if deduplicate: seen = set() out = [] for t in itens: k = t.lower() if k not in seen: seen.add(k) out.append(t) itens = out if max_items is not None: itens = itens[:max_items] if not itens: itens = ["Sem dados"] df_tmp = pd.DataFrame({"_Temas": itens}) replaced = 0 while True: changed = False for p in list(iter_all_paragraphs_everywhere(doc)): if placeholder in (p.text or ""): replace_placeholder_with_column_subitems_hanging2( doc, df_tmp, placeholder=placeholder, column_contains="_temas", left_indent_cm=left_indent_cm, hanging_cm=hanging_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) replaced += 1 changed = True break if not changed: break return replaced # %% def replace_placeholder_with_desenvolverdir( doc, df4: pd.DataFrame, placeholder: str = "{{desenvolverdir}}", left_indent_cm: float = 2.75, hanging_cm: float = 0.6, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, ) -> int: cols = [c for c in df4.columns if "_desenvolver" in str(c).lower()] itens = [] for c in cols: s = df4[c].dropna().astype(str).str.strip() s = s[s != ""] itens.extend(s.tolist()) if deduplicate: seen = set() out = [] for t in itens: k = t.lower() if k not in seen: seen.add(k) out.append(t) itens = out if max_items is not None: itens = itens[:max_items] if not itens: itens = ["Sem dados"] df_tmp = pd.DataFrame({"_Desenvolver": itens}) replaced = 0 while True: changed = False for p in list(iter_all_paragraphs_everywhere(doc)): if placeholder in (p.text or ""): replace_placeholder_with_column_subitems_hanging2( doc, df_tmp, placeholder=placeholder, column_contains="_desenvolver", left_indent_cm=left_indent_cm, hanging_cm=hanging_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) replaced += 1 changed = True break if not changed: break return replaced # %% def replace_placeholder_with_incluirdir( doc, df4: pd.DataFrame, placeholder: str = "{{incluirdir}}", left_indent_cm: float = 2.75, hanging_cm: float = 0.6, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, ) -> int: cols = [c for c in df4.columns if "_incluir" in str(c).lower()] itens = [] for c in cols: s = df4[c].dropna().astype(str).str.strip() s = s[s != ""] itens.extend(s.tolist()) if deduplicate: seen = set() out = [] for t in itens: k = t.lower() if k not in seen: seen.add(k) out.append(t) itens = out if max_items is not None: itens = itens[:max_items] if not itens: itens = ["Sem dados"] df_tmp = pd.DataFrame({"_Incluir": itens}) replaced = 0 while True: changed = False for p in list(iter_all_paragraphs_everywhere(doc)): if placeholder in (p.text or ""): replace_placeholder_with_column_subitems_hanging2( doc, df_tmp, placeholder=placeholder, column_contains="_incluir", left_indent_cm=left_indent_cm, hanging_cm=hanging_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) replaced += 1 changed = True break if not changed: break return replaced # %% def replace_placeholder_with_positivosdir( doc, df4: pd.DataFrame, placeholder: str = "{{positivosdir}}", left_indent_cm: float = 2.75, hanging_cm: float = 0.6, font_name: str = "Arial", font_size_pt: int = 12, deduplicate: bool = True, max_items: int | None = None, ) -> int: cols = [c for c in df4.columns if "_positivos" in str(c).lower()] itens = [] for c in cols: s = df4[c].dropna().astype(str).str.strip() s = s[s != ""] itens.extend(s.tolist()) if deduplicate: seen = set() out = [] for t in itens: k = t.lower() if k not in seen: seen.add(k) out.append(t) itens = out if max_items is not None: itens = itens[:max_items] if not itens: itens = ["Sem dados"] df_tmp = pd.DataFrame({"_Positivos": itens}) replaced = 0 while True: changed = False for p in list(iter_all_paragraphs_everywhere(doc)): if placeholder in (p.text or ""): replace_placeholder_with_column_subitems_hanging2( doc, df_tmp, placeholder=placeholder, column_contains="_positivos", left_indent_cm=left_indent_cm, hanging_cm=hanging_cm, font_name=font_name, font_size_pt=font_size_pt, deduplicate=False ) replaced += 1 changed = True break if not changed: break return replaced # %% df4 # %% nforma, mediaformquanl, mediaform = compute_formadores_summary(df2) replacements = { "{{NOMEDOCURSOEXTENSO}}": str(NOMEDOCURSO), "{{prerequesitos}}": str(prerequesitos), "{{prerequesitos2}}": str(100-prerequesitos), "{{Conteudo}}": str(Conteudo), "{{Conteudo2}}": str(100-Conteudo), "{{objgeral}}": str(objgeral), "{{objgeral2}}": str(100-objgeral), "{{objfinal}}": str(objfinal), "{{objfinal2}}": str(100-objfinal), "{{objadq}}": str(objadq), "{{objadq2}}": str(100-objadq), "{{avadq}}": str(avadq), "{{avadq2}}": str(100-avadq), "{{refere}}": str(refere), "{{refere2}}": str(100-refere), "{{ninq4}}": str(ninq4), "{{ninq3}}": str(ninq3), "{{nforma}}" : str(nforma), "{{mediaformquanl}}" : str(mediaformquanl), "{{mediaform}}" : str(mediaform), "{{mediaaloj}}": str(medalojamento2), "{{mediaaloj1}}": str(medalojamento), "{{mediaalime}}": str(medalimentacao2), "{{mediaalime1}}": str(medalimentacao), "{{medalimentacao}}": str(medalimentacaofinal), "{{apdir}}": str(medapdir), "{{graudif}}": str(meddificuldade2), "{{graudif1}}": str(meddificuldade), "{{meddificuldadefinal}}": str(meddificuldadefinal), "{{funcfut}}": str(medfuncfut2), "{{funcfut1}}": str(medfuncfut), "{{medfuncfutfinal}}": str(medfuncfutfinal), "{{apadm}}": str(medaplog), "{{motapro}}": str(medmotvpart2), "{{motapro1}}": str(medmotvpart), "{{medmotvpartfinal}}": str(medmotvpartfinal), "{{conhcurso}}": str(medconhecimento2), "{{conhcurso1}}": str(medconhecimento), "{{medconhecimentofinal}}": str(medconhecimentofinal), "{{objcruso}}": str(objcruso), "{{contcurso}}": str(contcurso), "{{medalojamento}}": str(medalojamentofinal), "{{adeqtrab}}": str(adeqtrab), "{{instform}}": str(instform), "{{audiovisuais}}": str(audiovisuais), "{{biblio}}": str(biblio), "{{ninq}}": str(ninq), "{{ninqfim}}": str(ninq2), "{{medpub}}": str(medpub), "{{medmeiosaux}}": str(medmeiosaux), "{{medapform}}": str(medapform), "{{medapdc}}": str(medapdc), "{{medobjesp}}": str(medobjesp), "{{medmetensi}}": str(medmetensi), "{{medtempform}}": str(medtempform), "{{medlocaisform}}": str(medlocaisform), "{{medlançaaval}}": str(medlançaaval), "{{medtipoaval}}": str(medtipoaval), "{{medtempoaval}}": str(medtempoaval), "{{medobjapre}}": str(medobjapre), "{{medadqonjesp}}": str(medadqonjesp), "{{medinterforma}}": str(medinterforma), "{{medprogcurso}}": str(medprogcurso), "{{medcontcurso}}": str(medcontcurso), "{{medestrcurso}}": str(medestrcurso), "{{medutilprat}}": str(medutilprat), "{{medcargahoraria}}": str(medcargahoraria), "{{medinstal}}": str(medinstal), "{{medaudiovis}}": str(medaudiovis), "{{meddocdispor}}": str(meddocdispor), "{{medapadmin}}": str(medapadmin), "{{medapcoord}}": str(medapcoord), "{{medmotform}}": str(medmotform), "{{medrelpart}}": str(medrelpart), "{{medpontass}}": str(medpontass), "{{med1}}": str(med1), "{{med2}}": str(med2), "{{med3}}": str(med3), "{{medpontassqual}}": str(avaliacao_qualitativa(medpontass)), "{{medrelpartqual}}": str(avaliacao_qualitativa(medrelpart)), "{{medmotformqual}}": str(avaliacao_qualitativa(medmotform)), "{{medapcoordqual}}": str(avaliacao_qualitativa(medapcoord)), "{{medapadminqual}}": str(avaliacao_qualitativa(medapadmin)), "{{meddocdisporqual}}": str(avaliacao_qualitativa(meddocdispor)), "{{medaudiovisqual}}": str(avaliacao_qualitativa(medaudiovis)), "{{medinstalqual}}": str(avaliacao_qualitativa(medinstal)), "{{medcargahorariaqual}}": str(avaliacao_qualitativa(medcargahoraria)), "{{medutilpratoqual}}": str(avaliacao_qualitativa(medutilprat)), "{{medestrcursooqual}}": str(avaliacao_qualitativa(medestrcurso)), "{{medprogcursoqual}}": str(avaliacao_qualitativa(medcontcurso)), "{{medinterformaqual}}": str(avaliacao_qualitativa(medinterforma)), "{{medadqonjespqual}}": str(avaliacao_qualitativa(medadqonjesp)), "{{medobjaprequal}}": str(avaliacao_qualitativa(medobjapre)), "{{medtempoavalqual}}": str(avaliacao_qualitativa(medtempoaval)), "{{medtipoavalqual}}": str(avaliacao_qualitativa(medtipoaval)), "{{medlançaavalual}}": str(avaliacao_qualitativa(medlançaaval)), "{{medlocaisformqual}}": str(avaliacao_qualitativa(medlocaisform)), "{{medtempformqual}}": str(avaliacao_qualitativa(medtempform)), "{{medmetensiqual}}": str(avaliacao_qualitativa(medmetensi)), "{{medobjespqual}}": str(avaliacao_qualitativa(medobjesp)), "{{medapdcqual}}": str(avaliacao_qualitativa(medapdc)), "{{medapformqual}}": str(avaliacao_qualitativa(medapform)), "{{medmeiosauxqual}}": str(avaliacao_qualitativa(medmeiosaux)), "{{medpubqual}}": str(avaliacao_qualitativa(medpub)), "{{mediaalojqual}}": str(avaliacao_qualitativa(medalojamento2)), "{{apdirqual}}": str(avaliacao_qualitativa(medapdir)), "{{funcfutqual}}": str(avaliacao_qualitativa(medfuncfut2)), "{{graudifaqual}}": str(avaliacao_qualitativa(meddificuldade2)), "{{apadmqual}}": str(avaliacao_qualitativa(medaplog)), "{{motaproqual}}": str(avaliacao_qualitativa(medmotvpart2)), "{{conhcursoqual}}": str(avaliacao_qualitativa(medconhecimento2)), "{{objcrusoqual}}": str(avaliacao_qualitativa(objcruso)), "{{contcursoqual}}": str(avaliacao_qualitativa(contcurso)), "{{adeqtrabqual}}": str(avaliacao_qualitativa(adeqtrab)), "{{instformqual}}": str(avaliacao_qualitativa(instform)), "{{audiovisuaisqual}}": str(avaliacao_qualitativa(audiovisuais)), "{{biblioqual}}": str(avaliacao_qualitativa(biblio)), "{{mediaalimequal}}": str(avaliacao_qualitativa(medalimentacao2)), "{{NOMEDOCURSO}}": str(NOMEDOCURSOcurto), "{{MESi}}": str(MESi), "{{AAAAi}}": str(AAAAi), "{{DDf}}": str(DDf), "{{MESf}}": str(MESf), "{{AAAAf}}": str(AAAAf), "{{MEDIAFINALCURSO}}": str(MEDIAFINALCURSO), "{{NFORMANDOS}}": str(NFORMANDOS), "{{FINALIDADECURSO}}": str(FINALIDADECURSO), "{{DDi}}": str(DDi) } template_path = _resource_path("Anexo RAI..docx") output_path = os.path.join(os.getcwd(), "relatorio_final.docx") doc = Document(template_path) if not file_path3: print("Nenhum ficheiro selecionado. A remover secção do documento.") delete_all_between_anchors_xml(doc, "{{ANCORA1}}", "{{ANCORA2}}", debug=True) df3 = None if file_path3: replace_placeholder_with_propostas( doc, df3, placeholder="{{Propostas}}", left_indent_cm=2.75, hanging_cm=0.6, font_name="Arial", font_size_pt=12, deduplicate=True, max_items=None ) if not file_path4: print("Nenhum ficheiro selecionado. A remover secção do documento.") delete_all_between_anchors_xml(doc, "{{ANCORA3}}", "{{ANCORA4}}", debug=True) df4 = None if file_path4: replace_placeholder_with_positivosdir( doc, df4, placeholder="{{positivosdir}}", left_indent_cm=2.75, hanging_cm=0.6, font_name="Arial", font_size_pt=12, deduplicate=True ) replace_placeholder_with_incluirdir( doc, df4, placeholder="{{incluirdir}}", left_indent_cm=2.75, hanging_cm=0.6, font_name="Arial", font_size_pt=12, deduplicate=True ) replace_placeholder_with_desenvolverdir( doc, df4, placeholder="{{desenvolverdir}}", left_indent_cm=2.75, hanging_cm=0.6, font_name="Arial", font_size_pt=12, deduplicate=True ) replace_placeholder_with_temasdir( doc, df4, placeholder="{{temasdir}}", left_indent_cm=2.75, hanging_cm=0.6, font_name="Arial", font_size_pt=12, deduplicate=True ) replace_placeholders_docx_bold_values_keep_style(doc, replacements) replace_placeholder_with_q06_subitems( doc, df2, placeholder="{{Q06_Apreciacao}}", indent_cm=2.75, indent_title=True, font_name="Arial", font_size_pt=12 ) replace_placeholder_with_formadores_table( doc, df2, placeholder="{{tabelaFormadores}}", font_name="Arial", font_size_pt=12 ) replace_placeholder_with_uc_table( doc, df_inicial=df, df_final=df2, placeholder="{{tabelasUC}}", font_name="Arial", font_size_pt=12 ) replace_placeholder_with_temas_smart( doc, df2, placeholder="{{TEMAS}}", indent_cm=0.5, font_name="Arial", font_size_pt=12, deduplicate=True, max_items=6 ) replace_placeholder_with_desenvolver_smart( doc, df2, placeholder="{{DESENVOLVER}}", indent_cm=0.5, font_name="Arial", font_size_pt=12, max_items=6 ) replace_placeholder_with_incluir_smart( doc, df2, placeholder="{{INCLUIR}}", indent_cm=0.5, font_name="Arial", font_size_pt=12, max_items=6 ) replace_placeholder_with_observacoes_smart2( doc, df2, placeholder="{{OBSERVACOES2}}", indent_cm=2.75, font_name="Arial", font_size_pt=12, max_items=6 ) replace_placeholder_with_observacoes_smart( doc, df2, placeholder="{{OBSERVACOES}}", indent_cm=0.5, font_name="Arial", font_size_pt=12, use_ollama=True, ollama_model="llama3.1:8b" # ajusta ao que existir nas máquinas ) n = replace_placeholder_with_observacoes_smart( doc, df2, placeholder="{{OBSERVACOES}}", indent_cm=0.5, font_name="Arial", font_size_pt=12, use_ollama=True, ollama_model="llama3.1:8b" # ajusta ao que existir nas máquinas ) print("OBSERVACOES substituídos:", n) delete_lines_with_ancora(doc, pattern=r"ANCORA") doc.save(output_path) print(f"Saved: {output_path}") # %% def iter_body_blocks(doc): body = doc._element.body for child in body.iterchildren(): tag = child.tag.rsplit("}", 1)[-1] if tag == "p": yield ("p", Paragraph(child, doc)) elif tag == "tbl": yield ("tbl", Table(child, doc)) # %% def table_to_text(tbl) -> str: lines = [] for row in tbl.rows: row_txt = [] for cell in row.cells: txt = " ".join(p.text.strip() for p in cell.paragraphs if p.text.strip()) txt = re.sub(r"\s+", " ", txt).strip() row_txt.append(txt) if any(row_txt): lines.append(" | ".join(row_txt)) return "\n".join(lines).strip() def extract_text_between_markers(doc, start_re: str, end_re: str | None = None, debug=False) -> str: start_rx = re.compile(start_re, flags=re.IGNORECASE) end_rx = re.compile(end_re, flags=re.IGNORECASE) if end_re else None collecting = False chunks = [] for kind, obj in iter_body_blocks(doc): if kind == "p": txt = (obj.text or "").strip() if not collecting and start_rx.search(txt): collecting = True if debug: print("[DEBUG] START matched:", txt) continue if collecting and end_rx and end_rx.search(txt): if debug: print("[DEBUG] END matched:", txt) break if collecting and txt: chunks.append(txt) elif kind == "tbl": if collecting: ttxt = table_to_text(obj) if ttxt: chunks.append(ttxt) out = "\n".join(chunks).strip() out = re.sub(r"\n{3,}", "\n\n", out) return out # %% def ollama_available(timeout=0.4) -> bool: try: r = requests.get("http://localhost:11434/api/tags", timeout=timeout) return r.status_code == 200 except Exception: return False def ollama_summarize_text( text: str, model: str = "llama3.1:8b", max_chars: int = 24000, timeout: float = 120.0, system_prompt: str = "", user_prompt: str = "", ) -> str: if not text.strip(): return "" if len(text) > max_chars: text = text[:max_chars] + "\n\n[Texto truncado por limite de tamanho.]" prompt = f""" {system_prompt} {user_prompt} TEXTO: {text} DEVOLVE APENAS O RESULTADO FINAL, SEM EXPLICAÇÕES. """.strip() payload = { "model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.2} } r = requests.post("http://localhost:11434/api/generate", json=payload, timeout=timeout) r.raise_for_status() return (r.json().get("response", "") or "").strip() # %% def replace_placeholder_with_text_paragraph_all( doc, placeholder: str, text: str, indent_cm: float = 0.0, font_name: str = "Arial", font_size_pt: int = 12, ) -> int: replaced = 0 def _apply_fmt(p: Paragraph): fmt = p.paragraph_format fmt.left_indent = Cm(indent_cm) fmt.line_spacing = 1.5 fmt.space_before = Pt(0) fmt.space_after = Pt(0) def _process_paragraph(p: Paragraph): nonlocal replaced if placeholder not in (p.text or ""): return if not p.runs: r = p.add_run("") for run in p.runs: run.text = "" r0 = p.runs[0] r0.text = text force_run_font(r0, font_name, font_size_pt) _apply_fmt(p) replaced += 1 for p in list(doc.paragraphs): _process_paragraph(p) for tbl in doc.tables: for row in tbl.rows: for cell in row.cells: for p in list(cell.paragraphs): _process_paragraph(p) return replaced # %% def fill_llm_placeholders_llm9_llm10( doc, model: str = "llama3.1:8b", placeholder_llm10: str = "{{LLM10}}", indent_cm: float = 0.5, font_name: str = "Arial", font_size_pt: int = 12, debug_extract: bool = False, ) -> dict: if not ollama_available(): return {"ok": False, "reason": "ollama_not_available", "llm9": 0, "llm10": 0} ap5 = extract_text_between_markers( doc, start_re=r"^\s*Apêndice\s*5\b", end_re=r"^\s*Apêndice\s*6\b", debug=debug_extract ) ap6 = extract_text_between_markers( doc, start_re=r"^\s*Apêndice\s*6\b", end_re=r"^\s*1\s*[–-]\s", debug=debug_extract ) texto_llm10 = "\n\n".join([t for t in [ap5, ap6] if t.strip()]).strip() sys_pt = "És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação." prompt_llm10 = ( "Resume os conteúdos do Apêndice 5 e do Apêndice 6 num texto único, formal e conciso " "(1 a 2 parágrafos). Realça pontos-chave e recomendações." ) resumo10 = "" if texto_llm10.strip(): resumo10 = ollama_summarize_text(texto_llm10, model=model, system_prompt=sys_pt, user_prompt=prompt_llm10) n10 = replace_placeholder_with_text_paragraph_all( doc, placeholder_llm10, resumo10.strip() or "Sem dados.", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt ) return { "ok": True, "llm10": n10, "chars_in_llm10": len(texto_llm10), } # %% def fill_llm_placeholder_from_doc_range( doc, placeholder: str, start_marker: str, end_marker: str, model: str = "llama3.1:8b", indent_cm: float = 0.5, font_name: str = "Arial", font_size_pt: int = 12, debug_extract: bool = False, ) -> dict: if not ollama_available(): return {"ok": False, "reason": "ollama_not_available", "replaced": 0, "chars": 0} start_re = r"^\s*" + re.escape(start_marker.strip()) + r"\s*$" end_re = r"^\s*" + re.escape(end_marker.strip()) + r"\s*$" texto = extract_text_between_markers( doc, start_re=start_re, end_re=end_re, debug=debug_extract ).strip() if not texto: start_re2 = re.escape(start_marker.strip()) end_re2 = re.escape(end_marker.strip()) texto = extract_text_between_markers( doc, start_re=start_re2, end_re=end_re2, debug=debug_extract ).strip() if not texto: n = replace_placeholder_with_text_paragraph_all( doc, placeholder, "Sem dados.", indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt ) return {"ok": True, "replaced": n, "chars": 0, "note": "range_not_found"} sys_pt = "És um analista a redigir um relatório oficial. Português de Portugal. Não inventes informação." user_prompt = ( "Lê o texto e produz um resumo final, em estilo de conclusões, adequado a relatório oficial:\n" "• 1 parágrafo de enquadramento (2–4 frases)\n" "• 5–8 bullets com conclusões/recomendações principais\n" "• Não inventes dados nem percentagens." ) resumo = ollama_summarize_text( texto, model=model, system_prompt=sys_pt, user_prompt=user_prompt ).strip() or "Sem dados." n = replace_placeholder_with_text_paragraph_all( doc, placeholder, resumo, indent_cm=indent_cm, font_name=font_name, font_size_pt=font_size_pt ) return {"ok": True, "replaced": n, "chars": len(texto)} # %% doc = Document(output_path) fill_llm_placeholder_from_doc_range( doc, placeholder="{{LLM9}}", start_marker="RELATÓRIO DE AVALIAÇÃO INTERNA", end_marker="O CHEFE DA DIREÇÃO DE AVALIAÇÃO E QUALIDADE", model="llama3.1:8b", indent_cm=0.5, font_name="Arial", font_size_pt=12, debug_extract=True ) fill_llm_placeholders_llm9_llm10( doc, model="llama3.1:8b", placeholder_llm10="{{LLM10}}", indent_cm=0.5, font_name="Arial", font_size_pt=12, debug_extract=True ) doc.save(output_path)