{ "cells": [ { "cell_type": "code", "execution_count": 44, "id": "c12daadb", "metadata": {}, "outputs": [], "source": [ "import fitz\n", "import pytesseract\n", "from PIL import Image\n", "import io\n", "import re\n", "import pandas as pd\n", "#pip install camelot-py[cv]\n", "import camelot\n", "\n" ] }, { "cell_type": "code", "execution_count": 100, "id": "a3970a11", "metadata": {}, "outputs": [], "source": [ "tables = camelot.read_pdf(\"C:/Users/garci/Downloads/Extracto Sogenave_Março.pdf\", pages=\"all\",flavor=\"stream\")" ] }, { "cell_type": "code", "execution_count": 101, "id": "10067e62", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Data Descrição Data Valor Montante \\\n", "0 01-03-2026 SALDO ANTERIOR 01-03-2026 \n", "1 01-03-2026 55389 FECHO TPA:0001455389 061-00006 01-03-2026 1,36 H \n", "2 01-03-2026 55294 FECHO TPA:0001455294 034-00022 01-03-2026 14,50 H \n", "3 01-03-2026 55340 FECHO TPA:0001455340 085-00005 01-03-2026 3,67 H \n", "4 01-03-2026 55370 FECHO TPA:0001455370 053-00003 01-03-2026 1,04 H \n", "... ... ... ... ... \n", "4024 31-03-2026 55356 FECHO TPA:0001455356 074-00010 31-03-2026 2,59 H \n", "4025 31-03-2026 55231 FECHO TPA:0001455231 059-00003 31-03-2026 1,34 H \n", "4026 31-03-2026 55224 FECHO TPA:0001455224 066-00007 31-03-2026 8,65 H \n", "4027 31-03-2026 55234 FECHO TPA:0001455234 092-00023 31-03-2026 7,74 H \n", "4028 31-03-2026 55312 FECHO TPA:0001455312 081-00086 31-03-2026 78,53 H \n", "\n", " Saldo \n", "0 23.240,03 \n", "1 23.241,39 \n", "2 23.255,89 \n", "3 23.259,56 \n", "4 23.260,60 \n", "... ... \n", "4024 67.726,71 \n", "4025 67.728,05 \n", "4026 67.736,70 \n", "4027 67.744,44 \n", "4028 67.822,97 \n", "\n", "[4029 rows x 5 columns]\n" ] } ], "source": [ "linhas_finais = []\n", "\n", "for table in tables:\n", " df_raw = table.df\n", " for i in range(1, len(df_raw)):\n", " row = df_raw.iloc[i]\n", " if len(row) < 5:\n", " continue\n", "\n", " col0 = row[0].split(\"\\n\") if row[0] else [\"\"]\n", " col1 = row[1].split(\"\\n\") if row[1] else [\"\"]\n", " col2 = row[2].split(\"\\n\") if row[2] else [\"\"]\n", " col3 = row[3].split(\"\\n\") if row[3] else [\"\"]\n", " col4 = row[4].split(\"\\n\") if row[4] else [\"\"]\n", "\n", " max_len = max(len(col0), len(col1), len(col2), len(col3), len(col4))\n", "\n", " def pad(lst, size):\n", " return lst + [\"\"] * (size - len(lst))\n", "\n", " col0 = pad(col0, max_len)\n", " col1 = pad(col1, max_len)\n", " col2 = pad(col2, max_len)\n", " col3 = pad(col3, max_len)\n", " col4 = pad(col4, max_len)\n", "\n", " for j in range(max_len):\n", " linhas_finais.append([\n", " col0[j],\n", " col1[j],\n", " col2[j],\n", " col3[j],\n", " col4[j]\n", " ])\n", "\n", "df_final = pd.DataFrame(\n", " linhas_finais,\n", " columns=[\"Data\", \"Descrição\", \"Data Valor\", \"Montante\", \"Saldo\"]\n", ")\n", "\n", "print(df_final)" ] }, { "cell_type": "code", "execution_count": 102, "id": "ab442a70", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Descrição | \n", "Data Valor | \n", "Montante | \n", "
|---|---|---|---|
| 1 | \n", "55389 FECHO TPA:0001455389 061-00006 | \n", "01-03-2026 | \n", "1,36 H | \n", "
| 2 | \n", "55294 FECHO TPA:0001455294 034-00022 | \n", "01-03-2026 | \n", "14,50 H | \n", "
| 3 | \n", "55340 FECHO TPA:0001455340 085-00005 | \n", "01-03-2026 | \n", "3,67 H | \n", "
| 4 | \n", "55370 FECHO TPA:0001455370 053-00003 | \n", "01-03-2026 | \n", "1,04 H | \n", "
| 5 | \n", "55205 FECHO TPA:0001455205 028-00009 | \n", "01-03-2026 | \n", "5,34 H | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 4024 | \n", "55356 FECHO TPA:0001455356 074-00010 | \n", "31-03-2026 | \n", "2,59 H | \n", "
| 4025 | \n", "55231 FECHO TPA:0001455231 059-00003 | \n", "31-03-2026 | \n", "1,34 H | \n", "
| 4026 | \n", "55224 FECHO TPA:0001455224 066-00007 | \n", "31-03-2026 | \n", "8,65 H | \n", "
| 4027 | \n", "55234 FECHO TPA:0001455234 092-00023 | \n", "31-03-2026 | \n", "7,74 H | \n", "
| 4028 | \n", "55312 FECHO TPA:0001455312 081-00086 | \n", "31-03-2026 | \n", "78,53 H | \n", "
4028 rows × 3 columns
\n", "| \n", " | Descrição | \n", "Data Valor | \n", "Montante | \n", "
|---|---|---|---|
| 536 | \n", "Cob-Venc. LEAS:530-3001-000939-050-01/2 | \n", "05-03-2026 | \n", "-4898,03 | \n", "
| 537 | \n", "Cob-Venc. LEAS:530-3001-000941-050-01/1 | \n", "05-03-2026 | \n", "-6245,72 | \n", "
| 1014 | \n", "COMISSÃO CERTIFICADO DE SALDOS E INF. A | \n", "09-03-2026 | \n", "-100,00 | \n", "
| 1015 | \n", "IVA COMISSÃO IVA(CERTIFICADO DE SALDOS E | \n", "09-03-2026 | \n", "-23,00 | \n", "
| 1925 | \n", "Cob-Venc. LEAS:530-3001-000940-050-01/3 | \n", "16-03-2026 | \n", "-2216,87 | \n", "
| \n", " | Descrição | \n", "Data Valor | \n", "Montante | \n", "
|---|---|---|---|
| 1 | \n", "TPA:0001455389 | \n", "01-03-2026 | \n", "1,36 | \n", "
| 2 | \n", "TPA:0001455294 | \n", "01-03-2026 | \n", "14,50 | \n", "
| 3 | \n", "TPA:0001455340 | \n", "01-03-2026 | \n", "3,67 | \n", "
| 4 | \n", "TPA:0001455370 | \n", "01-03-2026 | \n", "1,04 | \n", "
| 5 | \n", "TPA:0001455205 | \n", "01-03-2026 | \n", "5,34 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 4024 | \n", "TPA:0001455356 | \n", "31-03-2026 | \n", "2,59 | \n", "
| 4025 | \n", "TPA:0001455231 | \n", "31-03-2026 | \n", "1,34 | \n", "
| 4026 | \n", "TPA:0001455224 | \n", "31-03-2026 | \n", "8,65 | \n", "
| 4027 | \n", "TPA:0001455234 | \n", "31-03-2026 | \n", "7,74 | \n", "
| 4028 | \n", "TPA:0001455312 | \n", "31-03-2026 | \n", "78,53 | \n", "
3995 rows × 3 columns
\n", "| \n", " | Descrição | \n", "Data Valor | \n", "Montante | \n", "
|---|---|---|---|
| 0 | \n", "Cob-Venc. LEAS:530-3001-000939-050-01/2 | \n", "05-03-2026 | \n", "-4898.03 | \n", "
| 1 | \n", "Cob-Venc. LEAS:530-3001-000941-050-01/1 | \n", "05-03-2026 | \n", "-6245.72 | \n", "
| 2 | \n", "COMISSÃO CERTIFICADO DE SALDOS E INF. A | \n", "09-03-2026 | \n", "-100.00 | \n", "
| 3 | \n", "IVA COMISSÃO IVA(CERTIFICADO DE SALDOS E | \n", "09-03-2026 | \n", "-23.00 | \n", "
| 4 | \n", "Cob-Venc. LEAS:530-3001-000940-050-01/3 | \n", "16-03-2026 | \n", "-2216.87 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 3995 | \n", "TPA:0001455356 | \n", "31-03-2026 | \n", "2.59 | \n", "
| 3996 | \n", "TPA:0001455231 | \n", "31-03-2026 | \n", "1.34 | \n", "
| 3997 | \n", "TPA:0001455224 | \n", "31-03-2026 | \n", "8.65 | \n", "
| 3998 | \n", "TPA:0001455234 | \n", "31-03-2026 | \n", "7.74 | \n", "
| 3999 | \n", "TPA:0001455312 | \n", "31-03-2026 | \n", "78.53 | \n", "
4000 rows × 3 columns
\n", "