{ "cells": [ { "cell_type": "code", "execution_count": 44, "id": "c12daadb", "metadata": {}, "outputs": [], "source": [ "import fitz\n", "import pytesseract\n", "from PIL import Image\n", "import io\n", "import re\n", "import pandas as pd\n", "#pip install camelot-py[cv]\n", "import camelot\n", "\n" ] }, { "cell_type": "code", "execution_count": 100, "id": "a3970a11", "metadata": {}, "outputs": [], "source": [ "tables = camelot.read_pdf(\"C:/Users/garci/Downloads/Extracto Sogenave_Março.pdf\", pages=\"all\",flavor=\"stream\")" ] }, { "cell_type": "code", "execution_count": 101, "id": "10067e62", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Data Descrição Data Valor Montante \\\n", "0 01-03-2026 SALDO ANTERIOR 01-03-2026 \n", "1 01-03-2026 55389 FECHO TPA:0001455389 061-00006 01-03-2026 1,36 H \n", "2 01-03-2026 55294 FECHO TPA:0001455294 034-00022 01-03-2026 14,50 H \n", "3 01-03-2026 55340 FECHO TPA:0001455340 085-00005 01-03-2026 3,67 H \n", "4 01-03-2026 55370 FECHO TPA:0001455370 053-00003 01-03-2026 1,04 H \n", "... ... ... ... ... \n", "4024 31-03-2026 55356 FECHO TPA:0001455356 074-00010 31-03-2026 2,59 H \n", "4025 31-03-2026 55231 FECHO TPA:0001455231 059-00003 31-03-2026 1,34 H \n", "4026 31-03-2026 55224 FECHO TPA:0001455224 066-00007 31-03-2026 8,65 H \n", "4027 31-03-2026 55234 FECHO TPA:0001455234 092-00023 31-03-2026 7,74 H \n", "4028 31-03-2026 55312 FECHO TPA:0001455312 081-00086 31-03-2026 78,53 H \n", "\n", " Saldo \n", "0 23.240,03 \n", "1 23.241,39 \n", "2 23.255,89 \n", "3 23.259,56 \n", "4 23.260,60 \n", "... ... \n", "4024 67.726,71 \n", "4025 67.728,05 \n", "4026 67.736,70 \n", "4027 67.744,44 \n", "4028 67.822,97 \n", "\n", "[4029 rows x 5 columns]\n" ] } ], "source": [ "linhas_finais = []\n", "\n", "for table in tables:\n", " df_raw = table.df\n", " for i in range(1, len(df_raw)):\n", " row = df_raw.iloc[i]\n", " if len(row) < 5:\n", " continue\n", "\n", " col0 = row[0].split(\"\\n\") if row[0] else [\"\"]\n", " col1 = row[1].split(\"\\n\") if row[1] else [\"\"]\n", " col2 = row[2].split(\"\\n\") if row[2] else [\"\"]\n", " col3 = row[3].split(\"\\n\") if row[3] else [\"\"]\n", " col4 = row[4].split(\"\\n\") if row[4] else [\"\"]\n", "\n", " max_len = max(len(col0), len(col1), len(col2), len(col3), len(col4))\n", "\n", " def pad(lst, size):\n", " return lst + [\"\"] * (size - len(lst))\n", "\n", " col0 = pad(col0, max_len)\n", " col1 = pad(col1, max_len)\n", " col2 = pad(col2, max_len)\n", " col3 = pad(col3, max_len)\n", " col4 = pad(col4, max_len)\n", "\n", " for j in range(max_len):\n", " linhas_finais.append([\n", " col0[j],\n", " col1[j],\n", " col2[j],\n", " col3[j],\n", " col4[j]\n", " ])\n", "\n", "df_final = pd.DataFrame(\n", " linhas_finais,\n", " columns=[\"Data\", \"Descrição\", \"Data Valor\", \"Montante\", \"Saldo\"]\n", ")\n", "\n", "print(df_final)" ] }, { "cell_type": "code", "execution_count": 102, "id": "ab442a70", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DescriçãoData ValorMontante
155389 FECHO TPA:0001455389 061-0000601-03-20261,36 H
255294 FECHO TPA:0001455294 034-0002201-03-202614,50 H
355340 FECHO TPA:0001455340 085-0000501-03-20263,67 H
455370 FECHO TPA:0001455370 053-0000301-03-20261,04 H
555205 FECHO TPA:0001455205 028-0000901-03-20265,34 H
............
402455356 FECHO TPA:0001455356 074-0001031-03-20262,59 H
402555231 FECHO TPA:0001455231 059-0000331-03-20261,34 H
402655224 FECHO TPA:0001455224 066-0000731-03-20268,65 H
402755234 FECHO TPA:0001455234 092-0002331-03-20267,74 H
402855312 FECHO TPA:0001455312 081-0008631-03-202678,53 H
\n", "

4028 rows × 3 columns

\n", "
" ], "text/plain": [ " Descrição Data Valor Montante\n", "1 55389 FECHO TPA:0001455389 061-00006 01-03-2026 1,36 H\n", "2 55294 FECHO TPA:0001455294 034-00022 01-03-2026 14,50 H\n", "3 55340 FECHO TPA:0001455340 085-00005 01-03-2026 3,67 H\n", "4 55370 FECHO TPA:0001455370 053-00003 01-03-2026 1,04 H\n", "5 55205 FECHO TPA:0001455205 028-00009 01-03-2026 5,34 H\n", "... ... ... ...\n", "4024 55356 FECHO TPA:0001455356 074-00010 31-03-2026 2,59 H\n", "4025 55231 FECHO TPA:0001455231 059-00003 31-03-2026 1,34 H\n", "4026 55224 FECHO TPA:0001455224 066-00007 31-03-2026 8,65 H\n", "4027 55234 FECHO TPA:0001455234 092-00023 31-03-2026 7,74 H\n", "4028 55312 FECHO TPA:0001455312 081-00086 31-03-2026 78,53 H\n", "\n", "[4028 rows x 3 columns]" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_final = df_final.drop(columns=[\"Data\",\"Saldo\"])\n", "df_final = df_final.drop(index=0)\n", "df_final" ] }, { "cell_type": "code", "execution_count": 107, "id": "9498de76", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DescriçãoData ValorMontante
536Cob-Venc. LEAS:530-3001-000939-050-01/205-03-2026-4898,03
537Cob-Venc. LEAS:530-3001-000941-050-01/105-03-2026-6245,72
1014COMISSÃO CERTIFICADO DE SALDOS E INF. A09-03-2026-100,00
1015IVA COMISSÃO IVA(CERTIFICADO DE SALDOS E09-03-2026-23,00
1925Cob-Venc. LEAS:530-3001-000940-050-01/316-03-2026-2216,87
\n", "
" ], "text/plain": [ " Descrição Data Valor Montante\n", "536 Cob-Venc. LEAS:530-3001-000939-050-01/2 05-03-2026 -4898,03 \n", "537 Cob-Venc. LEAS:530-3001-000941-050-01/1 05-03-2026 -6245,72 \n", "1014 COMISSÃO CERTIFICADO DE SALDOS E INF. A 09-03-2026 -100,00 \n", "1015 IVA COMISSÃO IVA(CERTIFICADO DE SALDOS E 09-03-2026 -23,00 \n", "1925 Cob-Venc. LEAS:530-3001-000940-050-01/3 16-03-2026 -2216,87 " ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_D = df_final[df_final[\"Montante\"].str.contains(\"D\", na=False)]\n", "df_D[\"Montante\"]=df_D[\"Montante\"].str.replace(\"D\",\"\", regex=False)\n", "df_D[\"Montante\"]=df_D[\"Montante\"].str.replace(\".\",\"\", regex=False)\n", "df_D[\"Descrição\"]=df_D[\"Descrição\"].str.replace(\"ˆ\",\"Ã\", regex=False)\n", "df_D[\"Montante\"]=\"-\" + df_D[\"Montante\"].astype(str)\n", "df_D" ] }, { "cell_type": "code", "execution_count": 108, "id": "50b681a6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DescriçãoData ValorMontante
1TPA:000145538901-03-20261,36
2TPA:000145529401-03-202614,50
3TPA:000145534001-03-20263,67
4TPA:000145537001-03-20261,04
5TPA:000145520501-03-20265,34
............
4024TPA:000145535631-03-20262,59
4025TPA:000145523131-03-20261,34
4026TPA:000145522431-03-20268,65
4027TPA:000145523431-03-20267,74
4028TPA:000145531231-03-202678,53
\n", "

3995 rows × 3 columns

\n", "
" ], "text/plain": [ " Descrição Data Valor Montante\n", "1 TPA:0001455389 01-03-2026 1,36 \n", "2 TPA:0001455294 01-03-2026 14,50 \n", "3 TPA:0001455340 01-03-2026 3,67 \n", "4 TPA:0001455370 01-03-2026 1,04 \n", "5 TPA:0001455205 01-03-2026 5,34 \n", "... ... ... ...\n", "4024 TPA:0001455356 31-03-2026 2,59 \n", "4025 TPA:0001455231 31-03-2026 1,34 \n", "4026 TPA:0001455224 31-03-2026 8,65 \n", "4027 TPA:0001455234 31-03-2026 7,74 \n", "4028 TPA:0001455312 31-03-2026 78,53 \n", "\n", "[3995 rows x 3 columns]" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_H = df_final[df_final[\"Montante\"].str.contains(\"H\", na=False)]\n", "df_H[\"Descrição\"] = df_H[\"Descrição\"].str.extract(r'(TPA:\\d+)')\n", "df_H[\"Montante\"]=df_H[\"Montante\"].str.replace(\"H\",\"\", regex=False)\n", "df_H[\"Montante\"]=df_H[\"Montante\"].str.replace(\".\",\"\", regex=False)\n", "\n", "df_H" ] }, { "cell_type": "code", "execution_count": 112, "id": "c752079f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DescriçãoData ValorMontante
0Cob-Venc. LEAS:530-3001-000939-050-01/205-03-2026-4898.03
1Cob-Venc. LEAS:530-3001-000941-050-01/105-03-2026-6245.72
2COMISSÃO CERTIFICADO DE SALDOS E INF. A09-03-2026-100.00
3IVA COMISSÃO IVA(CERTIFICADO DE SALDOS E09-03-2026-23.00
4Cob-Venc. LEAS:530-3001-000940-050-01/316-03-2026-2216.87
............
3995TPA:000145535631-03-20262.59
3996TPA:000145523131-03-20261.34
3997TPA:000145522431-03-20268.65
3998TPA:000145523431-03-20267.74
3999TPA:000145531231-03-202678.53
\n", "

4000 rows × 3 columns

\n", "
" ], "text/plain": [ " Descrição Data Valor Montante\n", "0 Cob-Venc. LEAS:530-3001-000939-050-01/2 05-03-2026 -4898.03\n", "1 Cob-Venc. LEAS:530-3001-000941-050-01/1 05-03-2026 -6245.72\n", "2 COMISSÃO CERTIFICADO DE SALDOS E INF. A 09-03-2026 -100.00\n", "3 IVA COMISSÃO IVA(CERTIFICADO DE SALDOS E 09-03-2026 -23.00\n", "4 Cob-Venc. LEAS:530-3001-000940-050-01/3 16-03-2026 -2216.87\n", "... ... ... ...\n", "3995 TPA:0001455356 31-03-2026 2.59\n", "3996 TPA:0001455231 31-03-2026 1.34\n", "3997 TPA:0001455224 31-03-2026 8.65\n", "3998 TPA:0001455234 31-03-2026 7.74\n", "3999 TPA:0001455312 31-03-2026 78.53\n", "\n", "[4000 rows x 3 columns]" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dffinal=pd.concat([df_D, df_H], ignore_index=True)\n", "dffinal[\"Montante\"]=dffinal[\"Montante\"].str.replace(\",\",\".\", regex=False)\n", "dffinal[\"Montante\"] = dffinal[\"Montante\"].astype(float)\n", "dffinal" ] }, { "cell_type": "code", "execution_count": 114, "id": "76673631", "metadata": {}, "outputs": [], "source": [ "dffinal.to_excel(\"C:/Users/garci/Downloads/MAR 25 BIC2.xlsx\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.2" } }, "nbformat": 4, "nbformat_minor": 5 }