Spaces:
Runtime error
Runtime error
app.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
# === Загрузка библиотек ===
|
| 2 |
-
|
| 3 |
from pypdf import PdfReader, PdfWriter
|
| 4 |
import gradio as gr
|
| 5 |
-
import fitz
|
| 6 |
from PIL import Image
|
| 7 |
import pandas as pd
|
| 8 |
import cv2
|
|
@@ -11,7 +10,6 @@ import os
|
|
| 11 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 12 |
import torch
|
| 13 |
import difflib
|
| 14 |
-
from pdf2image import convert_from_path
|
| 15 |
|
| 16 |
# Загрузка TrOCR
|
| 17 |
processor = TrOCRProcessor.from_pretrained('kazars24/trocr-base-handwritten-ru')
|
|
@@ -143,7 +141,15 @@ def crop_and_recognize_cells(image, cells):
|
|
| 143 |
|
| 144 |
# === 5. Полный процесс обработки изображения таблицы ===
|
| 145 |
def process_pdf_table(pdf_path, output_excel='results.xlsx'):
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
if not images:
|
| 148 |
print("Ошибка: PDF пустой или не удалось сконвертировать.")
|
| 149 |
return
|
|
@@ -196,4 +202,4 @@ app = gr.Interface(
|
|
| 196 |
allow_flagging="never"
|
| 197 |
)
|
| 198 |
|
| 199 |
-
app.launch(
|
|
|
|
| 1 |
# === Загрузка библиотек ===
|
|
|
|
| 2 |
from pypdf import PdfReader, PdfWriter
|
| 3 |
import gradio as gr
|
| 4 |
+
import fitz # PyMuPDF
|
| 5 |
from PIL import Image
|
| 6 |
import pandas as pd
|
| 7 |
import cv2
|
|
|
|
| 10 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 11 |
import torch
|
| 12 |
import difflib
|
|
|
|
| 13 |
|
| 14 |
# Загрузка TrOCR
|
| 15 |
processor = TrOCRProcessor.from_pretrained('kazars24/trocr-base-handwritten-ru')
|
|
|
|
| 141 |
|
| 142 |
# === 5. Полный процесс обработки изображения таблицы ===
|
| 143 |
def process_pdf_table(pdf_path, output_excel='results.xlsx'):
|
| 144 |
+
# Извлечение изображений из PDF с помощью PyMuPDF
|
| 145 |
+
doc = fitz.open(pdf_path)
|
| 146 |
+
images = []
|
| 147 |
+
for page_num in range(len(doc)):
|
| 148 |
+
page = doc.load_page(page_num)
|
| 149 |
+
pix = page.get_pixmap(dpi=300)
|
| 150 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 151 |
+
images.append(img)
|
| 152 |
+
|
| 153 |
if not images:
|
| 154 |
print("Ошибка: PDF пустой или не удалось сконвертировать.")
|
| 155 |
return
|
|
|
|
| 202 |
allow_flagging="never"
|
| 203 |
)
|
| 204 |
|
| 205 |
+
app.launch()
|