Spaces:
Runtime error
Runtime error
| import atexit | |
| import functools | |
| from queue import Queue | |
| from threading import Event, Thread | |
| from paddleocr import PaddleOCR | |
| import documentos | |
| import herramientas | |
| LANG_CONFIG = { | |
| "ch": {"num_workers": 2}, | |
| "en": {"num_workers": 2}, | |
| "fr": {"num_workers": 1}, | |
| "german": {"num_workers": 1}, | |
| "korean": {"num_workers": 1}, | |
| "japan": {"num_workers": 1}, | |
| } | |
| CONCURRENCY_LIMIT = 8 | |
| class PaddleOCRModelManager(object): | |
| def __init__(self, | |
| num_workers, | |
| model_factory): | |
| super().__init__() | |
| self._model_factory = model_factory | |
| self._queue = Queue() | |
| self._workers = [] | |
| self._model_initialized_event = Event() | |
| for _ in range(num_workers): | |
| worker = Thread(target=self._worker, daemon=False) | |
| worker.start() | |
| self._model_initialized_event.wait() | |
| self._model_initialized_event.clear() | |
| self._workers.append(worker) | |
| def infer(self, *args, **kwargs): | |
| # XXX: Should I use a more lightweight data structure, say, a future? | |
| result_queue = Queue(maxsize=1) | |
| self._queue.put((args, kwargs, result_queue)) | |
| success, payload = result_queue.get() | |
| if success: | |
| return payload | |
| else: | |
| raise payload | |
| def close(self): | |
| for _ in self._workers: | |
| self._queue.put(None) | |
| for worker in self._workers: | |
| worker.join() | |
| def _worker(self): | |
| model = self._model_factory() | |
| self._model_initialized_event.set() | |
| while True: | |
| item = self._queue.get() | |
| if item is None: | |
| break | |
| args, kwargs, result_queue = item | |
| try: | |
| result = model.ocr(*args, **kwargs) | |
| result_queue.put((True, result)) | |
| except Exception as e: | |
| result_queue.put((False, e)) | |
| finally: | |
| self._queue.task_done() | |
| def create_model(lang): | |
| return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False) | |
| model_managers = {} | |
| for lang, config in LANG_CONFIG.items(): | |
| model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang)) | |
| model_managers[lang] = model_manager | |
| def close_model_managers(): | |
| for manager in model_managers.values(): | |
| manager.close() | |
| # XXX: Not sure if gradio allows adding custom teardown logic | |
| atexit.register(close_model_managers) | |
| def inference(img, lang): | |
| ocr = model_managers[lang] | |
| result = ocr.infer(img, cls=True)[0] | |
| textos_extraidos = herramientas.listaTextosExtraidos(result) | |
| #Campos DNI Panamá. | |
| nombre, apellido, identificacion = documentos.dni(textos_extraidos) | |
| print(f"Hola: {nombre}, {apellido} con identificación: {identificacion}") | |
| return { | |
| "nombre": nombre, | |
| "apellido": apellido, | |
| "identificacion": identificacion | |
| } |