Update README.md
Browse files
README.md
CHANGED
|
@@ -27,15 +27,21 @@ from transformers import AutoProcessor, UdopForConditionalGeneration
|
|
| 27 |
from datasets import load_dataset
|
| 28 |
|
| 29 |
# load model and processor
|
|
|
|
|
|
|
| 30 |
processor = AutoProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
|
| 31 |
model = UdopForConditionalGeneration.from_pretrained("microsoft/udop-large")
|
| 32 |
|
|
|
|
|
|
|
| 33 |
dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
| 34 |
example = dataset[0]
|
| 35 |
image = example["image"]
|
| 36 |
words = example["tokens"]
|
| 37 |
boxes = example["bboxes"]
|
| 38 |
question = "Question answering. What is the date on the form?"
|
|
|
|
|
|
|
| 39 |
encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
|
| 40 |
|
| 41 |
# autoregressive generation
|
|
|
|
| 27 |
from datasets import load_dataset
|
| 28 |
|
| 29 |
# load model and processor
|
| 30 |
+
# in this case, we already have performed OCR ourselves
|
| 31 |
+
# so we initialize the processor with `apply_ocr=False`
|
| 32 |
processor = AutoProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
|
| 33 |
model = UdopForConditionalGeneration.from_pretrained("microsoft/udop-large")
|
| 34 |
|
| 35 |
+
# load an example image, along with the words and coordinates
|
| 36 |
+
# which were extracted using an OCR engine
|
| 37 |
dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
| 38 |
example = dataset[0]
|
| 39 |
image = example["image"]
|
| 40 |
words = example["tokens"]
|
| 41 |
boxes = example["bboxes"]
|
| 42 |
question = "Question answering. What is the date on the form?"
|
| 43 |
+
|
| 44 |
+
# prepare everything for the model
|
| 45 |
encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
|
| 46 |
|
| 47 |
# autoregressive generation
|