Spaces:

oberbics
/

HistorySpace

Runtime error

App Files Files Community

oberbics commited on Apr 13

Commit

3953725

verified ·

1 Parent(s): 19377ce

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -146

app.py CHANGED Viewed

@@ -1,165 +1,53 @@
 import gradio as gr
-from transformers import pipeline
 import torch
 import json
 import time
-from functools import lru_cache
-# 1. Model Loading with Health Checks
-@lru_cache(maxsize=1)
-def load_model():
-    try:
-        print("⚙️ Initializing NuExtract-1.5 model...")
-        start_time = time.time()
-        model = pipeline(
-            "text2text-generation",
-            model="numind/NuExtract-1.5",
-            device="cuda" if torch.cuda.is_available() else "cpu",
-            torch_dtype=torch.float16 if torch.cuda.is_available() else None
-        )
-        load_time = round(time.time() - start_time, 2)
-        print(f"✅ Model loaded successfully in {load_time}s")
-        return model
-    except Exception as e:
-        print(f"❌ Model loading failed: {str(e)}")
-        return None
-# 2. Processing Function with Streamed Output
 def extract_structure(template, text):
-    # Input validation
-    if not text.strip():
-        yield "❌ Error: Empty input text", "", "<p style='color:red'>Please enter text to analyze</p>"
-        return
     try:
-        template_data = json.loads(template) if template.strip() else {}
-    except json.JSONDecodeError:
-        yield "❌ Error: Invalid JSON template", "", "<p style='color:red'>Malformed JSON template</p>"
-        return
-    # Processing stages
-    stages = [
-        ("🔍 Initializing model...", 0.5),
-        ("📖 Parsing document structure...", 1.2),
-        ("🔄 Matching template fields...", 0.8),
-        ("✨ Finalizing extraction...", 0.3)
-    ]
-    for msg, delay in stages:
-        yield msg, "", ""
-        time.sleep(delay)
-    try:
-        # Actual inference
-        result = extractor(
-            text,
-            **template_data,
-            max_length=512,
-            num_return_sequences=1,
-            temperature=0.7
-        )[0]['generated_text']
-        # Format output
-        formatted_json = json.loads(result)  # Parse to validate JSON
-        html_output = f"""
-        <div style='
-            padding: 15px;
-            background: #f8f9fa;
-            border-radius: 8px;
-            border-left: 4px solid #4CAF50;
-            margin-top: 10px;
-        '>
-            <h3 style='margin-top:0'>Extracted Data</h3>
-            <pre style='white-space: pre-wrap'>{json.dumps(formatted_json, indent=2)}</pre>
-        </div>
-        """
-        yield "✅ Extraction complete", formatted_json, html_output
     except Exception as e:
-        error_msg = f"❌ Processing error: {str(e)}"
-        yield error_msg, "", f"<p style='color:red'>{error_msg}</p>"
-# 3. Gradio Interface
-with gr.Blocks(theme=gr.themes.Soft(), title="NuExtract 1.5") as demo:
-    # Header
-    gr.Markdown("""
-    <div style='text-align:center'>
-        <h1>🧠 NuExtract-1.5</h1>
-        <p>Advanced Information Extraction System</p>
-    </div>
-    """)
-    # Main layout
     with gr.Row():
-        # Input Column
-        with gr.Column(scale=1, min_width=400):
-            gr.Markdown("### 📥 Input")
-            template_input = gr.Textbox(
-                label="Extraction Template (JSON)",
-                value='{"fields": ["name", "email", "phone"]}',
-                lines=5
-            )
-            text_input = gr.TextArea(
-                label="Document Text",
-                placeholder="John Smith ([email protected]) called regarding order #12345...",
-                lines=12
-            )
-            gr.Examples(
-                examples=[
-                    [
-                        '{"fields": ["name", "email"]}',
-                        "Please contact Dr. Sarah Johnson at [email protected]"
-                    ],
-                    [
-                        '{"fields": ["product", "price"]}',
-                        "The new MacBook Pro costs $1,299 at our store"
-                    ]
-                ],
-                inputs=[template_input, text_input],
-                label="Try Examples:"
-            )
-        # Output Column
-        with gr.Column(scale=1, min_width=500):
-            gr.Markdown("### 📤 Results")
-            status = gr.Textbox(
-                label="Status",
-                value="🟢 System Ready"
-            )
-            json_output = gr.JSON(label="Structured Output")  # Removed interactive parameter
-            html_output = gr.HTML(
-                label="Formatted View",
-                value="<div style='min-height:200px'></div>"
-            )
-    # Controls
-    submit_btn = gr.Button("Extract Information", variant="primary")
-    clear_btn = gr.Button("Clear")
-    # Event handlers
-    submit_btn.click(
-        fn=extract_structure,
-        inputs=[template_input, text_input],
-        outputs=[status, json_output, html_output]
-    )
-    clear_btn.click(
-        fn=lambda: ["", "", {}, "<div></div>"],
-        inputs=[],
-        outputs=[template_input, text_input, json_output, html_output]
-    )
-# 4. Launch Configuration
-if __name__ == "__main__":
-    # Initialize model
-    extractor = load_model()
-    # Launch app
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import json
 import time
+# Model Loading
+tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
+model = AutoModelForCausalLM.from_pretrained(
+    "numind/NuExtract-1.5",
+    device_map="auto",
+    torch_dtype=torch.float16
+)
 def extract_structure(template, text):
+    prompt = f"""Extract the following fields from the text:
+Template: {template}
+Text: {text}
+Extracted JSON:"""
     try:
+        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+        outputs = model.generate(**inputs, max_new_tokens=512)
+        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract JSON portion
+        json_start = result.find("{")
+        json_end = result.rfind("}") + 1
+        extracted = json.loads(result[json_start:json_end])
+        return "✅ Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>"
     except Exception as e:
+        return f"❌ Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>"
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# NuExtract-1.5 Structured Data Extractor")
     with gr.Row():
+        with gr.Column():
+            template = gr.Textbox(label="Template (JSON)", value='{"fields": ["name", "email"]}')
+            text = gr.TextArea(label="Input Text")
+            btn = gr.Button("Extract")
+        with gr.Column():
+            status = gr.Textbox(label="Status")
+            json_out = gr.JSON(label="Output")
+            html_out = gr.HTML()
+    btn.click(extract_structure, [template, text], [status, json_out, html_out])
+demo.launch()