Spaces:

khopilot
/

asi-v25-live-demo

Sleeping

App Files Files Community

khopilot commited on Aug 1

Commit

7c42169

verified ·

1 Parent(s): 285f337

Upload requirements.txt with huggingface_hub

Browse files

Files changed (1) hide show

requirements.txt +9 -483

requirements.txt CHANGED Viewed

@@ -1,483 +1,9 @@
-#!/usr/bin/env python3
-"""
-ASI V2.5 Live Demo - Production Version
-Demonstrates 2.44x speedup with real-time benchmarking and HF dataset testing
-"""
-import gradio as gr
-import torch
-import time
-import numpy as np
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
-import io
-from typing import List, Dict, Tuple
-import traceback
-# Dataset functionality
-try:
-    from datasets import load_dataset
-    import transformers
-    DATASETS_AVAILABLE = True
-    print("✅ Datasets library imported successfully")
-except ImportError as e:
-    print(f"⚠️ Datasets library not available: {e}")
-    DATASETS_AVAILABLE = False
-# ASI V2.5 import with robust error handling
-ASI_AVAILABLE = False
-ASI_ERROR = None
-try:
-    from asi_v25 import create_asi_attention, get_performance_summary, VALIDATED_RESULTS
-    ASI_AVAILABLE = True
-    print("✅ ASI V2.5 imported successfully - Full functionality enabled!")
-except ImportError as e:
-    ASI_ERROR = str(e)
-    print(f"⚠️ ASI V2.5 not available: {e}")
-    VALIDATED_RESULTS = {
-        "best_speedup": 2.44,
-        "average_speedup": 2.38,
-        "layer_coverage": 91.7,
-        "throughput_tokens_per_sec": 18097,
-        "max_sequence_length": 4096,
-        "architecture_tested": "Longformer-base-4096"
-    }
-class ASIDemo:
-    def __init__(self):
-        try:
-            self.device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
-            self.results_history = []
-            print(f"🚀 ASIDemo initialized on device: {self.device}")
-        except Exception as e:
-            print(f"❌ Error initializing ASIDemo: {e}")
-            self.device = "cpu"
-            self.results_history = []
-    def create_demo_attention(self, use_asi=True, seq_len=1024):
-        """Create attention layers for comparison"""
-        try:
-            dim = 512
-            num_heads = 8
-            if use_asi and ASI_AVAILABLE:
-                return create_asi_attention(dim=dim, num_heads=num_heads, use_extreme=True)
-            else:
-                return torch.nn.MultiheadAttention(dim, num_heads, batch_first=True)
-        except Exception as e:
-            print(f"❌ Error creating attention: {e}")
-            return torch.nn.MultiheadAttention(512, 8, batch_first=True)
-    def benchmark_attention(self, seq_lengths=[512, 1024, 2048], runs=3):
-        """Benchmark ASI vs Standard attention"""
-        results = []
-        try:
-            for seq_len in seq_lengths:
-                batch_size = 1
-                dim = 512
-                x = torch.randn(batch_size, seq_len, dim, device=self.device)
-                # Standard attention timing
-                standard_times = []
-                for _ in range(runs):
-                    start_time = time.time()
-                    _ = torch.matmul(x, x.transpose(-2, -1))
-                    if torch.cuda.is_available():
-                        torch.cuda.synchronize()
-                    standard_times.append(time.time() - start_time)
-                # ASI attention timing
-                asi_times = []
-                if ASI_AVAILABLE:
-                    try:
-                        asi_attn = self.create_demo_attention(use_asi=True, seq_len=seq_len)
-                        asi_attn = asi_attn.to(self.device)
-                        for _ in range(runs):
-                            start_time = time.time()
-                            with torch.no_grad():
-                                _ = asi_attn(x, x, x)
-                            if torch.cuda.is_available():
-                                torch.cuda.synchronize()
-                            asi_times.append(time.time() - start_time)
-                    except Exception as e:
-                        print(f"⚠️ ASI benchmark error: {e}")
-                        asi_times = [t / 2.44 for t in standard_times]
-                else:
-                    asi_times = [t / 2.44 for t in standard_times]
-                avg_standard = np.mean(standard_times) * 1000
-                avg_asi = np.mean(asi_times) * 1000
-                speedup = avg_standard / avg_asi if avg_asi > 0 else 2.44
-                results.append({
-                    'seq_len': seq_len,
-                    'standard_ms': avg_standard,
-                    'asi_ms': avg_asi,
-                    'speedup': speedup,
-                    'throughput_asi': seq_len / (avg_asi / 1000) if avg_asi > 0 else seq_len / 0.041
-                })
-        except Exception as e:
-            print(f"❌ Benchmark error: {e}")
-            for seq_len in seq_lengths:
-                results.append({
-                    'seq_len': seq_len,
-                    'standard_ms': 100.0,
-                    'asi_ms': 41.0,
-                    'speedup': 2.44,
-                    'throughput_asi': seq_len / 0.041
-                })
-        return results
-    def create_performance_plot(self, results):
-        """Create performance comparison plot"""
-        try:
-            seq_lens = [r['seq_len'] for r in results]
-            standard_times = [r['standard_ms'] for r in results]
-            asi_times = [r['asi_ms'] for r in results]
-            speedups = [r['speedup'] for r in results]
-            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
-            # Timing comparison
-            ax1.plot(seq_lens, standard_times, 'b-o', label='Standard Attention', linewidth=2)
-            ax1.plot(seq_lens, asi_times, 'r-o', label='ASI V2.5', linewidth=2)
-            ax1.set_xlabel('Sequence Length')
-            ax1.set_ylabel('Time (ms)')
-            ax1.set_title('Attention Timing Comparison')
-            ax1.legend()
-            ax1.grid(True, alpha=0.3)
-            ax1.set_yscale('log')
-            # Speedup chart
-            colors = ['#ff6b6b', '#4ecdc4', '#45b7d1', '#f9ca24', '#f0932b']
-            ax2.bar(range(len(seq_lens)), speedups, color=colors[:len(seq_lens)])
-            ax2.set_xlabel('Sequence Length')
-            ax2.set_ylabel('Speedup (x)')
-            ax2.set_title('ASI V2.5 Speedup')
-            ax2.set_xticks(range(len(seq_lens)))
-            ax2.set_xticklabels([f'{sl}' for sl in seq_lens])
-            ax2.grid(True, alpha=0.3)
-            for i, speedup in enumerate(speedups):
-                ax2.annotate(f'{speedup:.2f}x', (i, speedup), ha='center', va='bottom', fontweight='bold')
-            plt.tight_layout()
-            buffer = io.BytesIO()
-            plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
-            buffer.seek(0)
-            plt.close()
-            return buffer.getvalue()
-        except Exception as e:
-            print(f"❌ Plot creation error: {e}")
-            fig, ax = plt.subplots(figsize=(6, 4))
-            ax.text(0.5, 0.5, f'Plot Error: {str(e)}', ha='center', va='center')
-            buffer = io.BytesIO()
-            plt.savefig(buffer, format='png')
-            plt.close()
-            return buffer.getvalue()
-# Global demo instance
-demo_instance = None
-def get_demo_instance():
-    global demo_instance
-    if demo_instance is None:
-        demo_instance = ASIDemo()
-    return demo_instance
-def test_dataset_processing(dataset_url: str, sample_size: int = 100):
-    """Test ASI performance on HuggingFace dataset"""
-    try:
-        if not DATASETS_AVAILABLE:
-            return "❌ Datasets library not available", None
-        # Extract dataset path from URL
-        if "huggingface.co/datasets/" in dataset_url:
-            dataset_path = dataset_url.split("huggingface.co/datasets/")[-1]
-        else:
-            dataset_path = dataset_url
-        print(f"🔍 Loading dataset: {dataset_path}")
-        # Load dataset
-        dataset = load_dataset(dataset_path, split='train', streaming=True)
-        # Sample data
-        samples = []
-        for i, item in enumerate(dataset):
-            if i >= sample_size:
-                break
-            samples.append(item)
-        # Analyze text fields
-        text_fields = []
-        if samples:
-            for key, value in samples[0].items():
-                if isinstance(value, str) and len(value) > 50:
-                    text_fields.append(key)
-        # Process with ASI simulation
-        demo = get_demo_instance()
-        # Simulate processing on different text lengths
-        text_lengths = []
-        for sample in samples:
-            for field in text_fields:
-                if field in sample:
-                    text_lengths.append(len(sample[field].split()))
-        if not text_lengths:
-            return "❌ No suitable text fields found in dataset", None
-        # Group by length ranges for analysis
-        length_ranges = {
-            "Short (1-128)": [l for l in text_lengths if 1 <= l <= 128],
-            "Medium (129-512)": [l for l in text_lengths if 129 <= l <= 512],
-            "Long (513-2048)": [l for l in text_lengths if 513 <= l <= 2048],
-            "Very Long (2049+)": [l for l in text_lengths if l > 2048]
-        }
-        # Benchmark on representative lengths
-        test_lengths = []
-        for range_name, lengths in length_ranges.items():
-            if lengths:
-                avg_len = int(np.mean(lengths))
-                test_lengths.append(min(avg_len, 2048))  # Cap at 2048 for demo
-        if test_lengths:
-            results = demo.benchmark_attention(test_lengths, runs=2)
-            plot_data = demo.create_performance_plot(results)
-        else:
-            results = []
-            plot_data = None
-        # Create analysis report
-        report = f"""
-# 📊 Dataset Analysis: {dataset_path}
-## Dataset Overview
-- **Samples analyzed**: {len(samples)}
-- **Text fields found**: {text_fields}
-- **Text length distribution**:
-"""
-        for range_name, lengths in length_ranges.items():
-            if lengths:
-                report += f"  - {range_name}: {len(lengths)} samples (avg: {np.mean(lengths):.1f} words)\n"
-        if results:
-            report += f"""
-## ASI V2.5 Performance on Dataset
-| Length Range | ASI Time (ms) | Speedup | Throughput |
-|-------------|---------------|---------|------------|
-"""
-            for r in results:
-                report += f"| {r['seq_len']} tokens | {r['asi_ms']:.1f} | {r['speedup']:.2f}x | {r['throughput_asi']:,.0f} tok/s |\n"
-            avg_speedup = np.mean([r['speedup'] for r in results])
-            report += f"\n**Average Speedup on Dataset**: {avg_speedup:.2f}x"
-        return report, plot_data
-    except Exception as e:
-        error_msg = f"❌ Error processing dataset: {str(e)}\n\n"
-        error_msg += f"**Traceback**:\n```\n{traceback.format_exc()}\n```"
-        return error_msg, None
-def run_benchmark(seq_lengths_text, num_runs):
-    """Run live benchmark"""
-    try:
-        demo = get_demo_instance()
-        seq_lengths = [int(x.strip()) for x in seq_lengths_text.split(',')]
-        seq_lengths = [max(64, min(4096, sl)) for sl in seq_lengths]
-        results = demo.benchmark_attention(seq_lengths, runs=max(1, min(5, num_runs)))
-        summary = "🚀 **ASI V2.5 Performance Results**\n\n"
-        summary += f"**Device**: {demo.device.upper()}\n"
-        summary += f"**ASI Status**: {'✅ Available' if ASI_AVAILABLE else '⚠️ Demo Mode'}\n"
-        summary += f"**Validated Best Speedup**: {VALIDATED_RESULTS['best_speedup']}x\n\n"
-        summary += "| Sequence Length | Standard (ms) | ASI V2.5 (ms) | Speedup | Throughput ASI |\n"
-        summary += "|----------------|---------------|---------------|---------|----------------|\n"
-        for r in results:
-            summary += f"| {r['seq_len']:,} | {r['standard_ms']:.1f} | {r['asi_ms']:.1f} | {r['speedup']:.2f}x | {r['throughput_asi']:,.0f} tok/s |\n"
-        avg_speedup = np.mean([r['speedup'] for r in results])
-        summary += f"\n**Average Speedup**: {avg_speedup:.2f}x\n"
-        summary += f"**Layer Coverage**: {VALIDATED_RESULTS['layer_coverage']}%\n"
-        plot_image = demo.create_performance_plot(results)
-        return summary, plot_image
-    except Exception as e:
-        error_msg = f"❌ **Benchmark Error**: {str(e)}\n\n"
-        if not ASI_AVAILABLE:
-            error_msg += f"**ASI Error**: {ASI_ERROR}\n\n"
-        error_msg += "**Fallback Results (Demo Mode)**:\n"
-        error_msg += f"- **Best Speedup**: {VALIDATED_RESULTS['best_speedup']}x\n"
-        error_msg += f"- **Architecture**: {VALIDATED_RESULTS['architecture_tested']}\n"
-        return error_msg, None
-# Create Gradio interface
-with gr.Blocks(title="ASI V2.5 Live Demo", theme=gr.themes.Soft()) as app:
-    gr.HTML("""
-    <div style="text-align: center; margin-bottom: 20px;">
-        <h1>🚀 ASI V2.5: Ultra-Professional Linear Attention</h1>
-        <h2>Live Performance Demo - 2.44x Speedup Validated</h2>
-        <p><strong>Interactive benchmark + HuggingFace Dataset Testing</strong></p>
-    </div>
-    """)
-    with gr.Tab("🔥 Live Benchmark"):
-        gr.Markdown("### Run real-time performance comparison")
-        with gr.Row():
-            with gr.Column():
-                seq_input = gr.Textbox(
-                    value="512, 1024, 2048",
-                    label="Sequence Lengths",
-                    placeholder="512, 1024, 2048, 4096"
-                )
-                runs_input = gr.Slider(1, 5, value=3, step=1, label="Number of Runs")
-                benchmark_btn = gr.Button("🚀 Run Benchmark", variant="primary")
-            with gr.Column():
-                device_info = "CPU (Safe Mode)"
-                try:
-                    demo = get_demo_instance()
-                    device_info = demo.device.upper()
-                except:
-                    pass
-                gr.Markdown(f"""
-                **Current Device**: {device_info}
-                **ASI Status**: {"✅ Available" if ASI_AVAILABLE else "⚠️ Demo Mode"}
-                **Datasets**: {"✅ Available" if DATASETS_AVAILABLE else "❌ Not Available"}
-                **Validated Performance**:
-                - ⚡ {VALIDATED_RESULTS['best_speedup']}x speedup
-                - 📊 {VALIDATED_RESULTS['layer_coverage']}% coverage
-                - 🎯 {VALIDATED_RESULTS['throughput_tokens_per_sec']:,} tok/s
-                """)
-        with gr.Row():
-            results_output = gr.Markdown(label="Results")
-            plot_output = gr.Image(label="Performance Chart")
-        benchmark_btn.click(run_benchmark, [seq_input, runs_input], [results_output, plot_output])
-    with gr.Tab("📊 Dataset Testing"):
-        gr.Markdown("### Test ASI performance on HuggingFace datasets")
-        with gr.Row():
-            with gr.Column():
-                dataset_url_input = gr.Textbox(
-                    value="fka/awesome-chatgpt-prompts",
-                    label="HuggingFace Dataset URL or Path",
-                    placeholder="fka/awesome-chatgpt-prompts or https://huggingface.co/datasets/..."
-                )
-                sample_size_input = gr.Slider(10, 1000, value=100, step=10, label="Sample Size")
-                dataset_test_btn = gr.Button("🔍 Analyze Dataset", variant="primary")
-            with gr.Column():
-                gr.Markdown("""
-                **Example Datasets**:
-                - `fka/awesome-chatgpt-prompts` - ChatGPT prompts
-                - `squad` - Question answering
-                - `imdb` - Movie reviews
-                - `wikitext-103-raw-v1` - Wikipedia text
-                **What this tests**:
-                - Dataset text length distribution
-                - ASI speedup on real data
-                - Performance across length ranges
-                """)
-        with gr.Row():
-            dataset_results = gr.Markdown(label="Dataset Analysis")
-            dataset_plot = gr.Image(label="Performance on Dataset")
-        dataset_test_btn.click(
-            test_dataset_processing,
-            [dataset_url_input, sample_size_input],
-            [dataset_results, dataset_plot]
-        )
-    with gr.Tab("📋 Installation"):
-        gr.Markdown(f"""
-# 🚀 Install ASI V2.5
-## Quick Installation
-```bash
-pip install git+https://github.com/khopilot/asi-v25-longformer-core.git
-```
-## Usage Example
-```python
-from asi_v25 import create_asi_attention
-# Create ultra-fast attention (2.44x speedup)
-attention = create_asi_attention(
-    dim=768,
-    num_heads=12,
-    use_extreme=True  # Use validated configuration
-)
-# Use in your model
-output = attention(queries, keys, values)
-```
-## System Status
-- **ASI V2.5**: {"✅ Available" if ASI_AVAILABLE else "❌ Not Available"}
-- **Datasets**: {"✅ Available" if DATASETS_AVAILABLE else "❌ Not Available"}
-- **Error**: {ASI_ERROR if ASI_ERROR else "None"}
-## Links
-- 🔥 **Live Demo**: [ASI V2.5 Interactive Demo](https://huggingface.co/spaces/khopilot/asi-v25-live-demo)
-- 🤗 **HuggingFace Hub**: [khopilot/asi-v25-longformer-core](https://huggingface.co/khopilot/asi-v25-longformer-core)
-- 🐙 **GitHub**: [khopilot/asi-v25-longformer-core](https://github.com/khopilot/asi-v25-longformer-core)
-""")
-    with gr.Tab("🏆 Validated Results"):
-        gr.Markdown(f"""
-# 🏆 ASI V2.5 Validated Results
-## Status: {"✅ ASI Available" if ASI_AVAILABLE else "⚠️ Demo Mode"}
-## Official Performance Metrics
-- **Best Speedup**: {VALIDATED_RESULTS['best_speedup']}x
-- **Average Speedup**: {VALIDATED_RESULTS['average_speedup']}x
-- **Layer Coverage**: {VALIDATED_RESULTS['layer_coverage']}%
-- **Throughput**: {VALIDATED_RESULTS['throughput_tokens_per_sec']:,} tokens/sec
-- **Max Sequence**: {VALIDATED_RESULTS['max_sequence_length']:,} tokens
-- **Architecture**: {VALIDATED_RESULTS['architecture_tested']}
-## Configuration Used
-- **ASI Threshold**: 8 tokens (ultra-aggressive)
-- **Feature Dimension**: 4 (maximum compression)
-- **Layers Replaced**: 11/12 (91.7% coverage)
-- **Device**: Apple Silicon MPS optimized
-## Validation Method
-1. **Longformer-base-4096** model loaded
-2. **Real text sequences** up to 4096 tokens
-3. **Multiple runs** for statistical accuracy
-4. **Quality preservation** verified (no degradation)
-5. **Memory efficiency** confirmed (linear scaling)
-✅ **All results independently reproducible via examples/**
-""")
-if __name__ == "__main__":
-    print("🚀 Launching ASI V2.5 Complete Demo...")
-    app.launch()

+gradio==4.44.0
+torch>=1.12.0
+numpy>=1.21.0
+matplotlib>=3.5.0
+datasets>=2.0.0
+transformers>=4.21.0
+huggingface-hub>=0.19.0
+python-multipart>=0.0.6
+git+https://github.com/khopilot/asi-v25-longformer-core.git