DeepXR
/

Helion-V2.5-Rnd

+#!/usr/bin/env python3
+"""
+Helion-2.5-Rnd Security Implementation
+Comprehensive security features for safe model deployment
+"""
+import hashlib
+import hmac
+import json
+import logging
+import re
+import secrets
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+from pathlib import Path
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class InputValidator:
+    """Validate and sanitize user inputs"""
+    MAX_PROMPT_LENGTH = 131072
+    MAX_TOKEN_LIMIT = 8192
+    # Dangerous patterns that could indicate attacks
+    DANGEROUS_PATTERNS = [
+        r'<script.*?>',
+        r'javascript:',
+        r'on\w+\s*=',
+        r'\beval\s*\(',
+        r'\bexec\s*\(',
+        r'__import__',
+        r'\bos\.',
+        r'\bsystem\(',
+        r'subprocess',
+        r'\[\[.*?\]\].*?\[\[.*?\]\]',  # Repeated prompt injection
+    ]
+    @classmethod
+    def validate_prompt(cls, prompt: str) -> Tuple[bool, Optional[str]]:
+        """
+        Validate prompt input for security issues
+        Args:
+            prompt: Input prompt string
+        Returns:
+            (is_valid, error_message)
+        """
+        # Check for empty input
+        if not prompt or not prompt.strip():
+            return False, "Prompt cannot be empty"
+        # Length validation
+        if len(prompt) > cls.MAX_PROMPT_LENGTH:
+            return False, f"Prompt exceeds maximum length of {cls.MAX_PROMPT_LENGTH}"
+        # Check for null bytes
+        if '\x00' in prompt:
+            return False, "Prompt contains null bytes"
+        # Check for dangerous patterns
+        for pattern in cls.DANGEROUS_PATTERNS:
+            if re.search(pattern, prompt, re.IGNORECASE | re.MULTILINE):
+                logger.warning(f"Dangerous pattern detected: {pattern}")
+                return False, f"Prompt contains potentially dangerous content"
+        # Check for excessive repetition (possible DoS)
+        words = prompt.split()
+        if len(words) > 100:
+            word_counts = {}
+            for word in words:
+                word_counts[word] = word_counts.get(word, 0) + 1
+                if word_counts[word] > len(words) * 0.5:
+                    return False, "Excessive repetition detected"
+        return True, None
+    @classmethod
+    def sanitize_text(cls, text: str) -> str:
+        """
+        Sanitize text by removing dangerous content
+        Args:
+            text: Input text
+        Returns:
+            Sanitized text
+        """
+        # Remove script tags
+        text = re.sub(r'<script.*?</script>', '', text, flags=re.DOTALL | re.IGNORECASE)
+        # Remove javascript: protocol
+        text = re.sub(r'javascript:', '', text, flags=re.IGNORECASE)
+        # Remove event handlers
+        text = re.sub(r'\bon\w+\s*=\s*["\'].*?["\']', '', text, flags=re.IGNORECASE)
+        return text
+    @classmethod
+    def validate_generation_params(cls, params: Dict) -> Tuple[bool, Optional[str]]:
+        """
+        Validate generation parameters
+        Args:
+            params: Generation parameters dictionary
+        Returns:
+            (is_valid, error_message)
+        """
+        # Temperature validation
+        if 'temperature' in params:
+            temp = params['temperature']
+            if not isinstance(temp, (int, float)) or temp < 0 or temp > 2.0:
+                return False, "Temperature must be between 0 and 2.0"
+        # Max tokens validation
+        if 'max_tokens' in params:
+            max_tok = params['max_tokens']
+            if not isinstance(max_tok, int) or max_tok < 1 or max_tok > cls.MAX_TOKEN_LIMIT:
+                return False, f"max_tokens must be between 1 and {cls.MAX_TOKEN_LIMIT}"
+        # Top-p validation
+        if 'top_p' in params:
+            top_p = params['top_p']
+            if not isinstance(top_p, (int, float)) or top_p < 0 or top_p > 1.0:
+                return False, "top_p must be between 0 and 1.0"
+        return True, None
+class ContentFilter:
+    """Filter inappropriate and unsafe content"""
+    # Toxicity patterns
+    TOXICITY_PATTERNS = [
+        r'\b(kill|murder|assassinate|destroy)\s+(myself|yourself|themselves|someone)',
+        r'\bhow\s+to\s+(make|build|create)\s+(bomb|weapon|explosive)',
+        r'\b(suicide|self-harm|cutting)\s+(method|way|how)',
+        r'\b(hack|crack|exploit)\s+(password|account|system)',
+    ]
+    # Hate speech patterns
+    HATE_SPEECH_PATTERNS = [
+        r'\b(hate|despise)\s+\w+\s+(people|race|religion|gender)',
+        r'\b(inferior|superior)\s+(race|ethnicity|gender)',
+    ]
+    # PII patterns
+    PII_PATTERNS = {
+        'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
+        'ssn': r'\b\d{3}-\d{2}-\d{4}\b',
+        'phone': r'\b(\+\d{1,3}[-.]?)?\(?\d{3}\)?[-.]?\d{3}[-.]?\d{4}\b',
+        'credit_card': r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b',
+        'ip_address': r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b',
+    }
+    @classmethod
+    def check_toxicity(cls, text: str) -> Tuple[bool, List[str]]:
+        """
+        Check text for toxic content
+        Args:
+            text: Input text
+        Returns:
+            (is_safe, violations)
+        """
+        violations = []
+        # Check toxicity patterns
+        for pattern in cls.TOXICITY_PATTERNS:
+            if re.search(pattern, text, re.IGNORECASE):
+                violations.append(f"toxicity:{pattern[:30]}")
+        # Check hate speech
+        for pattern in cls.HATE_SPEECH_PATTERNS:
+            if re.search(pattern, text, re.IGNORECASE):
+                violations.append(f"hate_speech:{pattern[:30]}")
+        return len(violations) == 0, violations
+    @classmethod
+    def detect_pii(cls, text: str) -> List[Tuple[str, str]]:
+        """
+        Detect PII in text
+        Args:
+            text: Input text
+        Returns:
+            List of (pii_type, matched_value) tuples
+        """
+        detected = []
+        for pii_type, pattern in cls.PII_PATTERNS.items():
+            matches = re.finditer(pattern, text)
+            for match in matches:
+                detected.append((pii_type, match.group()))
+        return detected
+    @classmethod
+    def redact_pii(cls, text: str) -> str:
+        """
+        Redact PII from text
+        Args:
+            text: Input text
+        Returns:
+            Text with PII redacted
+        """
+        for pii_type, pattern in cls.PII_PATTERNS.items():
+            text = re.sub(pattern, f'[REDACTED_{pii_type.upper()}]', text)
+        return text
+    @classmethod
+    def filter_content(cls, text: str, redact_pii: bool = True) -> Tuple[str, Dict]:
+        """
+        Comprehensive content filtering
+        Args:
+            text: Input text
+            redact_pii: Whether to redact PII
+        Returns:
+            (filtered_text, metadata)
+        """
+        metadata = {
+            'original_length': len(text),
+            'pii_detected': [],
+            'toxic_content': False,
+            'violations': []
+        }
+        # Check toxicity
+        is_safe, violations = cls.check_toxicity(text)
+        if not is_safe:
+            metadata['toxic_content'] = True
+            metadata['violations'] = violations
+        # Detect PII
+        pii_found = cls.detect_pii(text)
+        if pii_found:
+            metadata['pii_detected'] = [pii_type for pii_type, _ in pii_found]
+        # Redact PII if requested
+        filtered_text = text
+        if redact_pii and pii_found:
+            filtered_text = cls.redact_pii(text)
+        metadata['filtered_length'] = len(filtered_text)
+        return filtered_text, metadata
+class RateLimiter:
+    """Token bucket rate limiter for API requests"""
+    def __init__(
+        self,
+        requests_per_minute: int = 60,
+        burst_size: int = 10,
+        cleanup_interval: int = 3600
+    ):
+        """
+        Initialize rate limiter
+        Args:
+            requests_per_minute: Sustained rate limit
+            burst_size: Maximum burst requests
+            cleanup_interval: Cleanup old entries after this many seconds
+        """
+        self.rate = requests_per_minute / 60.0
+        self.burst_size = burst_size
+        self.buckets: Dict[str, Dict] = defaultdict(lambda: {
+            'tokens': burst_size,
+            'last_update': datetime.now(),
+            'total_requests': 0
+        })
+        self.cleanup_interval = cleanup_interval
+        self.last_cleanup = datetime.now()
+    def _cleanup_old_entries(self):
+        """Remove inactive client entries"""
+        now = datetime.now()
+        if (now - self.last_cleanup).total_seconds() < self.cleanup_interval:
+            return
+        cutoff = now - timedelta(seconds=self.cleanup_interval)
+        inactive = [
+            client_id for client_id, bucket in self.buckets.items()
+            if bucket['last_update'] < cutoff
+        ]
+        for client_id in inactive:
+            del self.buckets[client_id]
+        self.last_cleanup = now
+        logger.info(f"Cleaned up {len(inactive)} inactive rate limit entries")
+    def allow_request(self, client_id: str) -> Tuple[bool, Dict]:
+        """
+        Check if request is allowed for client
+        Args:
+            client_id: Unique client identifier
+        Returns:
+            (allowed, metadata)
+        """
+        self._cleanup_old_entries()
+        bucket = self.buckets[client_id]
+        now = datetime.now()
+        # Calculate elapsed time and add tokens
+        elapsed = (now - bucket['last_update']).total_seconds()
+        bucket['tokens'] = min(
+            self.burst_size,
+            bucket['tokens'] + elapsed * self.rate
+        )
+        bucket['last_update'] = now
+        # Check if request allowed
+        if bucket['tokens'] >= 1.0:
+            bucket['tokens'] -= 1.0
+            bucket['total_requests'] += 1
+            return True, {
+                'allowed': True,
+                'remaining_tokens': int(bucket['tokens']),
+                'total_requests': bucket['total_requests']
+            }
+        else:
+            wait_time = (1.0 - bucket['tokens']) / self.rate
+            return False, {
+                'allowed': False,
+                'retry_after': int(wait_time) + 1,
+                'total_requests': bucket['total_requests']
+            }
+    def get_stats(self, client_id: str) -> Dict:
+        """Get rate limit statistics for client"""
+        if client_id not in self.buckets:
+            return {'exists': False}
+        bucket = self.buckets[client_id]
+        return {
+            'exists': True,
+            'tokens': bucket['tokens'],
+            'total_requests': bucket['total_requests'],
+            'last_update': bucket['last_update'].isoformat()
+        }
+class APIKeyManager:
+    """Secure API key management"""
+    def __init__(self, storage_path: Optional[str] = None):
+        """
+        Initialize API key manager
+        Args:
+            storage_path: Path to store key hashes
+        """
+        self.storage_path = Path(storage_path) if storage_path else None
+        self.keys: Dict[str, Dict] = {}
+        if self.storage_path and self.storage_path.exists():
+            self._load_keys()
+    def generate_key(self, client_id: str, description: str = "") -> str:
+        """
+        Generate new API key
+        Args:
+            client_id: Client identifier
+            description: Key description
+        Returns:
+            Generated API key
+        """
+        # Generate cryptographically secure key
+        key = f"helion_{secrets.token_urlsafe(32)}"
+        # Hash for storage
+        key_hash = hashlib.sha256(key.encode()).hexdigest()
+        # Store metadata
+        self.keys[key_hash] = {
+            'client_id': client_id,
+            'description': description,
+            'created_at': datetime.now().isoformat(),
+            'last_used': None,
+            'usage_count': 0
+        }
+        self._save_keys()
+        logger.info(f"Generated API key for client: {client_id}")
+        return key
+    def verify_key(self, key: str) -> Tuple[bool, Optional[str]]:
+        """
+        Verify API key
+        Args:
+            key: API key to verify
+        Returns:
+            (is_valid, client_id)
+        """
+        if not key or not key.startswith('helion_'):
+            return False, None
+        key_hash = hashlib.sha256(key.encode()).hexdigest()
+        if key_hash in self.keys:
+            # Update usage statistics
+            self.keys[key_hash]['last_used'] = datetime.now().isoformat()
+            self.keys[key_hash]['usage_count'] += 1
+            self._save_keys()
+            return True, self.keys[key_hash]['client_id']
+        return False, None
+    def revoke_key(self, key: str) -> bool:
+        """
+        Revoke API key
+        Args:
+            key: API key to revoke
+        Returns:
+            Success status
+        """
+        key_hash = hashlib.sha256(key.encode()).hexdigest()
+        if key_hash in self.keys:
+            del self.keys[key_hash]
+            self._save_keys()
+            logger.info(f"Revoked API key: {key_hash[:16]}...")
+            return True
+        return False
+    def _load_keys(self):
+        """Load keys from storage"""
+        try:
+            with open(self.storage_path, 'r') as f:
+                self.keys = json.load(f)
+            logger.info(f"Loaded {len(self.keys)} API keys")
+        except Exception as e:
+            logger.error(f"Failed to load API keys: {e}")
+    def _save_keys(self):
+        """Save keys to storage"""
+        if not self.storage_path:
+            return
+        try:
+            self.storage_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(self.storage_path, 'w') as f:
+                json.dump(self.keys, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save API keys: {e}")
+class SecurityLogger:
+    """Security event logging"""
+    def __init__(self, log_file: str = "security.log"):
+        """
+        Initialize security logger
+        Args:
+            log_file: Path to security log file
+        """
+        self.log_file = Path(log_file)
+        self.log_file.parent.mkdir(parents=True, exist_ok=True)
+        self.logger = logging.getLogger("security")
+        handler = logging.FileHandler(self.log_file)
+        formatter = logging.Formatter('%(message)s')
+        handler.setFormatter(formatter)
+        self.logger.addHandler(handler)
+        self.logger.setLevel(logging.INFO)
+    def log_event(self, event_type: str, details: Dict):
+        """
+        Log security event
+        Args:
+            event_type: Type of security event
+            details: Event details
+        """
+        event = {
+            'timestamp': datetime.utcnow().isoformat(),
+            'type': event_type,
+            'details': details
+        }
+        self.logger.info(json.dumps(event))
+    def log_authentication(self, client_id: str, success: bool, ip_address: str = None):
+        """Log authentication attempt"""
+        self.log_event('authentication', {
+            'client_id': client_id,
+            'success': success,
+            'ip_address': ip_address
+        })
+    def log_rate_limit(self, client_id: str, ip_address: str = None):
+        """Log rate limit violation"""
+        self.log_event('rate_limit', {
+            'client_id': client_id,
+            'ip_address': ip_address
+        })
+    def log_content_violation(self, client_id: str, violation_type: str, details: str):
+        """Log content policy violation"""
+        self.log_event('content_violation', {
+            'client_id': client_id,
+            'violation_type': violation_type,
+            'details': details
+        })
+    def log_input_validation_failure(self, client_id: str, reason: str):
+        """Log input validation failure"""
+        self.log_event('validation_failure', {
+            'client_id': client_id,
+            'reason': reason
+        })
+# Example usage and integration
+def create_secure_inference_middleware():
+    """
+    Create middleware for secure inference
+    Returns:
+        Dictionary of security components
+    """
+    return {
+        'validator': InputValidator(),
+        'content_filter': ContentFilter(),
+        'rate_limiter': RateLimiter(requests_per_minute=60),
+        'api_key_manager': APIKeyManager(storage_path='./keys/api_keys.json'),
+        'security_logger': SecurityLogger(log_file='./logs/security.log')
+    }
+if __name__ == "__main__":
+    # Demo security features
+    print("Helion Security Module - Feature Demo\n")
+    # Input validation
+    validator = InputValidator()
+    test_prompt = "Write a Python function to sort a list"
+    is_valid, error = validator.validate_prompt(test_prompt)
+    print(f"Validation test: {'PASS' if is_valid else 'FAIL'}")
+    # Content filtering
+    content_filter = ContentFilter()
+    test_text = "My email is [email protected] and phone is 555-1234"
+    filtered, metadata = content_filter.filter_content(test_text, redact_pii=True)
+    print(f"\nPII Detection: Found {len(metadata['pii_detected'])} types")
+    print(f"Filtered text: {filtered}")
+    # Rate limiting
+    rate_limiter = RateLimiter(requests_per_minute=10)
+    allowed, meta = rate_limiter.allow_request("client_123")
+    print(f"\nRate limit test: {'ALLOWED' if allowed else 'DENIED'}")
+    # API key management
+    key_manager = APIKeyManager()
+    api_key = key_manager.generate_key("test_client", "Demo key")
+    print(f"\nGenerated API key: {api_key[:20]}...")
+    is_valid, client = key_manager.verify_key(api_key)
+    print(f"Key verification: {'VALID' if is_valid else 'INVALID'}")
+    print("\nSecurity module ready for deployment!")