Spaces:
Running
Running
| """ | |
| Validation utilities for security and input validation | |
| """ | |
| import os | |
| import re | |
| from typing import Optional | |
| from urllib.parse import urlparse | |
| class ValidationError(Exception): | |
| """Custom exception for validation errors.""" | |
| pass | |
| class Validators: | |
| """Collection of validation functions for security and input validation.""" | |
| # Regex patterns for validation - allow numbers, letters, hyphens, underscores, dots | |
| HUGGINGFACE_MODEL_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+/[a-zA-Z0-9_\-\.]+$') | |
| SAFE_FILENAME_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+$') | |
| def validate_model_path(model_path: str) -> bool: | |
| """ | |
| Validate that a custom model path is safe and follows expected patterns. | |
| Args: | |
| model_path: The model path to validate | |
| Returns: | |
| bool: True if valid, False otherwise | |
| Raises: | |
| ValidationError: If the model path is invalid | |
| """ | |
| if not model_path or not isinstance(model_path, str): | |
| raise ValidationError("Model path cannot be empty") | |
| # Trim whitespace | |
| model_path = model_path.strip() | |
| # Check for dangerous characters (excluding single forward slash for HuggingFace format) | |
| dangerous_chars = ['..', '\\', '|', ';', '&', '$', '`', '<', '>'] | |
| if any(char in model_path for char in dangerous_chars): | |
| raise ValidationError("Model path contains invalid characters") | |
| # Check for multiple slashes or leading/trailing slashes | |
| if '//' in model_path or model_path.startswith('/') or model_path.endswith('/'): | |
| raise ValidationError("Model path contains invalid characters") | |
| # Check if it looks like a HuggingFace model path (user/model format) | |
| if not Validators.HUGGINGFACE_MODEL_PATTERN.match(model_path): | |
| raise ValidationError("Model path must follow the format 'organization/model-name'") | |
| # Check length limits | |
| if len(model_path) > 200: | |
| raise ValidationError("Model path is too long") | |
| return True | |
| def validate_filename(filename: str) -> bool: | |
| """ | |
| Validate that a filename is safe for upload. | |
| Args: | |
| filename: The filename to validate | |
| Returns: | |
| bool: True if valid, False otherwise | |
| Raises: | |
| ValidationError: If the filename is invalid | |
| """ | |
| if not filename or not isinstance(filename, str): | |
| raise ValidationError("Filename cannot be empty") | |
| # Check for dangerous characters and patterns | |
| dangerous_patterns = ['..', '/', '\\', '|', ';', '&', '$', '`', '<', '>'] | |
| if any(pattern in filename for pattern in dangerous_patterns): | |
| raise ValidationError("Filename contains invalid characters") | |
| # Check if filename starts with a dot (hidden files) | |
| if filename.startswith('.'): | |
| raise ValidationError("Hidden files are not allowed") | |
| # Check length | |
| if len(filename) > 255: | |
| raise ValidationError("Filename is too long") | |
| return True | |
| def validate_file_extension(filename: str, allowed_extensions: set) -> bool: | |
| """ | |
| Validate that a file has an allowed extension. | |
| Args: | |
| filename: The filename to check | |
| allowed_extensions: Set of allowed extensions (e.g., {'.txt', '.py'}) | |
| Returns: | |
| bool: True if valid, False otherwise | |
| Raises: | |
| ValidationError: If the extension is not allowed | |
| """ | |
| if not filename: | |
| raise ValidationError("Filename cannot be empty") | |
| _, ext = os.path.splitext(filename.lower()) | |
| if ext not in allowed_extensions: | |
| allowed_list = ', '.join(sorted(allowed_extensions)) | |
| raise ValidationError(f"File type '{ext}' not allowed. Allowed types: {allowed_list}") | |
| return True | |
| def validate_file_size(file_size: int, max_size: int) -> bool: | |
| """ | |
| Validate that a file size is within limits. | |
| Args: | |
| file_size: Size of the file in bytes | |
| max_size: Maximum allowed size in bytes | |
| Returns: | |
| bool: True if valid, False otherwise | |
| Raises: | |
| ValidationError: If the file is too large | |
| """ | |
| if file_size > max_size: | |
| max_mb = max_size / (1024 * 1024) | |
| current_mb = file_size / (1024 * 1024) | |
| raise ValidationError(f"File too large: {current_mb:.1f}MB (max: {max_mb:.1f}MB)") | |
| return True | |
| def validate_text_input(text: str, max_length: int = 1000000) -> bool: | |
| """ | |
| Validate text input for processing. | |
| Args: | |
| text: The text to validate | |
| max_length: Maximum allowed length | |
| Returns: | |
| bool: True if valid, False otherwise | |
| Raises: | |
| ValidationError: If the text is invalid | |
| """ | |
| if not isinstance(text, str): | |
| raise ValidationError("Text input must be a string") | |
| if len(text) > max_length: | |
| raise ValidationError(f"Text too long: {len(text)} characters (max: {max_length})") | |
| return True | |
| def sanitize_model_path(model_path: str) -> str: | |
| """ | |
| Sanitize a model path by removing potentially dangerous elements. | |
| Args: | |
| model_path: The model path to sanitize | |
| Returns: | |
| str: Sanitized model path | |
| """ | |
| if not model_path: | |
| return "" | |
| # Remove whitespace | |
| sanitized = model_path.strip() | |
| # Remove any path traversal attempts | |
| sanitized = sanitized.replace('..', '') | |
| sanitized = sanitized.replace('/', '') | |
| sanitized = sanitized.replace('\\', '') | |
| return sanitized | |
| def is_safe_path(path: str, base_path: str) -> bool: | |
| """ | |
| Check if a path is safe and within the expected base directory. | |
| Args: | |
| path: The path to check | |
| base_path: The base directory that the path should be within | |
| Returns: | |
| bool: True if the path is safe, False otherwise | |
| """ | |
| try: | |
| # Resolve both paths to absolute paths | |
| abs_path = os.path.abspath(path) | |
| abs_base = os.path.abspath(base_path) | |
| # Check if the path is within the base directory | |
| return abs_path.startswith(abs_base) | |
| except (OSError, ValueError): | |
| return False | |
| # Global instance | |
| validators = Validators() |