sight_chat / config.py
fmegahed's picture
version 2.0.0
ef821d9 verified
raw
history blame
8.83 kB
"""
Central configuration file for the Multi-Method RAG System.
All shared parameters and settings are defined here.
"""
import os
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables
load_dotenv(override=True)
# ==================== Versioning and Date ====================
DATE = "August 13, 2025"
VERSION = "2.0.1"
# ==================== API Configuration ====================
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_CHAT_MODEL = "gpt-5-chat-latest" # This is the non-reasoning model for gpt-5 so it has no latency
OPENAI_EMBEDDING_MODEL = "text-embedding-3-large" # Options: text-embedding-3-large, text-embedding-3-small, text-embedding-ada-002
# ==================== Realtime API Configuration ====================
# OpenAI Realtime API settings for speech-to-speech functionality
OPENAI_REALTIME_MODEL = "gpt-4o-realtime-preview" # Realtime model for speech-to-speech
REALTIME_VOICE = "alloy" # Available voices: alloy, echo, fable, onyx, nova, shimmer
REALTIME_INSTRUCTIONS = (
"You are a knowledgeable safety expert speaking naturally in conversation. "
"VOICE BEHAVIOR: "
"- Speak like a confident safety professional talking to a colleague "
"- Acknowledge what you heard: 'You're asking about [topic]...' "
"- Use natural speech with appropriate pauses and emphasis "
"- Sound authoritative and knowledgeable - you ARE the expert "
"- Never mention document names, page numbers, or citation details when speaking "
"- Just state the facts naturally as if you know them from your expertise "
"RESPONSE PROCESS: "
"1. Briefly acknowledge the question: 'You're asking about [topic]...' "
"2. Call ask_rag to get the accurate information "
"3. Speak the information naturally as YOUR expertise, not as 'according to document X' "
"4. Organize complex topics: 'There are three key requirements here...' "
"5. Be thorough but conversational - like explaining to a colleague "
"CITATION RULE: "
"NEVER mention specific documents, sources, or page numbers in speech. "
"Just state the information confidently as if it's your professional knowledge. "
"For example, don't say 'According to OSHA 1910.147...' - just say 'The lockout tagout requirements are...' "
"IMPORTANT: Always use ask_rag for safety questions to get accurate information, "
"but speak the results as your own expertise, not as citations."
)
# ==================== Model Parameters ====================
# Generation parameters
DEFAULT_TEMPERATURE = 0 # Range: 0.0-1.0 (0=deterministic, 1=creative)
DEFAULT_MAX_TOKENS = 5000 # Maximum tokens in response
DEFAULT_TOP_K = 5 # Number of chunks to retrieve by default
DEFAULT_TOP_P = 1.0 # Nucleus sampling parameter
# Context window management
MAX_CONTEXT_TOKENS = 7500 # Maximum context for models with 8k window
CHUNK_SIZE = 2000 # Tokens per chunk (used by TextPreprocessor.chunk_text_by_tokens)
CHUNK_OVERLAP = 200 # Token overlap between chunks
# ==================== Embedding Models ====================
# Sentence Transformers models
SENTENCE_TRANSFORMER_MODEL = 'all-MiniLM-L6-v2' # For DPR
CROSS_ENCODER_MODEL = 'cross-encoder/ms-marco-MiniLM-L-6-v2' # For re-ranking
# CLIP model
CLIP_MODEL = "ViT-L/14" # Options: ViT-B/32, ViT-L/14, RN50
# ==================== Search Parameters ====================
# BM25 parameters
BM25_K1 = 1.5 # Term frequency saturation parameter
BM25_B = 0.75 # Length normalization parameter
# Hybrid search
DEFAULT_HYBRID_ALPHA = 0.5 # Weight for BM25 (1-alpha for semantic)
# Re-ranking
RERANK_MULTIPLIER = 2 # Retrieve this many times top_k for re-ranking
MIN_RELEVANCE_SCORE = 0.3 # Minimum score threshold
# ==================== Directory Structure ====================
# Project directories
PROJECT_ROOT = Path(__file__).parent
DATA_DIR = PROJECT_ROOT / "data"
EMBEDDINGS_DIR = PROJECT_ROOT / "embeddings"
GRAPH_DIR = PROJECT_ROOT / "graph"
METADATA_DIR = PROJECT_ROOT / "metadata"
IMAGES_DIR = DATA_DIR / "images"
# File paths
VANILLA_FAISS_INDEX = EMBEDDINGS_DIR / "vanilla_faiss.index"
VANILLA_METADATA = EMBEDDINGS_DIR / "vanilla_metadata.pkl"
DPR_FAISS_INDEX = EMBEDDINGS_DIR / "dpr_faiss.index"
DPR_METADATA = EMBEDDINGS_DIR / "dpr_metadata.pkl"
BM25_INDEX = EMBEDDINGS_DIR / "bm25_index.pkl"
CONTEXT_DOCS = EMBEDDINGS_DIR / "context_stuffing_docs.pkl"
GRAPH_FILE = GRAPH_DIR / "graph.gml"
IMAGES_DB = METADATA_DIR / "images.db"
CHROMA_PATH = EMBEDDINGS_DIR / "chroma"
# ==================== Batch Processing ====================
EMBEDDING_BATCH_SIZE = 100 # Batch size for OpenAI embeddings
PROCESSING_BATCH_SIZE = 50 # Documents to process at once
# ==================== UI Configuration ====================
# Streamlit settings
MAX_CHAT_HISTORY = 5 # Maximum chat messages to keep
EXAMPLE_QUESTIONS = [
"What are general machine guarding requirements?",
"How do I perform lockout/tagout?",
"What safety measures are needed for robotic systems?",
"Explain the difference between guards and devices in machine safety.",
"What are the OSHA requirements for emergency stops?",
]
# Default method
DEFAULT_METHOD = "graph"
# Method descriptions for UI
METHOD_DESCRIPTIONS = {
'graph': "Graph-based RAG using NetworkX with relationship-aware retrieval",
'vanilla': "Standard vector search with FAISS and OpenAI embeddings",
'dpr': "Dense Passage Retrieval with bi-encoder and cross-encoder re-ranking",
'bm25': "BM25 keyword search with neural re-ranking for exact term matching",
'context': "Context stuffing with full document loading and heuristic selection",
'vision': "Vision-based search using GPT-5 Vision for image analysis and classification"
}
# ==================== Document Processing ====================
# Document types
SUPPORTED_EXTENSIONS = ['.pdf', '.txt', '.md', '.html']
IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
# Text splitting
MARKDOWN_HEADER_LEVEL = 3 # Split by this header level (###)
MAX_SECTIONS_PER_DOC = 500 # Maximum sections to extract from a document
# ==================== Logging ====================
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") # DEBUG, INFO, WARNING, ERROR
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
# ==================== Performance ====================
# Device configuration
import torch
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_WORKERS = 4 # Parallel processing workers
# Cache settings
ENABLE_CACHE = True
CACHE_TTL = 3600 # Cache time-to-live in seconds
# ==================== Safety & Validation ====================
# Input validation
MAX_QUESTION_LENGTH = 1000 # Maximum characters in a question
MAX_IMAGE_SIZE_MB = 10 # Maximum image file size
# Rate limiting (if needed)
RATE_LIMIT_ENABLED = False
MAX_QUERIES_PER_MINUTE = 60
# ==================== Default HTML Sources ====================
DEFAULT_HTML_SOURCES = [
{
"title": "NIOSH Robotics in the Workplace – Safety Overview",
"url": "https://www.cdc.gov/niosh/robotics/about/",
"source": "NIOSH",
"year": 2024,
"category": "Technical Guide",
"format": "HTML"
}
]
# ==================== Helper Functions ====================
def ensure_directories():
"""Create all required directories if they don't exist."""
for directory in [DATA_DIR, EMBEDDINGS_DIR, GRAPH_DIR, METADATA_DIR, IMAGES_DIR]:
directory.mkdir(parents=True, exist_ok=True)
def get_model_context_length(model_name: str = OPENAI_CHAT_MODEL) -> int:
"""Get the context length for a given model."""
context_lengths = {
"gpt-5": 128000,
"gpt-4o-mini": 8192,
"gpt-4o": 128000,
}
return context_lengths.get(model_name, 4096)
def validate_api_key():
"""Check if OpenAI API key is set."""
if not OPENAI_API_KEY:
raise ValueError(
"OpenAI API key not found. Please set OPENAI_API_KEY in .env file."
)
return True
# ==================== System Info ====================
def print_config():
"""Print current configuration for debugging."""
print("="*50)
print("RAG System Configuration")
print("="*50)
print(f"OpenAI Model: {OPENAI_CHAT_MODEL}")
print(f"Embedding Model: {OPENAI_EMBEDDING_MODEL}")
print(f"Device: {DEVICE}")
print(f"Default Temperature: {DEFAULT_TEMPERATURE}")
print(f"Default Top-K: {DEFAULT_TOP_K}")
print(f"Chunk Size: {CHUNK_SIZE}")
print(f"Project Root: {PROJECT_ROOT}")
print("="*50)
# Ensure directories exist on import
ensure_directories()