Spaces:
Sleeping
Sleeping
| """ | |
| Component-based analysis of SEC filings | |
| Identifies and analyzes key sections: Risk, Strategy, Financial Performance, Operations | |
| """ | |
| from typing import Dict, List | |
| class ComponentAnalyzer: | |
| """ | |
| Identifies and categorizes different components of SEC filings | |
| Each component has specific keywords for identification | |
| """ | |
| def __init__(self): | |
| self.components = { | |
| "financial_performance": { | |
| "keywords": [ | |
| "revenue", | |
| "net income", | |
| "earnings", | |
| "profit", | |
| "loss", | |
| "cash flow", | |
| "operating income", | |
| "EBITDA", | |
| "gross margin", | |
| "operating margin", | |
| "financial results", | |
| "fiscal year", | |
| "quarter", | |
| "YoY", | |
| "year-over-year", | |
| ], | |
| "weight": 1.0, | |
| }, | |
| "risk_factors": { | |
| "keywords": [ | |
| "risk", | |
| "uncertainty", | |
| "challenge", | |
| "threat", | |
| "adverse", | |
| "volatile", | |
| "fluctuation", | |
| "litigation", | |
| "regulatory", | |
| "competition", | |
| "competitive pressure", | |
| "market condition", | |
| "economic condition", | |
| "material adverse effect", | |
| ], | |
| "weight": 1.2, # Higher weight for risk analysis | |
| }, | |
| "business_strategy": { | |
| "keywords": [ | |
| "strategy", | |
| "strategic", | |
| "initiative", | |
| "growth", | |
| "expansion", | |
| "acquisition", | |
| "partnership", | |
| "innovation", | |
| "competitive advantage", | |
| "market opportunity", | |
| "business model", | |
| "long-term", | |
| "investment", | |
| "R&D", | |
| "research and development", | |
| ], | |
| "weight": 1.0, | |
| }, | |
| "operations": { | |
| "keywords": [ | |
| "operations", | |
| "operational", | |
| "production", | |
| "capacity", | |
| "efficiency", | |
| "supply chain", | |
| "customers", | |
| "users", | |
| "daily active users", | |
| "engagement", | |
| "platform", | |
| "infrastructure", | |
| "employee", | |
| "workforce", | |
| ], | |
| "weight": 0.9, | |
| }, | |
| } | |
| def identify_component(self, text: str) -> List[str]: | |
| """ | |
| Identify which components a text snippet belongs to | |
| Args: | |
| text: Text snippet to analyze | |
| Returns: | |
| List of component names that match | |
| """ | |
| text_lower = text.lower() | |
| matched_components = [] | |
| for component_name, config in self.components.items(): | |
| # Check if any keywords are present | |
| if any(keyword.lower() in text_lower for keyword in config["keywords"]): | |
| matched_components.append(component_name) | |
| return matched_components if matched_components else ["general"] | |
| def categorize_texts(self, texts: List[str]) -> Dict[str, List[str]]: | |
| """ | |
| Categorize a list of text segments by component | |
| Args: | |
| texts: List of text segments | |
| Returns: | |
| Dictionary mapping component names to text lists | |
| """ | |
| categorized = {component: [] for component in self.components.keys()} | |
| categorized["general"] = [] | |
| for text in texts: | |
| components = self.identify_component(text) | |
| for component in components: | |
| categorized[component].append(text) | |
| # Remove empty categories | |
| return {k: v for k, v in categorized.items() if v} | |
| def get_component_weight(self, component_name: str) -> float: | |
| """Get the importance weight for a component""" | |
| return self.components.get(component_name, {}).get("weight", 1.0) | |
| def get_risk_keywords(self) -> List[str]: | |
| """Get all risk-related keywords for focused analysis""" | |
| return self.components["risk_factors"]["keywords"] | |
| def get_financial_keywords(self) -> List[str]: | |
| """Get all financial-related keywords""" | |
| return self.components["financial_performance"]["keywords"] | |