Sanchit7's picture
Initial Hugging Face Spaces deployment
9e0d3ce
"""
Component-based analysis of SEC filings
Identifies and analyzes key sections: Risk, Strategy, Financial Performance, Operations
"""
from typing import Dict, List
class ComponentAnalyzer:
"""
Identifies and categorizes different components of SEC filings
Each component has specific keywords for identification
"""
def __init__(self):
self.components = {
"financial_performance": {
"keywords": [
"revenue",
"net income",
"earnings",
"profit",
"loss",
"cash flow",
"operating income",
"EBITDA",
"gross margin",
"operating margin",
"financial results",
"fiscal year",
"quarter",
"YoY",
"year-over-year",
],
"weight": 1.0,
},
"risk_factors": {
"keywords": [
"risk",
"uncertainty",
"challenge",
"threat",
"adverse",
"volatile",
"fluctuation",
"litigation",
"regulatory",
"competition",
"competitive pressure",
"market condition",
"economic condition",
"material adverse effect",
],
"weight": 1.2, # Higher weight for risk analysis
},
"business_strategy": {
"keywords": [
"strategy",
"strategic",
"initiative",
"growth",
"expansion",
"acquisition",
"partnership",
"innovation",
"competitive advantage",
"market opportunity",
"business model",
"long-term",
"investment",
"R&D",
"research and development",
],
"weight": 1.0,
},
"operations": {
"keywords": [
"operations",
"operational",
"production",
"capacity",
"efficiency",
"supply chain",
"customers",
"users",
"daily active users",
"engagement",
"platform",
"infrastructure",
"employee",
"workforce",
],
"weight": 0.9,
},
}
def identify_component(self, text: str) -> List[str]:
"""
Identify which components a text snippet belongs to
Args:
text: Text snippet to analyze
Returns:
List of component names that match
"""
text_lower = text.lower()
matched_components = []
for component_name, config in self.components.items():
# Check if any keywords are present
if any(keyword.lower() in text_lower for keyword in config["keywords"]):
matched_components.append(component_name)
return matched_components if matched_components else ["general"]
def categorize_texts(self, texts: List[str]) -> Dict[str, List[str]]:
"""
Categorize a list of text segments by component
Args:
texts: List of text segments
Returns:
Dictionary mapping component names to text lists
"""
categorized = {component: [] for component in self.components.keys()}
categorized["general"] = []
for text in texts:
components = self.identify_component(text)
for component in components:
categorized[component].append(text)
# Remove empty categories
return {k: v for k, v in categorized.items() if v}
def get_component_weight(self, component_name: str) -> float:
"""Get the importance weight for a component"""
return self.components.get(component_name, {}).get("weight", 1.0)
def get_risk_keywords(self) -> List[str]:
"""Get all risk-related keywords for focused analysis"""
return self.components["risk_factors"]["keywords"]
def get_financial_keywords(self) -> List[str]:
"""Get all financial-related keywords"""
return self.components["financial_performance"]["keywords"]