Spaces:

Sanchi10
/

financial-research-agent

Sleeping

App Files Files Community

financial-research-agent / src /tools /sec_analyzer /component_analyzer.py

Sanchit7

Initial Hugging Face Spaces deployment

9e0d3ce about 1 month ago

raw

history blame contribute delete

4.84 kB

	"""
	Component-based analysis of SEC filings
	Identifies and analyzes key sections: Risk, Strategy, Financial Performance, Operations
	"""

	from typing import Dict, List


	class ComponentAnalyzer:
	"""
	Identifies and categorizes different components of SEC filings
	Each component has specific keywords for identification
	"""

	def __init__(self):
	self.components = {
	"financial_performance": {
	"keywords": [
	"revenue",
	"net income",
	"earnings",
	"profit",
	"loss",
	"cash flow",
	"operating income",
	"EBITDA",
	"gross margin",
	"operating margin",
	"financial results",
	"fiscal year",
	"quarter",
	"YoY",
	"year-over-year",
	],
	"weight": 1.0,
	},
	"risk_factors": {
	"keywords": [
	"risk",
	"uncertainty",
	"challenge",
	"threat",
	"adverse",
	"volatile",
	"fluctuation",
	"litigation",
	"regulatory",
	"competition",
	"competitive pressure",
	"market condition",
	"economic condition",
	"material adverse effect",
	],
	"weight": 1.2, # Higher weight for risk analysis
	},
	"business_strategy": {
	"keywords": [
	"strategy",
	"strategic",
	"initiative",
	"growth",
	"expansion",
	"acquisition",
	"partnership",
	"innovation",
	"competitive advantage",
	"market opportunity",
	"business model",
	"long-term",
	"investment",
	"R&D",
	"research and development",
	],
	"weight": 1.0,
	},
	"operations": {
	"keywords": [
	"operations",
	"operational",
	"production",
	"capacity",
	"efficiency",
	"supply chain",
	"customers",
	"users",
	"daily active users",
	"engagement",
	"platform",
	"infrastructure",
	"employee",
	"workforce",
	],
	"weight": 0.9,
	},
	}

	def identify_component(self, text: str) -> List[str]:
	"""
	Identify which components a text snippet belongs to

	Args:
	text: Text snippet to analyze

	Returns:
	List of component names that match
	"""
	text_lower = text.lower()
	matched_components = []

	for component_name, config in self.components.items():
	# Check if any keywords are present
	if any(keyword.lower() in text_lower for keyword in config["keywords"]):
	matched_components.append(component_name)

	return matched_components if matched_components else ["general"]

	def categorize_texts(self, texts: List[str]) -> Dict[str, List[str]]:
	"""
	Categorize a list of text segments by component

	Args:
	texts: List of text segments

	Returns:
	Dictionary mapping component names to text lists
	"""
	categorized = {component: [] for component in self.components.keys()}
	categorized["general"] = []

	for text in texts:
	components = self.identify_component(text)
	for component in components:
	categorized[component].append(text)

	# Remove empty categories
	return {k: v for k, v in categorized.items() if v}

	def get_component_weight(self, component_name: str) -> float:
	"""Get the importance weight for a component"""
	return self.components.get(component_name, {}).get("weight", 1.0)

	def get_risk_keywords(self) -> List[str]:
	"""Get all risk-related keywords for focused analysis"""
	return self.components["risk_factors"]["keywords"]

	def get_financial_keywords(self) -> List[str]:
	"""Get all financial-related keywords"""
	return self.components["financial_performance"]["keywords"]