moonstarpaddy commited on
Commit
040675c
Β·
verified Β·
1 Parent(s): c53cf3d

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +110 -7
scraper.py CHANGED
@@ -21,8 +21,8 @@ class FinancialScraper:
21
  self.update_headers()
22
 
23
  # Best Practice 3: Request delays
24
- self.min_delay = 1.0 # Minimum delay between requests
25
- self.max_delay = 2.0 # Maximum delay between requests
26
  self.last_request_time = 0
27
 
28
  def update_headers(self):
@@ -47,7 +47,7 @@ class FinancialScraper:
47
 
48
  self.last_request_time = time.time()
49
 
50
- def _make_request(self, url: str) -> requests.Response:
51
  """Make a rate-limited request with error handling"""
52
  self._rate_limit()
53
 
@@ -58,7 +58,7 @@ class FinancialScraper:
58
  except requests.RequestException as e:
59
  raise Exception(f"Request failed: {str(e)}")
60
 
61
- def scrape_yahoo_summary(self, symbol: str) -> Dict[str, Any]:
62
  """Scrape basic financial data from Yahoo Finance"""
63
  try:
64
  url = f"https://finance.yahoo.com/quote/{symbol.upper()}"
@@ -88,19 +88,19 @@ class FinancialScraper:
88
 
89
  # Get company name
90
  try:
91
- company_elem = soup.find('h1', {'data-test': 'quote-header'})
92
  if company_elem:
93
  company_name = company_elem.text.split('(')[0].strip()
94
  data['Company Name'] = company_name
95
  except:
96
- pass
97
 
98
  return data
99
 
100
  except Exception as e:
101
  return {'symbol': symbol.upper(), 'error': f"Failed to scrape summary: {str(e)}"}
102
 
103
- def scrape_key_statistics(self, symbol: str) -> Dict[str, Any]:
104
  """Scrape financial ratios and key statistics"""
105
  try:
106
  url = f"https://finance.yahoo.com/quote/{symbol.upper()}/key-statistics"
@@ -113,3 +113,106 @@ class FinancialScraper:
113
  tables = soup.find_all('table')
114
 
115
  # Key metrics we want to extract
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  self.update_headers()
22
 
23
  # Best Practice 3: Request delays
24
+ self.min_delay = 1.0
25
+ self.max_delay = 2.0
26
  self.last_request_time = 0
27
 
28
  def update_headers(self):
 
47
 
48
  self.last_request_time = time.time()
49
 
50
+ def _make_request(self, url: str):
51
  """Make a rate-limited request with error handling"""
52
  self._rate_limit()
53
 
 
58
  except requests.RequestException as e:
59
  raise Exception(f"Request failed: {str(e)}")
60
 
61
+ def scrape_yahoo_summary(self, symbol: str):
62
  """Scrape basic financial data from Yahoo Finance"""
63
  try:
64
  url = f"https://finance.yahoo.com/quote/{symbol.upper()}"
 
88
 
89
  # Get company name
90
  try:
91
+ company_elem = soup.find('h1')
92
  if company_elem:
93
  company_name = company_elem.text.split('(')[0].strip()
94
  data['Company Name'] = company_name
95
  except:
96
+ data['Company Name'] = 'N/A'
97
 
98
  return data
99
 
100
  except Exception as e:
101
  return {'symbol': symbol.upper(), 'error': f"Failed to scrape summary: {str(e)}"}
102
 
103
+ def scrape_key_statistics(self, symbol: str):
104
  """Scrape financial ratios and key statistics"""
105
  try:
106
  url = f"https://finance.yahoo.com/quote/{symbol.upper()}/key-statistics"
 
113
  tables = soup.find_all('table')
114
 
115
  # Key metrics we want to extract
116
+ target_metrics = {
117
+ 'Market Cap': 'Market Cap',
118
+ 'Enterprise Value': 'Enterprise Value',
119
+ 'Trailing P/E': 'P/E Ratio',
120
+ 'Forward P/E': 'Forward P/E',
121
+ 'Price/Book': 'P/B Ratio',
122
+ 'Price/Sales': 'P/S Ratio',
123
+ 'PEG Ratio': 'PEG Ratio',
124
+ 'Enterprise Value/Revenue': 'EV/Revenue',
125
+ 'Enterprise Value/EBITDA': 'EV/EBITDA',
126
+ 'Return on Equity': 'ROE',
127
+ 'Return on Assets': 'ROA',
128
+ 'Operating Margin': 'Operating Margin',
129
+ 'Profit Margin': 'Profit Margin',
130
+ 'Total Debt/Equity': 'Debt/Equity'
131
+ }
132
+
133
+ for table in tables:
134
+ rows = table.find_all('tr')
135
+ for row in rows:
136
+ cells = row.find_all('td')
137
+ if len(cells) >= 2:
138
+ metric_name = cells[0].text.strip()
139
+ metric_value = cells[1].text.strip()
140
+
141
+ # Check if this metric is one we want
142
+ for target, display_name in target_metrics.items():
143
+ if target.lower() in metric_name.lower():
144
+ ratios[display_name] = metric_value
145
+ break
146
+
147
+ return ratios
148
+
149
+ except Exception as e:
150
+ return {'symbol': symbol.upper(), 'error': f"Failed to scrape statistics: {str(e)}"}
151
+
152
+ def scrape_financial_highlights(self, symbol: str):
153
+ """Get comprehensive financial data"""
154
+ try:
155
+ # Get both summary and statistics
156
+ summary_data = self.scrape_yahoo_summary(symbol)
157
+
158
+ # Check if summary failed
159
+ if 'error' in summary_data:
160
+ return summary_data
161
+
162
+ stats_data = self.scrape_key_statistics(symbol)
163
+
164
+ # Check if stats failed
165
+ if 'error' in stats_data:
166
+ return summary_data # Return at least summary data
167
+
168
+ # Combine the data
169
+ combined_data = {**summary_data, **stats_data}
170
+
171
+ # Remove duplicate symbol entries
172
+ combined_data = {k: v for k, v in combined_data.items()
173
+ if not (k == 'symbol' and list(combined_data.keys()).index(k) > 0)}
174
+
175
+ return combined_data
176
+
177
+ except Exception as e:
178
+ return {'symbol': symbol.upper(), 'error': f"Failed to scrape financial highlights: {str(e)}"}
179
+
180
+ def format_financial_data(data):
181
+ """Format scraped data for display"""
182
+ if 'error' in data:
183
+ return f"❌ Error: {data['error']}"
184
+
185
+ symbol = data.get('symbol', 'Unknown')
186
+ formatted_text = f"πŸ“Š **Financial Data for {symbol}**\n\n"
187
+
188
+ # Organize data into sections
189
+ price_metrics = ['Current Price', 'Previous Close', 'Open', 'Day\'s Range', '52 Week Range']
190
+ valuation_metrics = ['Market Cap', 'Enterprise Value', 'P/E Ratio', 'Forward P/E', 'P/B Ratio', 'P/S Ratio', 'PEG Ratio']
191
+ profitability_metrics = ['ROE', 'ROA', 'Operating Margin', 'Profit Margin']
192
+
193
+ # Display sections
194
+ sections = [
195
+ ("πŸ’° **Price Information**", price_metrics),
196
+ ("πŸ“ˆ **Valuation Metrics**", valuation_metrics),
197
+ ("πŸ’Ό **Profitability**", profitability_metrics)
198
+ ]
199
+
200
+ for section_name, metrics in sections:
201
+ section_data = [(k, v) for k, v in data.items() if k in metrics and v != 'N/A']
202
+ if section_data:
203
+ formatted_text += f"{section_name}\n"
204
+ for key, value in section_data:
205
+ formatted_text += f" β€’ {key}: {value}\n"
206
+ formatted_text += "\n"
207
+
208
+ # Add other metrics
209
+ other_metrics = [(k, v) for k, v in data.items()
210
+ if k not in price_metrics + valuation_metrics + profitability_metrics + ['symbol']
211
+ and v != 'N/A']
212
+
213
+ if other_metrics:
214
+ formatted_text += "πŸ“‹ **Other Information**\n"
215
+ for key, value in other_metrics:
216
+ formatted_text += f" β€’ {key}: {value}\n"
217
+
218
+ return formatted_text