stock-predictor / app.py
henriquebap's picture
Fix: Use yfinance 0.2.28 (no websockets dependency)
b40af40 verified
"""
Stock Price Predictor - LSTM
HuggingFace Spaces - FIAP Tech Challenge Fase 4
Features:
- Suporte a linguagem natural (Apple → AAPL)
- 50+ tickers populares BR e internacionais
- Modelo LSTM do HuggingFace Hub
"""
import gradio as gr
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime, timedelta
from huggingface_hub import hf_hub_download
import joblib
import warnings
warnings.filterwarnings('ignore')
# ============================================================================
# MAPEAMENTO DE EMPRESAS (Linguagem Natural → Ticker)
# ============================================================================
COMPANY_TO_TICKER = {
# Tech Giants - US
"apple": "AAPL", "maçã": "AAPL",
"google": "GOOGL", "alphabet": "GOOGL",
"microsoft": "MSFT",
"amazon": "AMZN",
"meta": "META", "facebook": "META", "fb": "META",
"nvidia": "NVDA",
"tesla": "TSLA",
"netflix": "NFLX",
"adobe": "ADBE",
"salesforce": "CRM",
"intel": "INTC",
"amd": "AMD",
"ibm": "IBM",
"oracle": "ORCL",
"cisco": "CSCO",
"paypal": "PYPL",
"uber": "UBER",
"airbnb": "ABNB",
"spotify": "SPOT",
"zoom": "ZM",
"shopify": "SHOP",
"twitter": "TWTR", "x": "TWTR",
# Finance - US
"jpmorgan": "JPM", "jp morgan": "JPM",
"bank of america": "BAC", "bofa": "BAC",
"wells fargo": "WFC",
"goldman sachs": "GS",
"morgan stanley": "MS",
"visa": "V",
"mastercard": "MA",
"american express": "AXP", "amex": "AXP",
"berkshire": "BRK-B", "buffett": "BRK-B",
# Consumer - US
"walmart": "WMT",
"coca cola": "KO", "coca-cola": "KO", "coke": "KO",
"pepsi": "PEP", "pepsico": "PEP",
"mcdonalds": "MCD", "mcdonald's": "MCD",
"starbucks": "SBUX",
"nike": "NKE",
"disney": "DIS",
"home depot": "HD",
"costco": "COST",
"target": "TGT",
# Healthcare - US
"johnson & johnson": "JNJ", "j&j": "JNJ",
"pfizer": "PFE",
"moderna": "MRNA",
"unitedhealth": "UNH",
"merck": "MRK",
"abbvie": "ABBV",
# Energy - US
"exxon": "XOM", "exxonmobil": "XOM",
"chevron": "CVX",
# BRASIL - B3
"petrobras": "PETR4.SA", "petro": "PETR4.SA",
"vale": "VALE3.SA",
"itau": "ITUB4.SA", "itaú": "ITUB4.SA",
"bradesco": "BBDC4.SA",
"banco do brasil": "BBAS3.SA", "bb": "BBAS3.SA",
"ambev": "ABEV3.SA",
"magazine luiza": "MGLU3.SA", "magalu": "MGLU3.SA",
"weg": "WEGE3.SA",
"b3": "B3SA3.SA",
"nubank": "NU",
"mercado livre": "MELI",
}
# Tickers populares para exibição
POPULAR_TICKERS = {
"🇺🇸 Tech US": ["AAPL", "GOOGL", "MSFT", "AMZN", "META", "NVDA", "TSLA", "NFLX"],
"💰 Finance US": ["JPM", "BAC", "V", "MA", "GS", "BRK-B"],
"🛒 Consumer US": ["WMT", "KO", "MCD", "SBUX", "NKE", "DIS"],
"💊 Healthcare US": ["JNJ", "PFE", "UNH", "MRNA"],
"🇧🇷 Brasil B3": ["PETR4.SA", "VALE3.SA", "ITUB4.SA", "BBDC4.SA", "ABEV3.SA", "WEGE3.SA"],
}
def resolve_ticker(user_input: str) -> str:
"""Converte linguagem natural para ticker."""
user_input = user_input.strip()
# Se já é um ticker válido (maiúsculas, 1-5 chars)
if user_input.upper() == user_input and 1 <= len(user_input) <= 10:
return user_input.upper()
# Procurar no mapeamento
key = user_input.lower()
if key in COMPANY_TO_TICKER:
return COMPANY_TO_TICKER[key]
# Procurar parcial
for company, ticker in COMPANY_TO_TICKER.items():
if key in company or company in key:
return ticker
# Retornar como está (pode ser ticker desconhecido)
return user_input.upper()
# ============================================================================
# LSTM MODEL
# ============================================================================
class LSTMModel(nn.Module):
def __init__(self, input_size=16, hidden_size=50, num_layers=2, dropout=0.2):
super(LSTMModel, self).__init__()
self.lstm = nn.LSTM(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
dropout=dropout if num_layers > 1 else 0
)
self.fc = nn.Linear(hidden_size, 1)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
lstm_out, _ = self.lstm(x)
out = self.dropout(lstm_out[:, -1, :])
return self.fc(out)
# ============================================================================
# DATA FUNCTIONS (com fallback robusto)
# ============================================================================
def load_stock_data(symbol: str, days: int = 400) -> pd.DataFrame:
"""Carrega dados usando yfinance.download (mais estável)."""
import yfinance as yf
end = datetime.now()
start = end - timedelta(days=days)
try:
# Usar download ao invés de Ticker.history (mais estável)
df = yf.download(
symbol,
start=start.strftime('%Y-%m-%d'),
end=end.strftime('%Y-%m-%d'),
progress=False,
auto_adjust=True
)
if df.empty:
raise ValueError(f"Dados não encontrados para {symbol}")
# Tratar MultiIndex columns
if isinstance(df.columns, pd.MultiIndex):
df.columns = df.columns.get_level_values(0)
df = df.reset_index()
df.columns = df.columns.str.lower()
# Renomear coluna de data
for col in ['date', 'Date', 'datetime', 'Datetime']:
if col in df.columns:
df = df.rename(columns={col: 'timestamp'})
break
return df
except Exception as e:
raise ValueError(f"Erro ao obter dados para {symbol}: {str(e)}")
def create_features(df: pd.DataFrame) -> pd.DataFrame:
"""Cria features técnicas."""
df = df.copy()
df['price_change'] = df['close'].pct_change().fillna(0)
df['high_low_pct'] = ((df['high'] - df['low']) / df['low'].replace(0, 1)).fillna(0)
df['close_open_pct'] = ((df['close'] - df['open']) / df['open'].replace(0, 1)).fillna(0)
df['ma_7'] = df['close'].rolling(7, min_periods=1).mean()
df['ma_30'] = df['close'].rolling(30, min_periods=1).mean()
df['ma_90'] = df['close'].rolling(90, min_periods=1).mean()
df['volatility_7'] = df['close'].rolling(7, min_periods=1).std().fillna(0)
df['volatility_30'] = df['close'].rolling(30, min_periods=1).std().fillna(0)
df['volume_change'] = df['volume'].pct_change().fillna(0)
df['volume_ma_7'] = df['volume'].rolling(7, min_periods=1).mean()
df['momentum'] = (df['close'] - df['close'].shift(4)).fillna(0)
# Limpar infinitos
df = df.replace([np.inf, -np.inf], 0)
return df.bfill().ffill()
# ============================================================================
# MODEL HUB
# ============================================================================
MODEL_REPO = "henriquebap/stock-predictor-lstm"
model_cache = {}
def load_model_from_hub(symbol: str):
"""Carrega modelo do Hub."""
if symbol in model_cache:
return model_cache[symbol]
try:
model_file = f"lstm_model_{symbol}.pth"
scaler_file = f"scaler_{symbol}.pkl"
try:
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=model_file)
scaler_path = hf_hub_download(repo_id=MODEL_REPO, filename=scaler_file)
model_type = "específico"
except:
model_path = hf_hub_download(repo_id=MODEL_REPO, filename="lstm_model_BASE.pth")
scaler_path = hf_hub_download(repo_id=MODEL_REPO, filename="scaler_BASE.pkl")
model_type = "base"
checkpoint = torch.load(model_path, map_location='cpu')
model = LSTMModel(
input_size=checkpoint['input_size'],
hidden_size=checkpoint['hidden_size'],
num_layers=checkpoint['num_layers'],
dropout=checkpoint.get('dropout', 0.2)
)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
scaler_data = joblib.load(scaler_path)
model_cache[symbol] = {
'model': model,
'scaler': scaler_data['scaler'],
'target_scaler': scaler_data['target_scaler'],
'feature_columns': scaler_data['feature_columns'],
'type': model_type
}
return model_cache[symbol]
except Exception as e:
return None
# ============================================================================
# PREDICTION
# ============================================================================
def predict_stock(user_input: str) -> str:
"""Faz previsão com suporte a linguagem natural."""
if not user_input or not user_input.strip():
return "❌ Digite o nome de uma empresa ou ticker (ex: Apple, AAPL, Petrobras)"
# Resolver ticker
symbol = resolve_ticker(user_input)
original_input = user_input.strip()
try:
# Carregar dados
df = load_stock_data(symbol)
if len(df) < 70:
return f"❌ Dados insuficientes para {symbol} (mínimo 70 dias)"
current_price = float(df['close'].iloc[-1])
# Preparar features
df_feat = create_features(df)
# Tentar modelo LSTM do Hub
model_data = load_model_from_hub(symbol)
if model_data:
# Usar LSTM
try:
feature_cols = model_data['feature_columns']
for col in feature_cols:
if col not in df_feat.columns:
df_feat[col] = 0
features = df_feat[feature_cols].values
features_scaled = model_data['scaler'].transform(features)
X = features_scaled[-60:].reshape(1, 60, len(feature_cols))
X_tensor = torch.FloatTensor(X)
with torch.no_grad():
pred_scaled = model_data['model'](X_tensor).numpy()[0, 0]
predicted_price = model_data['target_scaler'].inverse_transform([[pred_scaled]])[0, 0]
model_type = f"LSTM {model_data['type'].capitalize()}"
except:
# Fallback
predicted_price = current_price * (1 + float(df_feat['momentum'].iloc[-1]) / current_price * 0.5)
model_type = "Fallback"
else:
# Modelo simples
ma_7 = float(df_feat['ma_7'].iloc[-1])
momentum = float(df_feat['momentum'].iloc[-1])
predicted_price = current_price + momentum * 0.3
model_type = "Técnico"
# Calcular métricas
change_pct = ((predicted_price - current_price) / current_price) * 100
if change_pct > 1:
direction = "📈 ALTA"
emoji = "🟢"
elif change_pct < -1:
direction = "📉 BAIXA"
emoji = "🔴"
else:
direction = "➡️ LATERAL"
emoji = "🟡"
# Indicadores
ma_7 = float(df_feat['ma_7'].iloc[-1])
ma_30 = float(df_feat['ma_30'].iloc[-1])
volatility = float(df_feat['volatility_7'].iloc[-1])
trend = "📈 Positiva" if ma_7 > ma_30 else "📉 Negativa"
# Performance
week_change = ((current_price - float(df['close'].iloc[-5])) / float(df['close'].iloc[-5])) * 100 if len(df) > 5 else 0
month_change = ((current_price - float(df['close'].iloc[-21])) / float(df['close'].iloc[-21])) * 100 if len(df) > 21 else 0
# Mostrar conversão se houve
input_info = f"**Pesquisa**: {original_input} → **{symbol}**" if original_input.lower() != symbol.lower() else f"**Ticker**: {symbol}"
return f"""
# {emoji} {direction} prevista para {symbol}
{input_info}
---
## 🤖 Modelo: {model_type}
| Métrica | Valor |
|---------|-------|
| **Preço Atual** | ${current_price:.2f} |
| **Previsão** | ${predicted_price:.2f} |
| **Variação** | {change_pct:+.2f}% |
---
## 📊 Indicadores Técnicos
| Indicador | Valor |
|-----------|-------|
| **MA 7 dias** | ${ma_7:.2f} |
| **MA 30 dias** | ${ma_30:.2f} |
| **Tendência** | {trend} |
| **Volatilidade** | ${volatility:.2f} |
---
## 📅 Performance Recente
| Período | Variação |
|---------|----------|
| **Semana** | {week_change:+.2f}% |
| **Mês** | {month_change:+.2f}% |
---
⚠️ **Disclaimer**: Previsão educacional. NÃO use para investimentos!
*🎓 Tech Challenge Fase 4 - FIAP Pós-Tech MLET*
"""
except Exception as e:
return f"""
❌ **Erro ao processar "{user_input}"**
**Possíveis causas:**
- Ticker inválido ou não encontrado
- API do Yahoo Finance temporariamente indisponível
- Empresa não listada na bolsa
**Tente:**
- Usar o ticker oficial (ex: AAPL, GOOGL, PETR4.SA)
- Verificar se a empresa está listada
- Aguardar alguns minutos e tentar novamente
**Erro técnico:** {str(e)[:100]}
"""
# ============================================================================
# GRADIO INTERFACE
# ============================================================================
# Criar tabela de tickers populares
def create_ticker_table():
lines = ["## 📋 Tickers Populares\n"]
for category, tickers in POPULAR_TICKERS.items():
lines.append(f"**{category}**: {', '.join(tickers)}")
return "\n\n".join(lines)
with gr.Blocks(title="Stock Predictor LSTM") as demo:
gr.Markdown("""
# 📈 Stock Price Predictor - LSTM
### Sistema de Previsão com Deep Learning
🎓 **Tech Challenge Fase 4** - FIAP Pós-Tech Machine Learning Engineering
---
**💡 Dica**: Digite o nome da empresa ou o ticker!
- `Apple` ou `AAPL`
- `Nvidia` ou `NVDA`
- `Petrobras` ou `PETR4.SA`
""")
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="🔍 Empresa ou Ticker",
placeholder="Ex: Apple, GOOGL, Petrobras, NVDA...",
value="Apple"
)
submit_btn = gr.Button("🚀 Analisar", variant="primary")
gr.Markdown(create_ticker_table())
with gr.Column(scale=2):
output = gr.Markdown(label="Resultado")
gr.Markdown("""
---
### 🧠 Sobre o Modelo
- **Arquitetura**: LSTM 2 camadas × 50 neurônios
- **Features**: 16 indicadores técnicos
- **Período**: 60 dias de histórico
- **Modelos Treinados**: AAPL, GOOGL (outros usam modelo BASE)
📦 **Model Hub**: [henriquebap/stock-predictor-lstm](https://huggingface.co/henriquebap/stock-predictor-lstm)
---
*Dezembro 2024 | FIAP Pós-Tech MLET*
""")
submit_btn.click(fn=predict_stock, inputs=input_text, outputs=output)
input_text.submit(fn=predict_stock, inputs=input_text, outputs=output)
if __name__ == "__main__":
demo.launch()