Spaces:

reyhanadr
/

Sentiment_BitcoinHalving_IndoBERTweet

Sleeping

App Files Files Community

reyhanadr commited on Sep 26, 2025

Commit

c5e83c6

1 Parent(s): a7abdb8

Deploy aplikasi FastAPI sentiment analysis IndoBERTweet

Browse files

Files changed (4) hide show

__pycache__/app.cpython-310.pyc +0 -0
app.py +133 -0
model_indoBERTweet_100Epochs_sentiment.pth +3 -0
requirements.txt +8 -0

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (4.4 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import torch
+import emoji
+import re
+from transformers import BertTokenizer, BertForSequenceClassification
+from fastapi import FastAPI
+from pydantic import BaseModel
+# ====================================================================
+# 1. KELAS LOGIKA ANDA (Disalin dari kode Anda)
+# ====================================================================
+class TextCleaner:
+    def __init__(self):
+        # Daftar karakter ini saya sederhanakan karena loop Anda sudah menangani huruf a-z
+        self.character = ['.', ',', ';', ':', '?', '!', '(', ')', '[', ']', '{', '}', '<', '>', '"', '/', '\'', '-', '@']
+        # Menambahkan semua huruf ke dalam daftar karakter untuk pembersihan
+        self.character.extend([chr(i) for i in range(ord('a'), ord('z') + 1)])
+    def repeatcharClean(self, text):
+        for char_to_clean in self.character:
+            # Menggunakan regex untuk mengganti 3 atau lebih karakter berulang menjadi satu
+            # Contoh: 'heloooo' -> 'helo'
+            pattern = re.compile(re.escape(char_to_clean) + r'{3,}')
+            text = pattern.sub(char_to_clean, text)
+        return text
+    def clean_review(self, text):
+        text = text.lower()
+        text = re.sub(r'\s+', ' ', text)
+        text = re.sub(r'[^\x00-\x7F]+', ' ', text)
+        new_text = []
+        for word in text.split(" "):
+            word = '@USER' if word.startswith('@') and len(word) > 1 else word
+            word = 'HTTPURL' if word.startswith('http') else word
+            new_text.append(word)
+        text = " ".join(new_text)
+        text = emoji.demojize(text)
+        text = re.sub(r':[A-Za-z_-]+:', ' ', text)
+        text = re.sub(r"([xX;:]'?[dDpPvVoO3)(])", ' ', text)
+        text = re.sub(r'["#$%&()*+,./:;<=>\[\]\\^_`{|}~]', ' ', text)
+        text = self.repeatcharClean(text)
+        # Membersihkan spasi berlebih yang mungkin muncul setelah pembersihan
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text
+class SentimentPredictor:
+    def __init__(self, tokenizer, model):
+        self.tokenizer = tokenizer
+        self.model = model
+        self.device = torch.device("cpu")
+        self.model.to(self.device)
+    def predict(self, text: str) -> (str, float):
+        inputs = self.tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=280)
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        logits = outputs.logits
+        predicted_label = torch.argmax(logits, dim=1).item()
+        probabilities = torch.softmax(logits, dim=1)
+        confidence_score = probabilities[0][predicted_label].item()
+        if predicted_label == 2:
+            sentiment = 'Negatif'
+        elif predicted_label == 1:
+            sentiment = 'Netral'
+        else: # predicted_label == 0
+            sentiment = 'Positif'
+        return sentiment, confidence_score
+# ====================================================================
+# 2. INISIALISASI MODEL & APLIKASI FASTAPI
+# (Ini hanya dijalankan sekali saat API pertama kali startet)
+# ====================================================================
+print("Memuat model dan tokenizer...")
+# Muat tokenizer dan model dasar
+tokenizer = BertTokenizer.from_pretrained('indolem/indobertweet-base-uncased')
+model = BertForSequenceClassification.from_pretrained('indolem/indobertweet-base-uncased', num_labels=3)
+# Muat bobot model yang sudah Anda latih
+model_path = 'model_indoBERTweet_100Epochs_sentiment.pth'
+state_dict = torch.load(model_path, map_location=torch.device('cpu'))
+model.load_state_dict(state_dict, strict=False)
+model.eval()
+print("Model berhasil dimuat.")
+# Buat instance dari kelas-kelas Anda
+text_cleaner = TextCleaner()
+sentiment_predictor = SentimentPredictor(tokenizer, model)
+# Inisialisasi aplikasi FastAPI
+app = FastAPI(
+    title="API Klasifikasi Sentimen",
+    description="Sebuah API untuk menganalisis sentimen teks Bahasa Indonesia."
+)
+# ====================================================================
+# 3. DEFINISIKAN MODEL INPUT & OUTPUT API
+# ====================================================================
+class TextInput(BaseModel):
+    text: str
+class PredictionOutput(BaseModel):
+    sentiment: str
+    confidence: float
+# ====================================================================
+# 4. BUAT ENDPOINT PREDIKSI
+# ====================================================================
+@app.get("/")
+def read_root():
+    return {"message": "Selamat datang di API Klasifikasi Sentimen"}
+@app.post("/predict", response_model=PredictionOutput)
+def predict_sentiment(request: TextInput):
+    # Langkah 1: Bersihkan teks input
+    cleaned_text = text_cleaner.clean_review(request.text)
+    # Langkah 2: Lakukan prediksi pada teks yang sudah bersih
+    sentiment, confidence = sentiment_predictor.predict(cleaned_text)
+    # Langkah 3: Kembalikan hasil prediksi
+    return PredictionOutput(sentiment=sentiment, confidence=confidence)

model_indoBERTweet_100Epochs_sentiment.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a69f5d96885cfad1f22458b99c73c2336dbe3e4c1e2541428936f571e3ce363
+size 442330099

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn[standard]
+torch
+transformers
+emoji
+pandas
+pydantic
+python-multipart