reyhanadr commited on
Commit
c5e83c6
·
1 Parent(s): a7abdb8

Deploy aplikasi FastAPI sentiment analysis IndoBERTweet

Browse files
__pycache__/app.cpython-310.pyc ADDED
Binary file (4.4 kB). View file
 
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import emoji
3
+ import re
4
+ from transformers import BertTokenizer, BertForSequenceClassification
5
+ from fastapi import FastAPI
6
+ from pydantic import BaseModel
7
+
8
+ # ====================================================================
9
+ # 1. KELAS LOGIKA ANDA (Disalin dari kode Anda)
10
+ # ====================================================================
11
+
12
+ class TextCleaner:
13
+ def __init__(self):
14
+ # Daftar karakter ini saya sederhanakan karena loop Anda sudah menangani huruf a-z
15
+ self.character = ['.', ',', ';', ':', '?', '!', '(', ')', '[', ']', '{', '}', '<', '>', '"', '/', '\'', '-', '@']
16
+ # Menambahkan semua huruf ke dalam daftar karakter untuk pembersihan
17
+ self.character.extend([chr(i) for i in range(ord('a'), ord('z') + 1)])
18
+
19
+ def repeatcharClean(self, text):
20
+ for char_to_clean in self.character:
21
+ # Menggunakan regex untuk mengganti 3 atau lebih karakter berulang menjadi satu
22
+ # Contoh: 'heloooo' -> 'helo'
23
+ pattern = re.compile(re.escape(char_to_clean) + r'{3,}')
24
+ text = pattern.sub(char_to_clean, text)
25
+ return text
26
+
27
+ def clean_review(self, text):
28
+ text = text.lower()
29
+ text = re.sub(r'\s+', ' ', text)
30
+ text = re.sub(r'[^\x00-\x7F]+', ' ', text)
31
+
32
+ new_text = []
33
+ for word in text.split(" "):
34
+ word = '@USER' if word.startswith('@') and len(word) > 1 else word
35
+ word = 'HTTPURL' if word.startswith('http') else word
36
+ new_text.append(word)
37
+ text = " ".join(new_text)
38
+
39
+ text = emoji.demojize(text)
40
+ text = re.sub(r':[A-Za-z_-]+:', ' ', text)
41
+ text = re.sub(r"([xX;:]'?[dDpPvVoO3)(])", ' ', text)
42
+ text = re.sub(r'["#$%&()*+,./:;<=>\[\]\\^_`{|}~]', ' ', text)
43
+ text = self.repeatcharClean(text)
44
+
45
+ # Membersihkan spasi berlebih yang mungkin muncul setelah pembersihan
46
+ text = re.sub(r'\s+', ' ', text).strip()
47
+ return text
48
+
49
+ class SentimentPredictor:
50
+ def __init__(self, tokenizer, model):
51
+ self.tokenizer = tokenizer
52
+ self.model = model
53
+ self.device = torch.device("cpu")
54
+ self.model.to(self.device)
55
+
56
+ def predict(self, text: str) -> (str, float):
57
+ inputs = self.tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=280)
58
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
59
+
60
+ with torch.no_grad():
61
+ outputs = self.model(**inputs)
62
+
63
+ logits = outputs.logits
64
+ predicted_label = torch.argmax(logits, dim=1).item()
65
+
66
+ probabilities = torch.softmax(logits, dim=1)
67
+ confidence_score = probabilities[0][predicted_label].item()
68
+
69
+ if predicted_label == 2:
70
+ sentiment = 'Negatif'
71
+ elif predicted_label == 1:
72
+ sentiment = 'Netral'
73
+ else: # predicted_label == 0
74
+ sentiment = 'Positif'
75
+
76
+ return sentiment, confidence_score
77
+
78
+ # ====================================================================
79
+ # 2. INISIALISASI MODEL & APLIKASI FASTAPI
80
+ # (Ini hanya dijalankan sekali saat API pertama kali startet)
81
+ # ====================================================================
82
+
83
+ print("Memuat model dan tokenizer...")
84
+ # Muat tokenizer dan model dasar
85
+ tokenizer = BertTokenizer.from_pretrained('indolem/indobertweet-base-uncased')
86
+ model = BertForSequenceClassification.from_pretrained('indolem/indobertweet-base-uncased', num_labels=3)
87
+
88
+ # Muat bobot model yang sudah Anda latih
89
+ model_path = 'model_indoBERTweet_100Epochs_sentiment.pth'
90
+ state_dict = torch.load(model_path, map_location=torch.device('cpu'))
91
+ model.load_state_dict(state_dict, strict=False)
92
+ model.eval()
93
+ print("Model berhasil dimuat.")
94
+
95
+ # Buat instance dari kelas-kelas Anda
96
+ text_cleaner = TextCleaner()
97
+ sentiment_predictor = SentimentPredictor(tokenizer, model)
98
+
99
+ # Inisialisasi aplikasi FastAPI
100
+ app = FastAPI(
101
+ title="API Klasifikasi Sentimen",
102
+ description="Sebuah API untuk menganalisis sentimen teks Bahasa Indonesia."
103
+ )
104
+
105
+ # ====================================================================
106
+ # 3. DEFINISIKAN MODEL INPUT & OUTPUT API
107
+ # ====================================================================
108
+
109
+ class TextInput(BaseModel):
110
+ text: str
111
+
112
+ class PredictionOutput(BaseModel):
113
+ sentiment: str
114
+ confidence: float
115
+
116
+ # ====================================================================
117
+ # 4. BUAT ENDPOINT PREDIKSI
118
+ # ====================================================================
119
+
120
+ @app.get("/")
121
+ def read_root():
122
+ return {"message": "Selamat datang di API Klasifikasi Sentimen"}
123
+
124
+ @app.post("/predict", response_model=PredictionOutput)
125
+ def predict_sentiment(request: TextInput):
126
+ # Langkah 1: Bersihkan teks input
127
+ cleaned_text = text_cleaner.clean_review(request.text)
128
+
129
+ # Langkah 2: Lakukan prediksi pada teks yang sudah bersih
130
+ sentiment, confidence = sentiment_predictor.predict(cleaned_text)
131
+
132
+ # Langkah 3: Kembalikan hasil prediksi
133
+ return PredictionOutput(sentiment=sentiment, confidence=confidence)
model_indoBERTweet_100Epochs_sentiment.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a69f5d96885cfad1f22458b99c73c2336dbe3e4c1e2541428936f571e3ce363
3
+ size 442330099
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ torch
4
+ transformers
5
+ emoji
6
+ pandas
7
+ pydantic
8
+ python-multipart