rayistern commited on
Commit
5901795
·
verified ·
1 Parent(s): 977f75c

Added embedding handler

Browse files
Files changed (2) hide show
  1. handler.py +31 -0
  2. requirements.txt +3 -0
handler.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModel, AutoTokenizer
2
+ import torch
3
+
4
+ class EndpointHandler():
5
+ def __init__(self, path=""):
6
+ # Initialize the tokenizer and model with pre-trained weights
7
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
8
+ self.model = AutoModel.from_pretrained(path)
9
+
10
+ def __call__(self, data):
11
+ # Extract text input from the request data
12
+ inputs = data['inputs']
13
+
14
+ # Define a prompt to provide context
15
+ prompt = "Contextual understanding of the following text, from the perspective of Chassidic philosophy: "
16
+
17
+ # Combine prompt with the actual input
18
+ combined_input = prompt + inputs
19
+
20
+ # Prepare the text for the model
21
+ encoded_input = self.tokenizer(combined_input, return_tensors='pt', padding=True, truncation=True, max_length=512)
22
+
23
+ # Generate embeddings without updating gradients
24
+ with torch.no_grad():
25
+ outputs = self.model(**encoded_input)
26
+
27
+ # Extract embeddings from the last hidden layer
28
+ embeddings = outputs.last_hidden_state.squeeze().tolist()
29
+
30
+ # Return the embeddings as a list (serialized format)
31
+ return {'embeddings': embeddings}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch==1.11.0
2
+ transformers==4.18.0
3
+ numpy==1.22.3