Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,43 +1,41 @@
|
|
| 1 |
-
import os
|
| 2 |
-
os.environ["HF_HOME"] = "./transformers" # Set HF_HOME to a local directory to avoid TensorFlow dependency
|
| 3 |
-
|
| 4 |
import streamlit as st
|
| 5 |
import requests
|
| 6 |
from transformers import pipeline, BertTokenizer
|
| 7 |
|
| 8 |
# Function to generate answers using the BERT model
|
| 9 |
-
def generate_answers(
|
| 10 |
-
# Download the research paper
|
| 11 |
-
response = requests.get(paper_link)
|
| 12 |
-
paper_text = response.text
|
| 13 |
-
|
| 14 |
# Initialize the BERT tokenizer
|
| 15 |
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
| 16 |
|
| 17 |
# Initialize the question-answering pipeline
|
| 18 |
model = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
|
| 22 |
-
for question in questions.split(","):
|
| 23 |
-
inputs = tokenizer(question.strip(), paper_text, return_tensors="pt")
|
| 24 |
-
inputs = {k: v.tolist()[0] for k, v in inputs.items()} # Convert tensors to lists
|
| 25 |
-
answer = model(**inputs)
|
| 26 |
-
answers.append(answer['answer'])
|
| 27 |
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Streamlit app
|
| 31 |
st.title("Research Paper Question Answering")
|
| 32 |
|
| 33 |
-
questions = st.text_input("Enter comma-separated questions:")
|
| 34 |
paper_link = st.text_input("Enter the link to the research paper (Arxiv link):")
|
|
|
|
| 35 |
|
| 36 |
-
if st.button("Generate
|
| 37 |
-
if not (
|
| 38 |
-
st.warning("Please provide both
|
| 39 |
else:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import requests
|
| 3 |
from transformers import pipeline, BertTokenizer
|
| 4 |
|
| 5 |
# Function to generate answers using the BERT model
|
| 6 |
+
def generate_answers(chunks, question):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
# Initialize the BERT tokenizer
|
| 8 |
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
| 9 |
|
| 10 |
# Initialize the question-answering pipeline
|
| 11 |
model = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
|
| 12 |
|
| 13 |
+
# Concatenate chunks into a single text
|
| 14 |
+
paper_text = ' '.join(chunks)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
# Generate answers for the question based on the entire context
|
| 17 |
+
answer = model(question, paper_text)
|
| 18 |
+
return answer['answer']
|
| 19 |
|
| 20 |
# Streamlit app
|
| 21 |
st.title("Research Paper Question Answering")
|
| 22 |
|
|
|
|
| 23 |
paper_link = st.text_input("Enter the link to the research paper (Arxiv link):")
|
| 24 |
+
question = st.text_input("Enter your question:")
|
| 25 |
|
| 26 |
+
if st.button("Generate Answer"):
|
| 27 |
+
if not (paper_link and question):
|
| 28 |
+
st.warning("Please provide both the paper link and the question.")
|
| 29 |
else:
|
| 30 |
+
# Download the research paper
|
| 31 |
+
response = requests.get(paper_link)
|
| 32 |
+
paper_text = response.text
|
| 33 |
+
|
| 34 |
+
# Split the paper text into chunks of 512 words
|
| 35 |
+
paper_chunks = [paper_text[i:i+512] for i in range(0, len(paper_text), 512)]
|
| 36 |
+
|
| 37 |
+
# Generate answer based on chunks
|
| 38 |
+
answer = generate_answers(paper_chunks, question)
|
| 39 |
+
st.success("Answer generated successfully!")
|
| 40 |
+
st.text("Generated Answer:")
|
| 41 |
+
st.write(answer)
|