Final_Assignment

Sleeping

App Files Files Community

olcapone commited on Aug 23

Commit

bc13e30

verified ·

1 Parent(s): e258014

Update app.py (#5)

Browse files

- Update app.py (a29f3e9f784498ce2be0e79a1d2f4f2d2e37e0ff)

Files changed (1) hide show

app.py +98 -13

app.py CHANGED Viewed

@@ -3,11 +3,101 @@ import gradio as gr
 import requests
 import pandas as pd
 import time
 from smolagents import LiteLLMModel, CodeAgent, Tool
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Agent Tools ---
 class MathSolver(Tool):
     name = "math_solver"
@@ -70,13 +160,14 @@ def select_model(provider="groq"):
     HF_MODEL_NAME = "huggingfaceh4/zephyr-7b-beta"
     if provider == "groq":
-        return LiteLLMModel(model_id="groq/llama-3.1-8b-instant",
                                 api_key=os.getenv("GROQ_API_KEY"))
         if not api_key:
             raise ValueError("GROQ_API_KEY environment variable is not set")
         return LiteLLMModel(model_id=GROQ_MODEL_NAME, api_key=api_key)
     elif provider == "hf":
         api_key = os.getenv("HF_TOKEN")
@@ -111,6 +202,7 @@ class BasicAgent:
             "For string answers, omit articles ('a', 'the') and use full words. "
             "For lists, output in comma-separated format with no conjunctions. "
             "If the answer is not found, say `- unknown`."
         )
     def __call__(self, question: str) -> str:
@@ -120,15 +212,8 @@ class BasicAgent:
         for attempt in range(max_retries):
             try:
                 result = self.agent.run(question)
-                # Extract only the final answer without any wrappers
-                final_str = str(result).strip()
-                # Remove any potential prefixes
-                if final_str.startswith('[ANSWER]'):
-                    final_str = final_str[8:].strip()
-                if final_str.startswith('Final answer:'):
-                    final_str = final_str[13:].strip()
-                if final_str.startswith('Answer:'):
-                    final_str = final_str[7:].strip()
                 return final_str
             except Exception as e:
                 # Check if it's a rate limit error

 import requests
 import pandas as pd
 import time
+import re
 from smolagents import LiteLLMModel, CodeAgent, Tool
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Answer Extraction Function ---
+def extract_answer(text: str, original_question: str) -> str:
+    """Extract the answer from the LLM response, being robust to various formats."""
+    if not text:
+        return "- unknown"
+    # Clean the text
+    cleaned = text.strip()
+    # If the response is the same as the question, it's not an answer
+    if cleaned == original_question.strip():
+        return "- unknown"
+    # Remove common prefixes
+    prefixes_to_remove = [
+        '[ANSWER]:',
+        '[ANSWER]',
+        'Final answer:',
+        'Final Answer:',
+        'Answer:',
+        'answer:',
+        'The answer is',
+        'The final answer is',
+    ]
+    for prefix in prefixes_to_remove:
+        if cleaned.startswith(prefix):
+            cleaned = cleaned[len(prefix):].strip()
+    # If it's a "how many" question, try to extract just the number
+    if 'how many' in original_question.lower():
+        # Look for numbers in the response
+        numbers = re.findall(r'\d+', cleaned)
+        if numbers:
+            return numbers[0]  # Return the first number found
+    # If it's asking for a year, try to extract just the year
+    if re.search(r'\b(19|20)\d{2}\b', original_question):
+        years = re.findall(r'\b(19|20)\d{2}\b', cleaned)
+        if years:
+            return years[0]  # Return the first year found
+    # If we still have the full question in the response, try to extract what comes after it
+    if original_question.strip() in cleaned:
+        # Split by the question and take what comes after
+        parts = cleaned.split(original_question.strip())
+        if len(parts) > 1 and parts[1].strip():
+            cleaned = parts[1].strip()
+        else:
+            # Try to find numbers or short answers in the response
+            # Look for a line that might contain the answer
+            lines = cleaned.split('\n')
+            for line in lines:
+                line = line.strip()
+                if line and line != original_question.strip():
+                    # If it's a short line, it might be the answer
+                    if len(line) < 100 or 'how many' in original_question.lower():
+                        cleaned = line
+                        break
+    # If the cleaned answer is still very long and contains the question,
+    # try to extract just the essential part
+    if len(cleaned) > 200 and original_question.strip() in cleaned:
+        # Try to find a short line that might be the answer
+        lines = cleaned.split('\n')
+        for line in lines:
+            line = line.strip()
+            if line and len(line) < 100 and line != original_question.strip():
+                # Check if it looks like an answer (short and possibly numeric)
+                if re.match(r'^[\w\s\d\-\.,]+$', line):  # Simple alphanumeric answer
+                    return line
+    # If we still have a very long response, try to extract just the last line
+    # which might be the answer
+    if len(cleaned) > 200:
+        lines = cleaned.split('\n')
+        # Take the last non-empty line that isn't too long
+        for line in reversed(lines):
+            line = line.strip()
+            if line and len(line) < 100:
+                cleaned = line
+                break
+    # Final fallback - if the result is still the same as the question, return unknown
+    if cleaned == original_question.strip():
+        return "- unknown"
+    return cleaned if cleaned else "- unknown"
 # --- Agent Tools ---
 class MathSolver(Tool):
     name = "math_solver"
     HF_MODEL_NAME = "huggingfaceh4/zephyr-7b-beta"
     if provider == "groq":
+        api_key = os.getenv("GROQ_API_KEY")
+        if api_key:
+            return LiteLLMModel(model_id="groq/llama-3.1-8b-instant",
                                 api_key=os.getenv("GROQ_API_KEY"))
         if not api_key:
             raise ValueError("GROQ_API_KEY environment variable is not set")
         return LiteLLMModel(model_id=GROQ_MODEL_NAME, api_key=api_key)
     elif provider == "hf":
         api_key = os.getenv("HF_TOKEN")
             "For string answers, omit articles ('a', 'the') and use full words. "
             "For lists, output in comma-separated format with no conjunctions. "
             "If the answer is not found, say `- unknown`."
+            "IMPORTANT: Respond with ONLY the answer, nothing else. No prefixes, no explanations."
         )
     def __call__(self, question: str) -> str:
         for attempt in range(max_retries):
             try:
                 result = self.agent.run(question)
+                # Use our enhanced extraction function
+                final_str = extract_answer(str(result), question)
                 return final_str
             except Exception as e:
                 # Check if it's a rate limit error