Spaces:

capmar00
/

census-newsroom-AIde-api

Sleeping

App Files Files Community

Caporlingua Marina commited on Jan 9

Commit

cfde4b0

1 Parent(s): 453be1d

go back to Meta-Llama-3-8B-Instruct

Browse files

Files changed (6) hide show

app.py +1 -1
init.py +1 -0
llms/gemini.py +25 -0
llms/llama.py +102 -0
modules/tools.py +5 -5
modules/llms.py → test/HF-InferenceClient.py +3 -1

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from modules.shared import *
 from modules.query_api import *
 from modules.utils import *
-from modules.llms import *
 app = FastAPI()

 from modules.shared import *
 from modules.query_api import *
 from modules.utils import *
+from llms.llama import *
 app = FastAPI()

init.py CHANGED Viewed

@@ -39,6 +39,7 @@ folders = [
     "data/",
     "data/eurostat/",
     "data/istat/",
     "modules/",
     "schemas/",
     "test"

     "data/",
     "data/eurostat/",
     "data/istat/",
+    "llms",
     "modules/",
     "schemas/",
     "test"

llms/gemini.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import sys
+import google.generativeai as genai
+current_dir = os.getcwd()
+sys.path.insert(0, current_dir)
+from modules.shared import *
+from modules.utils import *
+from modules.tools import *
+genai.configure(api_key="")
+model = genai.GenerativeModel("gemini-1.5-flash")
+chat = model.start_chat(
+    history=[
+        {"role": "user", "parts": "Hello"},
+        {"role": "model", "parts": "Great to meet you. What would you like to know?"},
+    ],
+)
+response = chat.send_message("I have 2 dogs in my house.")
+print(response.text)
+response2 = chat.send_message("How many paws are in my house?")
+print(response2.text)

llms/llama.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import sys
+current_dir = os.getcwd()
+sys.path.insert(0, current_dir)
+from modules.shared import *
+from modules.utils import *
+from modules.tools import *
+from huggingface_hub import InferenceClient
+def process_user_query(prompt: str) -> dict:
+    """
+    Processes a user's query by calling an LLM and executing the appropriate tool function.
+    Args:
+        prompt (str): The user's query.
+    Returns:
+        dict: The final data based on the user's query.
+    """
+    _geographic_areas = read_jsonl_file(f"{ISTAT_DATA_PATH}/_geographic_areas.jsonl")
+    _regions = read_jsonl_file(f"{ISTAT_DATA_PATH}/_regions.jsonl")
+    _provinces = read_jsonl_file(f"{ISTAT_DATA_PATH}/_provinces.jsonl")
+    messages = []
+    try:
+        messages.append({"role": "user", "content": prompt})
+        system_location_ids_prompt = f"""
+        "From the provided list of locations, select the one that best matches the user's needs.
+        Geographic areas:
+        {_geographic_areas};
+        Regions:
+        {_regions};
+        Provinces:
+        {_provinces}.
+        Instruction:
+        Review the user prompts and the locations list, then return the id of the exact location without any extra text — just the id, nothing else.
+        If they are mupliple locations, combine them with a plus '+' sign.
+        Examples:
+        - Query: "Tell me the population of Sicilia" -> Response: "ITD3".
+        - Query: "I want the unemployment rate in Sud Italia?" -> "ITF".
+        - Query: "What is the population of Bologna, Ravenna and Parma?" -> "ITD55+ITD57+ITD52".
+        Important: Only return the exact string (e.g., "ITC41") without any additional words or explanations.
+        """
+        messages.append({"role": "system", "content": system_location_ids_prompt})
+        # https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
+        client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=os.getenv('HF_LLMS_TOKEN'))
+        # client = InferenceClient("meta-llama/Meta-Llama-3.1-70B", token=os.getenv('HF_LLMS_TOKEN'))
+        response = client.chat_completion(messages, max_tokens=2000, temperature=0)
+        location_id = response.choices[0].message.content
+        if not location_id:
+            raise ValueError("Failed to extract location ID.")
+        messages.append({"role": "assistant", "content": location_id})
+        print("\n\nFIRST response - location id: ", location_id)
+        response = client.chat_completion(
+            messages,
+            tools=tools,
+            tool_choice="auto",
+            max_tokens=1000
+            )
+        print("\n\nSECOND response: ", response)
+        tool_call = response['choices'][0]['message']['tool_calls'][0]['function']
+        arguments = tool_call['arguments']
+        # Get the function from the dictionary safely
+        chosen_function = tool_functions_map.get(tool_call['name'])
+        if chosen_function is None:
+            raise ValueError("Function not found")
+        # Execute the function
+        final_data = chosen_function(**arguments)
+        print("\n\nFINAL DATA: ", final_data)
+        # Ensure the final data is a dictionary
+        if isinstance(final_data, list) and all(isinstance(entry, dict) for entry in final_data):
+            # Format the response when there are multiple results
+            formatted_response = {
+                "summary": f"Query returned data for {len(final_data)} locations.",
+                "details": final_data,
+            }
+            return formatted_response
+        if isinstance(final_data, dict):
+            return final_data
+        raise ValueError("Final data is not a dictionary")
+    except Exception as e:
+        logging.error(f"An error occurred during query processing: {e}")
+        raise HTTPException(status_code=500, detail="An error occurred while processing the query")

modules/tools.py CHANGED Viewed

@@ -198,7 +198,7 @@ def age_str_to_int(age_str: str) -> int:
-def extract_and_format_data_from_xml_for_streamlit_app(xml_content: str) -> List[Dict[str, str]]:
     """
     Extracts and formats data from an SDMX-ML XML document for use in a Streamlit app.
@@ -227,7 +227,7 @@ def extract_and_format_data_from_xml_for_streamlit_app(xml_content: str) -> List
         - The `age_str_to_int` function is used to ensure proper sorting of age strings.
     Example:
-        extract_and_format_data_from_xml_for_streamlit_app(xml_data)
         -> [{'location': 'Italy', 'sex': 'Male', 'age (years)': '0', 'time period': '2020', 'population': '10000'}, ...]
     """
     # Parse the XML content
@@ -336,7 +336,7 @@ def fetch_population_for_locations_years_sex_age_via_sdmx(
     if res is None:
         return None
     else:
-        data = extract_and_format_data_from_xml_for_streamlit_app(res)
         return data
@@ -363,11 +363,11 @@ tools=[
             },
             "start_period": {
               "type": "string",
-              "description": "The start date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-01-01'.  Default is '2024-01-01'."
             },
             "end_period": {
               "type": "string",
-              "description": "The end date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-12-31'.  Default is '2024-12-31'."
             }
           },
           "required": ["location_ids", "sex", "age", "start_period", "end_period"]

+def extract_and_format_data_from_xml_for_web_app(xml_content: str) -> List[Dict[str, str]]:
     """
     Extracts and formats data from an SDMX-ML XML document for use in a Streamlit app.
         - The `age_str_to_int` function is used to ensure proper sorting of age strings.
     Example:
+        extract_and_format_data_from_xml_for_web_app(xml_data)
         -> [{'location': 'Italy', 'sex': 'Male', 'age (years)': '0', 'time period': '2020', 'population': '10000'}, ...]
     """
     # Parse the XML content
     if res is None:
         return None
     else:
+        data = extract_and_format_data_from_xml_for_web_app(res)
         return data
             },
             "start_period": {
               "type": "string",
+              "description": "The start date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-01-01'. Default is '2024-01-01'."
             },
             "end_period": {
               "type": "string",
+              "description": "The end date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-12-31'. Default is '2024-12-31'."
             }
           },
           "required": ["location_ids", "sex", "age", "start_period", "end_period"]

modules/llms.py → test/HF-InferenceClient.py RENAMED Viewed

@@ -47,7 +47,9 @@ def process_user_query(prompt: str) -> dict:
         # https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
         # client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=os.getenv('HF_LLMS_TOKEN'))
-        client = InferenceClient("meta-llama/Meta-Llama-3.1-70B", token=os.getenv('HF_LLMS_TOKEN'))
         response = client.chat_completion(messages, max_tokens=2000, temperature=0)
         location_id = response.choices[0].message.content

         # https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
         # client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=os.getenv('HF_LLMS_TOKEN'))
+        # client = InferenceClient("meta-llama/Meta-Llama-3.1-70B", token=os.getenv('HF_LLMS_TOKEN'))
+        client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=os.getenv('HF_LLMS_TOKEN'))
         response = client.chat_completion(messages, max_tokens=2000, temperature=0)
         location_id = response.choices[0].message.content