Caporlingua Marina commited on
Commit
cfde4b0
Β·
1 Parent(s): 453be1d

go back to Meta-Llama-3-8B-Instruct

Browse files
app.py CHANGED
@@ -1,7 +1,7 @@
1
  from modules.shared import *
2
  from modules.query_api import *
3
  from modules.utils import *
4
- from modules.llms import *
5
 
6
 
7
  app = FastAPI()
 
1
  from modules.shared import *
2
  from modules.query_api import *
3
  from modules.utils import *
4
+ from llms.llama import *
5
 
6
 
7
  app = FastAPI()
init.py CHANGED
@@ -39,6 +39,7 @@ folders = [
39
  "data/",
40
  "data/eurostat/",
41
  "data/istat/",
 
42
  "modules/",
43
  "schemas/",
44
  "test"
 
39
  "data/",
40
  "data/eurostat/",
41
  "data/istat/",
42
+ "llms",
43
  "modules/",
44
  "schemas/",
45
  "test"
llms/gemini.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import google.generativeai as genai
4
+ current_dir = os.getcwd()
5
+ sys.path.insert(0, current_dir)
6
+ from modules.shared import *
7
+ from modules.utils import *
8
+ from modules.tools import *
9
+
10
+
11
+ genai.configure(api_key="")
12
+
13
+
14
+ model = genai.GenerativeModel("gemini-1.5-flash")
15
+ chat = model.start_chat(
16
+ history=[
17
+ {"role": "user", "parts": "Hello"},
18
+ {"role": "model", "parts": "Great to meet you. What would you like to know?"},
19
+ ],
20
+ )
21
+
22
+ response = chat.send_message("I have 2 dogs in my house.")
23
+ print(response.text)
24
+ response2 = chat.send_message("How many paws are in my house?")
25
+ print(response2.text)
llms/llama.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ current_dir = os.getcwd()
4
+ sys.path.insert(0, current_dir)
5
+ from modules.shared import *
6
+ from modules.utils import *
7
+ from modules.tools import *
8
+
9
+ from huggingface_hub import InferenceClient
10
+
11
+
12
+
13
+ def process_user_query(prompt: str) -> dict:
14
+ """
15
+ Processes a user's query by calling an LLM and executing the appropriate tool function.
16
+
17
+ Args:
18
+ prompt (str): The user's query.
19
+
20
+ Returns:
21
+ dict: The final data based on the user's query.
22
+ """
23
+ _geographic_areas = read_jsonl_file(f"{ISTAT_DATA_PATH}/_geographic_areas.jsonl")
24
+ _regions = read_jsonl_file(f"{ISTAT_DATA_PATH}/_regions.jsonl")
25
+ _provinces = read_jsonl_file(f"{ISTAT_DATA_PATH}/_provinces.jsonl")
26
+ messages = []
27
+ try:
28
+ messages.append({"role": "user", "content": prompt})
29
+
30
+ system_location_ids_prompt = f"""
31
+ "From the provided list of locations, select the one that best matches the user's needs.
32
+ Geographic areas:
33
+ {_geographic_areas};
34
+ Regions:
35
+ {_regions};
36
+ Provinces:
37
+ {_provinces}.
38
+
39
+ Instruction:
40
+ Review the user prompts and the locations list, then return the id of the exact location without any extra text β€” just the id, nothing else.
41
+ If they are mupliple locations, combine them with a plus '+' sign.
42
+
43
+ Examples:
44
+ - Query: "Tell me the population of Sicilia" -> Response: "ITD3".
45
+ - Query: "I want the unemployment rate in Sud Italia?" -> "ITF".
46
+ - Query: "What is the population of Bologna, Ravenna and Parma?" -> "ITD55+ITD57+ITD52".
47
+
48
+ Important: Only return the exact string (e.g., "ITC41") without any additional words or explanations.
49
+ """
50
+
51
+ messages.append({"role": "system", "content": system_location_ids_prompt})
52
+
53
+ # https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
54
+ client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=os.getenv('HF_LLMS_TOKEN'))
55
+ # client = InferenceClient("meta-llama/Meta-Llama-3.1-70B", token=os.getenv('HF_LLMS_TOKEN'))
56
+
57
+ response = client.chat_completion(messages, max_tokens=2000, temperature=0)
58
+ location_id = response.choices[0].message.content
59
+
60
+ if not location_id:
61
+ raise ValueError("Failed to extract location ID.")
62
+
63
+ messages.append({"role": "assistant", "content": location_id})
64
+ print("\n\nFIRST response - location id: ", location_id)
65
+
66
+ response = client.chat_completion(
67
+ messages,
68
+ tools=tools,
69
+ tool_choice="auto",
70
+ max_tokens=1000
71
+ )
72
+
73
+ print("\n\nSECOND response: ", response)
74
+
75
+ tool_call = response['choices'][0]['message']['tool_calls'][0]['function']
76
+ arguments = tool_call['arguments']
77
+ # Get the function from the dictionary safely
78
+ chosen_function = tool_functions_map.get(tool_call['name'])
79
+ if chosen_function is None:
80
+ raise ValueError("Function not found")
81
+
82
+ # Execute the function
83
+ final_data = chosen_function(**arguments)
84
+ print("\n\nFINAL DATA: ", final_data)
85
+
86
+ # Ensure the final data is a dictionary
87
+ if isinstance(final_data, list) and all(isinstance(entry, dict) for entry in final_data):
88
+ # Format the response when there are multiple results
89
+ formatted_response = {
90
+ "summary": f"Query returned data for {len(final_data)} locations.",
91
+ "details": final_data,
92
+ }
93
+ return formatted_response
94
+
95
+ if isinstance(final_data, dict):
96
+ return final_data
97
+
98
+ raise ValueError("Final data is not a dictionary")
99
+
100
+ except Exception as e:
101
+ logging.error(f"An error occurred during query processing: {e}")
102
+ raise HTTPException(status_code=500, detail="An error occurred while processing the query")
modules/tools.py CHANGED
@@ -198,7 +198,7 @@ def age_str_to_int(age_str: str) -> int:
198
 
199
 
200
 
201
- def extract_and_format_data_from_xml_for_streamlit_app(xml_content: str) -> List[Dict[str, str]]:
202
  """
203
  Extracts and formats data from an SDMX-ML XML document for use in a Streamlit app.
204
 
@@ -227,7 +227,7 @@ def extract_and_format_data_from_xml_for_streamlit_app(xml_content: str) -> List
227
  - The `age_str_to_int` function is used to ensure proper sorting of age strings.
228
 
229
  Example:
230
- extract_and_format_data_from_xml_for_streamlit_app(xml_data)
231
  -> [{'location': 'Italy', 'sex': 'Male', 'age (years)': '0', 'time period': '2020', 'population': '10000'}, ...]
232
  """
233
  # Parse the XML content
@@ -336,7 +336,7 @@ def fetch_population_for_locations_years_sex_age_via_sdmx(
336
  if res is None:
337
  return None
338
  else:
339
- data = extract_and_format_data_from_xml_for_streamlit_app(res)
340
  return data
341
 
342
 
@@ -363,11 +363,11 @@ tools=[
363
  },
364
  "start_period": {
365
  "type": "string",
366
- "description": "The start date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-01-01'. Default is '2024-01-01'."
367
  },
368
  "end_period": {
369
  "type": "string",
370
- "description": "The end date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-12-31'. Default is '2024-12-31'."
371
  }
372
  },
373
  "required": ["location_ids", "sex", "age", "start_period", "end_period"]
 
198
 
199
 
200
 
201
+ def extract_and_format_data_from_xml_for_web_app(xml_content: str) -> List[Dict[str, str]]:
202
  """
203
  Extracts and formats data from an SDMX-ML XML document for use in a Streamlit app.
204
 
 
227
  - The `age_str_to_int` function is used to ensure proper sorting of age strings.
228
 
229
  Example:
230
+ extract_and_format_data_from_xml_for_web_app(xml_data)
231
  -> [{'location': 'Italy', 'sex': 'Male', 'age (years)': '0', 'time period': '2020', 'population': '10000'}, ...]
232
  """
233
  # Parse the XML content
 
336
  if res is None:
337
  return None
338
  else:
339
+ data = extract_and_format_data_from_xml_for_web_app(res)
340
  return data
341
 
342
 
 
363
  },
364
  "start_period": {
365
  "type": "string",
366
+ "description": "The start date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-01-01'. Default is '2024-01-01'."
367
  },
368
  "end_period": {
369
  "type": "string",
370
+ "description": "The end date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-12-31'. Default is '2024-12-31'."
371
  }
372
  },
373
  "required": ["location_ids", "sex", "age", "start_period", "end_period"]
modules/llms.py β†’ test/HF-InferenceClient.py RENAMED
@@ -47,7 +47,9 @@ def process_user_query(prompt: str) -> dict:
47
 
48
  # https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
49
  # client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=os.getenv('HF_LLMS_TOKEN'))
50
- client = InferenceClient("meta-llama/Meta-Llama-3.1-70B", token=os.getenv('HF_LLMS_TOKEN'))
 
 
51
  response = client.chat_completion(messages, max_tokens=2000, temperature=0)
52
  location_id = response.choices[0].message.content
53
 
 
47
 
48
  # https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
49
  # client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=os.getenv('HF_LLMS_TOKEN'))
50
+ # client = InferenceClient("meta-llama/Meta-Llama-3.1-70B", token=os.getenv('HF_LLMS_TOKEN'))
51
+ client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=os.getenv('HF_LLMS_TOKEN'))
52
+
53
  response = client.chat_completion(messages, max_tokens=2000, temperature=0)
54
  location_id = response.choices[0].message.content
55