Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| current_dir = os.getcwd() | |
| sys.path.insert(0, current_dir) | |
| from modules.shared import * | |
| def assemble_locations() -> List[Dict]: | |
| """ | |
| Reads JSON Lines (JSONL) files containing location data and combines their contents into a single list. | |
| This function processes multiple JSONL files stored in the `ISTAT_DATA_PATH` directory, | |
| each representing a type of location (e.g., geographic areas, regions, provinces). | |
| The contents of these files are read, deserialized, and aggregated into a single list. | |
| Args: | |
| None: This function does not take any arguments. It relies on a predefined | |
| `ISTAT_DATA_PATH` constant and file naming convention. | |
| Returns: | |
| List[Dict]: A list of dictionaries, where each dictionary represents a location | |
| extracted from the input JSONL files. | |
| Raises: | |
| FileNotFoundError: If any of the expected JSONL files are missing. | |
| IOError: If there are issues reading the files. | |
| json.JSONDecodeError: If a line in any of the files cannot be parsed as valid JSON. | |
| Notes: | |
| - The `ISTAT_DATA_PATH` constant must be defined elsewhere in the code. | |
| - File names are expected to follow the naming convention specified in the `file_names` list. | |
| """ | |
| file_names = ['_geographic_areas', '_regions', '_provinces'] | |
| locations = [] | |
| for name in file_names: | |
| file_path = f"{ISTAT_DATA_PATH}/{name}.jsonl" | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| for line in file: | |
| data = json.loads(line) | |
| locations.append(data) | |
| return locations | |
| def query_api(url: str) -> Optional[str]: | |
| """ | |
| Sends a GET request to the specified URL and returns the response content as a string. | |
| This function attempts to fetch the content from the provided URL using a GET request. If the request | |
| is successful, the content of the response is returned as a UTF-8 encoded string. If an error occurs | |
| (e.g., network issues, invalid URL, or a non-2xx status code), the function prints an error message | |
| and returns `None`. | |
| Args: | |
| url (str): The URL to which the GET request is sent. This should be a valid HTTP or HTTPS URL. | |
| Returns: | |
| Optional[str]: The content of the response as a UTF-8 encoded string if the request is successful, | |
| or `None` if an error occurs. | |
| Raises: | |
| requests.RequestException: This exception is caught internally if there is an issue with the | |
| request (e.g., network problem, invalid URL, or non-2xx HTTP status code). | |
| Instead of propagating the exception, an error message is printed. | |
| Notes: | |
| - Ensure that the `requests` library is installed in your environment to use this function. | |
| """ | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() # Raises an HTTPError for bad responses | |
| response.encoding = 'utf-8' | |
| return response.text | |
| except requests.RequestException as e: | |
| print(f"An error occurred: {e}") | |
| return None | |
| def combine_ages(age_code: str) -> str: | |
| """ | |
| Generates a sequence of age codes based on the provided `age_code`. | |
| This function interprets different formats of age codes and generates a sequence | |
| of age codes as a string, joined by " + ". | |
| Args: | |
| age_code (str): The input code representing an age or age group. Supported formats: | |
| - `YX`: Represents an exact age (e.g., "Y65"). | |
| - `Y_GEX`: Represents ages X and over up to 100+ (e.g., "Y_GE18"). | |
| - `Y_UNX`: Represents all ages until X (e.g., "Y_UN18"). | |
| - `YX-Z`: Represents a range of ages from X to Z (e.g., "Y23-42"). | |
| Returns: | |
| str: A string representing the sequence of age codes, joined by " + ". | |
| Rules: | |
| 1. `YX` -> Return the same string (e.g., "Y65" -> "Y65"). | |
| 2. `Y_GEX` -> Generate a sequence starting from `YX` to `Y_GE100` | |
| (e.g., "Y_GE18" -> "Y18 + Y19 + ... + Y_GE100"). | |
| 3. `Y_UNX` -> Generate a sequence from `Y0` to `Y(X-1)` | |
| (e.g., "Y_UN18" -> "Y0 + Y1 + ... + Y17"). | |
| 4. `YX-Z` -> Generate a sequence from `YX` to `YZ` | |
| (e.g., "Y23-42" -> "Y23 + Y24 + ... + Y42"). | |
| Example: | |
| combine_ages("Y_GE18") -> "Y18 + Y19 + ... + Y_GE100" | |
| combine_ages("Y_UN18") -> "Y0 + Y1 + ... + Y17" | |
| combine_ages("Y23-25") -> "Y23 + Y24 + Y25" | |
| combine_ages("Y65") -> "Y65" | |
| """ | |
| if age_code.startswith("Y_"): | |
| if age_code.startswith("Y_GE"): | |
| # Rule 1: Y_GEX -> YX + Y(X+1) + ... + Y_GE100 | |
| start_age = int(age_code[4:]) | |
| age_list = [f"Y{age}" for age in range(start_age, 100)] | |
| age_list.append("Y_GE100") | |
| return "+".join(age_list) | |
| elif age_code.startswith("Y_UN"): | |
| # Rule 2: Y_UNX -> Y0 + Y1 + ... + Y(X-1) | |
| end_age = int(age_code[4:]) | |
| age_list = [f"Y{age}" for age in range(end_age)] | |
| return "+".join(age_list) | |
| elif "-" in age_code: | |
| # Rule 3: YX-Z -> YX + Y(X+1) + ... + YZ | |
| start_age, end_age = map(int, age_code[1:].split('-')) | |
| age_list = [f"Y{age}" for age in range(start_age, end_age + 1)] | |
| return "+".join(age_list) | |
| else: | |
| # Rule 0: YX -> YX | |
| return age_code | |
| def transform_age_code(age_code: str) -> Optional[str]: | |
| """ | |
| Transforms an age code into a simplified human-readable format. | |
| This function converts various age codes into a more user-friendly format: | |
| - "TOTAL" becomes "total". | |
| - "Y_GE100" becomes "100+". | |
| - Age codes in the format "Y<number>" (e.g., "Y0", "Y99") are converted to their numeric representation (e.g., "0", "99"). | |
| - For unsupported formats, the function returns `None`. | |
| Args: | |
| age_code (str): The age code to be transformed. | |
| Returns: | |
| Optional[str]: The transformed age code, or `None` if the input is not a recognized format. | |
| Examples: | |
| transform_age_code("TOTAL") -> "total" | |
| transform_age_code("Y_GE100") -> "100+" | |
| transform_age_code("Y25") -> "25" | |
| transform_age_code("INVALID") -> None | |
| """ | |
| if age_code == 'TOTAL': | |
| return 'total' | |
| elif age_code == 'Y_GE100': | |
| return '100+' | |
| # Handle the regular case for age codes like 'Y0', 'Y1', ..., 'Y99' | |
| elif age_code.startswith('Y') and age_code[1:].isdigit(): | |
| return str(int(age_code[1:])) # Convert 'Y1' to '1', 'Y99' to '99' | |
| return None | |
| def age_str_to_int(age_str: str) -> int: | |
| """ | |
| Converts an age string into an integer for custom sorting. | |
| This function maps age strings to integer values for sorting purposes: | |
| - Numeric strings (e.g., "0", "1", "99") are converted to their integer equivalents. | |
| - The special value "100+" is mapped to 101 to ensure it sorts after other numeric ages. | |
| - The special value "TOTAL" is mapped to 102 to ensure it sorts after all other values. | |
| Args: | |
| age_str (str): The age value as a string. This can be: | |
| - A numeric value (e.g., "0", "1", "99"). | |
| - The special value "100+". | |
| - The special value "TOTAL". | |
| Returns: | |
| int: A numeric value for sorting: | |
| - Numeric strings are converted to integers. | |
| - "100+" is mapped to 101. | |
| - "TOTAL" is mapped to 102. | |
| Examples: | |
| age_str_to_int("5") -> 5 | |
| age_str_to_int("100+") -> 101 | |
| age_str_to_int("TOTAL") -> 102 | |
| """ | |
| if age_str == '100+': | |
| return 101 # Assign a high value so it sorts last | |
| if age_str.upper() == 'TOTAL': | |
| return 102 | |
| return int(age_str) | |
| def extract_and_format_data_from_xml_for_web_app(xml_content: str) -> List[Dict[str, str]]: | |
| """ | |
| Extracts and formats data from an SDMX-ML XML document for use in a Streamlit app. | |
| This function parses XML content, extracts demographic data (e.g., location, sex, age, time period, and population), | |
| and formats it into a list of dictionaries sorted by time period, location, and age. | |
| Args: | |
| xml_content (str): The XML content as a string to be parsed. | |
| Returns: | |
| List[Dict[str, str]]: A list of dictionaries representing the extracted and formatted data. | |
| Each dictionary contains the following keys: | |
| - `location` (str): The name of the location. | |
| - `sex` (str): The descriptive sex (e.g., "Male", "Female", "Total"). | |
| - `age (years)` (str): The age group or exact age as a human-readable string. | |
| - `time period` (str): The time period of the observation. | |
| - `population` (str): The observed population value. | |
| Raises: | |
| ET.ParseError: If the XML content cannot be parsed. | |
| KeyError: If required fields are missing in the XML structure. | |
| Notes: | |
| - The `assemble_locations` function must be defined to provide a location dictionary. | |
| - The `transform_age_code` function is used to convert age codes into human-readable descriptions. | |
| - The `age_str_to_int` function is used to ensure proper sorting of age strings. | |
| Example: | |
| extract_and_format_data_from_xml_for_web_app(xml_data) | |
| -> [{'location': 'Italy', 'sex': 'Male', 'age (years)': '0', 'time period': '2020', 'population': '10000'}, ...] | |
| """ | |
| # Parse the XML content | |
| root = ET.fromstring(xml_content) | |
| # Define namespaces for the XML structure | |
| ns = { | |
| 'generic': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic', | |
| 'message': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message', | |
| 'common': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common' | |
| } | |
| # Create a dictionary from the locations list for faster lookup | |
| locations = assemble_locations() | |
| location_dict = {item[next(iter(item))]: next(iter(item)) for item in locations} | |
| # List to store the data from all series | |
| extracted_data = [] | |
| # Iterate over each series in the DataSet | |
| for series in root.findall('.//generic:Series', ns): | |
| # Extract common series information | |
| ref_area_code = series.find(".//generic:Value[@id='REF_AREA']", ns).get('value') | |
| # Get the location name using the location dictionary | |
| ref_area_name = location_dict.get(ref_area_code, | |
| "Unknown Location") # Default to "Unknown Location" if not found | |
| age_code = series.find(".//generic:Value[@id='AGE']", ns).get('value') | |
| age_description = transform_age_code(age_code) | |
| sex_code = series.find(".//generic:Value[@id='SEX']", ns).get('value') | |
| # Map sex codes to descriptive strings | |
| sex_map = {'1': 'Male', '2': 'Female', '9': 'Total'} | |
| sex_description = sex_map.get(sex_code, "Unknown Sex") # Default to "Unknown Sex" if not found | |
| # Iterate over each observation in the series | |
| for obs in series.findall('.//generic:Obs', ns): | |
| time_period = obs.find(".//generic:ObsDimension[@id='TIME_PERIOD']", ns).get('value') | |
| obs_value = obs.find('.//generic:ObsValue', ns).get('value') | |
| # Append extracted data to the series data list | |
| extracted_data.append({ | |
| 'location': ref_area_name, | |
| 'sex': sex_description, | |
| 'age (years)': age_description, | |
| 'time period': time_period, | |
| 'population': obs_value | |
| }) | |
| # Sorting the list of dictionaries by 'time period', 'location', and 'age (years)' | |
| extracted_data_sorted = sorted( | |
| extracted_data, | |
| key=lambda x: (int(x['time period']), x['location'], age_str_to_int(x['age (years)'])) | |
| ) | |
| return extracted_data_sorted | |
| def fetch_population_for_locations_years_sex_age_via_sdmx( | |
| location_ids: str = 'IT', | |
| sex: str = '9', | |
| age: str = 'TOTAL', | |
| start_period: str = '2024-01-01', | |
| end_period: str = '2024-12-31' | |
| ) -> Optional[List[Dict[str, str]]]: | |
| """ | |
| Fetches population data for specific locations, time periods, and demographics using the Istat SDMX web service. | |
| This function constructs a query URL based on the provided parameters and retrieves population data | |
| in XML format. The data is parsed, formatted, and returned as a list of dictionaries. | |
| Args: | |
| location_ids (str): Geographical identifiers for the locations, concatenated by '+' if multiple. | |
| Default is 'IT' for Italy. | |
| sex (str): The sex category for which data is requested. Options: | |
| - '1': Male | |
| - '2': Female | |
| - '9': Total | |
| Multiple values can be combined with '+' (e.g., '1+2'). Default is '9'. | |
| age (str): The age category for which data is requested. Options: | |
| - 'Y0' to 'Y99': Specific ages | |
| - 'Y_GE100': 100 years and above | |
| - 'TOTAL': Total (all ages) | |
| Multiple values can be combined with '+'. Default is 'TOTAL'. | |
| start_period (str): The start date of the period for which data is requested, formatted as 'YYYY-MM-DD'. | |
| Default is '2024-01-01'. | |
| end_period (str): The end date of the period for which data is requested, formatted as 'YYYY-MM-DD'. | |
| Default is '2024-12-31'. | |
| Returns: | |
| Optional[List[Dict[str, str]]]: A list of dictionaries containing population data. Each dictionary includes: | |
| - `location`: The name of the location. | |
| - `sex`: The demographic category for sex (e.g., "Male", "Female", "Total"). | |
| - `age`: The age group or category. | |
| - `time period`: The year of the observation. | |
| - `population`: The observed population value. | |
| Returns `None` if the query fails. | |
| Example: | |
| fetch_population_for_locations_years_sex_age_via_sdmx( | |
| 'ITC+ITE2+ITF14', '9', 'TOTAL', '2024-01-01', '2024-12-31' | |
| ) | |
| -> [ | |
| {'location': 'Nord-ovest', 'sex': 'Total', 'age': 'Total', 'time period': '2024', 'population': '15858626'}, | |
| {'location': 'Umbria', 'sex': 'Total', 'age': 'Total', 'time period': '2024', 'population': '856407'}, | |
| {'location': 'Chieti', 'sex': 'Total', 'age': 'Total', 'time period': '2024', 'population': '372640'} | |
| ] | |
| """ | |
| if age.upper() == "TOTAL": | |
| combined_age = age.upper() | |
| else: | |
| combined_age = combine_ages(age) | |
| url = f"https://esploradati.istat.it/SDMXWS/rest/data/IT1,22_289_DF_DCIS_POPRES1_1,1.0/A.{location_ids}.JAN.{sex}.{combined_age}.99/ALL/?detail=full&startPeriod={start_period}&endPeriod={end_period}&dimensionAtObservation=TIME_PERIOD" | |
| print(url) | |
| res = query_api(url) | |
| if res is None: | |
| return None | |
| else: | |
| data = extract_and_format_data_from_xml_for_web_app(res) | |
| return data | |
| tools=[ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "fetch_population_for_locations_years_sex_age_via_sdmx", | |
| "description": "Fetches population data for specific locations, sex categories, age and time periods using the Istat SDMX web service. Supports multiple locations and sex categories.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "location_ids": { | |
| "type": "string", | |
| "description": "Geographical identifiers for the locations, concatenated by '+' if multiple, e.g., 'ITC+ITE2+ITF14'" | |
| }, | |
| "sex": { | |
| "type": "string", | |
| "description": "The sex category for which data is requested. '1' for male, '2' for female, '9' for total. Can be combined with '+', e.g., '1+2+9'" | |
| }, | |
| "age": { | |
| "type": "string", | |
| "description": "The age in years for which data is requested. Follow these rules to interpret the age definition: 1. Exact age (e.g., `X years`): Format: `YX`; Example: `0 year` β `Y0`, `1 year` β `Y1`, `10 years` β `Y10`. 2. Age and over (e.g., `X years and over`): Format: `Y_GEX`; Example: `14 years and over` β `Y_GE14`. 3. Until a certain age (e.g., `until X years` or `under X years`): Format: `Y_UNX`; Example: `until 15 years` β `Y_UN15`. 4. Age ranges (e.g., `X-Y years`): Format: `YX-Y`; Example: `14-15 years` β `Y14-15`. 5. TOTAL (representing all ages): Use the code `TOTAL`." | |
| }, | |
| "start_period": { | |
| "type": "string", | |
| "description": "The start date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-01-01'. Default is '2024-01-01'." | |
| }, | |
| "end_period": { | |
| "type": "string", | |
| "description": "The end date of the period for which data is requested, formatted as 'YYYY-MM-DD', e.g., '2024-12-31'. Default is '2024-12-31'." | |
| } | |
| }, | |
| "required": ["location_ids", "sex", "age", "start_period", "end_period"] | |
| } | |
| } | |
| } | |
| ] | |
| tool_functions_map = { | |
| 'fetch_population_for_locations_years_sex_age_via_sdmx': fetch_population_for_locations_years_sex_age_via_sdmx | |
| } |