Spaces:
Running
Running
| import logging | |
| import os | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| from huggingface_hub.inference._generated.types import ChatCompletionOutput | |
| from huggingface_hub.utils import HfHubHTTPError | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" | |
| ) | |
| # Load environment variables from .env file | |
| # load_dotenv() # Removed: This should be loaded only at the main entry point (app.py) | |
| load_dotenv() # Restored: Ensure env vars are loaded when this module is imported/used | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| HF_INFERENCE_ENDPOINT_URL = os.getenv("HF_INFERENCE_ENDPOINT_URL") | |
| # Default parameters for the LLM call | |
| DEFAULT_MAX_TOKENS = 2048 | |
| DEFAULT_TEMPERATURE = 0.1 # Lower temperature for more deterministic analysis | |
| # Special dictionary to indicate a 503 error | |
| ERROR_503_DICT = {"error_type": "503", "message": "Service Unavailable"} | |
| def query_qwen_endpoint( | |
| formatted_prompt: list[dict[str, str]], max_tokens: int = DEFAULT_MAX_TOKENS | |
| ) -> ChatCompletionOutput | dict | None: | |
| """ | |
| Queries the specified Qwen Inference Endpoint with the formatted prompt. | |
| Args: | |
| formatted_prompt: A list of message dictionaries for the chat completion API. | |
| max_tokens: The maximum number of tokens to generate. | |
| Returns: | |
| The ChatCompletionOutput object from the inference client, | |
| a specific dictionary (ERROR_503_DICT) if a 503 error occurs, | |
| or None if another error occurs. | |
| """ | |
| if not HF_INFERENCE_ENDPOINT_URL: | |
| logging.error("HF_INFERENCE_ENDPOINT_URL environment variable not set.") | |
| return None | |
| if not HF_TOKEN: | |
| logging.warning( | |
| "HF_TOKEN environment variable not set. Requests might fail if the endpoint requires authentication." | |
| ) | |
| # Depending on endpoint config, it might still work without token | |
| logging.info(f"Querying Inference Endpoint: {HF_INFERENCE_ENDPOINT_URL}") | |
| client = InferenceClient(model=HF_INFERENCE_ENDPOINT_URL, token=HF_TOKEN) | |
| try: | |
| response = client.chat_completion( | |
| messages=formatted_prompt, | |
| max_tokens=max_tokens, | |
| temperature=DEFAULT_TEMPERATURE, | |
| # Qwen models often benefit from setting stop sequences if known, | |
| # but we'll rely on max_tokens and model's natural stopping for now. | |
| # stop=["<|im_end|>"] # Example stop token if needed for specific Qwen finetunes | |
| ) | |
| logging.info("Successfully received response from Inference Endpoint.") | |
| return response | |
| except HfHubHTTPError as e: | |
| # Check specifically for 503 Service Unavailable | |
| if e.response is not None and e.response.status_code == 503: | |
| logging.warning( | |
| f"Encountered 503 Service Unavailable from endpoint: {HF_INFERENCE_ENDPOINT_URL}" | |
| ) | |
| return ERROR_503_DICT # Return special dict for 503 | |
| else: | |
| # Handle other HTTP errors | |
| logging.error(f"HTTP error querying Inference Endpoint: {e}") | |
| if e.response is not None: | |
| logging.error(f"Response details: {e.response.text}") | |
| return None # Return None for other HTTP errors | |
| except Exception as e: | |
| logging.error(f"An unexpected error occurred querying Inference Endpoint: {e}") | |
| return None | |
| def parse_qwen_response(response: ChatCompletionOutput | dict | None) -> str: | |
| """ | |
| Parses the response from the Qwen model to extract the generated text. | |
| Handles potential None or error dict inputs. | |
| Args: | |
| response: The ChatCompletionOutput object, ERROR_503_DICT, or None. | |
| Returns: | |
| The extracted response text as a string, or an error message string. | |
| """ | |
| if response is None: | |
| return "Error: Failed to get response from the language model." | |
| # Check if it's our specific 503 error signal before trying to parse as ChatCompletionOutput | |
| if isinstance(response, dict) and response.get("error_type") == "503": | |
| return f"Error: {response['error_type']} {response['message']}" | |
| # Check if it's likely the expected ChatCompletionOutput structure | |
| if not hasattr(response, "choices"): | |
| logging.error( | |
| f"Unexpected response type received by parse_qwen_response: {type(response)}. Content: {response}" | |
| ) | |
| return "Error: Received an unexpected response format from the language model endpoint." | |
| try: | |
| # Access the generated content according to the ChatCompletionOutput structure | |
| if response.choices and len(response.choices) > 0: | |
| content = response.choices[0].message.content | |
| if content: | |
| logging.info("Successfully parsed response content.") | |
| return content.strip() | |
| else: | |
| logging.warning("Response received, but content is empty.") | |
| return "Error: Received an empty response from the language model." | |
| else: | |
| logging.warning("Response received, but no choices found.") | |
| return "Error: No response choices found in the language model output." | |
| except AttributeError as e: | |
| # This might catch cases where response looks like the object but lacks expected attributes | |
| logging.error( | |
| f"Attribute error parsing response: {e}. Response structure might be unexpected." | |
| ) | |
| logging.error(f"Raw response object: {response}") | |
| return "Error: Could not parse the structure of the language model response." | |
| except Exception as e: | |
| logging.error(f"An unexpected error occurred parsing the response: {e}") | |
| return "Error: An unexpected error occurred while parsing the language model response." | |
| # Example Usage (for testing - requires .env setup and potentially prompts.py) | |
| # if __name__ == '__main__': | |
| # # This example assumes you have a prompts.py that can generate a test prompt | |
| # try: | |
| # from prompts import format_code_for_analysis | |
| # # Create a dummy prompt for testing | |
| # test_files = {"app.py": "print('hello')"} | |
| # test_prompt = format_code_for_analysis("test/minimal", test_files) | |
| # print("--- Sending Test Prompt ---") | |
| # print(test_prompt) | |
| # api_response = query_qwen_endpoint(test_prompt) | |
| # print("\n--- Raw API Response ---") | |
| # print(api_response) | |
| # print("\n--- Parsed Response ---") | |
| # parsed_text = parse_qwen_response(api_response) | |
| # print(parsed_text) | |
| # except ImportError: | |
| # print("Could not import prompts.py for testing. Run this test from the project root.") | |
| # except Exception as e: | |
| # print(f"An error occurred during testing: {e}") | |