import re import logging import torch import builtins import io import sys import gradio as gr from transformers import pipeline import spaces # Required for ZeroGPU # Suppress warnings logging.getLogger("transformers").setLevel(logging.ERROR) # System prompt (same as original) system_prompt = """ A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions. The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "" tag, for example: print("Hello World!") . The assistant should attempt fewer things at a time instead of putting too much code in one block. The assistant can install packages through PIP by !pip install [package needed] and should always import packages and define variables before starting to use them. For algorithms that return values (like search functions), ALWAYS define them as proper functions with def function_name(...): ... return value ... and then call the function to print or use the result. Do NOT use return outside a function. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. Whenever possible, execute the code for the user using instead of providing it. The assistant’s response should be concise, but do express their thoughts. Once the task is complete and tested successfully (e.g., correct index printed), stop generating more code and say 'Task complete' without tags. """ # List of example prompts (same as original) prompt_list = [ "Print 'Hello, World!' using code. Once done, stop.", "Compute and print the sum of numbers from 1 to 10 using a loop. Use code to do it.", "Define a function def add(a, b) that returns a + b. Test it by calling add(3, 4) and printing the result.", "Print the length of numbers from 0 to 9 using len and range.", "Create a list [1,2,3] and print its length. Use code.", "Implement a function def factorial(n) to compute factorial recursively. Test on 5 and print result.", "Try to import math and print math.sqrt(16). If needed, install packages.", "Find if 10 is even or odd using a function def is_even(n), return True/False, test and print.", "Implement linear search to find index of 7 in [3,5,7,9], return -1 if not found. Test and print.", "Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2." ] # Load model globally (CPU-safe to avoid startup CUDA errors) pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map=None, torch_dtype=torch.float16) # float16 for init; no 'auto' # Generator function with GPU decorator @spaces.GPU(duration=180) # 180s for safety def run_agent(user_content): full_log = "" current_code = "" current_exec_output = "" yield [current_code, current_exec_output, full_log] # Initial empty full_log += "Allocating GPU... (may queue if busy)\n\n" yield [current_code, current_exec_output, full_log] full_log += "Moving model to GPU and initializing...\n\n" yield [current_code, current_exec_output, full_log] # Move to GPU here device = torch.device('cuda') pipe.model.to(device) pipe.device = device # Initial messages messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}, ] # REPL state (restricted builtins) repl_globals = {'__builtins__': {k: v for k, v in builtins.__dict__.items() if k in ['print', 'len', 'range', 'int']}} # History for loop detection prev_codes = set() # Max turns max_turns = 10 turn = 0 full_log += f"### Starting simulation for prompt: '{user_content}'\n\n" yield [current_code, current_exec_output, full_log] while turn < max_turns: # Generate response result = pipe(messages, max_new_tokens=512) assistant_content = result[0]['generated_text'][-1]['content'] full_log += f"**Assistant (Turn {turn+1}):** {assistant_content}\n\n" yield [current_code, current_exec_output, full_log] # Stop checks if re.search(r'(task complete|done|final answer)', assistant_content.lower()): full_log += "Detected completion keyword. Stopping.\n" yield [current_code, current_exec_output, full_log] break # Extract execute_match = re.search(r'(.*?)', assistant_content, re.DOTALL) if not execute_match: full_log += "No code to execute. Task likely complete.\n" yield [current_code, current_exec_output, full_log] break code = execute_match.group(1).strip() current_code = code # Loop detection if code in prev_codes: full_log += "Repeated code detected. Possible infinite loop—stopping.\n" yield [current_code, current_exec_output, full_log] break prev_codes.add(code) full_log += f"**Executing code:**\n```\n{code}\n```\n\n" yield [current_code, current_exec_output, full_log] # Exec with capture old_stdout = sys.stdout sys.stdout = io.StringIO() try: exec(code, repl_globals) exec_output = sys.stdout.getvalue().strip() or "No output." except Exception as e: exec_output = f"Error: {str(e)}" finally: sys.stdout = old_stdout current_exec_output = exec_output full_log += f"**Execution Output:** {exec_output}\n\n" yield [current_code, current_exec_output, full_log] # Success stop: If output is pure digit (index), assume done if re.match(r'^\d+$', exec_output.strip()): full_log += "Pure index output detected. Task successful—stopping.\n" yield [current_code, current_exec_output, full_log] break # Append feedback messages.append({"role": "assistant", "content": assistant_content}) messages.append({"role": "user", "content": f"Observation: {exec_output}"}) turn += 1 # Final parse (grab last number as index if applicable) if 'exec_output' in locals(): final_index = re.search(r'(\d+)$', exec_output) if final_index: full_log += f"**Extracted Result:** Index {final_index.group(1)}\n" yield [current_code, current_exec_output, full_log] else: full_log += "No clear index found—check errors.\n" yield [current_code, current_exec_output, full_log] else: full_log += "No execution output.\n" yield [current_code, current_exec_output, full_log] full_log += f"### End of simulation for prompt: '{user_content}'\n" yield [current_code, current_exec_output, full_log] # Gradio interface with gr.Blocks(title="Code Agent Simulator") as demo: gr.Markdown("# Code Agent Simulator on Hugging Face Spaces\nEnter a coding task prompt, and watch the agent simulate execution in real-time.") input_prompt = gr.Textbox(label="Enter your prompt", placeholder="e.g., Implement binary search...") with gr.Row(): generated_code = gr.Code(label="Generated Code", language="python", lines=15, show_label=True) exec_output = gr.Textbox(label="Execution Output", lines=15, show_label=True) full_log = gr.Textbox(label="Full Simulation Log", lines=20, autoscroll=True, show_label=True) run_button = gr.Button("Run Simulation") examples = gr.Examples(examples=prompt_list, inputs=[input_prompt]) # On click, run the generator and stream to multiple outputs run_button.click(fn=run_agent, inputs=input_prompt, outputs=[generated_code, exec_output, full_log]) # Launch (disable SSR for stability, enable debug for logs) if __name__ == "__main__": demo.queue().launch(ssr_mode=False, debug=True)