from transformers import AutoModelForCausalLM, AutoConfig from transformers.image_utils import load_image from Qwenov3Config import Qwenov3Config, Qwenov3, Qwenov3Processor import torch device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_path = 'TianYeZ1214/Qwenov3' AutoConfig.register("Qwenov3", Qwenov3Config) AutoModelForCausalLM.register(Qwenov3Config, Qwenov3) model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, dtype=torch.bfloat16, trust_remote_code=True, attn_implementation="flash_attention_2").to(device) processor = Qwenov3Processor(image_processor=model.processor, tokenizer=model.tokenizer) model.eval() messages = [ {"role": "system", "content": 'You are a helpful assistant.'}, {"role": "user", "content": "描述图片内容"}, ] url = "http://images.cocodataset.org/val2017/000000039769.jpg" image = load_image(url) q_text = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, enable_thinking=False ) inputs = processor( text=[q_text], images=image, padding=True, return_tensors="pt", ).to(device) output_ids = model.generate( **inputs, max_new_tokens=512, temperature=0.7, top_k=20, top_p=0.8, do_sample=True, repetition_penalty=1.1, ) output_ids = output_ids[0].tolist() try: index = len(output_ids) - output_ids[::-1].index(151668) except ValueError: index = 0 content = processor.decode(output_ids[index:], skip_special_tokens=True) print("content:", content)