Spaces:
Running
on
Zero
Running
on
Zero
Upload 4 files
Browse files- app.py +4 -2
- joycaption.py +38 -37
app.py
CHANGED
|
@@ -4,7 +4,8 @@ from joycaption import stream_chat_mod, get_text_model, change_text_model, get_r
|
|
| 4 |
|
| 5 |
JC_TITLE_MD = "<h1><center>JoyCaption Alpha Two Mod</center></h1>"
|
| 6 |
JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-alpha-two](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-two),
|
| 7 |
-
[Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha).
|
|
|
|
| 8 |
|
| 9 |
css = """
|
| 10 |
.info {text-align:center; !important}
|
|
@@ -65,6 +66,7 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
|
|
| 65 |
jc_gguf = gr.Dropdown(label=f"GGUF Filename", choices=[], value="",
|
| 66 |
allow_custom_value=True, min_width=320, visible=False)
|
| 67 |
jc_nf4 = gr.Checkbox(label="Use NF4 quantization", value=True)
|
|
|
|
| 68 |
jc_text_model_button = gr.Button("Load Model", variant="secondary", visible=False)
|
| 69 |
jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False)
|
| 70 |
with gr.Row():
|
|
@@ -81,7 +83,7 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
|
|
| 81 |
|
| 82 |
jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_caption_type, jc_caption_length, jc_extra_options, jc_name_input, jc_custom_prompt,
|
| 83 |
jc_tokens, jc_topp, jc_temperature, jc_text_model], outputs=[jc_output_prompt, jc_output_caption])
|
| 84 |
-
jc_text_model.change(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
|
| 85 |
#jc_text_model_button.click(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
|
| 86 |
#jc_text_model.change(get_repo_gguf, [jc_text_model], [jc_gguf], show_api=False)
|
| 87 |
#jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False)
|
|
|
|
| 4 |
|
| 5 |
JC_TITLE_MD = "<h1><center>JoyCaption Alpha Two Mod</center></h1>"
|
| 6 |
JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-alpha-two](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-two),
|
| 7 |
+
[Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha).
|
| 8 |
+
Thanks to [dominic1021](https://huggingface.co/dominic1021), [IceHibiki](https://huggingface.co/IceHibiki)."""
|
| 9 |
|
| 10 |
css = """
|
| 11 |
.info {text-align:center; !important}
|
|
|
|
| 66 |
jc_gguf = gr.Dropdown(label=f"GGUF Filename", choices=[], value="",
|
| 67 |
allow_custom_value=True, min_width=320, visible=False)
|
| 68 |
jc_nf4 = gr.Checkbox(label="Use NF4 quantization", value=True)
|
| 69 |
+
jc_lora = gr.Checkbox(label="Use Custom VLM", info="Llama 3 BF16 only", value=True)
|
| 70 |
jc_text_model_button = gr.Button("Load Model", variant="secondary", visible=False)
|
| 71 |
jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False)
|
| 72 |
with gr.Row():
|
|
|
|
| 83 |
|
| 84 |
jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_caption_type, jc_caption_length, jc_extra_options, jc_name_input, jc_custom_prompt,
|
| 85 |
jc_tokens, jc_topp, jc_temperature, jc_text_model], outputs=[jc_output_prompt, jc_output_caption])
|
| 86 |
+
jc_text_model.change(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4, jc_lora], [jc_text_model], show_api=False)
|
| 87 |
#jc_text_model_button.click(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
|
| 88 |
#jc_text_model.change(get_repo_gguf, [jc_text_model], [jc_gguf], show_api=False)
|
| 89 |
#jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False)
|
joycaption.py
CHANGED
|
@@ -9,7 +9,7 @@ else:
|
|
| 9 |
return func(*args, **kwargs)
|
| 10 |
return wrapper
|
| 11 |
import gradio as gr
|
| 12 |
-
from huggingface_hub import InferenceClient
|
| 13 |
from torch import nn
|
| 14 |
from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM, LlavaForConditionalGeneration
|
| 15 |
from pathlib import Path
|
|
@@ -18,11 +18,15 @@ import torch.amp.autocast_mode
|
|
| 18 |
from PIL import Image
|
| 19 |
import torchvision.transforms.functional as TVF
|
| 20 |
import gc
|
| 21 |
-
from peft import
|
| 22 |
from typing import Union
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
BASE_DIR = Path(__file__).resolve().parent # Define the base directory
|
| 28 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -38,7 +42,8 @@ llm_models = {
|
|
| 38 |
"unsloth/Meta-Llama-3.1-8B-bnb-4bit": None,
|
| 39 |
"DevQuasar/HermesNova-Llama-3.1-8B": None,
|
| 40 |
"mergekit-community/L3.1-Boshima-b-FIX": None,
|
| 41 |
-
"
|
|
|
|
| 42 |
}
|
| 43 |
|
| 44 |
CLIP_PATH = "google/siglip-so400m-patch14-384"
|
|
@@ -158,25 +163,26 @@ class ImageAdapter(nn.Module):
|
|
| 158 |
# https://huggingface.co/docs/transformers/main/en/peft#enable-and-disable-adapters
|
| 159 |
# https://huggingface.co/docs/transformers/main/quantization/bitsandbytes?bnb=4-bit
|
| 160 |
# https://huggingface.co/lllyasviel/flux1-dev-bnb-nf4
|
|
|
|
|
|
|
| 161 |
tokenizer = None
|
| 162 |
text_model_client = None
|
| 163 |
text_model = None
|
| 164 |
image_adapter = None
|
| 165 |
-
peft_config = None
|
| 166 |
pixtral_model = None
|
| 167 |
pixtral_processor = None
|
| 168 |
-
def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None, is_nf4: bool=True):
|
| 169 |
-
global tokenizer, text_model, image_adapter,
|
| 170 |
try:
|
| 171 |
tokenizer = None
|
| 172 |
text_model_client = None
|
| 173 |
text_model = None
|
| 174 |
image_adapter = None
|
| 175 |
-
peft_config = None
|
| 176 |
pixtral_model = None
|
| 177 |
pixtral_processor = None
|
| 178 |
torch.cuda.empty_cache()
|
| 179 |
gc.collect()
|
|
|
|
| 180 |
|
| 181 |
from transformers import BitsAndBytesConfig
|
| 182 |
nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
|
|
@@ -202,23 +208,25 @@ def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None
|
|
| 202 |
if device == "cpu":
|
| 203 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
|
| 204 |
elif is_nf4:
|
| 205 |
-
text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
|
| 206 |
else:
|
| 207 |
-
text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=
|
| 208 |
else:
|
| 209 |
if device == "cpu":
|
| 210 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
|
| 211 |
elif is_nf4:
|
| 212 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
|
| 213 |
else:
|
| 214 |
-
text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=
|
| 215 |
|
| 216 |
-
if LORA_PATH.exists():
|
| 217 |
print("Loading VLM's custom text model")
|
| 218 |
-
if is_nf4:
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
| 222 |
|
| 223 |
print("Loading image adapter")
|
| 224 |
image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
|
|
@@ -237,6 +245,7 @@ load_text_model.zerogpu = True
|
|
| 237 |
print("Loading CLIP")
|
| 238 |
clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
|
| 239 |
clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model
|
|
|
|
| 240 |
if (CHECKPOINT_PATH / "clip_model.pt").exists():
|
| 241 |
print("Loading VLM's custom vision model")
|
| 242 |
checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu', weights_only=False)
|
|
@@ -251,15 +260,15 @@ clip_model.eval().requires_grad_(False).to(device)
|
|
| 251 |
#load_text_model(PIXTRAL_PATHS[0])
|
| 252 |
#print(f"pixtral_model: {type(pixtral_model)}") #
|
| 253 |
#print(f"pixtral_processor: {type(pixtral_processor)}") #
|
| 254 |
-
load_text_model()
|
| 255 |
-
print(f"pixtral_model: {type(pixtral_model)}") #
|
| 256 |
-
print(f"pixtral_processor: {type(pixtral_processor)}") #
|
| 257 |
|
| 258 |
@spaces.GPU()
|
| 259 |
@torch.inference_mode()
|
| 260 |
def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length: Union[str, int], extra_options: list[str], name_input: str, custom_prompt: str,
|
| 261 |
max_new_tokens: int=300, top_p: float=0.9, temperature: float=0.6, model_name: str=MODEL_PATH, progress=gr.Progress(track_tqdm=True)) -> tuple[str, str]:
|
| 262 |
-
global tokenizer, text_model, image_adapter,
|
| 263 |
torch.cuda.empty_cache()
|
| 264 |
gc.collect()
|
| 265 |
|
|
@@ -302,7 +311,6 @@ def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length:
|
|
| 302 |
print(f"pixtral_model: {type(pixtral_model)}") #
|
| 303 |
print(f"pixtral_processor: {type(pixtral_processor)}") #
|
| 304 |
input_images = [input_image.convert("RGB")]
|
| 305 |
-
#input_prompt = f"[INST]{prompt_str}\n[IMG][/INST]"
|
| 306 |
input_prompt = "[INST]Caption this image:\n[IMG][/INST]"
|
| 307 |
inputs = pixtral_processor(images=input_images, text=input_prompt, return_tensors="pt").to(device)
|
| 308 |
generate_ids = pixtral_model.generate(**inputs, max_new_tokens=max_new_tokens)
|
|
@@ -373,7 +381,7 @@ def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length:
|
|
| 373 |
attention_mask = torch.ones_like(input_ids)
|
| 374 |
|
| 375 |
# Debugging
|
| 376 |
-
print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
|
| 377 |
|
| 378 |
text_model.to(device)
|
| 379 |
generate_ids = text_model.generate(input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens,
|
|
@@ -403,19 +411,16 @@ def is_repo_name(s):
|
|
| 403 |
|
| 404 |
|
| 405 |
def is_repo_exists(repo_id):
|
| 406 |
-
from huggingface_hub import HfApi
|
| 407 |
try:
|
| 408 |
api = HfApi(token=HF_TOKEN)
|
| 409 |
if api.repo_exists(repo_id=repo_id): return True
|
| 410 |
else: return False
|
| 411 |
except Exception as e:
|
| 412 |
-
print(f"Error: Failed to connect {repo_id}.")
|
| 413 |
-
print(e)
|
| 414 |
return True # for safe
|
| 415 |
|
| 416 |
|
| 417 |
def is_valid_repo(repo_id):
|
| 418 |
-
from huggingface_hub import HfApi
|
| 419 |
import re
|
| 420 |
try:
|
| 421 |
if not re.fullmatch(r'^[^/,\s\"\']+/[^/,\s\"\']+$', repo_id): return False
|
|
@@ -432,15 +437,13 @@ def get_text_model():
|
|
| 432 |
|
| 433 |
|
| 434 |
def is_gguf_repo(repo_id: str):
|
| 435 |
-
from huggingface_hub import HfApi
|
| 436 |
try:
|
| 437 |
api = HfApi(token=HF_TOKEN)
|
| 438 |
if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return False
|
| 439 |
files = api.list_repo_files(repo_id=repo_id)
|
| 440 |
except Exception as e:
|
| 441 |
-
print(f"Error: Failed to get {repo_id}'s info.")
|
| 442 |
-
|
| 443 |
-
gr.Warning(f"Error: Failed to get {repo_id}'s info.")
|
| 444 |
return False
|
| 445 |
files = [f for f in files if f.endswith(".gguf")]
|
| 446 |
if len(files) == 0: return False
|
|
@@ -448,15 +451,13 @@ def is_gguf_repo(repo_id: str):
|
|
| 448 |
|
| 449 |
|
| 450 |
def get_repo_gguf(repo_id: str):
|
| 451 |
-
from huggingface_hub import HfApi
|
| 452 |
try:
|
| 453 |
api = HfApi(token=HF_TOKEN)
|
| 454 |
if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return gr.update(value="", choices=[])
|
| 455 |
files = api.list_repo_files(repo_id=repo_id)
|
| 456 |
except Exception as e:
|
| 457 |
-
print(f"Error: Failed to get {repo_id}'s info.")
|
| 458 |
-
|
| 459 |
-
gr.Warning(f"Error: Failed to get {repo_id}'s info.")
|
| 460 |
return gr.update(value="", choices=[])
|
| 461 |
files = [f for f in files if f.endswith(".gguf")]
|
| 462 |
if len(files) == 0: return gr.update(value="", choices=[])
|
|
@@ -465,7 +466,7 @@ def get_repo_gguf(repo_id: str):
|
|
| 465 |
|
| 466 |
@spaces.GPU()
|
| 467 |
def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_file: Union[str, None]=None,
|
| 468 |
-
is_nf4: bool=True, progress=gr.Progress(track_tqdm=True)):
|
| 469 |
global use_inference_client, llm_models
|
| 470 |
use_inference_client = use_client
|
| 471 |
try:
|
|
@@ -477,7 +478,7 @@ def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_f
|
|
| 477 |
if use_inference_client:
|
| 478 |
pass #
|
| 479 |
else:
|
| 480 |
-
load_text_model(model_name, gguf_file, is_nf4)
|
| 481 |
if model_name not in llm_models: llm_models[model_name] = gguf_file if gguf_file else None
|
| 482 |
return gr.update(choices=get_text_model())
|
| 483 |
except Exception as e:
|
|
|
|
| 9 |
return func(*args, **kwargs)
|
| 10 |
return wrapper
|
| 11 |
import gradio as gr
|
| 12 |
+
from huggingface_hub import InferenceClient, HfApi
|
| 13 |
from torch import nn
|
| 14 |
from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM, LlavaForConditionalGeneration
|
| 15 |
from pathlib import Path
|
|
|
|
| 18 |
from PIL import Image
|
| 19 |
import torchvision.transforms.functional as TVF
|
| 20 |
import gc
|
| 21 |
+
from peft import PeftModel
|
| 22 |
from typing import Union
|
| 23 |
|
| 24 |
+
LOAD_IN_NF4 = True
|
| 25 |
+
|
| 26 |
+
if os.environ.get("SPACES_ZERO_GPU") is not None:
|
| 27 |
+
import subprocess
|
| 28 |
+
LOAD_IN_NF4 = False # If true, Custom VLM LoRA doesn't work initially. The rest are fine.
|
| 29 |
+
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 30 |
|
| 31 |
BASE_DIR = Path(__file__).resolve().parent # Define the base directory
|
| 32 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 42 |
"unsloth/Meta-Llama-3.1-8B-bnb-4bit": None,
|
| 43 |
"DevQuasar/HermesNova-Llama-3.1-8B": None,
|
| 44 |
"mergekit-community/L3.1-Boshima-b-FIX": None,
|
| 45 |
+
#"chuanli11/Llama-3.2-3B-Instruct-uncensored": None, # Error(s) in loading state_dict for ImageAdapter:\n\tsize mismatch for linear1.weight: copying a param with shape torch.Size([4096, 1152]) from checkpoint, the shape in current model is torch.Size([3072, 1152]).\n\tsize mismatch for linear1.bias: copying a param with shape torch.Size([4096]) from checkpoint,
|
| 46 |
+
"unsloth/Meta-Llama-3.1-8B-Instruct": None,
|
| 47 |
}
|
| 48 |
|
| 49 |
CLIP_PATH = "google/siglip-so400m-patch14-384"
|
|
|
|
| 163 |
# https://huggingface.co/docs/transformers/main/en/peft#enable-and-disable-adapters
|
| 164 |
# https://huggingface.co/docs/transformers/main/quantization/bitsandbytes?bnb=4-bit
|
| 165 |
# https://huggingface.co/lllyasviel/flux1-dev-bnb-nf4
|
| 166 |
+
# https://github.com/huggingface/transformers/issues/28515
|
| 167 |
+
# https://gist.github.com/ChrisHayduk/1a53463331f52dca205e55982baf9930
|
| 168 |
tokenizer = None
|
| 169 |
text_model_client = None
|
| 170 |
text_model = None
|
| 171 |
image_adapter = None
|
|
|
|
| 172 |
pixtral_model = None
|
| 173 |
pixtral_processor = None
|
| 174 |
+
def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None, is_nf4: bool=True, is_lora: bool=True):
|
| 175 |
+
global tokenizer, text_model, image_adapter, pixtral_model, pixtral_processor, text_model_client, use_inference_client
|
| 176 |
try:
|
| 177 |
tokenizer = None
|
| 178 |
text_model_client = None
|
| 179 |
text_model = None
|
| 180 |
image_adapter = None
|
|
|
|
| 181 |
pixtral_model = None
|
| 182 |
pixtral_processor = None
|
| 183 |
torch.cuda.empty_cache()
|
| 184 |
gc.collect()
|
| 185 |
+
lora_device = "auto"
|
| 186 |
|
| 187 |
from transformers import BitsAndBytesConfig
|
| 188 |
nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
|
|
|
|
| 208 |
if device == "cpu":
|
| 209 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
|
| 210 |
elif is_nf4:
|
| 211 |
+
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
|
| 212 |
else:
|
| 213 |
+
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=lora_device, torch_dtype=torch.bfloat16).eval()
|
| 214 |
else:
|
| 215 |
if device == "cpu":
|
| 216 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
|
| 217 |
elif is_nf4:
|
| 218 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
|
| 219 |
else:
|
| 220 |
+
text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=lora_device, torch_dtype=torch.bfloat16).eval()
|
| 221 |
|
| 222 |
+
if is_lora and LORA_PATH.exists() and not is_nf4:
|
| 223 |
print("Loading VLM's custom text model")
|
| 224 |
+
if is_nf4: # omitted
|
| 225 |
+
text_model = PeftModel.from_pretrained(model=text_model, model_id=LORA_PATH, device_map=device, quantization_config=nf4_config)
|
| 226 |
+
else:
|
| 227 |
+
text_model = PeftModel.from_pretrained(model=text_model, model_id=LORA_PATH, device_map=device)
|
| 228 |
+
text_model = text_model.merge_and_unload(safe_merge=True) # to avoid PEFT bug https://github.com/huggingface/transformers/issues/28515
|
| 229 |
+
else: print("VLM's custom text model is not loaded")
|
| 230 |
|
| 231 |
print("Loading image adapter")
|
| 232 |
image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
|
|
|
|
| 245 |
print("Loading CLIP")
|
| 246 |
clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
|
| 247 |
clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model
|
| 248 |
+
assert (CHECKPOINT_PATH / "clip_model.pt").exists()
|
| 249 |
if (CHECKPOINT_PATH / "clip_model.pt").exists():
|
| 250 |
print("Loading VLM's custom vision model")
|
| 251 |
checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu', weights_only=False)
|
|
|
|
| 260 |
#load_text_model(PIXTRAL_PATHS[0])
|
| 261 |
#print(f"pixtral_model: {type(pixtral_model)}") #
|
| 262 |
#print(f"pixtral_processor: {type(pixtral_processor)}") #
|
| 263 |
+
load_text_model(MODEL_PATH, None, LOAD_IN_NF4, True)
|
| 264 |
+
#print(f"pixtral_model: {type(pixtral_model)}") #
|
| 265 |
+
#print(f"pixtral_processor: {type(pixtral_processor)}") #
|
| 266 |
|
| 267 |
@spaces.GPU()
|
| 268 |
@torch.inference_mode()
|
| 269 |
def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length: Union[str, int], extra_options: list[str], name_input: str, custom_prompt: str,
|
| 270 |
max_new_tokens: int=300, top_p: float=0.9, temperature: float=0.6, model_name: str=MODEL_PATH, progress=gr.Progress(track_tqdm=True)) -> tuple[str, str]:
|
| 271 |
+
global tokenizer, text_model, image_adapter, pixtral_model, pixtral_processor, text_model_client, use_inference_client
|
| 272 |
torch.cuda.empty_cache()
|
| 273 |
gc.collect()
|
| 274 |
|
|
|
|
| 311 |
print(f"pixtral_model: {type(pixtral_model)}") #
|
| 312 |
print(f"pixtral_processor: {type(pixtral_processor)}") #
|
| 313 |
input_images = [input_image.convert("RGB")]
|
|
|
|
| 314 |
input_prompt = "[INST]Caption this image:\n[IMG][/INST]"
|
| 315 |
inputs = pixtral_processor(images=input_images, text=input_prompt, return_tensors="pt").to(device)
|
| 316 |
generate_ids = pixtral_model.generate(**inputs, max_new_tokens=max_new_tokens)
|
|
|
|
| 381 |
attention_mask = torch.ones_like(input_ids)
|
| 382 |
|
| 383 |
# Debugging
|
| 384 |
+
#print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
|
| 385 |
|
| 386 |
text_model.to(device)
|
| 387 |
generate_ids = text_model.generate(input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens,
|
|
|
|
| 411 |
|
| 412 |
|
| 413 |
def is_repo_exists(repo_id):
|
|
|
|
| 414 |
try:
|
| 415 |
api = HfApi(token=HF_TOKEN)
|
| 416 |
if api.repo_exists(repo_id=repo_id): return True
|
| 417 |
else: return False
|
| 418 |
except Exception as e:
|
| 419 |
+
print(f"Error: Failed to connect {repo_id}. {e}")
|
|
|
|
| 420 |
return True # for safe
|
| 421 |
|
| 422 |
|
| 423 |
def is_valid_repo(repo_id):
|
|
|
|
| 424 |
import re
|
| 425 |
try:
|
| 426 |
if not re.fullmatch(r'^[^/,\s\"\']+/[^/,\s\"\']+$', repo_id): return False
|
|
|
|
| 437 |
|
| 438 |
|
| 439 |
def is_gguf_repo(repo_id: str):
|
|
|
|
| 440 |
try:
|
| 441 |
api = HfApi(token=HF_TOKEN)
|
| 442 |
if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return False
|
| 443 |
files = api.list_repo_files(repo_id=repo_id)
|
| 444 |
except Exception as e:
|
| 445 |
+
print(f"Error: Failed to get {repo_id}'s info. {e}")
|
| 446 |
+
gr.Warning(f"Error: Failed to get {repo_id}'s info. {e}")
|
|
|
|
| 447 |
return False
|
| 448 |
files = [f for f in files if f.endswith(".gguf")]
|
| 449 |
if len(files) == 0: return False
|
|
|
|
| 451 |
|
| 452 |
|
| 453 |
def get_repo_gguf(repo_id: str):
|
|
|
|
| 454 |
try:
|
| 455 |
api = HfApi(token=HF_TOKEN)
|
| 456 |
if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return gr.update(value="", choices=[])
|
| 457 |
files = api.list_repo_files(repo_id=repo_id)
|
| 458 |
except Exception as e:
|
| 459 |
+
print(f"Error: Failed to get {repo_id}'s info. {e}")
|
| 460 |
+
gr.Warning(f"Error: Failed to get {repo_id}'s info. {e}")
|
|
|
|
| 461 |
return gr.update(value="", choices=[])
|
| 462 |
files = [f for f in files if f.endswith(".gguf")]
|
| 463 |
if len(files) == 0: return gr.update(value="", choices=[])
|
|
|
|
| 466 |
|
| 467 |
@spaces.GPU()
|
| 468 |
def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_file: Union[str, None]=None,
|
| 469 |
+
is_nf4: bool=True, is_lora: bool=True, progress=gr.Progress(track_tqdm=True)):
|
| 470 |
global use_inference_client, llm_models
|
| 471 |
use_inference_client = use_client
|
| 472 |
try:
|
|
|
|
| 478 |
if use_inference_client:
|
| 479 |
pass #
|
| 480 |
else:
|
| 481 |
+
load_text_model(model_name, gguf_file, is_nf4, is_lora)
|
| 482 |
if model_name not in llm_models: llm_models[model_name] = gguf_file if gguf_file else None
|
| 483 |
return gr.update(choices=get_text_model())
|
| 484 |
except Exception as e:
|