Image-Text-to-Text
Transformers
paligemma-3b-pt-224-a16w8 / gen_models--google--paligemma-3b-pt-224.py
Dezvi's picture
Upload folder using huggingface_hub
d720448 verified
#**************************************************************************
#|| SiMa.ai CONFIDENTIAL ||
#|| Unpublished Copyright (c) 2025 SiMa.ai, All Rights Reserved. ||
#**************************************************************************
# NOTICE: All information contained herein is, and remains the property of
# SiMa.ai. The intellectual and technical concepts contained herein are
# proprietary to SiMa and may be covered by U.S. and Foreign Patents,
# patents in process, and are protected by trade secret or copyright law.
#
# Dissemination of this information or reproduction of this material is
# strictly forbidden unless prior written permission is obtained from
# SiMa.ai. Access to the source code contained herein is hereby forbidden
# to anyone except current SiMa.ai employees, managers or contractors who
# have executed Confidentiality and Non-disclosure agreements explicitly
# covering such access.
#
# The copyright notice above does not evidence any actual or intended
# publication or disclosure of this source code, which includes information
# that is confidential and/or proprietary, and is a trade secret, of SiMa.ai.
#
# ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC
# DISPLAY OF OR THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN
# CONSENT OF SiMa.ai IS STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE
# LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE
# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO
# REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR
# SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
#
#**************************************************************************
import logging
from pathlib import Path
from afe.apis.error_handling_variables import enable_verbose_error_messages
from sima_utils.transformer.model import FileGenMode, FileGenPrecision, VisionLanguageModel
def gen_files(model_path: Path, num_processes: int, resume: bool):
enable_verbose_error_messages()
max_num_tokens = 512
language_future_token_mask_size = 128
model = VisionLanguageModel.from_hf_cache(
hf_cache_path=model_path,
model_name=model_path.name,
onnx_path=Path(f"{model_path.name}/onnx_files"),
sima_path=Path(f"{model_path.name}/sima_files"),
max_num_tokens=max_num_tokens,
system_prompt=None,
override_language_future_token_mask_size=language_future_token_mask_size,
)
log_level = logging.INFO
precision = {
"vision": FileGenPrecision.BF16,
"group": FileGenPrecision.A_BF16_W_INT8,
"single": FileGenPrecision.A_BF16_W_INT8,
}
model.gen_files(
FileGenMode.ALL, precision=precision, log_level=log_level, num_processes=num_processes,
resume=resume
)
if __name__ == "__main__":
import argparse
# Download the HuggingFace google/paligemma-3b-pt-224 using the following command.
# huggingface-cli download google/paligemma-3b-pt-224
parser = argparse.ArgumentParser(description="VLM generate file arguments")
parser.add_argument("--model_path", type=Path, required=True)
parser.add_argument("--num_processes", type=int, default=1)
parser.add_argument("--resume", action="store_true", default=False)
args = parser.parse_args()
print("Arguments:", args, flush=True)
gen_files(args.model_path, args.num_processes, args.resume)