CopyPasteLLM-L3-8B

Quick Start

Method 1: Using Transformers

#!/usr/bin/env python3
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    BitsAndBytesConfig,
    pipeline
)
import warnings
warnings.filterwarnings("ignore")

def load_model_and_tokenizer():
    """Load model and tokenizer"""
    print("Loading model and tokenizer...")
    
    # Base model configuration
    base_model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
    lora_model_name = "wingchiuloong/CopyPasteLLM-L3-8B"
    
    # Quantization configuration
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4"
    )
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        base_model_name,
        trust_remote_code=True
    )
    
    # Load base model
    model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=quantization_config,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=torch.float16
    )
    
    # Load LoRA weights
    print("Loading LoRA weights...")
    from peft import PeftModel
    model = PeftModel.from_pretrained(model, lora_model_name)
    
    return model, tokenizer

def create_pipeline(model, tokenizer):
    """Create inference pipeline"""
    return pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto"
    )

def format_prompt(user_input):
    """Format input to Llama-3 format"""
    system_message = "You are a helpful AI assistant."
    
    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>

{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""
    return prompt

def main():
    """Main function - One-time inference"""
    print("CopyPasteLLM inference Demo")
    print("=" * 50)
    
    # Set test question
    context = "Galileo Galilei, renowned as one of the most influential figures in the history of science, made numerous contributions that revolutionized our understanding of physics and astronomy. His meticulous work with telescopes led to groundbreaking discoveries about the moons of Jupiter and the phases of Venus. Beyond the realm of astronomy, his observations and experiments laid the foundation for classical mechanics. One of Galileo’s lesser-known achievements is his development of the Three Laws of Motion, which were critical in advancing the study of kinematics and dynamics. These laws articulate the principles of inertia, the relationship between force and motion, and the law of action and reaction, providing a comprehensive framework for understanding moving bodies. His work on pendulums also contributed substantially to timekeeping and horology, as he discovered that pendulums of different lengths oscillate at predictable periods, a principle still applied in modern clocks. Galileo’s interdisciplinary approach enabled him to synthesize knowledge from various fields, which allowed new theories to emerge, reshaping the scientific landscape of his time and beyond. Notably, his support of the heliocentric model of the solar system earned him both acclaim and censure, highlighting the tension between scientific inquiry and established doctrine. In contrast to Galen’s biological studies and Newton’s later contributions, Galileo’s articulation of the Three Laws of Motion was pivotal in the transition from Aristotelian physics to Newtonian mechanics. His contributions remain a testament to the interplay of observation, theory, and experimentation in scientific progress."
    question = "Which law was Galileo Galilei responsible for describing?"
    test_question = f"{context}\nQ: {question}\nA:"
    print(f"{test_question}")
    print("-" * 50)
    
    try:
        # Load model
        model, tokenizer = load_model_and_tokenizer()
        
        # Create pipeline
        print("Creating inference pipeline...")
        pipe = create_pipeline(model, tokenizer)
        
        print("Model loaded! Starting inference...")
        
        # Format input
        prompt = format_prompt(test_question)
        
        # Generate reply
        print("Generating reply...")
        outputs = pipe(
            prompt,
            max_new_tokens=512,
            temperature=1.0,
            top_p=0.95,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            return_full_text=False
        )
        
        # Output result
        response = outputs[0]['generated_text']
        print(f"\nCopyPasteLLM:\n{response}") # According to the passage, Galileo Galilei was responsible for describing the Three Laws of Motion, which articulate the principles of inertia, the relationship between force and motion, and the law of action and reaction.
                
    except Exception as e:
        print(f"Inference failed: {e}")
        print("Please ensure the necessary dependencies are installed: pip install transformers peft bitsandbytes accelerate")

if __name__ == "__main__":
    main()

Method 2: Using vLLM

base_model="meta-llama/Meta-Llama-3-8B-Instruct"
lora_modules_path="<model_at_your_huggingface_cache>" # like "~/.huggingface/hub/models--wingchiuloong--CopyPasteLLM-L3-8B/snapshots/<uuid>"

python -m vllm.entrypoints.openai.api_server \
  --model $base_model \
  --enable-lora \
  --max-lora-rank 64 \
  --lora-modules CopyPasteLLM-8b=$lora_modules_path \
  --host 0.0.0.0 \
  --port 8888 \
  --gpu-memory-utilization 0.8 \
  --max-model-len 1024 \
  --max-num-seqs 32 \
  --tensor-parallel-size 1

Citation

If you use this model in your research, please cite:

Coming soon...

License

This model is released under the same license as the base Llama-3-8B-Instruct model. Please refer to the Llama 3 Community License for details.

Contact

For questions and support, please open an issue on the GitHub repository.

Note: This model is designed for research and educational purposes. Please ensure responsible use and compliance with applicable laws and regulations.

Downloads last month: -; Downloads are not tracked for this model. How to track

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for wingchiuloong/CopyPasteLLM-L3-8B

Base model

meta-llama/Meta-Llama-3-8B-Instruct

Finetuned

(793)

this model

wingchiuloong
/

CopyPasteLLM-L3-8B