init

Files changed (3) hide show

config.json ADDED Viewed

+{
+    "model_type": "llama",
+    "vocab_size": 32000,
+    "hidden_size": 4096,
+    "num_attention_heads": 32,
+    "num_hidden_layers": 24,
+    "intermediate_size": 11008,
+    "max_position_embeddings": 2048,
+    "use_cache": true,
+    "layer_norm_epsilon": 1e-5,
+    "activation_function": "gelu_new",
+    "rotary_dim": 64,
+    "quantization": {
+        "format": "gguf",
+        "bits": 4
+    }
+}

example.py ADDED Viewed

+# pip install llama-cpp-python --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu118
+from llama_cpp import Llama
+llm = Llama(
+    model_path="recurv_llama_13B.gguf",
+    n_ctx=2048,         # Context window
+    n_threads=4         # Number of CPU threads to use
+)
+prompt = "What is Paracetamol?"
+output = llm(
+    prompt,
+    max_tokens=256,     # Maximum number of tokens to generate
+    temperature=0.5,    # Controls randomness (0.0 = deterministic, 1.0 = creative)
+    top_p=0.95,         # Nucleus sampling parameter
+    stop=["###"],       # Optional stop words
+    echo=True           # Include prompt in the output
+)
+# Print the generated text
+print(output['choices'][0]['text'])

requirements.txt ADDED Viewed

+tokenizers
+transformers
+torch
+safetensors
+gguf
+llama-cpp-python