Recurv commited on
Commit
adf42a8
·
verified ·
1 Parent(s): 002571e
Files changed (3) hide show
  1. config.json +17 -0
  2. example.py +21 -0
  3. requirements.txt +6 -0
config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "vocab_size": 32000,
4
+ "hidden_size": 4096,
5
+ "num_attention_heads": 32,
6
+ "num_hidden_layers": 24,
7
+ "intermediate_size": 11008,
8
+ "max_position_embeddings": 2048,
9
+ "use_cache": true,
10
+ "layer_norm_epsilon": 1e-5,
11
+ "activation_function": "gelu_new",
12
+ "rotary_dim": 64,
13
+ "quantization": {
14
+ "format": "gguf",
15
+ "bits": 4
16
+ }
17
+ }
example.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install llama-cpp-python --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu118
2
+ from llama_cpp import Llama
3
+
4
+ llm = Llama(
5
+ model_path="recurv_llama_13B.gguf",
6
+ n_ctx=2048, # Context window
7
+ n_threads=4 # Number of CPU threads to use
8
+ )
9
+
10
+ prompt = "What is Paracetamol?"
11
+ output = llm(
12
+ prompt,
13
+ max_tokens=256, # Maximum number of tokens to generate
14
+ temperature=0.5, # Controls randomness (0.0 = deterministic, 1.0 = creative)
15
+ top_p=0.95, # Nucleus sampling parameter
16
+ stop=["###"], # Optional stop words
17
+ echo=True # Include prompt in the output
18
+ )
19
+
20
+ # Print the generated text
21
+ print(output['choices'][0]['text'])
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ tokenizers
2
+ transformers
3
+ torch
4
+ safetensors
5
+ gguf
6
+ llama-cpp-python