Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +264 -0
config.json +44 -0
model.safetensors +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer_config.json +59 -0

README.md ADDED Viewed

	@@ -0,0 +1,264 @@

+---
+base_model:
+- ai4bharat/Airavata
+language:
+- en
+- hi
+license: llama2
+tags:
+- bnb-my-repo
+- multilingual
+- instruction-tuning
+- llama2
+datasets:
+- ai4bharat/indic-instruct-data-v0.1
+model-index:
+- name: Airavata
+  results:
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: AI2 Reasoning Challenge (25-Shot)
+      type: ai2_arc
+      config: ARC-Challenge
+      split: test
+      args:
+        num_few_shot: 25
+    metrics:
+    - type: acc_norm
+      value: 46.5
+      name: normalized accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=ai4bharat/Airavata
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: HellaSwag (10-Shot)
+      type: hellaswag
+      split: validation
+      args:
+        num_few_shot: 10
+    metrics:
+    - type: acc_norm
+      value: 69.26
+      name: normalized accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=ai4bharat/Airavata
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU (5-Shot)
+      type: cais/mmlu
+      config: all
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 43.9
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=ai4bharat/Airavata
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: TruthfulQA (0-shot)
+      type: truthful_qa
+      config: multiple_choice
+      split: validation
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: mc2
+      value: 40.62
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=ai4bharat/Airavata
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: Winogrande (5-shot)
+      type: winogrande
+      config: winogrande_xl
+      split: validation
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 68.82
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=ai4bharat/Airavata
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: GSM8k (5-shot)
+      type: gsm8k
+      config: main
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 4.02
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=ai4bharat/Airavata
+      name: Open LLM Leaderboard
+---
+# ai4bharat/Airavata (Quantized)
+## Description
+This model is a quantized version of the original model [`ai4bharat/Airavata`](https://huggingface.co/ai4bharat/Airavata).
+It's quantized using the BitsAndBytes library to 4-bit using the [bnb-my-repo](https://huggingface.co/spaces/bnb-community/bnb-my-repo) space.
+## Quantization Details
+- **Quantization Type**: int4
+- **bnb_4bit_quant_type**: nf4
+- **bnb_4bit_use_double_quant**: False
+- **bnb_4bit_compute_dtype**: bfloat16
+- **bnb_4bit_quant_storage**: int8
+# 📄 Original Model Information
+# Airavata
+This model is a 7B [OpenHathi](https://huggingface.co/sarvamai/OpenHathi-7B-Hi-v0.1-Base) model finetuned on [IndicInstruct dataset](https://huggingface.co/datasets/ai4bharat/indic-instruct-data-v0.1)
+which is a collection of instruction datasets (Anudesh, wikiHow, Flan v2, Dolly, Anthropic-HHH, OpenAssistant v1, and LymSys-Chat).
+Please check the corresponding huggingface dataset card for more details.
+This was trained as part of the technical report [Airavata: Introducing Hindi Instruction-tuned LLM](https://arxiv.org/abs/2401.15006).
+The codebase used to train and evaluate this model can be found at [https://github.com/AI4Bharat/IndicInstruct](https://github.com/AI4Bharat/IndicInstruct).
+## Usage
+Clone [https://github.com/AI4Bharat/IndicInstruct](https://github.com/AI4Bharat/IndicInstruct) and install the required dependencies. Then download or clone this model to the same machine.
+## Input Format
+The model is trained to use the chat format similar to [open-instruct code repository](https://github.com/allenai/open-instruct) (note the newlines):
+```
+<|user|>
+Your message here!
+<|assistant|>
+```
+For best results, format all inputs in this manner. **Make sure to include a newline after `<|assistant|>`, this can affect generation quality quite a bit.**
+## Hyperparameters
+We fine-tune OpenHathi base model on the aforementioned IndicInstruct dataset with LoRA. The hyperparameters for the LoRA fine-tuning are listed below:
+- LoRA Rank: 16
+- LoRA alpha: 32
+- LoRA Dropout: 0.05
+- LoRA Target Modules: ["q_proj", "v_proj", "k_proj", "down_proj", "gate_proj", "up_proj"]
+- Epochs: 4
+- Learning rate: 5e-4
+- Batch Size: 128
+- Floating Point Precision: bfloat16
+We recommend the readers to check out [our official blog post](https://ai4bharat.github.io/airavata) for more details on the model training, ablations and evaluation results.
+## Example
+```python3
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True):
+    formatted_text = ""
+    for message in messages:
+        if message["role"] == "system":
+            formatted_text += "<|system|>\n" + message["content"] + "\n"
+        elif message["role"] == "user":
+            formatted_text += "<|user|>\n" + message["content"] + "\n"
+        elif message["role"] == "assistant":
+            formatted_text += "<|assistant|>\n" + message["content"].strip() + eos + "\n"
+        else:
+            raise ValueError(
+                "Tulu chat template only supports 'system', 'user' and 'assistant' roles. Invalid role: {}.".format(
+                    message["role"]
+                )
+            )
+    formatted_text += "<|assistant|>\n"
+    formatted_text = bos + formatted_text if add_bos else formatted_text
+    return formatted_text
+def inference(input_prompts, model, tokenizer):
+    input_prompts = [
+        create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False)
+        for input_prompt in input_prompts
+    ]
+    encodings = tokenizer(input_prompts, padding=True, return_tensors="pt")
+    encodings = encodings.to(device)
+    with torch.inference_mode():
+        outputs = model.generate(encodings.input_ids, do_sample=False, max_new_tokens=250)
+    output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True)
+    input_prompts = [
+        tokenizer.decode(tokenizer.encode(input_prompt), skip_special_tokens=True) for input_prompt in input_prompts
+    ]
+    output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)]
+    return output_texts
+model_name = "ai4bharat/Airavata"
+tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
+tokenizer.pad_token = tokenizer.eos_token
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
+input_prompts = [
+    "मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं।",
+    "मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं और उनका वर्णन करें।",
+]
+outputs = inference(input_prompts, model, tokenizer)
+print(outputs)
+```
+## Citation
+```bibtex
+@article{gala2024airavata,
+  title   = {Airavata: Introducing Hindi Instruction-tuned LLM},
+  author  = {Jay Gala and Thanmay Jayakumar and Jaavid Aktar Husain and Aswanth Kumar M and Mohammed Safi Ur Rahman Khan and Diptesh Kanojia and Ratish Puduppully and Mitesh M. Khapra and Raj Dabre and Rudra Murthy and Anoop Kunchukuttan},
+  year    = {2024},
+  journal = {arXiv preprint arXiv: 2401.15006}
+}
+```
+# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_ai4bharat__Airavata)
+|             Metric              |Value|
+|---------------------------------|----:|
+|Avg.                             |45.52|
+|AI2 Reasoning Challenge (25-Shot)|46.50|
+|HellaSwag (10-Shot)              |69.26|
+|MMLU (5-Shot)                    |43.90|
+|TruthfulQA (0-shot)              |40.62|
+|Winogrande (5-shot)              |68.82|
+|GSM8k (5-shot)                   | 4.02|

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "architectures": [
+    "LlamaModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 4096,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "pretraining_tp": 1,
+  "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "bfloat16",
+    "bnb_4bit_quant_storage": "int8",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": false,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.1",
+  "use_cache": true,
+  "vocab_size": 48065
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:509630a6263ae057fe44478fa66abd5ff4ce9969f863ff27598b8c70ee592c6c
+size 4037176904

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "48064": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}