In [4]:
from openai import OpenAI
from google.colab import userdata


client = OpenAI(
    api_key=userdata.get('MOONSHOT_API_KEY'), # Replace MOONSHOT_API_KEY with the API Key you obtained from the Kimi Open Platform
    base_url="https://api.moonshot.ai/v1",
)

completion = client.chat.completions.create(
    # model = "moonshot-v1-8k",
    model = "kimi-k2-0711-preview",
    messages = [
        {"role": "system", "content": "You are Kimi, an AI assistant provided by Moonshot AI. You are proficient in Vietnamese and English conversations. You provide users with safe, helpful, and accurate answers. You will reject any requests involving terrorism, racism, or explicit content. Moonshot AI is a proper noun and should not be translated."},
        {"role": "user", "content": "Hello, my name is Li Lei. What is the meaning of life?"}
    ],
    temperature = 0.3,
)

# We receive a response from the Kimi large language model via the API (role=assistant)
print(completion.choices[0].message.content)

Hello Li Lei!  
This is one of humanity’s oldest and most personal questions, so I’ll share a few perspectives and then invite you to decide which resonates with you.

1. Biological lens  
   From an evolutionary standpoint, the “purpose” encoded in every living organism is simply to survive, reproduce, and pass on its genes. Life persists because it is good at persisting.

2. Philosophical lens  
   Thinkers across cultures have argued that meaning is not handed to us by nature; we must create it ourselves. For Sartre, existence precedes essence—we are free to define our own essence through choices. For Confucian traditions, meaning arises from cultivating virtue (仁, rén) and harmonious relationships within family and society.

3. Psychological lens  
   Research in positive psychology suggests that people feel life is meaningful when they experience three things:  
   - Purpose: having goals that feel worthwhile.  
   - Coherence: understanding how life fits together.  
   - Signific

In [5]:
%%writefile vmlu_kimi.py
import os
import json
import tqdm
import pandas as pd
import time
import re
from openai import OpenAI
import random
from typing import Optional, Dict, Any
from google.colab import userdata

class KimiClient:
    def __init__(self, api_key: str, base_url: str = "https://api.moonshot.ai/v1"):
        self.client = OpenAI(
            api_key=api_key,
            base_url=base_url,
        )
        self.model = "kimi-k2-0711-preview"

    def chat_completion_with_retry(
        self,
        messages: list,
        temperature: float = 0,
        max_retries: int = 5,
        base_delay: float = 1.0
    ) -> Optional[str]:
        """
        Chat completion with exponential backoff retry for rate limits
        """
        for attempt in range(max_retries):
            try:
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                )
                return response.choices[0].message.content

            except Exception as e:
                error_str = str(e).lower()

                # Rate limit handling
                if "rate_limit" in error_str or "rate limit" in error_str:
                    # Extract wait time from error message if available
                    wait_time = self._extract_wait_time(str(e))
                    if wait_time:
                        print(f"Rate limit hit. Waiting {wait_time}s (from error message)")
                        time.sleep(wait_time)
                    else:
                        # Exponential backoff with jitter
                        delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
                        print(f"Rate limit hit. Retry {attempt + 1}/{max_retries}. Waiting {delay:.2f}s")
                        time.sleep(delay)
                    continue

                # Other API errors
                elif "timeout" in error_str or "connection" in error_str:
                    delay = base_delay * (2 ** attempt)
                    print(f"Connection issue. Retry {attempt + 1}/{max_retries}. Waiting {delay:.2f}s")
                    time.sleep(delay)
                    continue

                # Unknown errors
                else:
                    print(f"Unexpected error: {e}")
                    if attempt < max_retries - 1:
                        delay = base_delay * (2 ** attempt)
                        print(f"Retry {attempt + 1}/{max_retries}. Waiting {delay:.2f}s")
                        time.sleep(delay)
                        continue
                    else:
                        print(f"Max retries reached for unknown error")
                        return None

        print(f"Failed after {max_retries} attempts")
        return None

    def _extract_wait_time(self, error_message: str) -> Optional[float]:
        """Extract wait time from rate limit error message"""
        # Common patterns for rate limit messages
        patterns = [
            r'try again in (\d+\.?\d*) seconds',
            r'retry after (\d+\.?\d*) seconds',
            r'wait (\d+\.?\d*) seconds'
        ]

        for pattern in patterns:
            match = re.search(pattern, error_message.lower())
            if match:
                return float(match.group(1))
        return None

def process_vmlu_with_kimi(api_key: str, data_path: str = 'test.jsonl'):
    """Process VMLU dataset with Kimi K2 model"""

    # Initialize Kimi client
    kimi = KimiClient(api_key)

    # Load data
    data = []
    with open(data_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            data.append(json.loads(line))

    print(f"Loaded {len(data)} questions")

    # Process questions
    all_res = []
    failed_ids = []

    for idx, doc in enumerate(tqdm.tqdm(data)):
        text_choice = '\n'.join(doc['choices'])
        prompt = ("Chỉ đưa ra chữ cái đứng trước câu trả lời đúng (A, B, C, D hoặc E) "
                 "của câu hỏi trắc nghiệm sau: \n"
                 + doc["question"] + "\n\n" + text_choice + "\n" + "Đáp án: ")

        messages = [
            {
                "role": "system",
                "content": "You are Kimi, an AI assistant. Provide only the letter (A, B, C, D, or E) that corresponds to the correct answer for Vietnamese multiple choice questions."
            },
            {
                "role": "user",
                "content": prompt
            }
        ]

        # Get response with retry logic
        response_str = kimi.chat_completion_with_retry(messages, temperature=0)

        if response_str is None:
            print(f"Failed to get response for question {doc['id']}")
            failed_ids.append(doc['id'])
            response_str = ""

        all_res.append({
            "id": doc['id'],
            "prompt": prompt,
            "question": doc["question"],
            "answer": response_str
        })

        # Save progress every 100 questions
        if idx % 100 == 0 and idx > 0:
            result_folder = "all_res/kimi_result"
            os.makedirs(result_folder, exist_ok=True)
            pd.DataFrame(all_res).to_csv(
                f"{result_folder}/raw_result_{len(all_res)}.csv",
                index=False,
                encoding='utf-8'
            )
            print(f"Progress saved: {len(all_res)} questions processed")

        # Rate limiting: small delay between requests
        time.sleep(0.1)

    # Final processing
    df = pd.DataFrame(all_res)

    # Clean answers - extract first letter and ensure it's A-E
    def clean_answer(answer_str):
        if not answer_str:
            return ""

        # Extract first character that's A-E
        cleaned = re.sub(r'[^ABCDEabcde]', '', str(answer_str))
        if cleaned:
            return cleaned[0].upper()

        # Fallback: try to find A-E in the original string
        for char in str(answer_str).upper():
            if char in 'ABCDE':
                return char

        return ""

    df['answer'] = df['answer'].apply(clean_answer)

    # Save final results
    result_folder = "all_res/kimi_result"
    os.makedirs(result_folder, exist_ok=True)

    # Save raw results
    df.to_csv(f"{result_folder}/final_raw_result.csv", index=False, encoding='utf-8')

    # Create submission file
    submission_df = df[['id', 'answer']].copy()
    submission_df.to_csv('kimi_submission_k2.csv', index=False)

    # Print statistics
    total_questions = len(data)
    answered_questions = len(df[df['answer'] != ''])
    valid_answers = len(df[df['answer'].isin(['A', 'B', 'C', 'D', 'E'])])

    print(f"\n=== Results Summary ===")
    print(f"Total questions: {total_questions}")
    print(f"Answered questions: {answered_questions}")
    print(f"Valid answers (A-E): {valid_answers}")
    print(f"Success rate: {valid_answers/total_questions*100:.2f}%")

    if failed_ids:
        print(f"Failed question IDs: {failed_ids[:10]}{'...' if len(failed_ids) > 10 else ''}")

    return df

if __name__ == "__main__":
    # Get API key from environment
    api_key = "sk-VZFI2IgonKwgzpKsASVeUDoEUfq0FUwUKDNNjk0JWoF0SOiQ"
    # api_key = userdata.get('MOONSHOT_API_KEY')
    if not api_key:
        raise ValueError("Please set MOONSHOT_API_KEY environment variable")

    # Process VMLU dataset
    results_df = process_vmlu_with_kimi(api_key)
    print("VMLU processing completed!")

Writing vmlu_kimi.py


In [6]:
!cp /content/drive/MyDrive/2025/llm/vlmu_mqa_v1.5.zip ./

In [7]:
!unzip -q vlmu_mqa_v1.5.zip

In [8]:
!python vmlu_kimi.py

Loaded 9833 questions
  1% 100/9833 [02:33<3:09:40,  1.17s/it]Progress saved: 101 questions processed
  2% 200/9833 [04:54<3:19:56,  1.25s/it]Progress saved: 201 questions processed
  3% 300/9833 [07:05<3:20:14,  1.26s/it]Progress saved: 301 questions processed
  4% 400/9833 [09:30<3:09:14,  1.20s/it]Progress saved: 401 questions processed
  5% 500/9833 [11:51<5:04:13,  1.96s/it]Progress saved: 501 questions processed
  6% 600/9833 [14:26<4:43:51,  1.84s/it]Progress saved: 601 questions processed
  7% 700/9833 [16:43<3:31:54,  1.39s/it]Progress saved: 701 questions processed
  8% 800/9833 [19:05<3:00:43,  1.20s/it]Progress saved: 801 questions processed
  9% 900/9833 [21:08<2:56:15,  1.18s/it]Progress saved: 901 questions processed
 10% 1000/9833 [23:19<2:57:40,  1.21s/it]Progress saved: 1001 questions processed
 11% 1100/9833 [25:47<3:11:10,  1.31s/it]Progress saved: 1101 questions processed
 12% 1200/9833 [27:54<3:46:54,  1.58s/it]Progress saved: 1201 questions processed
 13% 1300/98

In [9]:
!wc -l kimi_submission_k2.csv

9834 kimi_submission_k2.csv


In [10]:
!head -n 5 kimi_submission_k2.csv

id,answer
28-0021,B
28-0022,A
28-0023,D
28-0024,A


In [11]:
!cp kimi_submission_k2.csv /content/drive/MyDrive/2025/llm/kimi_submission_k2.csv

In [12]:
!wc -l /content/drive/MyDrive/2025/llm/kimi_submission_k2.csv

9834 /content/drive/MyDrive/2025/llm/kimi_submission_k2.csv
