File size: 6,731 Bytes

21434ad
 
 
 
 
 
 
 
9fe7f69
21434ad
9fe7f69
21434ad
65c2c66
 
706ebc2
 
 
21434ad
 
 
 
 
 
 
706ebc2
21434ad

{
  "model_family": "Helion",
  "version": "2.5",
  "release_type": "research_and_development",
  "variants": {
    "base": {
      "name": "Helion-2.5-Rnd",
      "full_name": "DeepXR/Helion-2.5-Rnd",
      "description": "Base research model with full precision (FP16)",
      "parameters": "70B",
      "precision": "float16",
      "context_length": 131072,
      "safetensors_shards": 83,
      "shard_naming": "shard_00 to shard_82",
      "shard_size_gb": 1.69,
      "shard_size_gib": 1.57,
      "total_size_gb": 140.27,
      "status": "active",
      "recommended_use": [
        "Research",
        "Development",
        "High-accuracy inference"
      ],
      "hardware_requirements": {
        "min_vram_gb": 145,
        "min_gpus": 2,
        "recommended_gpu": "A100 80GB"
      }
    },
    "instruct": {
      "name": "Helion-2.5-Rnd-Instruct",
      "full_name": "DeepXR/Helion-2.5-Rnd-Instruct",
      "description": "Instruction-tuned variant optimized for following instructions",
      "parameters": "70B",
      "precision": "bfloat16",
      "context_length": 131072,
      "status": "planned",
      "recommended_use": [
        "Instruction following",
        "Task completion",
        "Structured outputs"
      ],
      "fine_tuning": {
        "type": "supervised",
        "data_focus": "instruction_pairs"
      }
    },
    "chat": {
      "name": "Helion-2.5-Rnd-Chat",
      "full_name": "DeepXR/Helion-2.5-Rnd-Chat",
      "description": "Conversational variant optimized for multi-turn dialogue",
      "parameters": "70B",
      "precision": "bfloat16",
      "context_length": 131072,
      "status": "planned",
      "recommended_use": [
        "Conversational AI",
        "Customer service",
        "Interactive applications"
      ],
      "fine_tuning": {
        "type": "rlhf",
        "data_focus": "conversational_data"
      }
    },
    "code": {
      "name": "Helion-2.5-Rnd-Code",
      "full_name": "DeepXR/Helion-2.5-Rnd-Code",
      "description": "Code-specialized variant with enhanced programming capabilities",
      "parameters": "70B",
      "precision": "bfloat16",
      "context_length": 131072,
      "status": "planned",
      "recommended_use": [
        "Code generation",
        "Code review",
        "Bug fixing",
        "Documentation"
      ],
      "fine_tuning": {
        "type": "supervised",
        "data_focus": "code_repositories"
      },
      "enhanced_languages": [
        "Python",
        "JavaScript",
        "TypeScript",
        "Rust",
        "Go",
        "Java"
      ]
    },
    "math": {
      "name": "Helion-2.5-Rnd-Math",
      "full_name": "DeepXR/Helion-2.5-Rnd-Math",
      "description": "Mathematics-specialized variant for advanced problem solving",
      "parameters": "70B",
      "precision": "bfloat16",
      "context_length": 131072,
      "status": "planned",
      "recommended_use": [
        "Mathematical reasoning",
        "Proof generation",
        "Problem solving",
        "Educational applications"
      ],
      "fine_tuning": {
        "type": "supervised",
        "data_focus": "mathematical_proofs"
      }
    }
  },
  "deployment_configurations": {
    "production": {
      "description": "Production-ready configuration with optimizations",
      "settings": {
        "tensor_parallel_size": 4,
        "gpu_memory_utilization": 0.95,
        "max_batch_size": 32,
        "enable_prefix_caching": true,
        "enable_chunked_prefill": true
      }
    },
    "development": {
      "description": "Development configuration for testing",
      "settings": {
        "tensor_parallel_size": 2,
        "gpu_memory_utilization": 0.85,
        "max_batch_size": 8,
        "enable_prefix_caching": false,
        "enable_chunked_prefill": false
      }
    },
    "research": {
      "description": "Research configuration for experimentation",
      "settings": {
        "tensor_parallel_size": 2,
        "gpu_memory_utilization": 0.90,
        "max_batch_size": 4,
        "enable_prefix_caching": false,
        "enable_chunked_prefill": false,
        "enable_logging": true
      }
    }
  },
  "comparison_matrix": {
    "base_vs_instruct": {
      "base_advantages": [
        "More flexible for fine-tuning",
        "Better for creative tasks",
        "Less constrained outputs"
      ],
      "instruct_advantages": [
        "Better instruction following",
        "More structured outputs",
        "Improved task completion"
      ]
    },
    "base_vs_chat": {
      "base_advantages": [
        "Better for single-turn tasks",
        "More diverse outputs",
        "Flexible formatting"
      ],
      "chat_advantages": [
        "Better conversation coherence",
        "Improved context awareness",
        "Natural dialogue flow"
      ]
    }
  },
  "migration_guide": {
    "from_base_to_instruct": {
      "steps": [
        "Update prompt format to instruction style",
        "Adjust temperature (typically lower)",
        "Add explicit task descriptions",
        "Use structured output formats"
      ],
      "example_prompt_change": {
        "base": "Write a function to sort a list",
        "instruct": "### Instruction:\nWrite a Python function that sorts a list in ascending order.\n\n### Response:"
      }
    },
    "from_base_to_chat": {
      "steps": [
        "Convert to chat message format",
        "Add system prompts",
        "Maintain conversation history",
        "Use appropriate message roles"
      ],
      "example_format_change": {
        "base": "Hello, how are you?",
        "chat": [
          {
            "role": "system",
            "content": "You are a helpful assistant."
          },
          {
            "role": "user",
            "content": "Hello, how are you?"
          }
        ]
      }
    }
  },
  "version_history": {
    "2.5.0-rnd": {
      "release_date": "2025-01-30",
      "status": "current",
      "changes": [
        "Initial research release",
        "70B parameter model",
        "131K context with YARN",
        "SafeTensors format (96 shards)",
        "Full precision (BF16)"
      ]
    }
  },
  "roadmap": {
    "upcoming_variants": [
      {
        "name": "Helion-2.5-Rnd-Instruct",
        "expected": "Q2 2025",
        "status": "in_development"
      },
      {
        "name": "Helion-2.5-Rnd-Chat",
        "expected": "Q2 2025",
        "status": "planned"
      },
      {
        "name": "Helion-2.5-Rnd-Code",
        "expected": "Q3 2025",
        "status": "planned"
      }
    ],
    "future_features": [
      "Multi-modal capabilities",
      "Extended context to 256K",
      "Improved multilingual support",
      "Domain-specific variants"
    ]
  }
}