File size: 4,852 Bytes
ad86d54 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
#!/bin/bash
# Optimized Single GPU AMP Flow Matching Training Launch Script with FULL DATA
# This script launches optimized training on GPU 0 using ALL available data
# Features: Mixed precision (BF16), increased batch size, H100 optimizations
echo "=== Launching Optimized Single GPU AMP Flow Matching Training with FULL DATA ==="
echo "Using GPU 0 for training"
echo "Using ALL available peptide embeddings and new FASTA CFG data"
echo "OVERNIGHT TRAINING: 15000 iterations with CFG support and H100 optimizations"
echo ""
# Activate flow virtual environment
echo "Activating flow virtual environment..."
source /home/edwardsun/miniconda3/envs/flow/bin/activate
if [ $? -eq 0 ]; then
echo "β Flow environment activated"
echo "Python: $(which python)"
echo "Python version: $(python --version)"
else
echo "β Failed to activate flow environment"
echo "Please check if the environment exists: conda env list"
exit 1
fi
echo ""
# Check if required files exist
echo "Checking required files..."
if [ ! -f "final_compressor_model.pth" ]; then
echo "β Missing final_compressor_model.pth"
echo "Please run compressor_with_embeddings.py first"
exit 1
fi
if [ ! -f "final_decompressor_model.pth" ]; then
echo "β Missing final_decompressor_model.pth"
echo "Please run compressor_with_embeddings.py first"
exit 1
fi
if [ ! -d "/data2/edwardsun/flow_project/peptide_embeddings/" ]; then
echo "β Missing /data2/edwardsun/flow_project/peptide_embeddings/ directory"
echo "Please run final_sequence_encoder.py first"
exit 1
fi
# Check for full data files
if [ ! -f "/data2/edwardsun/flow_project/peptide_embeddings/all_peptide_embeddings.pt" ]; then
echo "β οΈ Warning: all_peptide_embeddings.pt not found"
echo "Will use individual embedding files instead"
else
echo "β Found all_peptide_embeddings.pt (4.3GB - ALL peptide data)"
fi
# Check for new FASTA CFG data
if [ ! -f "/home/edwardsun/flow/combined_final.fasta" ]; then
echo "β Missing /home/edwardsun/flow/combined_final.fasta"
echo "This contains the new CFG training data with >AP (AMP) and >sp (Non-AMP) labels"
exit 1
else
echo "β Found combined_final.fasta - New CFG data with automatic labeling"
echo " >AP headers = AMP sequences"
echo " >sp headers = Non-AMP sequences"
fi
echo "β All required files found!"
echo ""
# Set CUDA device to GPU 0
export CUDA_VISIBLE_DEVICES=0
# Enable H100 optimizations
export TORCH_CUDNN_V8_API_ENABLED=1
export TORCH_CUDNN_V8_API_DISABLED=0
echo "=== Optimized Training Configuration ==="
echo " - GPU: 0 (CUDA_VISIBLE_DEVICES=0)"
echo " - Batch size: 96 (optimized based on profiling)"
echo " - Total iterations: 6,000"
echo " - Mixed precision: BF16 (H100 optimized)"
echo " - Learning rate: 4e-4 -> 2e-4 (cosine annealing)"
echo " - Warmup steps: 5,000"
echo " - Gradient clipping: 1.0"
echo " - Weight decay: 0.01"
echo " - Data workers: 16"
echo " - CFG dropout: 15%"
echo " - Validation: Every 10,000 steps"
echo " - Checkpoints: Every 1,000 epochs"
echo " - Estimated time: ~8-10 hours (overnight training)"
echo ""
# Check GPU memory and capabilities
echo "Checking GPU capabilities..."
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader,nounits | while IFS=, read -r name total free; do
echo " GPU: $name"
echo " Total memory: ${total}MB"
echo " Free memory: ${free}MB"
echo " Available: $((free * 100 / total))%"
done
echo ""
# Launch optimized training
echo "Starting optimized single GPU training on GPU 0 with FULL DATA..."
echo "Using new FASTA CFG data: combined_final.fasta"
echo ""
# Launch training with new FASTA CFG data
python amp_flow_training_single_gpu_full_data.py --cfg_data /home/edwardsun/flow/combined_final.fasta
echo ""
echo "=== Optimized Overnight Training Complete with FULL DATA ==="
echo "Check for output files:"
echo " - amp_flow_model_best_optimized.pth (best validation model)"
echo " - amp_flow_model_final_optimized.pth (final model)"
echo " - amp_flow_checkpoint_optimized_step_*.pth (checkpoints every 1000 epochs)"
echo ""
echo "Training optimizations applied:"
echo " β Mixed precision (BF16) for ~30-50% speedup"
echo " β Increased batch size (128) for better H100 utilization"
echo " β Optimized learning rate schedule with proper warmup"
echo " β Gradient clipping for training stability"
echo " β CFG dropout for better guidance"
echo " β Validation monitoring and early stopping"
echo " β PyTorch 2.x compilation for speedup"
echo ""
echo "Next steps:"
echo "1. Test the optimized model: python generate_amps.py"
echo "2. Compare performance with previous model"
echo "3. Implement reflow for 1-step generation"
echo "4. Add conditioning for toxicity"
echo "5. Fine-tune on specific AMP properties" |