File size: 4,852 Bytes
ad86d54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/bin/bash

# Optimized Single GPU AMP Flow Matching Training Launch Script with FULL DATA
# This script launches optimized training on GPU 0 using ALL available data
# Features: Mixed precision (BF16), increased batch size, H100 optimizations

echo "=== Launching Optimized Single GPU AMP Flow Matching Training with FULL DATA ==="
echo "Using GPU 0 for training"
echo "Using ALL available peptide embeddings and new FASTA CFG data"
echo "OVERNIGHT TRAINING: 15000 iterations with CFG support and H100 optimizations"
echo ""

# Activate flow virtual environment
echo "Activating flow virtual environment..."
source /home/edwardsun/miniconda3/envs/flow/bin/activate
if [ $? -eq 0 ]; then
    echo "βœ“ Flow environment activated"
    echo "Python: $(which python)"
    echo "Python version: $(python --version)"
else
    echo "❌ Failed to activate flow environment"
    echo "Please check if the environment exists: conda env list"
    exit 1
fi
echo ""

# Check if required files exist
echo "Checking required files..."
if [ ! -f "final_compressor_model.pth" ]; then
    echo "❌ Missing final_compressor_model.pth"
    echo "Please run compressor_with_embeddings.py first"
    exit 1
fi

if [ ! -f "final_decompressor_model.pth" ]; then
    echo "❌ Missing final_decompressor_model.pth"
    echo "Please run compressor_with_embeddings.py first"
    exit 1
fi

if [ ! -d "/data2/edwardsun/flow_project/peptide_embeddings/" ]; then
    echo "❌ Missing /data2/edwardsun/flow_project/peptide_embeddings/ directory"
    echo "Please run final_sequence_encoder.py first"
    exit 1
fi

# Check for full data files
if [ ! -f "/data2/edwardsun/flow_project/peptide_embeddings/all_peptide_embeddings.pt" ]; then
    echo "⚠️  Warning: all_peptide_embeddings.pt not found"
    echo "Will use individual embedding files instead"
else
    echo "βœ“ Found all_peptide_embeddings.pt (4.3GB - ALL peptide data)"
fi

# Check for new FASTA CFG data
if [ ! -f "/home/edwardsun/flow/combined_final.fasta" ]; then
    echo "❌ Missing /home/edwardsun/flow/combined_final.fasta"
    echo "This contains the new CFG training data with >AP (AMP) and >sp (Non-AMP) labels"
    exit 1
else
    echo "βœ“ Found combined_final.fasta - New CFG data with automatic labeling"
    echo "  >AP headers = AMP sequences"
    echo "  >sp headers = Non-AMP sequences"
fi

echo "βœ“ All required files found!"
echo ""

# Set CUDA device to GPU 0
export CUDA_VISIBLE_DEVICES=0

# Enable H100 optimizations
export TORCH_CUDNN_V8_API_ENABLED=1
export TORCH_CUDNN_V8_API_DISABLED=0

echo "=== Optimized Training Configuration ==="
echo "  - GPU: 0 (CUDA_VISIBLE_DEVICES=0)"
echo "  - Batch size: 96 (optimized based on profiling)"
echo "  - Total iterations: 6,000"
echo "  - Mixed precision: BF16 (H100 optimized)"
echo "  - Learning rate: 4e-4 -> 2e-4 (cosine annealing)"
echo "  - Warmup steps: 5,000"
echo "  - Gradient clipping: 1.0"
echo "  - Weight decay: 0.01"
echo "  - Data workers: 16"
echo "  - CFG dropout: 15%"
echo "  - Validation: Every 10,000 steps"
echo "  - Checkpoints: Every 1,000 epochs"
echo "  - Estimated time: ~8-10 hours (overnight training)"
echo ""

# Check GPU memory and capabilities
echo "Checking GPU capabilities..."
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader,nounits | while IFS=, read -r name total free; do
    echo "  GPU: $name"
    echo "  Total memory: ${total}MB"
    echo "  Free memory: ${free}MB"
    echo "  Available: $((free * 100 / total))%"
done

echo ""

# Launch optimized training
echo "Starting optimized single GPU training on GPU 0 with FULL DATA..."
echo "Using new FASTA CFG data: combined_final.fasta"
echo ""

# Launch training with new FASTA CFG data
python amp_flow_training_single_gpu_full_data.py --cfg_data /home/edwardsun/flow/combined_final.fasta

echo ""
echo "=== Optimized Overnight Training Complete with FULL DATA ==="
echo "Check for output files:"
echo "  - amp_flow_model_best_optimized.pth (best validation model)"
echo "  - amp_flow_model_final_optimized.pth (final model)"
echo "  - amp_flow_checkpoint_optimized_step_*.pth (checkpoints every 1000 epochs)"
echo ""
echo "Training optimizations applied:"
echo "  βœ“ Mixed precision (BF16) for ~30-50% speedup"
echo "  βœ“ Increased batch size (128) for better H100 utilization"
echo "  βœ“ Optimized learning rate schedule with proper warmup"
echo "  βœ“ Gradient clipping for training stability"
echo "  βœ“ CFG dropout for better guidance"
echo "  βœ“ Validation monitoring and early stopping"
echo "  βœ“ PyTorch 2.x compilation for speedup"
echo ""
echo "Next steps:"
echo "1. Test the optimized model: python generate_amps.py"
echo "2. Compare performance with previous model"
echo "3. Implement reflow for 1-step generation"
echo "4. Add conditioning for toxicity"
echo "5. Fine-tune on specific AMP properties"