 
				cleanrl/EleutherAI_pythia-6.9b-deduped__ppo__tldr
				
				
			
			Text Generation
			
• 
		
				7B
			• 
	
				Updated
					
				
				• 
					
					2
				
	
				
				
 
				cleanrl/EleutherAI_pythia-2.8b-deduped__ppo__tldr
				
				
			
			Text Generation
			
• 
		
				3B
			• 
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/EleutherAI_pythia-1b-deduped__ppo__tldr
				
				
			
			Text Generation
			
• 
		
				1B
			• 
	
				Updated
					
				
				• 
					
					2
				
	
				
				
 
				cleanrl/EleutherAI_pythia-2.8b-deduped__reward__tldr
				
				
			
			Text Classification
			
• 
		
	
				Updated
					
				
				• 
					
					2
				
	
				
				
 
				cleanrl/EleutherAI_pythia-1b-deduped__reward__tldr
				
				
			
			Text Classification
			
• 
		
	
				Updated
					
				
				• 
					
					997
				
	
				
				
 
				cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
				
				
			
			Text Generation
			
• 
		
	
				Updated
					
				
				• 
					
					1.21k
				
	
				
				
 
				cleanrl/EleutherAI_pythia-2.8b-deduped__sft__tldr
				
				
			
			Text Generation
			
• 
		
	
				Updated
					
				
				• 
					
					1
				
	
				
				
 
				cleanrl/EleutherAI_pythia-6.9b-deduped__sft__tldr
				
				
			
			Text Generation
			
• 
		
	
				Updated
					
				
				• 
					
					347
				
	
				
				
 
				cleanrl/EleutherAI_pythia-6.9b-deduped__reward__tldr
				
				
			
			Text Classification
			
• 
		
	
				Updated
					
				
				• 
					
					181
				
	
				
				
 
				cleanrl/ppo_zephyr310
				
				
			
			Text Generation
			
• 
		
				7B
			• 
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/BeamRiderNoFrameskip-v4-dqn_atari-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/PongNoFrameskip-v4-dqn_atari-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/BreakoutNoFrameskip-v4-dqn_atari-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				• 
					
					2
				
 
				cleanrl/QbertNoFrameskip-v4-dqn_atari-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/SpaceInvadersNoFrameskip-v4-dqn_atari-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/MsPacmanNoFrameskip-v4-dqn_atari-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Ant-v2-td3_continuous_action_jax-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Ant-v2-td3_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Swimmer-v4-td3_continuous_action_jax-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Ant-v4-td3_continuous_action_jax-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Swimmer-v4-td3_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Ant-v4-td3_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/InvertedPendulum-v2-ppo_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Humanoid-v2-ppo_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Pusher-v2-ppo_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Ant-v2-ppo_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/HalfCheetah-v2-ppo_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Walker2d-v2-ppo_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/Hopper-v2-ppo_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				cleanrl/InvertedPendulum-v4-ppo_continuous_action-seed1
				
				
			
			Reinforcement Learning
			
• 
		
	
				Updated