Initial upload with README & teaser

Browse files

Files changed (4) hide show

.gitattributes +1 -0
README.md +14 -3
assets/NovoMolGen.png +3 -0
modeling_novomolgen.py +6 -6

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/NovoMolGen.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -14,7 +14,12 @@ pipeline_tag: text-generation
 # NovoMolGen
-NovoMolGen is a family of molecular foundation models trained on 1.5 billion ZINC‑22 molecules using Llama architectures and FlashAttention. It achieves state‑of‑the‑art performance on both unconstrained and goal‑directed molecule generation tasks.
 ## How to load
@@ -24,9 +29,14 @@ tokenizer = AutoTokenizer.from_pretrained("chandar-lab/NovoMolGen_157M_SMILES_At
 model = AutoModelForCausalLM.from_pretrained("chandar-lab/NovoMolGen_157M_SMILES_AtomWise", trust_remote_code=True)
 ```
-## Quickstart
 ```python
 outputs = model.sample(tokenizer=tokenizer, batch_size=4)
 print(outputs['SMILES'])
 ```
@@ -36,7 +46,8 @@ print(outputs['SMILES'])
 ```bibtex
 @article{chitsaz2024novomolgen,
   title={NovoMolGen: Rethinking Molecular Language Model Pretraining},
-  author={Chitsaz, Kamran and Balaji, Roshan and Fournier, Quentin and Bhatt, Nirav Pravinbhai and Chandar, Sarath},
   journal={arXiv preprint},
   year={2025},
 }

 # NovoMolGen
+NovoMolGen is a family of molecular foundation models trained on
+1.5 billion ZINC-22 molecules with Llama architectures and FlashAttention.
+It achieves state-of-the-art performance on both unconstrained and
+goal-directed molecule generation tasks.
+<img src="assets/NovoMolGen.png" width="900"/>
 ## How to load
 model = AutoModelForCausalLM.from_pretrained("chandar-lab/NovoMolGen_157M_SMILES_AtomWise", trust_remote_code=True)
 ```
+## Quick-start (FlashAttention + bf16)
 ```python
+from accelerate import Accelerator
+acc = Accelerator(mixed_precision='bf16')
+model = acc.prepare(model)
 outputs = model.sample(tokenizer=tokenizer, batch_size=4)
 print(outputs['SMILES'])
 ```
 ```bibtex
 @article{chitsaz2024novomolgen,
   title={NovoMolGen: Rethinking Molecular Language Model Pretraining},
+  author={Chitsaz, Kamran and Balaji, Roshan and Fournier, Quentin and
+          Bhatt, Nirav Pravinbhai and Chandar, Sarath},
   journal={arXiv preprint},
   year={2025},
 }

assets/NovoMolGen.png ADDED Viewed

Git LFS Details

SHA256: 9db01aa9afa3b39dfe789009590118265a2dddb5ebf057e878ecfc9dd9328ce8
Pointer size: 132 Bytes
Size of remote file: 5.08 MB

modeling_novomolgen.py CHANGED Viewed

@@ -33,7 +33,7 @@ except ImportError:
     inv_remap_state_dict_hf_llama = None
-def state_dict_from_pretrained(model_name, checkpoint_path: str = "", device=None, dtype=None):
     """
     code modified from: https://github.com/Dao-AILab/flash-attention/blob/main/flash_attn/utils/pretrained.py
     """
@@ -45,10 +45,10 @@ def state_dict_from_pretrained(model_name, checkpoint_path: str = "", device=Non
     # Try loading from HF hub instead of from local files
     resolved_archive_file = cached_file(model_name, os.path.join(checkpoint_path, WEIGHTS_NAME),
-                                        _raise_exceptions_for_missing_entries=False)
     if resolved_archive_file is None:
         resolved_archive_file = cached_file(model_name, os.path.join(checkpoint_path, WEIGHTS_INDEX_NAME),
-                                            _raise_exceptions_for_missing_entries=False)
         if resolved_archive_file is not None:
             is_sharded = True
@@ -115,7 +115,7 @@ class NovoMolGenConfig(LlamaConfig):
         resolved_archive_config_file = cached_file(pretrained_model_name_or_path,
                                                    os.path.join(checkpoint_path, "config.json"),
-                                                   _raise_exceptions_for_missing_entries=False)
         if resolved_archive_config_file is not None:
             with open(resolved_archive_config_file, "r", encoding="utf-8") as reader:
@@ -266,13 +266,13 @@ class NovoMolGen(GPTLMHeadModel):
         **kwargs,
         ):
         if config is None:
-            config = NovoMolGenConfig.from_pretrained(pretrained_model_name_or_path, checkpoint_path=checkpoint_path)
         model = cls(config)
         if os.path.exists(pretrained_model_name_or_path):
             state_dict = torch.load(os.path.join(pretrained_model_name_or_path, checkpoint_path, WEIGHTS_NAME))
         else:
-            state_dict = state_dict_from_pretrained(pretrained_model_name_or_path, checkpoint_path=checkpoint_path)
         model.load_state_dict(state_dict)
         return model

     inv_remap_state_dict_hf_llama = None
+def state_dict_from_pretrained(model_name, checkpoint_path: str = "", device=None, dtype=None, **kwargs):
     """
     code modified from: https://github.com/Dao-AILab/flash-attention/blob/main/flash_attn/utils/pretrained.py
     """
     # Try loading from HF hub instead of from local files
     resolved_archive_file = cached_file(model_name, os.path.join(checkpoint_path, WEIGHTS_NAME),
+                                        _raise_exceptions_for_missing_entries=False, **kwargs)
     if resolved_archive_file is None:
         resolved_archive_file = cached_file(model_name, os.path.join(checkpoint_path, WEIGHTS_INDEX_NAME),
+                                            _raise_exceptions_for_missing_entries=False, **kwargs)
         if resolved_archive_file is not None:
             is_sharded = True
         resolved_archive_config_file = cached_file(pretrained_model_name_or_path,
                                                    os.path.join(checkpoint_path, "config.json"),
+                                                   _raise_exceptions_for_missing_entries=False, force_download=force_download)
         if resolved_archive_config_file is not None:
             with open(resolved_archive_config_file, "r", encoding="utf-8") as reader:
         **kwargs,
         ):
         if config is None:
+            config = NovoMolGenConfig.from_pretrained(pretrained_model_name_or_path, checkpoint_path=checkpoint_path, **kwargs)
         model = cls(config)
         if os.path.exists(pretrained_model_name_or_path):
             state_dict = torch.load(os.path.join(pretrained_model_name_or_path, checkpoint_path, WEIGHTS_NAME))
         else:
+            state_dict = state_dict_from_pretrained(pretrained_model_name_or_path, checkpoint_path=checkpoint_path, **kwargs)
         model.load_state_dict(state_dict)
         return model