Spaces:

gopiium
/

Binoculars

Build error

App Files Files Community

gopiium commited on Jan 24

Commit

ba866d1

verified ·

1 Parent(s): 825313b

Upload folder using huggingface_hub

Browse files

Files changed (29) hide show

.gitattributes +7 -0
.gitignore +164 -0
.gradio/certificate.pem +31 -0
LICENSE.md +28 -0
README.md +86 -7
app.py +5 -0
assets/bino-logo.svg +189 -0
assets/binoculars.jpg +0 -0
binoculars/__init__.py +3 -0
binoculars/detector.py +100 -0
binoculars/metrics.py +57 -0
binoculars/utils.py +10 -0
datasets/core/cc_news/cc_news-falcon7.jsonl +0 -0
datasets/core/cc_news/cc_news-llama2_13.jsonl +3 -0
datasets/core/cnn/cnn-falcon7.jsonl +0 -0
datasets/core/cnn/cnn-llama2_13.jsonl +3 -0
datasets/core/pubmed/pubmed-falcon7.jsonl +0 -0
datasets/core/pubmed/pubmed-llama2_13.jsonl +3 -0
datasets/robustness/open_orca/carl-sagan-llama2-13b-chat.jsonl +3 -0
datasets/robustness/open_orca/default-llama2-13b-chat.jsonl +3 -0
datasets/robustness/open_orca/no-robotic-words-llama2-13b-chat.jsonl +3 -0
datasets/robustness/open_orca/pirate-llama2-13b-chat.jsonl +3 -0
demo/demo.py +132 -0
experiments/jobs.sh +22 -0
experiments/run.py +111 -0
experiments/utils.py +48 -0
main.py +14 -0
requirements.txt +9 -0
setup.py +15 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+datasets/core/cc_news/cc_news-llama2_13.jsonl filter=lfs diff=lfs merge=lfs -text
+datasets/core/cnn/cnn-llama2_13.jsonl filter=lfs diff=lfs merge=lfs -text
+datasets/core/pubmed/pubmed-llama2_13.jsonl filter=lfs diff=lfs merge=lfs -text
+datasets/robustness/open_orca/carl-sagan-llama2-13b-chat.jsonl filter=lfs diff=lfs merge=lfs -text
+datasets/robustness/open_orca/default-llama2-13b-chat.jsonl filter=lfs diff=lfs merge=lfs -text
+datasets/robustness/open_orca/no-robotic-words-llama2-13b-chat.jsonl filter=lfs diff=lfs merge=lfs -text
+datasets/robustness/open_orca/pirate-llama2-13b-chat.jsonl filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,164 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+samples/
+**.*ipynb

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

LICENSE.md ADDED Viewed

	@@ -0,0 +1,28 @@

+BSD 3-Clause License
+Copyright (c) 2023, Abhimanyu Hans, Avi Schwarzschild, Tom Goldstein
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.md CHANGED Viewed

@@ -1,12 +1,91 @@
 ---
 title: Binoculars
-emoji: 📊
-colorFrom: blue
-colorTo: yellow
-sdk: gradio
-sdk_version: 5.13.1
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Binoculars
 app_file: app.py
+sdk: gradio
+sdk_version: 5.13.0
 ---
+# <img src="./assets/bino-logo.svg" width=40 style="padding-top: 0px"/>  Binoculars: Zero-Shot Detection of LLM-Generated Text [[paper]](https://arxiv.org/abs/2401.12070)[[demo]](https://huggingface.co/spaces/tomg-group-umd/Binoculars)
+<p align="center">
+  <img src="assets/binoculars.jpg" width="300" height="300" alt="ool Binoculars with Falcon on Top">
+</p>
+We introduce Binoculars, a state-of-the-art method for detecting AI-generated text. Binoculars is a
+zero-shot and domain-agnostic (requires no training data) method. It is based on a simple idea: most
+decoder-only, causal language models have a huge overlap in pretraining datasets, for e.g. Common Crawl, Pile, etc.
+More details about the method and results can be found in our paper **Spotting LLMs with Binoculars: Zero-Shot
+Detection of Machine-Generated Text**.
+## Getting Started
+### Installation
+To run the implementation of Binoculars, you can clone this repository and install the package using pip. This code was
+developed and tested on Python This code was developed and tested with Python 3.9. To install the package, run the
+following commands:
+```bash
+$ git clone https://github.com/ahans30/Binoculars.git
+$ cd Binoculars
+$ pip install -e .
+```
+### Usage
+Please note, this implementation comes with a fixed global threshold that is used to classify the input as AI-generated
+or not. This threshold is selected using _Falcon-7B_ and _Falcon-7B-Instruct_ models for scoring. If you want to
+use different scoring models, you can pass it as an argument to the `Binoculars` class. Please read the paper for more
+details about the Binoculars work.
+To detect AI-generated text, please use the following code snippet:
+```python
+from binoculars import Binoculars
+bino = Binoculars()
+# ChatGPT (GPT-4) output when prompted with “Can you write a few sentences about a capybara that is an astrophysicist?"
+sample_string = '''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his
+groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret
+cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he
+peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the
+stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to
+aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.'''
+print(bino.compute_score(sample_string))  # 0.75661373
+print(bino.predict(sample_string))  # 'Most likely AI-Generated'
+```
+In the above code, user can also pass a `list` of `str` to `compute_score` and `predict` methods to get results for
+the entire batch of samples.
+### Demo
+We have also made a demo available to predict AI-generated text interactively with a simple UI
+using [gradio](https://github.com/gradio-app/gradio). You can run the demo using the following command:
+```bash
+$ python app.py
+```
+## Limitations
+All AI-generated text detectors aim for accuracy, but none are perfect and can have multiple failure modes (e.g.,
+Binoculars is more proficient in detecting English language text compared to other languages). This implementation is
+for academic purposes only and should not be considered as a consumer product. We also strongly caution against using
+Binoculars (or any detector) without human supervision.
+## Cite our work
+If you find this work useful, please cite our paper:
+```bibtex
+@misc{hans2024spotting,
+      title={Spotting LLMs With Binoculars: Zero-Shot Detection of Machine-Generated Text},
+      author={Abhimanyu Hans and Avi Schwarzschild and Valeriia Cherepanova and Hamid Kazemi and Aniruddha Saha and Micah Goldblum and Jonas Geiping and Tom Goldstein},
+      year={2024},
+      eprint={2401.12070},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```

app.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from demo.demo import app
+if __name__ == "__main__":
+    # Launch the Gradio interface
+    app.launch(show_api=False, debug=True, share=True)

assets/bino-logo.svg ADDED Viewed

assets/binoculars.jpg ADDED Viewed

binoculars/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .detector import Binoculars
2	+
3	+ __all__ = ["Binoculars"]

binoculars/detector.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from typing import Union
+import os
+import numpy as np
+import torch
+import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from .utils import assert_tokenizer_consistency
+from .metrics import perplexity, entropy
+torch.set_grad_enabled(False)
+huggingface_config = {
+    # Only required for private models from Huggingface (e.g. LLaMA models)
+    "TOKEN": os.environ.get("HF_TOKEN", None)
+}
+# selected using Falcon-7B and Falcon-7B-Instruct at bfloat16
+BINOCULARS_ACCURACY_THRESHOLD = 0.9015310749276843  # optimized for f1-score
+BINOCULARS_FPR_THRESHOLD = 0.8536432310785527  # optimized for low-fpr [chosen at 0.01%]
+DEVICE_1 = "cuda:0" if torch.cuda.is_available() else "cpu"
+DEVICE_2 = "cuda:1" if torch.cuda.device_count() > 1 else DEVICE_1
+class Binoculars(object):
+    def __init__(self,
+                 observer_name_or_path: str = "tiiuae/falcon-7b",
+                 performer_name_or_path: str = "tiiuae/falcon-7b-instruct",
+                 use_bfloat16: bool = True,
+                 max_token_observed: int = 512,
+                 mode: str = "low-fpr",
+                 ) -> None:
+        assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path)
+        self.change_mode(mode)
+        self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path,
+                                                                   device_map="auto",
+                                                                   torch_dtype="auto",
+                                                                   token=huggingface_config["TOKEN"]
+                                                                   )
+        self.performer_model = AutoModelForCausalLM.from_pretrained(performer_name_or_path,
+                                                                    device_map="auto",
+                                                                    torch_dtype="auto",
+                                                                    token=huggingface_config["TOKEN"]
+                                                                    )
+        self.observer_model.eval()
+        self.performer_model.eval()
+        self.tokenizer = AutoTokenizer.from_pretrained(observer_name_or_path)
+        if not self.tokenizer.pad_token:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.max_token_observed = max_token_observed
+    def change_mode(self, mode: str) -> None:
+        if mode == "low-fpr":
+            self.threshold = BINOCULARS_FPR_THRESHOLD
+        elif mode == "accuracy":
+            self.threshold = BINOCULARS_ACCURACY_THRESHOLD
+        else:
+            raise ValueError(f"Invalid mode: {mode}")
+    def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding:
+        batch_size = len(batch)
+        encodings = self.tokenizer(
+            batch,
+            return_tensors="pt",
+            padding="longest" if batch_size > 1 else False,
+            truncation=True,
+            max_length=self.max_token_observed,
+            return_token_type_ids=False).to(self.observer_model.device)
+        return encodings
+    @torch.inference_mode()
+    def _get_logits(self, encodings: transformers.BatchEncoding) -> torch.Tensor:
+        observer_logits = self.observer_model(**encodings.to(DEVICE_1)).logits
+        performer_logits = self.performer_model(**encodings.to(DEVICE_2)).logits
+        if DEVICE_1 != "cpu":
+            torch.cuda.synchronize()
+        return observer_logits, performer_logits
+    def compute_score(self, input_text: Union[list[str], str]) -> Union[float, list[float]]:
+        batch = [input_text] if isinstance(input_text, str) else input_text
+        encodings = self._tokenize(batch)
+        observer_logits, performer_logits = self._get_logits(encodings)
+        ppl = perplexity(encodings, performer_logits)
+        x_ppl = entropy(observer_logits.to(DEVICE_1), performer_logits.to(DEVICE_1),
+                        encodings.to(DEVICE_1), self.tokenizer.pad_token_id)
+        binoculars_scores = ppl / x_ppl
+        binoculars_scores = binoculars_scores.tolist()
+        return binoculars_scores[0] if isinstance(input_text, str) else binoculars_scores
+    def predict(self, input_text: Union[list[str], str]) -> Union[list[str], str]:
+        binoculars_scores = np.array(self.compute_score(input_text))
+        pred = np.where(binoculars_scores < self.threshold,
+                        "Most likely AI-generated",
+                        "Most likely human-generated"
+                        ).tolist()
+        return pred

binoculars/metrics.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import numpy as np
+import torch
+import transformers
+ce_loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
+softmax_fn = torch.nn.Softmax(dim=-1)
+def perplexity(encoding: transformers.BatchEncoding,
+               logits: torch.Tensor,
+               median: bool = False,
+               temperature: float = 1.0):
+    shifted_logits = logits[..., :-1, :].contiguous() / temperature
+    shifted_labels = encoding.input_ids[..., 1:].contiguous()
+    shifted_attention_mask = encoding.attention_mask[..., 1:].contiguous()
+    if median:
+        ce_nan = (ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels).
+                  masked_fill(~shifted_attention_mask.bool(), float("nan")))
+        ppl = np.nanmedian(ce_nan.cpu().float().numpy(), 1)
+    else:
+        ppl = (ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels) *
+               shifted_attention_mask).sum(1) / shifted_attention_mask.sum(1)
+        ppl = ppl.to("cpu").float().numpy()
+    return ppl
+def entropy(p_logits: torch.Tensor,
+            q_logits: torch.Tensor,
+            encoding: transformers.BatchEncoding,
+            pad_token_id: int,
+            median: bool = False,
+            sample_p: bool = False,
+            temperature: float = 1.0):
+    vocab_size = p_logits.shape[-1]
+    total_tokens_available = q_logits.shape[-2]
+    p_scores, q_scores = p_logits / temperature, q_logits / temperature
+    p_proba = softmax_fn(p_scores).view(-1, vocab_size)
+    if sample_p:
+        p_proba = torch.multinomial(p_proba.view(-1, vocab_size), replacement=True, num_samples=1).view(-1)
+    q_scores = q_scores.view(-1, vocab_size)
+    ce = ce_loss_fn(input=q_scores, target=p_proba).view(-1, total_tokens_available)
+    padding_mask = (encoding.input_ids != pad_token_id).type(torch.uint8)
+    if median:
+        ce_nan = ce.masked_fill(~padding_mask.bool(), float("nan"))
+        agg_ce = np.nanmedian(ce_nan.cpu().float().numpy(), 1)
+    else:
+        agg_ce = (((ce * padding_mask).sum(1) / padding_mask.sum(1)).to("cpu").float().numpy())
+    return agg_ce

binoculars/utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from transformers import AutoTokenizer
+def assert_tokenizer_consistency(model_id_1, model_id_2):
+    identical_tokenizers = (
+            AutoTokenizer.from_pretrained(model_id_1).vocab
+            == AutoTokenizer.from_pretrained(model_id_2).vocab
+    )
+    if not identical_tokenizers:
+        raise ValueError(f"Tokenizers are not identical for {model_id_1} and {model_id_2}.")

datasets/core/cc_news/cc_news-falcon7.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/core/cc_news/cc_news-llama2_13.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9226f42549fbdab8f8443e147b733a28afb7cedc581a3014617163426dbed376
+size 38526357

datasets/core/cnn/cnn-falcon7.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/core/cnn/cnn-llama2_13.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73a7c9529114b05efdf9c938f5c359357f4cc2a1177737e7630efba7a5539594
+size 20188348

datasets/core/pubmed/pubmed-falcon7.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/core/pubmed/pubmed-llama2_13.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8555120ad5318ec6f4f8572dd36976d6721ab911a1516a5d26d43fd3fa885760
+size 52871752

datasets/robustness/open_orca/carl-sagan-llama2-13b-chat.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d721347290b901ae9825c89ca8809d1f74f31de28d25d39be78b7e414380cbb
+size 19395215

datasets/robustness/open_orca/default-llama2-13b-chat.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ea54239f48f2f5b5e7d2bd477c8a388bea316014aeaa6578a464e592a42bc0e
+size 18329664

datasets/robustness/open_orca/no-robotic-words-llama2-13b-chat.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4cda05c16a550baf7cb02eeb7252b4184877d7a833304a9023c63a972c703a37
+size 16006774

datasets/robustness/open_orca/pirate-llama2-13b-chat.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36364c4cd497ffc6c7c7e45c0118987020f333e786838740c125a66631d8acd0
+size 17366901

demo/demo.py ADDED Viewed

	@@ -0,0 +1,132 @@

+__all__ = ["app"]
+import gradio as gr
+from binoculars import Binoculars
+BINO = Binoculars()
+TOKENIZER = BINO.tokenizer
+MINIMUM_TOKENS = 64
+def count_tokens(text):
+    return len(TOKENIZER(text).input_ids)
+def run_detector(input_str):
+    if count_tokens(input_str) < MINIMUM_TOKENS:
+        gr.Warning(f"Too short length. Need minimum {MINIMUM_TOKENS} tokens to run Binoculars.")
+        return ""
+    return f"{BINO.predict(input_str)}"
+def change_mode(mode):
+    if mode == "Low False Positive Rate":
+        BINO.change_mode("low-fpr")
+    elif mode == "High Accuracy":
+        BINO.change_mode("accuracy")
+    else:
+        gr.Error(f"Invalid mode selected.")
+    return mode
+# def load_set(progress=gr.Progress()):
+#     tokens = [None] * 24
+#     for count in progress.tqdm(tokens, desc="Counting Tokens..."):
+#         time.sleep(0.01)
+#     return ["Loaded"] * 2
+css = """
+.green { color: black!important;line-height:1.9em; padding: 0.2em 0.2em; background: #ccffcc; border-radius:0.5rem;}
+.red { color: black!important;line-height:1.9em; padding: 0.2em 0.2em; background: #ffad99; border-radius:0.5rem;}
+.hyperlinks {
+  display: flex;
+  align-items: center;
+  align-content: center;
+  padding-top: 12px;
+  justify-content: flex-end;
+  margin: 0 10px; /* Adjust the margin as needed */
+  text-decoration: none;
+  color: #000; /* Set the desired text color */
+}
+"""
+# Most likely human generated, #most likely AI written
+capybara_problem = '''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.'''
+with gr.Blocks(css=css,
+               theme=gr.themes.Default(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"])) as app:
+    with gr.Row():
+        with gr.Column(scale=3):
+            gr.HTML("<p><h1> binoculars: zero-shot llm-text detector</h1>")
+        with gr.Column(scale=1):
+            gr.HTML("""
+            <p>
+            <a href="https://arxiv.org/abs/2401.12070" target="_blank">paper</a>
+            <a href="https://github.com/AHans30/Binoculars" target="_blank">code</a>
+            <a href="mailto:[email protected]" target="_blank">contact</a>
+            """, elem_classes="hyperlinks")
+    with gr.Row():
+        input_box = gr.Textbox(value=capybara_problem, placeholder="Enter text here", lines=8, label="Input Text", )
+    with gr.Row():
+        # dropdown option for mode
+        dropdown_mode = gr.Dropdown(["Low False Positive Rate", "High Accuracy"],
+                                    label="Mode",
+                                    show_label=True,
+                                    value="Low False Positive Rate"
+                                    )
+        submit_button = gr.Button("Run Binoculars", variant="primary")
+        clear_button = gr.ClearButton()
+    with gr.Row():
+        output_text = gr.Textbox(label="Prediction", value="Most likely AI-Generated")
+    with gr.Row():
+        gr.HTML("<p><p><p>")
+    with gr.Row():
+        gr.HTML("<p><p><p>")
+    with gr.Row():
+        gr.HTML("<p><p><p>")
+    with gr.Accordion("Disclaimer", open=False):
+        gr.Markdown(
+            """
+            - `Accuracy` :
+                - AI-generated text detectors aim for accuracy, but no detector is perfect.
+                - If you choose "high accuracy" mode, then the threshold between human and machine is chosen to maximize the F1 score on our validation dataset.
+                - If you choose the "low false-positive rate" mode, the threshold for declaring something to be AI generated will be set so that the false positive (human text wrongly flagged as AI) rate is below 0.01% on our validation set.
+                - The provided prediction is for demonstration purposes only. This is not offered as a consumer product.
+                - Users are advised to exercise discretion, and we assume no liability for any use.
+            - `Recommended detection Use Cases` :
+                - In this work, our focus is on achieving a low false positive rate, crucial for sensitive downstream use cases where false accusations are highly undesireable.
+                - The main focus of our research is on content moderation, e.g., detecting AI-generated reviews on Amazon/Yelp, detecting AI generated social media posts and news, etc. We feel this application space is most compelling, as LLM detection tools are best used by professionals in conjunction with a broader set of moderation tools and policies.
+            - `Known weaknesses` :
+                - As noted in our paper, Binoculars exhibits superior detection performance in the English language compared to other languages.  Non-English text makes it more likely that results will default to "human written."
+                - Binoculars considers verbatim memorized texts to be "AI generated." For example, most language models have memorized and can recite the US constitution. For this reason, text from the constitution, or other highly memorized sources, may be classified as AI written.
+                - We recommend using 200-300 words of text at a time. Fewer words make detection difficult, as can using more than 1000 words. Binoculars will be more likely to default to the "human written" category if too few tokens are provided.
+            """
+        )
+    with gr.Accordion("Cite our work", open=False):
+        gr.Markdown(
+            """
+            ```bibtex
+                @misc{hans2024spotting,
+                      title={Spotting LLMs With Binoculars: Zero-Shot Detection of Machine-Generated Text},
+                      author={Abhimanyu Hans and Avi Schwarzschild and Valeriia Cherepanova and Hamid Kazemi and Aniruddha Saha and Micah Goldblum and Jonas Geiping and Tom Goldstein},
+                      year={2024},
+                      eprint={2401.12070},
+                      archivePrefix={arXiv},
+                      primaryClass={cs.CL}
+                }
+            """
+        )
+    # confidence_bar = gr.Label(value={"Confidence": 0})
+    # clear_button.click(lambda x: input_box., )
+    submit_button.click(run_detector, inputs=input_box, outputs=output_text)
+    clear_button.click(lambda: ("", ""), outputs=[input_box, output_text])
+    dropdown_mode.change(change_mode, inputs=[dropdown_mode], outputs=[dropdown_mode])

experiments/jobs.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+# Experiments for the CC News, CNN and PubMed datasets with generations from LLaMA-2-13B model
+python run.py \
+  --dataset_path ../datasets/core/cc_news/cc_news-llama2_13.jsonl \
+  --dataset_name CC-News \
+  --human_sample_key text \
+  --machine_sample_key meta-llama-Llama-2-13b-hf_generated_text_wo_prompt \
+  --machine_text_source LLaMA-2-13B
+python run.py \
+  --dataset_path ../datasets/core/cnn/cnn-llama2_13.jsonl \
+  --dataset_name CNN \
+  --human_sample_key article \
+  --machine_sample_key meta-llama-Llama-2-13b-hf_generated_text_wo_prompt \
+  --machine_text_source LLaMA-2-13B
+python run.py \
+  --dataset_path ../datasets/core/pubmed/pubmed-llama2_13.jsonl \
+  --dataset_name PubMed \
+  --human_sample_key article \
+  --machine_sample_key meta-llama-Llama-2-13b-hf_generated_text_wo_prompt \
+  --machine_text_source LLaMA-2-13B

experiments/run.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from binoculars.detector import Binoculars
+from binoculars.detector import BINOCULARS_ACCURACY_THRESHOLD as THRESHOLD
+from experiments.utils import convert_to_pandas, save_experiment
+import os
+import argparse
+import datetime
+import torch
+from datasets import Dataset, logging as datasets_logging
+import numpy as np
+from sklearn import metrics
+def main(args):
+    # Initialize Binoculars (experiments in paper use the "accuracy" mode threshold wherever applicable)
+    bino = Binoculars(mode="accuracy", max_token_observed=args.tokens_seen)
+    # Load dataset
+    ds = Dataset.from_json(f"{args.dataset_path}")
+    # Set (non) default values
+    args.dataset_name = args.dataset_name or args.dataset_path.rstrip("/").split("/")[-2]
+    machine_sample_key = (
+            args.machine_sample_key
+            or [x for x in list(ds.features.keys())[::-1] if "generated_text" in x][0]
+    )
+    args.machine_text_source = args.machine_text_source or machine_sample_key.rstrip("_generated_text_wo_prompt")
+    # Set job name, experiment path and create directory
+    args.job_name = (
+            args.job_name
+            or f"{args.dataset_name}-{args.machine_text_source}-{args.tokens_seen}-tokens"
+            .strip().replace(' ', '-')
+    )
+    breakpoint()
+    args.experiment_path = f"results/{args.job_name}"
+    os.makedirs(f"{args.experiment_path}", exist_ok=True)
+    # Score human and machine generated text
+    print(f"Scoring human text")
+    human_scores = ds.map(
+        lambda batch: {"score": bino.compute_score(batch[args.human_sample_key])},
+        batched=True,
+        batch_size=args.batch_size,
+        remove_columns=ds.column_names
+    )
+    print(f"Scoring machine text")
+    machine_scores = ds.map(
+        lambda batch: {"score": bino.compute_score(batch[args.machine_sample_key])},
+        batched=True,
+        batch_size=args.batch_size,
+        remove_columns=ds.column_names
+    )
+    score_df = convert_to_pandas(human_scores, machine_scores)
+    score_df["pred"] = np.where(score_df["score"] < THRESHOLD, 1, 0)
+    # Compute metrics
+    f1_score = metrics.f1_score(score_df["class"], score_df["pred"])
+    score = -1 * score_df["score"]  # We negative scale the scores to make the class 1 (machine) the positive class
+    fpr, tpr, thresholds = metrics.roc_curve(y_true=score_df["class"], y_score=score, pos_label=1)
+    roc_auc = metrics.auc(fpr, tpr)
+    # Interpolate the TPR at FPR = 0.01%, this is a fixed point in roc curve
+    tpr_at_fpr_0_01 = np.interp(0.01 / 100, fpr, tpr)
+    # Save experiment
+    save_experiment(args, score_df, fpr, tpr, f1_score, roc_auc, tpr_at_fpr_0_01)
+if __name__ == "__main__":
+    print("=" * 60, "START", "=" * 60)
+    # Set logging at the CRITICAL level to avoid seeing loaded datasets from cache
+    datasets_logging.set_verbosity_error()
+    parser = argparse.ArgumentParser(
+        description="Run (default) Binoculars on a dataset and compute/plot relevant metrics.",
+    )
+    # Dataset arguments
+    parser.add_argument("--dataset_path", type=str, help="Path to the jsonl file")
+    parser.add_argument("--dataset_name", type=str, default=None, help="name of the dataset")
+    parser.add_argument("--human_sample_key", type=str, help="key for the human-generated text")
+    parser.add_argument("--machine_sample_key", type=str, default=None,
+                        help="key for the machine-generated text")
+    parser.add_argument("--machine_text_source", type=str, default=None,
+                        help="name of model used to generate machine text")
+    # Scoring arguments
+    parser.add_argument("--tokens_seen", type=int, default=512, help="Number of tokens seen by the model")
+    # Computational arguments
+    parser.add_argument("--batch_size", type=int, default=32)
+    # Job arguments
+    parser.add_argument("--job_name", type=str, default=None)
+    args = parser.parse_args()
+    print("Using device:", "cuda" if torch.cuda.is_available() else "cpu")
+    if torch.cuda.is_available():
+        print(f"Number of GPUs: {torch.cuda.device_count()}")
+        print(f"GPU Type: {torch.cuda.get_device_name(0)}")
+    args.start_time = datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y")
+    main(args)
+    print("=" * 60, "END", "=" * 60)

experiments/utils.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import json
+import os
+import datetime
+import pandas as pd
+import matplotlib as mpl
+import seaborn as sns
+import matplotlib.pyplot as plt
+from sklearn import metrics
+COLOR = "black"
+mpl.rcParams["text.color"] = COLOR
+mpl.rcParams["axes.labelcolor"] = COLOR
+mpl.rcParams["xtick.color"] = COLOR
+mpl.rcParams["ytick.color"] = COLOR
+mpl.rcParams["figure.dpi"] = 200
+sns.set(style="darkgrid")
+def convert_to_pandas(human_scores, machine_scores):
+    human_scores = human_scores["score"]
+    machine_scores = machine_scores["score"]
+    df = pd.DataFrame(
+        {"score": human_scores + machine_scores, "class": [0] * len(human_scores) + [1] * len(machine_scores)}
+    )
+    return df
+def save_json(data, save_path):
+    data.end_time = datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y")
+    with open(os.path.join(save_path, "experiments_details.json"), "w", encoding="utf-8") as f:
+        json.dump(data.__dict__, f, ensure_ascii=False, indent=4)
+def save_experiment(args, score_df, fpr, tpr, f1_score, roc_auc, tpr_at_fpr_0_01):
+    fig, ax = plt.subplots(1, 1)
+    ax.set_xscale("log")
+    annotation = f"ROC AUC: {roc_auc:.4f}\nF1 Score: {f1_score:.2f}\nTPR at 0.01% FPR:{100 * tpr_at_fpr_0_01:.2f}%"
+    display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, estimator_name=annotation)
+    display.plot(ax=ax, linestyle="--")
+    ax.set_title(f"{args.dataset_name} (n={len(score_df)})\nMachine Text from {args.machine_text_source}")
+    fig.savefig(f"{args.experiment_path}/performance.png", bbox_inches='tight')
+    score_df.to_csv(f"{args.experiment_path}/score_df.csv", index=False)
+    save_json(args, args.experiment_path)

main.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from binoculars import Binoculars
+bino = Binoculars()
+# ChatGPT (GPT-4) output when prompted with “Can you write a few sentences about a capybara that is an astrophysicist?"
+sample_string = '''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his
+groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret
+cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he
+peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the
+stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to
+aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.'''
+print(bino.compute_score(sample_string))  # 0.75661373
+print(bino.predict(sample_string))  # 'Most likely AI-Generated'

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+sentencepiece
+transformers[torch] @ https://github.com/huggingface/transformers/archive/refs/tags/v4.31.0.zip
+datasets
+numpy
+gradio
+gradio_client
+scikit-learn
+seaborn
+pandas

setup.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from setuptools import setup, find_packages
+setup(
+    name='Binoculars',
+    version='0.0.10',
+    packages=find_packages(),
+    url='https://github.com/ahans30/Binoculars',
+    license=open("LICENSE.md", "r", encoding="utf-8").read(),
+    author='Authors of "Binoculars: Zero-Shot Detection of LLM-Generated Text"',
+    author_email='[email protected]',
+    description='A language model generated text detector.',
+    long_description=open("README.md", "r", encoding="utf-8").read(),
+    long_description_content_type="text/markdown",
+    install_requires=open("requirements.txt", "r", encoding="utf-8").read().splitlines(),
+)