Spaces:

society-ethics
/

model-card-regulatory-check

Runtime error

App Files Files Community

Nima Boscarino commited on Mar 16, 2023

Commit

e814211

1 Parent(s): 93711b8

WIP: Split up unit tests to their own files

Browse files

Files changed (5) hide show

requirements.txt +2 -5
tests/test_compliance_checks.py +3 -230
tests/test_computational_requirements_check.py +81 -0
tests/test_general_limitations_check.py +59 -0
tests/test_intended_purpose_check.py +99 -0

requirements.txt CHANGED Viewed

@@ -1,7 +1,4 @@
-# fastapi
-# uvicorn
 markdown
 beautifulsoup4
-# tabulate
-# pytest
-gradio

 markdown
 beautifulsoup4
+pytest
+gradio

tests/test_compliance_checks.py CHANGED Viewed

@@ -1,241 +1,14 @@
 import pytest
 from unittest.mock import MagicMock
-import markdown
-from bs4 import BeautifulSoup
 from compliance_checks import (
     ComplianceSuite,
-    ModelProviderIdentityCheck, ModelProviderIdentityResult,
-    IntendedPurposeCheck, IntendedPurposeResult,
-    GeneralLimitationsCheck, GeneralLimitationsResult,
-    ComputationalRequirementsCheck, ComputationalRequirementsResult,
 )
-expected_infrastructure = """\
-Jean Zay Public Supercomputer, provided by the French government.\
-Hardware\
-384 A100 80GB GPUs (48 nodes)\
-Software\
-Megatron-DeepSpeed (Github link)\
-"""
-class TestComplianceCheck:
-    @pytest.fixture
-    def provider_identity_model_card(self):
-        return """
-# Model Card for Sample Model
-Some random info...
-## Model Details
-### Model Description
-<!-- Provide a longer summary of what this model is. -->
-- **Developed by:** Nima Boscarino
-- **Model type:** Yada yada yada
-        """
-    @pytest.fixture
-    def bad_provider_identity_model_card(self):
-        return """
-# Model Card for Sample Model
-Some random info...
-## Model Details
-### Model Description
-- **Developed by:** [More Information Needed]
-- **Model type:** Yada yada yada
-        """
-    @pytest.fixture
-    def intended_purpose_model_card(self):
-        return """
-# Model Card for Sample Model
-Some random info...
-## Uses
-<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
-### Direct Use
-Here is some info about direct uses...
-### Downstream Use [optional]
-<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
-[More Information Needed]
-### Out-of-Scope Use
-<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
-Here is some info about out-of-scope uses...
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-[More Information Needed]
-        """
-    @pytest.fixture
-    def bad_intended_purpose_model_card(self):
-        return """
-# Model Card for Sample Model
-Some random info...
-## Uses
-<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
-### Direct Use
-<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
-[More Information Needed]
-### Downstream Use [optional]
-<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
-[More Information Needed]
-### Out-of-Scope Use
-<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
-[More Information Needed]
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-[More Information Needed]
-        """
-    @pytest.fixture
-    def general_limitations_model_card(self):
-        return """
-# Model Card for Sample Model
-## Some Random Header
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-Hello world! These are some risks...
-## More Things
-        """
-    @pytest.fixture
-    def bad_general_limitations_model_card(self):
-        return """
-# Model Card for Sample Model
-## Some Random Header
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-[More Information Needed]
-## More Things
-        """
-    @pytest.fixture
-    def computational_requirements_model_card(self):
-        # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
-        return """
-# Model Card for Sample Model
-## Some Random Header
-## Technical Specifications
-### Compute infrastructure
-Jean Zay Public Supercomputer, provided by the French government.
-#### Hardware
-* 384 A100 80GB GPUs (48 nodes)
-#### Software
-* Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed))
-</details>
-## Intended Use
-Etc..
-"""
-    @pytest.fixture
-    def bad_computational_requirements_model_card(self):
-        # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
-        return """
-# Model Card for Sample Model
-## Some Random Header
-## Technical Specifications
-### Compute infrastructure
-[More Information Needed]
-## Intended Use
-Etc..
-"""
-    @pytest.mark.parametrize("check,card,expected", [
-        (ModelProviderIdentityCheck(), "provider_identity_model_card", ModelProviderIdentityResult(
-            status=True,
-            provider="Nima Boscarino",
-        )),
-        (ModelProviderIdentityCheck(), "bad_provider_identity_model_card", ModelProviderIdentityResult()),
-        (IntendedPurposeCheck(), "intended_purpose_model_card", IntendedPurposeResult(
-            status=True,
-            direct_use="Here is some info about direct uses...",
-            downstream_use=None,
-            out_of_scope_use="Here is some info about out-of-scope uses...",
-        )),
-        (IntendedPurposeCheck(), "bad_intended_purpose_model_card", IntendedPurposeResult()),
-        (GeneralLimitationsCheck(), "general_limitations_model_card", GeneralLimitationsResult(
-            status=True,
-            limitations="Hello world! These are some risks..."
-        )),
-        (GeneralLimitationsCheck(), "bad_general_limitations_model_card", GeneralLimitationsResult()),
-        (ComputationalRequirementsCheck(), "computational_requirements_model_card", ComputationalRequirementsResult(
-            status=True,
-            requirements=expected_infrastructure,
-        )),
-        (ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", ComputationalRequirementsResult()),
-    ])
-    def test_run_checks(self, check, card, expected, request):
-        card = request.getfixturevalue(card)
-        model_card_html = markdown.markdown(card)
-        card_soup = BeautifulSoup(model_card_html, features="html.parser")
-        results = check.run_check(card_soup)
-        assert results == expected
 class TestComplianceSuite:
     @pytest.fixture
     def mock_compliance_check(self):

 import pytest
 from unittest.mock import MagicMock
 from compliance_checks import (
     ComplianceSuite,
+    IntendedPurposeCheck,
+    GeneralLimitationsCheck,
+    ComputationalRequirementsCheck,
 )
 class TestComplianceSuite:
     @pytest.fixture
     def mock_compliance_check(self):

tests/test_computational_requirements_check.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import pytest
+import markdown
+from bs4 import BeautifulSoup
+from compliance_checks import (
+    ComputationalRequirementsCheck, ComputationalRequirementsResult,
+)
+expected_infrastructure = """\
+Jean Zay Public Supercomputer, provided by the French government.\
+Hardware\
+384 A100 80GB GPUs (48 nodes)\
+Software\
+Megatron-DeepSpeed (Github link)\
+"""
+@pytest.fixture
+def computational_requirements_model_card():
+    # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
+    return """
+# Model Card for Sample Model
+## Some Random Header
+## Technical Specifications
+### Compute infrastructure
+Jean Zay Public Supercomputer, provided by the French government.
+#### Hardware
+* 384 A100 80GB GPUs (48 nodes)
+#### Software
+* Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed))
+</details>
+## Intended Use
+Etc..
+"""
+@pytest.fixture
+def bad_computational_requirements_model_card():
+    # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
+    return """
+# Model Card for Sample Model
+## Some Random Header
+## Technical Specifications
+### Compute infrastructure
+[More Information Needed]
+## Intended Use
+Etc..
+"""
+@pytest.mark.parametrize("check,card,expected", [
+    (ComputationalRequirementsCheck(), "computational_requirements_model_card", ComputationalRequirementsResult(
+        status=True,
+        requirements=expected_infrastructure,
+    )),
+    (ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", ComputationalRequirementsResult()),
+])
+def test_run_checks(check, card, expected, request):
+    card = request.getfixturevalue(card)
+    model_card_html = markdown.markdown(card)
+    card_soup = BeautifulSoup(model_card_html, features="html.parser")
+    results = check.run_check(card_soup)
+    assert results == expected

tests/test_general_limitations_check.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import pytest
+import markdown
+from bs4 import BeautifulSoup
+from compliance_checks import (
+    GeneralLimitationsCheck, GeneralLimitationsResult,
+)
+@pytest.fixture
+def general_limitations_model_card():
+    return """
+# Model Card for Sample Model
+## Some Random Header
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+Hello world! These are some risks...
+## More Things
+    """
+@pytest.fixture
+def bad_general_limitations_model_card():
+    return """
+# Model Card for Sample Model
+## Some Random Header
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+## More Things
+    """
+@pytest.mark.parametrize("check,card,expected", [
+    (GeneralLimitationsCheck(), "general_limitations_model_card", GeneralLimitationsResult(
+        status=True,
+        limitations="Hello world! These are some risks..."
+    )),
+    (GeneralLimitationsCheck(), "bad_general_limitations_model_card", GeneralLimitationsResult()),
+])
+def test_run_checks(check, card, expected, request):
+    card = request.getfixturevalue(card)
+    model_card_html = markdown.markdown(card)
+    card_soup = BeautifulSoup(model_card_html, features="html.parser")
+    results = check.run_check(card_soup)
+    assert results == expected

tests/test_intended_purpose_check.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import pytest
+import markdown
+from bs4 import BeautifulSoup
+from compliance_checks import (
+    IntendedPurposeCheck, IntendedPurposeResult,
+)
+@pytest.fixture
+def intended_purpose_model_card():
+    return """
+# Model Card for Sample Model
+Some random info...
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+Here is some info about direct uses...
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+Here is some info about out-of-scope uses...
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+    """
+@pytest.fixture
+def bad_intended_purpose_model_card():
+    return """
+# Model Card for Sample Model
+Some random info...
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+    """
+@pytest.mark.parametrize("check,card,expected", [
+    (IntendedPurposeCheck(), "intended_purpose_model_card", IntendedPurposeResult(
+        status=True,
+        direct_use="Here is some info about direct uses...",
+        downstream_use=None,
+        out_of_scope_use="Here is some info about out-of-scope uses...",
+    )),
+    (IntendedPurposeCheck(), "bad_intended_purpose_model_card", IntendedPurposeResult()),
+])
+def test_run_checks(check, card, expected, request):
+    card = request.getfixturevalue(card)
+    model_card_html = markdown.markdown(card)
+    card_soup = BeautifulSoup(model_card_html, features="html.parser")
+    results = check.run_check(card_soup)
+    assert results == expected