Spaces:
Runtime error
Runtime error
| from abc import ABC, abstractmethod | |
| from typing import Optional, List | |
| import markdown | |
| from bs4 import BeautifulSoup, Comment | |
| def walk_to_next_heading(card, heading, heading_text): | |
| stop_at = [heading, f"h{int(heading[1]) - 1}"] | |
| try: | |
| heading_node = card.find(heading, string=heading_text) | |
| content = "" | |
| sibling_gen = heading_node.nextSiblingGenerator() | |
| sibling = next(sibling_gen) | |
| while sibling and (not (sibling.name is not None and sibling.name in stop_at) or sibling.name is None): | |
| if not isinstance(sibling, Comment): | |
| content = content + sibling.text.strip() | |
| sibling = next(sibling_gen, None) | |
| if content.strip() == "[More Information Needed]": | |
| return False, None | |
| return True, content | |
| except AttributeError: | |
| return False, None | |
| class ComplianceResult(ABC): | |
| name: str = None | |
| def __init__(self, status: Optional[bool] = False, *args, **kwargs): | |
| self.status = status | |
| def __eq__(self, other): | |
| try: | |
| assert self.status == other.status | |
| return True | |
| except AssertionError: | |
| return False | |
| def to_string(self): | |
| return "Not Implemented" | |
| class ComplianceCheck(ABC): | |
| name: str = None | |
| def run_check(self, card: BeautifulSoup) -> ComplianceResult: | |
| raise NotImplementedError | |
| class ModelProviderIdentityResult(ComplianceResult): | |
| name = "Model Provider Identity" | |
| def __init__(self, provider: str = None, *args, **kwargs): | |
| super().__init__(*args, **kwargs) | |
| self.provider = provider | |
| def __eq__(self, other): | |
| if isinstance(other, ModelProviderIdentityResult): | |
| if super().__eq__(other): | |
| try: | |
| assert self.provider == other.provider | |
| return True | |
| except AssertionError: | |
| return False | |
| else: | |
| return False | |
| def to_string(self): | |
| return str(self.provider) | |
| class ModelProviderIdentityCheck(ComplianceCheck): | |
| name = "Model Provider Identity" | |
| def run_check(self, card: BeautifulSoup): | |
| try: | |
| developed_by = card.find("strong", string="Developed by:") | |
| developer = "".join([str(s) for s in developed_by.next_siblings]).strip() | |
| if developer == "[More Information Needed]": | |
| return ModelProviderIdentityResult() | |
| return ModelProviderIdentityResult(status=True, provider=developer) | |
| except AttributeError: | |
| return ModelProviderIdentityResult() | |
| class IntendedPurposeResult(ComplianceResult): | |
| name = "Intended Purpose" | |
| def __init__( | |
| self, | |
| direct_use: str = None, | |
| downstream_use: str = None, | |
| out_of_scope_use: str = None, | |
| *args, | |
| **kwargs, | |
| ): | |
| super().__init__(*args, **kwargs) | |
| self.direct_use = direct_use | |
| self.downstream_use = downstream_use | |
| self.out_of_scope_use = out_of_scope_use | |
| def __eq__(self, other): | |
| if isinstance(other, IntendedPurposeResult): | |
| if super().__eq__(other): | |
| try: | |
| assert self.direct_use == other.direct_use | |
| assert self.downstream_use == other.downstream_use | |
| assert self.out_of_scope_use == other.out_of_scope_use | |
| return True | |
| except AssertionError: | |
| return False | |
| else: | |
| return False | |
| def to_string(self): | |
| return str((self.direct_use, self.direct_use, self.out_of_scope_use)) | |
| class IntendedPurposeCheck(ComplianceCheck): | |
| name = "Intended Purpose" | |
| def run_check(self, card: BeautifulSoup): | |
| direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use") | |
| # TODO: Handle [optional], which doesn't exist in BLOOM, e.g. | |
| downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]") | |
| out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use") | |
| return IntendedPurposeResult( | |
| status=direct_use_check and out_of_scope_use_check, | |
| direct_use=direct_use_content, | |
| downstream_use=downstream_use_content, | |
| out_of_scope_use=out_of_scope_use_content | |
| ) | |
| class GeneralLimitationsResult(ComplianceResult): | |
| name = "General Limitations" | |
| def __init__( | |
| self, | |
| limitations: str = None, | |
| *args, | |
| **kwargs, | |
| ): | |
| super().__init__(*args, **kwargs) | |
| self.limitations = limitations | |
| def __eq__(self, other): | |
| if isinstance(other, GeneralLimitationsResult): | |
| if super().__eq__(other): | |
| try: | |
| assert self.limitations == other.limitations | |
| return True | |
| except AssertionError: | |
| return False | |
| else: | |
| return False | |
| def to_string(self): | |
| return self.limitations | |
| class GeneralLimitationsCheck(ComplianceCheck): | |
| name = "General Limitations" | |
| def run_check(self, card: BeautifulSoup): | |
| check, content = walk_to_next_heading(card, "h2", "Bias, Risks, and Limitations") | |
| return GeneralLimitationsResult( | |
| status=check, | |
| limitations=content | |
| ) | |
| class ComputationalRequirementsResult(ComplianceResult): | |
| name = "Computational Requirements" | |
| def __init__( | |
| self, | |
| requirements: str = None, | |
| *args, | |
| **kwargs, | |
| ): | |
| super().__init__(*args, **kwargs) | |
| self.requirements = requirements | |
| def __eq__(self, other): | |
| if isinstance(other, ComputationalRequirementsResult): | |
| if super().__eq__(other): | |
| try: | |
| assert self.requirements == other.requirements | |
| return True | |
| except AssertionError: | |
| return False | |
| else: | |
| return False | |
| def to_string(self): | |
| return self.requirements | |
| class ComputationalRequirementsCheck(ComplianceCheck): | |
| name = "Computational Requirements" | |
| def run_check(self, card: BeautifulSoup): | |
| check, content = walk_to_next_heading(card, "h3", "Compute infrastructure") | |
| return ComputationalRequirementsResult( | |
| status=check, | |
| requirements=content, | |
| ) | |
| class ComplianceSuite: | |
| def __init__(self, checks): | |
| self.checks = checks | |
| def run(self, model_card) -> List[ComplianceResult]: | |
| model_card_html = markdown.markdown(model_card) | |
| card_soup = BeautifulSoup(model_card_html, features="html.parser") | |
| return [c.run_check(card_soup) for c in self.checks] | |