Upload 235 files
Browse filesThis view is limited to 50 files because it contains too many changes.  
							See raw diff
- litellm/__init__.py +557 -0
- litellm/_logging.py +30 -0
- litellm/_redis.py +93 -0
- litellm/_version.py +6 -0
- litellm/budget_manager.py +206 -0
- litellm/caching.py +678 -0
- litellm/cost.json +5 -0
- litellm/deprecated_litellm_server/.env.template +43 -0
- litellm/deprecated_litellm_server/Dockerfile +10 -0
- litellm/deprecated_litellm_server/README.md +3 -0
- litellm/deprecated_litellm_server/__init__.py +2 -0
- litellm/deprecated_litellm_server/main.py +193 -0
- litellm/deprecated_litellm_server/requirements.txt +7 -0
- litellm/deprecated_litellm_server/server_utils.py +85 -0
- litellm/exceptions.py +200 -0
- litellm/integrations/__init__.py +1 -0
- litellm/integrations/aispend.py +177 -0
- litellm/integrations/berrispend.py +184 -0
- litellm/integrations/custom_logger.py +130 -0
- litellm/integrations/dynamodb.py +92 -0
- litellm/integrations/helicone.py +114 -0
- litellm/integrations/langfuse.py +191 -0
- litellm/integrations/langsmith.py +75 -0
- litellm/integrations/litedebugger.py +262 -0
- litellm/integrations/llmonitor.py +127 -0
- litellm/integrations/prompt_layer.py +72 -0
- litellm/integrations/s3.py +150 -0
- litellm/integrations/supabase.py +117 -0
- litellm/integrations/traceloop.py +114 -0
- litellm/integrations/weights_biases.py +223 -0
- litellm/llms/__init__.py +1 -0
- litellm/llms/ai21.py +212 -0
- litellm/llms/aleph_alpha.py +304 -0
- litellm/llms/anthropic.py +215 -0
- litellm/llms/azure.py +799 -0
- litellm/llms/base.py +45 -0
- litellm/llms/baseten.py +164 -0
- litellm/llms/bedrock.py +799 -0
- litellm/llms/cloudflare.py +176 -0
- litellm/llms/cohere.py +293 -0
- litellm/llms/custom_httpx/azure_dall_e_2.py +136 -0
- litellm/llms/custom_httpx/bedrock_async.py +0 -0
- litellm/llms/gemini.py +222 -0
- litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt +2523 -0
- litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt +0 -0
- litellm/llms/huggingface_restapi.py +750 -0
- litellm/llms/maritalk.py +189 -0
- litellm/llms/nlp_cloud.py +243 -0
- litellm/llms/ollama.py +400 -0
- litellm/llms/ollama_chat.py +333 -0
    	
        litellm/__init__.py
    ADDED
    
    | @@ -0,0 +1,557 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ### INIT VARIABLES ###
         | 
| 2 | 
            +
            import threading, requests
         | 
| 3 | 
            +
            from typing import Callable, List, Optional, Dict, Union, Any
         | 
| 4 | 
            +
            from litellm.caching import Cache
         | 
| 5 | 
            +
            from litellm._logging import set_verbose
         | 
| 6 | 
            +
            from litellm.proxy._types import KeyManagementSystem
         | 
| 7 | 
            +
            import httpx
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            input_callback: List[Union[str, Callable]] = []
         | 
| 10 | 
            +
            success_callback: List[Union[str, Callable]] = []
         | 
| 11 | 
            +
            failure_callback: List[Union[str, Callable]] = []
         | 
| 12 | 
            +
            callbacks: List[Callable] = []
         | 
| 13 | 
            +
            _async_input_callback: List[
         | 
| 14 | 
            +
                Callable
         | 
| 15 | 
            +
            ] = []  # internal variable - async custom callbacks are routed here.
         | 
| 16 | 
            +
            _async_success_callback: List[
         | 
| 17 | 
            +
                Union[str, Callable]
         | 
| 18 | 
            +
            ] = []  # internal variable - async custom callbacks are routed here.
         | 
| 19 | 
            +
            _async_failure_callback: List[
         | 
| 20 | 
            +
                Callable
         | 
| 21 | 
            +
            ] = []  # internal variable - async custom callbacks are routed here.
         | 
| 22 | 
            +
            pre_call_rules: List[Callable] = []
         | 
| 23 | 
            +
            post_call_rules: List[Callable] = []
         | 
| 24 | 
            +
            email: Optional[
         | 
| 25 | 
            +
                str
         | 
| 26 | 
            +
            ] = None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
         | 
| 27 | 
            +
            token: Optional[
         | 
| 28 | 
            +
                str
         | 
| 29 | 
            +
            ] = None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
         | 
| 30 | 
            +
            telemetry = True
         | 
| 31 | 
            +
            max_tokens = 256  # OpenAI Defaults
         | 
| 32 | 
            +
            drop_params = False
         | 
| 33 | 
            +
            retry = True
         | 
| 34 | 
            +
            api_key: Optional[str] = None
         | 
| 35 | 
            +
            openai_key: Optional[str] = None
         | 
| 36 | 
            +
            azure_key: Optional[str] = None
         | 
| 37 | 
            +
            anthropic_key: Optional[str] = None
         | 
| 38 | 
            +
            replicate_key: Optional[str] = None
         | 
| 39 | 
            +
            cohere_key: Optional[str] = None
         | 
| 40 | 
            +
            maritalk_key: Optional[str] = None
         | 
| 41 | 
            +
            ai21_key: Optional[str] = None
         | 
| 42 | 
            +
            openrouter_key: Optional[str] = None
         | 
| 43 | 
            +
            huggingface_key: Optional[str] = None
         | 
| 44 | 
            +
            vertex_project: Optional[str] = None
         | 
| 45 | 
            +
            vertex_location: Optional[str] = None
         | 
| 46 | 
            +
            togetherai_api_key: Optional[str] = None
         | 
| 47 | 
            +
            cloudflare_api_key: Optional[str] = None
         | 
| 48 | 
            +
            baseten_key: Optional[str] = None
         | 
| 49 | 
            +
            aleph_alpha_key: Optional[str] = None
         | 
| 50 | 
            +
            nlp_cloud_key: Optional[str] = None
         | 
| 51 | 
            +
            use_client: bool = False
         | 
| 52 | 
            +
            logging: bool = True
         | 
| 53 | 
            +
            caching: bool = False  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
         | 
| 54 | 
            +
            caching_with_models: bool = False  # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
         | 
| 55 | 
            +
            cache: Optional[
         | 
| 56 | 
            +
                Cache
         | 
| 57 | 
            +
            ] = None  # cache object <- use this - https://docs.litellm.ai/docs/caching
         | 
| 58 | 
            +
            model_alias_map: Dict[str, str] = {}
         | 
| 59 | 
            +
            model_group_alias_map: Dict[str, str] = {}
         | 
| 60 | 
            +
            max_budget: float = 0.0  # set the max budget across all providers
         | 
| 61 | 
            +
            _openai_completion_params = [
         | 
| 62 | 
            +
                "functions",
         | 
| 63 | 
            +
                "function_call",
         | 
| 64 | 
            +
                "temperature",
         | 
| 65 | 
            +
                "temperature",
         | 
| 66 | 
            +
                "top_p",
         | 
| 67 | 
            +
                "n",
         | 
| 68 | 
            +
                "stream",
         | 
| 69 | 
            +
                "stop",
         | 
| 70 | 
            +
                "max_tokens",
         | 
| 71 | 
            +
                "presence_penalty",
         | 
| 72 | 
            +
                "frequency_penalty",
         | 
| 73 | 
            +
                "logit_bias",
         | 
| 74 | 
            +
                "user",
         | 
| 75 | 
            +
                "request_timeout",
         | 
| 76 | 
            +
                "api_base",
         | 
| 77 | 
            +
                "api_version",
         | 
| 78 | 
            +
                "api_key",
         | 
| 79 | 
            +
                "deployment_id",
         | 
| 80 | 
            +
                "organization",
         | 
| 81 | 
            +
                "base_url",
         | 
| 82 | 
            +
                "default_headers",
         | 
| 83 | 
            +
                "timeout",
         | 
| 84 | 
            +
                "response_format",
         | 
| 85 | 
            +
                "seed",
         | 
| 86 | 
            +
                "tools",
         | 
| 87 | 
            +
                "tool_choice",
         | 
| 88 | 
            +
                "max_retries",
         | 
| 89 | 
            +
            ]
         | 
| 90 | 
            +
            _litellm_completion_params = [
         | 
| 91 | 
            +
                "metadata",
         | 
| 92 | 
            +
                "acompletion",
         | 
| 93 | 
            +
                "caching",
         | 
| 94 | 
            +
                "mock_response",
         | 
| 95 | 
            +
                "api_key",
         | 
| 96 | 
            +
                "api_version",
         | 
| 97 | 
            +
                "api_base",
         | 
| 98 | 
            +
                "force_timeout",
         | 
| 99 | 
            +
                "logger_fn",
         | 
| 100 | 
            +
                "verbose",
         | 
| 101 | 
            +
                "custom_llm_provider",
         | 
| 102 | 
            +
                "litellm_logging_obj",
         | 
| 103 | 
            +
                "litellm_call_id",
         | 
| 104 | 
            +
                "use_client",
         | 
| 105 | 
            +
                "id",
         | 
| 106 | 
            +
                "fallbacks",
         | 
| 107 | 
            +
                "azure",
         | 
| 108 | 
            +
                "headers",
         | 
| 109 | 
            +
                "model_list",
         | 
| 110 | 
            +
                "num_retries",
         | 
| 111 | 
            +
                "context_window_fallback_dict",
         | 
| 112 | 
            +
                "roles",
         | 
| 113 | 
            +
                "final_prompt_value",
         | 
| 114 | 
            +
                "bos_token",
         | 
| 115 | 
            +
                "eos_token",
         | 
| 116 | 
            +
                "request_timeout",
         | 
| 117 | 
            +
                "complete_response",
         | 
| 118 | 
            +
                "self",
         | 
| 119 | 
            +
                "client",
         | 
| 120 | 
            +
                "rpm",
         | 
| 121 | 
            +
                "tpm",
         | 
| 122 | 
            +
                "input_cost_per_token",
         | 
| 123 | 
            +
                "output_cost_per_token",
         | 
| 124 | 
            +
                "hf_model_name",
         | 
| 125 | 
            +
                "model_info",
         | 
| 126 | 
            +
                "proxy_server_request",
         | 
| 127 | 
            +
                "preset_cache_key",
         | 
| 128 | 
            +
            ]
         | 
| 129 | 
            +
            _current_cost = 0  # private variable, used if max budget is set
         | 
| 130 | 
            +
            error_logs: Dict = {}
         | 
| 131 | 
            +
            add_function_to_prompt: bool = False  # if function calling not supported by api, append function call details to system prompt
         | 
| 132 | 
            +
            client_session: Optional[httpx.Client] = None
         | 
| 133 | 
            +
            aclient_session: Optional[httpx.AsyncClient] = None
         | 
| 134 | 
            +
            model_fallbacks: Optional[List] = None  # Deprecated for 'litellm.fallbacks'
         | 
| 135 | 
            +
            model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
         | 
| 136 | 
            +
            suppress_debug_info = False
         | 
| 137 | 
            +
            dynamodb_table_name: Optional[str] = None
         | 
| 138 | 
            +
            s3_callback_params: Optional[Dict] = None
         | 
| 139 | 
            +
            #### RELIABILITY ####
         | 
| 140 | 
            +
            request_timeout: Optional[float] = 6000
         | 
| 141 | 
            +
            num_retries: Optional[int] = None  # per model endpoint
         | 
| 142 | 
            +
            fallbacks: Optional[List] = None
         | 
| 143 | 
            +
            context_window_fallbacks: Optional[List] = None
         | 
| 144 | 
            +
            allowed_fails: int = 0
         | 
| 145 | 
            +
            num_retries_per_request: Optional[
         | 
| 146 | 
            +
                int
         | 
| 147 | 
            +
            ] = None  # for the request overall (incl. fallbacks + model retries)
         | 
| 148 | 
            +
            ####### SECRET MANAGERS #####################
         | 
| 149 | 
            +
            secret_manager_client: Optional[
         | 
| 150 | 
            +
                Any
         | 
| 151 | 
            +
            ] = None  # list of instantiated key management clients - e.g. azure kv, infisical, etc.
         | 
| 152 | 
            +
            _google_kms_resource_name: Optional[str] = None
         | 
| 153 | 
            +
            _key_management_system: Optional[KeyManagementSystem] = None
         | 
| 154 | 
            +
            #############################################
         | 
| 155 | 
            +
             | 
| 156 | 
            +
             | 
| 157 | 
            +
            def get_model_cost_map(url: str):
         | 
| 158 | 
            +
                try:
         | 
| 159 | 
            +
                    with requests.get(
         | 
| 160 | 
            +
                        url, timeout=5
         | 
| 161 | 
            +
                    ) as response:  # set a 5 second timeout for the get request
         | 
| 162 | 
            +
                        response.raise_for_status()  # Raise an exception if the request is unsuccessful
         | 
| 163 | 
            +
                        content = response.json()
         | 
| 164 | 
            +
                        return content
         | 
| 165 | 
            +
                except Exception as e:
         | 
| 166 | 
            +
                    import importlib.resources
         | 
| 167 | 
            +
                    import json
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                    with importlib.resources.open_text(
         | 
| 170 | 
            +
                        "litellm", "model_prices_and_context_window_backup.json"
         | 
| 171 | 
            +
                    ) as f:
         | 
| 172 | 
            +
                        content = json.load(f)
         | 
| 173 | 
            +
                        return content
         | 
| 174 | 
            +
             | 
| 175 | 
            +
             | 
| 176 | 
            +
            model_cost = get_model_cost_map(url=model_cost_map_url)
         | 
| 177 | 
            +
            custom_prompt_dict: Dict[str, dict] = {}
         | 
| 178 | 
            +
             | 
| 179 | 
            +
             | 
| 180 | 
            +
            ####### THREAD-SPECIFIC DATA ###################
         | 
| 181 | 
            +
            class MyLocal(threading.local):
         | 
| 182 | 
            +
                def __init__(self):
         | 
| 183 | 
            +
                    self.user = "Hello World"
         | 
| 184 | 
            +
             | 
| 185 | 
            +
             | 
| 186 | 
            +
            _thread_context = MyLocal()
         | 
| 187 | 
            +
             | 
| 188 | 
            +
             | 
| 189 | 
            +
            def identify(event_details):
         | 
| 190 | 
            +
                # Store user in thread local data
         | 
| 191 | 
            +
                if "user" in event_details:
         | 
| 192 | 
            +
                    _thread_context.user = event_details["user"]
         | 
| 193 | 
            +
             | 
| 194 | 
            +
             | 
| 195 | 
            +
            ####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
         | 
| 196 | 
            +
            api_base = None
         | 
| 197 | 
            +
            headers = None
         | 
| 198 | 
            +
            api_version = None
         | 
| 199 | 
            +
            organization = None
         | 
| 200 | 
            +
            config_path = None
         | 
| 201 | 
            +
            ####### COMPLETION MODELS ###################
         | 
| 202 | 
            +
            open_ai_chat_completion_models: List = []
         | 
| 203 | 
            +
            open_ai_text_completion_models: List = []
         | 
| 204 | 
            +
            cohere_models: List = []
         | 
| 205 | 
            +
            anthropic_models: List = []
         | 
| 206 | 
            +
            openrouter_models: List = []
         | 
| 207 | 
            +
            vertex_language_models: List = []
         | 
| 208 | 
            +
            vertex_vision_models: List = []
         | 
| 209 | 
            +
            vertex_chat_models: List = []
         | 
| 210 | 
            +
            vertex_code_chat_models: List = []
         | 
| 211 | 
            +
            vertex_text_models: List = []
         | 
| 212 | 
            +
            vertex_code_text_models: List = []
         | 
| 213 | 
            +
            ai21_models: List = []
         | 
| 214 | 
            +
            nlp_cloud_models: List = []
         | 
| 215 | 
            +
            aleph_alpha_models: List = []
         | 
| 216 | 
            +
            bedrock_models: List = []
         | 
| 217 | 
            +
            deepinfra_models: List = []
         | 
| 218 | 
            +
            perplexity_models: List = []
         | 
| 219 | 
            +
            for key, value in model_cost.items():
         | 
| 220 | 
            +
                if value.get("litellm_provider") == "openai":
         | 
| 221 | 
            +
                    open_ai_chat_completion_models.append(key)
         | 
| 222 | 
            +
                elif value.get("litellm_provider") == "text-completion-openai":
         | 
| 223 | 
            +
                    open_ai_text_completion_models.append(key)
         | 
| 224 | 
            +
                elif value.get("litellm_provider") == "cohere":
         | 
| 225 | 
            +
                    cohere_models.append(key)
         | 
| 226 | 
            +
                elif value.get("litellm_provider") == "anthropic":
         | 
| 227 | 
            +
                    anthropic_models.append(key)
         | 
| 228 | 
            +
                elif value.get("litellm_provider") == "openrouter":
         | 
| 229 | 
            +
                    openrouter_models.append(key)
         | 
| 230 | 
            +
                elif value.get("litellm_provider") == "vertex_ai-text-models":
         | 
| 231 | 
            +
                    vertex_text_models.append(key)
         | 
| 232 | 
            +
                elif value.get("litellm_provider") == "vertex_ai-code-text-models":
         | 
| 233 | 
            +
                    vertex_code_text_models.append(key)
         | 
| 234 | 
            +
                elif value.get("litellm_provider") == "vertex_ai-language-models":
         | 
| 235 | 
            +
                    vertex_language_models.append(key)
         | 
| 236 | 
            +
                elif value.get("litellm_provider") == "vertex_ai-vision-models":
         | 
| 237 | 
            +
                    vertex_vision_models.append(key)
         | 
| 238 | 
            +
                elif value.get("litellm_provider") == "vertex_ai-chat-models":
         | 
| 239 | 
            +
                    vertex_chat_models.append(key)
         | 
| 240 | 
            +
                elif value.get("litellm_provider") == "vertex_ai-code-chat-models":
         | 
| 241 | 
            +
                    vertex_code_chat_models.append(key)
         | 
| 242 | 
            +
                elif value.get("litellm_provider") == "ai21":
         | 
| 243 | 
            +
                    ai21_models.append(key)
         | 
| 244 | 
            +
                elif value.get("litellm_provider") == "nlp_cloud":
         | 
| 245 | 
            +
                    nlp_cloud_models.append(key)
         | 
| 246 | 
            +
                elif value.get("litellm_provider") == "aleph_alpha":
         | 
| 247 | 
            +
                    aleph_alpha_models.append(key)
         | 
| 248 | 
            +
                elif value.get("litellm_provider") == "bedrock":
         | 
| 249 | 
            +
                    bedrock_models.append(key)
         | 
| 250 | 
            +
                elif value.get("litellm_provider") == "deepinfra":
         | 
| 251 | 
            +
                    deepinfra_models.append(key)
         | 
| 252 | 
            +
                elif value.get("litellm_provider") == "perplexity":
         | 
| 253 | 
            +
                    perplexity_models.append(key)
         | 
| 254 | 
            +
             | 
| 255 | 
            +
            # known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
         | 
| 256 | 
            +
            openai_compatible_endpoints: List = [
         | 
| 257 | 
            +
                "api.perplexity.ai",
         | 
| 258 | 
            +
                "api.endpoints.anyscale.com/v1",
         | 
| 259 | 
            +
                "api.deepinfra.com/v1/openai",
         | 
| 260 | 
            +
                "api.mistral.ai/v1",
         | 
| 261 | 
            +
            ]
         | 
| 262 | 
            +
             | 
| 263 | 
            +
            # this is maintained for Exception Mapping
         | 
| 264 | 
            +
            openai_compatible_providers: List = [
         | 
| 265 | 
            +
                "anyscale",
         | 
| 266 | 
            +
                "mistral",
         | 
| 267 | 
            +
                "deepinfra",
         | 
| 268 | 
            +
                "perplexity",
         | 
| 269 | 
            +
                "xinference",
         | 
| 270 | 
            +
            ]
         | 
| 271 | 
            +
             | 
| 272 | 
            +
             | 
| 273 | 
            +
            # well supported replicate llms
         | 
| 274 | 
            +
            replicate_models: List = [
         | 
| 275 | 
            +
                # llama replicate supported LLMs
         | 
| 276 | 
            +
                "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
         | 
| 277 | 
            +
                "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
         | 
| 278 | 
            +
                "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
         | 
| 279 | 
            +
                # Vicuna
         | 
| 280 | 
            +
                "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
         | 
| 281 | 
            +
                "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
         | 
| 282 | 
            +
                # Flan T-5
         | 
| 283 | 
            +
                "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f"
         | 
| 284 | 
            +
                # Others
         | 
| 285 | 
            +
                "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
         | 
| 286 | 
            +
                "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
         | 
| 287 | 
            +
            ]
         | 
| 288 | 
            +
             | 
| 289 | 
            +
            huggingface_models: List = [
         | 
| 290 | 
            +
                "meta-llama/Llama-2-7b-hf",
         | 
| 291 | 
            +
                "meta-llama/Llama-2-7b-chat-hf",
         | 
| 292 | 
            +
                "meta-llama/Llama-2-13b-hf",
         | 
| 293 | 
            +
                "meta-llama/Llama-2-13b-chat-hf",
         | 
| 294 | 
            +
                "meta-llama/Llama-2-70b-hf",
         | 
| 295 | 
            +
                "meta-llama/Llama-2-70b-chat-hf",
         | 
| 296 | 
            +
                "meta-llama/Llama-2-7b",
         | 
| 297 | 
            +
                "meta-llama/Llama-2-7b-chat",
         | 
| 298 | 
            +
                "meta-llama/Llama-2-13b",
         | 
| 299 | 
            +
                "meta-llama/Llama-2-13b-chat",
         | 
| 300 | 
            +
                "meta-llama/Llama-2-70b",
         | 
| 301 | 
            +
                "meta-llama/Llama-2-70b-chat",
         | 
| 302 | 
            +
            ]  # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
         | 
| 303 | 
            +
             | 
| 304 | 
            +
            together_ai_models: List = [
         | 
| 305 | 
            +
                # llama llms - chat
         | 
| 306 | 
            +
                "togethercomputer/llama-2-70b-chat",
         | 
| 307 | 
            +
                # llama llms - language / instruct
         | 
| 308 | 
            +
                "togethercomputer/llama-2-70b",
         | 
| 309 | 
            +
                "togethercomputer/LLaMA-2-7B-32K",
         | 
| 310 | 
            +
                "togethercomputer/Llama-2-7B-32K-Instruct",
         | 
| 311 | 
            +
                "togethercomputer/llama-2-7b",
         | 
| 312 | 
            +
                # falcon llms
         | 
| 313 | 
            +
                "togethercomputer/falcon-40b-instruct",
         | 
| 314 | 
            +
                "togethercomputer/falcon-7b-instruct",
         | 
| 315 | 
            +
                # alpaca
         | 
| 316 | 
            +
                "togethercomputer/alpaca-7b",
         | 
| 317 | 
            +
                # chat llms
         | 
| 318 | 
            +
                "HuggingFaceH4/starchat-alpha",
         | 
| 319 | 
            +
                # code llms
         | 
| 320 | 
            +
                "togethercomputer/CodeLlama-34b",
         | 
| 321 | 
            +
                "togethercomputer/CodeLlama-34b-Instruct",
         | 
| 322 | 
            +
                "togethercomputer/CodeLlama-34b-Python",
         | 
| 323 | 
            +
                "defog/sqlcoder",
         | 
| 324 | 
            +
                "NumbersStation/nsql-llama-2-7B",
         | 
| 325 | 
            +
                "WizardLM/WizardCoder-15B-V1.0",
         | 
| 326 | 
            +
                "WizardLM/WizardCoder-Python-34B-V1.0",
         | 
| 327 | 
            +
                # language llms
         | 
| 328 | 
            +
                "NousResearch/Nous-Hermes-Llama2-13b",
         | 
| 329 | 
            +
                "Austism/chronos-hermes-13b",
         | 
| 330 | 
            +
                "upstage/SOLAR-0-70b-16bit",
         | 
| 331 | 
            +
                "WizardLM/WizardLM-70B-V1.0",
         | 
| 332 | 
            +
            ]  # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
         | 
| 333 | 
            +
             | 
| 334 | 
            +
             | 
| 335 | 
            +
            baseten_models: List = [
         | 
| 336 | 
            +
                "qvv0xeq",
         | 
| 337 | 
            +
                "q841o8w",
         | 
| 338 | 
            +
                "31dxrj3",
         | 
| 339 | 
            +
            ]  # FALCON 7B  # WizardLM  # Mosaic ML
         | 
| 340 | 
            +
             | 
| 341 | 
            +
             | 
| 342 | 
            +
            # used for Cost Tracking & Token counting
         | 
| 343 | 
            +
            # https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/
         | 
| 344 | 
            +
            # Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting
         | 
| 345 | 
            +
            azure_llms = {
         | 
| 346 | 
            +
                "gpt-35-turbo": "azure/gpt-35-turbo",
         | 
| 347 | 
            +
                "gpt-35-turbo-16k": "azure/gpt-35-turbo-16k",
         | 
| 348 | 
            +
                "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct",
         | 
| 349 | 
            +
            }
         | 
| 350 | 
            +
             | 
| 351 | 
            +
            azure_embedding_models = {
         | 
| 352 | 
            +
                "ada": "azure/ada",
         | 
| 353 | 
            +
            }
         | 
| 354 | 
            +
             | 
| 355 | 
            +
            petals_models = [
         | 
| 356 | 
            +
                "petals-team/StableBeluga2",
         | 
| 357 | 
            +
            ]
         | 
| 358 | 
            +
             | 
| 359 | 
            +
            ollama_models = ["llama2"]
         | 
| 360 | 
            +
             | 
| 361 | 
            +
            maritalk_models = ["maritalk"]
         | 
| 362 | 
            +
             | 
| 363 | 
            +
            model_list = (
         | 
| 364 | 
            +
                open_ai_chat_completion_models
         | 
| 365 | 
            +
                + open_ai_text_completion_models
         | 
| 366 | 
            +
                + cohere_models
         | 
| 367 | 
            +
                + anthropic_models
         | 
| 368 | 
            +
                + replicate_models
         | 
| 369 | 
            +
                + openrouter_models
         | 
| 370 | 
            +
                + huggingface_models
         | 
| 371 | 
            +
                + vertex_chat_models
         | 
| 372 | 
            +
                + vertex_text_models
         | 
| 373 | 
            +
                + ai21_models
         | 
| 374 | 
            +
                + together_ai_models
         | 
| 375 | 
            +
                + baseten_models
         | 
| 376 | 
            +
                + aleph_alpha_models
         | 
| 377 | 
            +
                + nlp_cloud_models
         | 
| 378 | 
            +
                + ollama_models
         | 
| 379 | 
            +
                + bedrock_models
         | 
| 380 | 
            +
                + deepinfra_models
         | 
| 381 | 
            +
                + perplexity_models
         | 
| 382 | 
            +
                + maritalk_models
         | 
| 383 | 
            +
            )
         | 
| 384 | 
            +
             | 
| 385 | 
            +
            provider_list: List = [
         | 
| 386 | 
            +
                "openai",
         | 
| 387 | 
            +
                "custom_openai",
         | 
| 388 | 
            +
                "text-completion-openai",
         | 
| 389 | 
            +
                "cohere",
         | 
| 390 | 
            +
                "anthropic",
         | 
| 391 | 
            +
                "replicate",
         | 
| 392 | 
            +
                "huggingface",
         | 
| 393 | 
            +
                "together_ai",
         | 
| 394 | 
            +
                "openrouter",
         | 
| 395 | 
            +
                "vertex_ai",
         | 
| 396 | 
            +
                "palm",
         | 
| 397 | 
            +
                "gemini",
         | 
| 398 | 
            +
                "ai21",
         | 
| 399 | 
            +
                "baseten",
         | 
| 400 | 
            +
                "azure",
         | 
| 401 | 
            +
                "sagemaker",
         | 
| 402 | 
            +
                "bedrock",
         | 
| 403 | 
            +
                "vllm",
         | 
| 404 | 
            +
                "nlp_cloud",
         | 
| 405 | 
            +
                "petals",
         | 
| 406 | 
            +
                "oobabooga",
         | 
| 407 | 
            +
                "ollama",
         | 
| 408 | 
            +
                "ollama_chat",
         | 
| 409 | 
            +
                "deepinfra",
         | 
| 410 | 
            +
                "perplexity",
         | 
| 411 | 
            +
                "anyscale",
         | 
| 412 | 
            +
                "mistral",
         | 
| 413 | 
            +
                "maritalk",
         | 
| 414 | 
            +
                "voyage",
         | 
| 415 | 
            +
                "cloudflare",
         | 
| 416 | 
            +
                "xinference",
         | 
| 417 | 
            +
                "custom",  # custom apis
         | 
| 418 | 
            +
            ]
         | 
| 419 | 
            +
             | 
| 420 | 
            +
            models_by_provider: dict = {
         | 
| 421 | 
            +
                "openai": open_ai_chat_completion_models + open_ai_text_completion_models,
         | 
| 422 | 
            +
                "cohere": cohere_models,
         | 
| 423 | 
            +
                "anthropic": anthropic_models,
         | 
| 424 | 
            +
                "replicate": replicate_models,
         | 
| 425 | 
            +
                "huggingface": huggingface_models,
         | 
| 426 | 
            +
                "together_ai": together_ai_models,
         | 
| 427 | 
            +
                "baseten": baseten_models,
         | 
| 428 | 
            +
                "openrouter": openrouter_models,
         | 
| 429 | 
            +
                "vertex_ai": vertex_chat_models + vertex_text_models,
         | 
| 430 | 
            +
                "ai21": ai21_models,
         | 
| 431 | 
            +
                "bedrock": bedrock_models,
         | 
| 432 | 
            +
                "petals": petals_models,
         | 
| 433 | 
            +
                "ollama": ollama_models,
         | 
| 434 | 
            +
                "deepinfra": deepinfra_models,
         | 
| 435 | 
            +
                "perplexity": perplexity_models,
         | 
| 436 | 
            +
                "maritalk": maritalk_models,
         | 
| 437 | 
            +
            }
         | 
| 438 | 
            +
             | 
| 439 | 
            +
            # mapping for those models which have larger equivalents
         | 
| 440 | 
            +
            longer_context_model_fallback_dict: dict = {
         | 
| 441 | 
            +
                # openai chat completion models
         | 
| 442 | 
            +
                "gpt-3.5-turbo": "gpt-3.5-turbo-16k",
         | 
| 443 | 
            +
                "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301",
         | 
| 444 | 
            +
                "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613",
         | 
| 445 | 
            +
                "gpt-4": "gpt-4-32k",
         | 
| 446 | 
            +
                "gpt-4-0314": "gpt-4-32k-0314",
         | 
| 447 | 
            +
                "gpt-4-0613": "gpt-4-32k-0613",
         | 
| 448 | 
            +
                # anthropic
         | 
| 449 | 
            +
                "claude-instant-1": "claude-2",
         | 
| 450 | 
            +
                "claude-instant-1.2": "claude-2",
         | 
| 451 | 
            +
                # vertexai
         | 
| 452 | 
            +
                "chat-bison": "chat-bison-32k",
         | 
| 453 | 
            +
                "chat-bison@001": "chat-bison-32k",
         | 
| 454 | 
            +
                "codechat-bison": "codechat-bison-32k",
         | 
| 455 | 
            +
                "codechat-bison@001": "codechat-bison-32k",
         | 
| 456 | 
            +
                # openrouter
         | 
| 457 | 
            +
                "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k",
         | 
| 458 | 
            +
                "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2",
         | 
| 459 | 
            +
            }
         | 
| 460 | 
            +
             | 
| 461 | 
            +
            ####### EMBEDDING MODELS ###################
         | 
| 462 | 
            +
            open_ai_embedding_models: List = ["text-embedding-ada-002"]
         | 
| 463 | 
            +
            cohere_embedding_models: List = [
         | 
| 464 | 
            +
                "embed-english-v3.0",
         | 
| 465 | 
            +
                "embed-english-light-v3.0",
         | 
| 466 | 
            +
                "embed-multilingual-v3.0",
         | 
| 467 | 
            +
                "embed-english-v2.0",
         | 
| 468 | 
            +
                "embed-english-light-v2.0",
         | 
| 469 | 
            +
                "embed-multilingual-v2.0",
         | 
| 470 | 
            +
            ]
         | 
| 471 | 
            +
            bedrock_embedding_models: List = [
         | 
| 472 | 
            +
                "amazon.titan-embed-text-v1",
         | 
| 473 | 
            +
                "cohere.embed-english-v3",
         | 
| 474 | 
            +
                "cohere.embed-multilingual-v3",
         | 
| 475 | 
            +
            ]
         | 
| 476 | 
            +
             | 
| 477 | 
            +
            all_embedding_models = (
         | 
| 478 | 
            +
                open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models
         | 
| 479 | 
            +
            )
         | 
| 480 | 
            +
             | 
| 481 | 
            +
            ####### IMAGE GENERATION MODELS ###################
         | 
| 482 | 
            +
            openai_image_generation_models = ["dall-e-2", "dall-e-3"]
         | 
| 483 | 
            +
             | 
| 484 | 
            +
             | 
| 485 | 
            +
            from .timeout import timeout
         | 
| 486 | 
            +
            from .utils import (
         | 
| 487 | 
            +
                client,
         | 
| 488 | 
            +
                exception_type,
         | 
| 489 | 
            +
                get_optional_params,
         | 
| 490 | 
            +
                modify_integration,
         | 
| 491 | 
            +
                token_counter,
         | 
| 492 | 
            +
                cost_per_token,
         | 
| 493 | 
            +
                completion_cost,
         | 
| 494 | 
            +
                get_litellm_params,
         | 
| 495 | 
            +
                Logging,
         | 
| 496 | 
            +
                acreate,
         | 
| 497 | 
            +
                get_model_list,
         | 
| 498 | 
            +
                get_max_tokens,
         | 
| 499 | 
            +
                get_model_info,
         | 
| 500 | 
            +
                register_prompt_template,
         | 
| 501 | 
            +
                validate_environment,
         | 
| 502 | 
            +
                check_valid_key,
         | 
| 503 | 
            +
                get_llm_provider,
         | 
| 504 | 
            +
                register_model,
         | 
| 505 | 
            +
                encode,
         | 
| 506 | 
            +
                decode,
         | 
| 507 | 
            +
                _calculate_retry_after,
         | 
| 508 | 
            +
                _should_retry,
         | 
| 509 | 
            +
                get_secret,
         | 
| 510 | 
            +
            )
         | 
| 511 | 
            +
            from .llms.huggingface_restapi import HuggingfaceConfig
         | 
| 512 | 
            +
            from .llms.anthropic import AnthropicConfig
         | 
| 513 | 
            +
            from .llms.replicate import ReplicateConfig
         | 
| 514 | 
            +
            from .llms.cohere import CohereConfig
         | 
| 515 | 
            +
            from .llms.ai21 import AI21Config
         | 
| 516 | 
            +
            from .llms.together_ai import TogetherAIConfig
         | 
| 517 | 
            +
            from .llms.cloudflare import CloudflareConfig
         | 
| 518 | 
            +
            from .llms.palm import PalmConfig
         | 
| 519 | 
            +
            from .llms.gemini import GeminiConfig
         | 
| 520 | 
            +
            from .llms.nlp_cloud import NLPCloudConfig
         | 
| 521 | 
            +
            from .llms.aleph_alpha import AlephAlphaConfig
         | 
| 522 | 
            +
            from .llms.petals import PetalsConfig
         | 
| 523 | 
            +
            from .llms.vertex_ai import VertexAIConfig
         | 
| 524 | 
            +
            from .llms.sagemaker import SagemakerConfig
         | 
| 525 | 
            +
            from .llms.ollama import OllamaConfig
         | 
| 526 | 
            +
            from .llms.maritalk import MaritTalkConfig
         | 
| 527 | 
            +
            from .llms.bedrock import (
         | 
| 528 | 
            +
                AmazonTitanConfig,
         | 
| 529 | 
            +
                AmazonAI21Config,
         | 
| 530 | 
            +
                AmazonAnthropicConfig,
         | 
| 531 | 
            +
                AmazonCohereConfig,
         | 
| 532 | 
            +
                AmazonLlamaConfig,
         | 
| 533 | 
            +
            )
         | 
| 534 | 
            +
            from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig
         | 
| 535 | 
            +
            from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
         | 
| 536 | 
            +
            from .main import *  # type: ignore
         | 
| 537 | 
            +
            from .integrations import *
         | 
| 538 | 
            +
            from .exceptions import (
         | 
| 539 | 
            +
                AuthenticationError,
         | 
| 540 | 
            +
                InvalidRequestError,
         | 
| 541 | 
            +
                BadRequestError,
         | 
| 542 | 
            +
                NotFoundError,
         | 
| 543 | 
            +
                RateLimitError,
         | 
| 544 | 
            +
                ServiceUnavailableError,
         | 
| 545 | 
            +
                OpenAIError,
         | 
| 546 | 
            +
                ContextWindowExceededError,
         | 
| 547 | 
            +
                ContentPolicyViolationError,
         | 
| 548 | 
            +
                BudgetExceededError,
         | 
| 549 | 
            +
                APIError,
         | 
| 550 | 
            +
                Timeout,
         | 
| 551 | 
            +
                APIConnectionError,
         | 
| 552 | 
            +
                APIResponseValidationError,
         | 
| 553 | 
            +
                UnprocessableEntityError,
         | 
| 554 | 
            +
            )
         | 
| 555 | 
            +
            from .budget_manager import BudgetManager
         | 
| 556 | 
            +
            from .proxy.proxy_cli import run_server
         | 
| 557 | 
            +
            from .router import Router
         | 
    	
        litellm/_logging.py
    ADDED
    
    | @@ -0,0 +1,30 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import logging
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            set_verbose = False
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            # Create a handler for the logger (you may need to adapt this based on your needs)
         | 
| 6 | 
            +
            handler = logging.StreamHandler()
         | 
| 7 | 
            +
            handler.setLevel(logging.DEBUG)
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Create a formatter and set it for the handler
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            formatter = logging.Formatter("\033[92m%(name)s - %(levelname)s\033[0m: %(message)s")
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            handler.setFormatter(formatter)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
             | 
| 16 | 
            +
            def print_verbose(print_statement):
         | 
| 17 | 
            +
                try:
         | 
| 18 | 
            +
                    if set_verbose:
         | 
| 19 | 
            +
                        print(print_statement)  # noqa
         | 
| 20 | 
            +
                except:
         | 
| 21 | 
            +
                    pass
         | 
| 22 | 
            +
             | 
| 23 | 
            +
             | 
| 24 | 
            +
            verbose_proxy_logger = logging.getLogger("LiteLLM Proxy")
         | 
| 25 | 
            +
            verbose_router_logger = logging.getLogger("LiteLLM Router")
         | 
| 26 | 
            +
            verbose_logger = logging.getLogger("LiteLLM")
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            # Add the handler to the logger
         | 
| 29 | 
            +
            verbose_router_logger.addHandler(handler)
         | 
| 30 | 
            +
            verbose_proxy_logger.addHandler(handler)
         | 
    	
        litellm/_redis.py
    ADDED
    
    | @@ -0,0 +1,93 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # +-----------------------------------------------+
         | 
| 2 | 
            +
            # |                                               |
         | 
| 3 | 
            +
            # |           Give Feedback / Get Help            |
         | 
| 4 | 
            +
            # | https://github.com/BerriAI/litellm/issues/new |
         | 
| 5 | 
            +
            # |                                               |
         | 
| 6 | 
            +
            # +-----------------------------------------------+
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #  Thank you users! We ❤️ you! - Krrish & Ishaan
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
         | 
| 11 | 
            +
            import os
         | 
| 12 | 
            +
            import inspect
         | 
| 13 | 
            +
            import redis, litellm
         | 
| 14 | 
            +
            from typing import List, Optional
         | 
| 15 | 
            +
             | 
| 16 | 
            +
             | 
| 17 | 
            +
            def _get_redis_kwargs():
         | 
| 18 | 
            +
                arg_spec = inspect.getfullargspec(redis.Redis)
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                # Only allow primitive arguments
         | 
| 21 | 
            +
                exclude_args = {
         | 
| 22 | 
            +
                    "self",
         | 
| 23 | 
            +
                    "connection_pool",
         | 
| 24 | 
            +
                    "retry",
         | 
| 25 | 
            +
                }
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                include_args = ["url"]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                available_args = [x for x in arg_spec.args if x not in exclude_args] + include_args
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                return available_args
         | 
| 32 | 
            +
             | 
| 33 | 
            +
             | 
| 34 | 
            +
            def _get_redis_env_kwarg_mapping():
         | 
| 35 | 
            +
                PREFIX = "REDIS_"
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                return {f"{PREFIX}{x.upper()}": x for x in _get_redis_kwargs()}
         | 
| 38 | 
            +
             | 
| 39 | 
            +
             | 
| 40 | 
            +
            def _redis_kwargs_from_environment():
         | 
| 41 | 
            +
                mapping = _get_redis_env_kwarg_mapping()
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                return_dict = {}
         | 
| 44 | 
            +
                for k, v in mapping.items():
         | 
| 45 | 
            +
                    value = litellm.get_secret(k, default_value=None)  # check os.environ/key vault
         | 
| 46 | 
            +
                    if value is not None:
         | 
| 47 | 
            +
                        return_dict[v] = value
         | 
| 48 | 
            +
                return return_dict
         | 
| 49 | 
            +
             | 
| 50 | 
            +
             | 
| 51 | 
            +
            def get_redis_url_from_environment():
         | 
| 52 | 
            +
                if "REDIS_URL" in os.environ:
         | 
| 53 | 
            +
                    return os.environ["REDIS_URL"]
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                if "REDIS_HOST" not in os.environ or "REDIS_PORT" not in os.environ:
         | 
| 56 | 
            +
                    raise ValueError(
         | 
| 57 | 
            +
                        "Either 'REDIS_URL' or both 'REDIS_HOST' and 'REDIS_PORT' must be specified for Redis."
         | 
| 58 | 
            +
                    )
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                if "REDIS_PASSWORD" in os.environ:
         | 
| 61 | 
            +
                    redis_password = f":{os.environ['REDIS_PASSWORD']}@"
         | 
| 62 | 
            +
                else:
         | 
| 63 | 
            +
                    redis_password = ""
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                return (
         | 
| 66 | 
            +
                    f"redis://{redis_password}{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}"
         | 
| 67 | 
            +
                )
         | 
| 68 | 
            +
             | 
| 69 | 
            +
             | 
| 70 | 
            +
            def get_redis_client(**env_overrides):
         | 
| 71 | 
            +
                ### check if "os.environ/<key-name>" passed in
         | 
| 72 | 
            +
                for k, v in env_overrides.items():
         | 
| 73 | 
            +
                    if isinstance(v, str) and v.startswith("os.environ/"):
         | 
| 74 | 
            +
                        v = v.replace("os.environ/", "")
         | 
| 75 | 
            +
                        value = litellm.get_secret(v)
         | 
| 76 | 
            +
                        env_overrides[k] = value
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                redis_kwargs = {
         | 
| 79 | 
            +
                    **_redis_kwargs_from_environment(),
         | 
| 80 | 
            +
                    **env_overrides,
         | 
| 81 | 
            +
                }
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                if "url" in redis_kwargs and redis_kwargs["url"] is not None:
         | 
| 84 | 
            +
                    redis_kwargs.pop("host", None)
         | 
| 85 | 
            +
                    redis_kwargs.pop("port", None)
         | 
| 86 | 
            +
                    redis_kwargs.pop("db", None)
         | 
| 87 | 
            +
                    redis_kwargs.pop("password", None)
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                    return redis.Redis.from_url(**redis_kwargs)
         | 
| 90 | 
            +
                elif "host" not in redis_kwargs or redis_kwargs["host"] is None:
         | 
| 91 | 
            +
                    raise ValueError("Either 'host' or 'url' must be specified for redis.")
         | 
| 92 | 
            +
                litellm.print_verbose(f"redis_kwargs: {redis_kwargs}")
         | 
| 93 | 
            +
                return redis.Redis(**redis_kwargs)
         | 
    	
        litellm/_version.py
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import importlib_metadata
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            try:
         | 
| 4 | 
            +
                version = importlib_metadata.version("litellm")
         | 
| 5 | 
            +
            except:
         | 
| 6 | 
            +
                pass
         | 
    	
        litellm/budget_manager.py
    ADDED
    
    | @@ -0,0 +1,206 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, json, time
         | 
| 2 | 
            +
            import litellm
         | 
| 3 | 
            +
            from litellm.utils import ModelResponse
         | 
| 4 | 
            +
            import requests, threading
         | 
| 5 | 
            +
            from typing import Optional, Union, Literal
         | 
| 6 | 
            +
             | 
| 7 | 
            +
             | 
| 8 | 
            +
            class BudgetManager:
         | 
| 9 | 
            +
                def __init__(
         | 
| 10 | 
            +
                    self,
         | 
| 11 | 
            +
                    project_name: str,
         | 
| 12 | 
            +
                    client_type: str = "local",
         | 
| 13 | 
            +
                    api_base: Optional[str] = None,
         | 
| 14 | 
            +
                ):
         | 
| 15 | 
            +
                    self.client_type = client_type
         | 
| 16 | 
            +
                    self.project_name = project_name
         | 
| 17 | 
            +
                    self.api_base = api_base or "https://api.litellm.ai"
         | 
| 18 | 
            +
                    ## load the data or init the initial dictionaries
         | 
| 19 | 
            +
                    self.load_data()
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def print_verbose(self, print_statement):
         | 
| 22 | 
            +
                    try:
         | 
| 23 | 
            +
                        if litellm.set_verbose:
         | 
| 24 | 
            +
                            import logging
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                            logging.info(print_statement)
         | 
| 27 | 
            +
                    except:
         | 
| 28 | 
            +
                        pass
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def load_data(self):
         | 
| 31 | 
            +
                    if self.client_type == "local":
         | 
| 32 | 
            +
                        # Check if user dict file exists
         | 
| 33 | 
            +
                        if os.path.isfile("user_cost.json"):
         | 
| 34 | 
            +
                            # Load the user dict
         | 
| 35 | 
            +
                            with open("user_cost.json", "r") as json_file:
         | 
| 36 | 
            +
                                self.user_dict = json.load(json_file)
         | 
| 37 | 
            +
                        else:
         | 
| 38 | 
            +
                            self.print_verbose("User Dictionary not found!")
         | 
| 39 | 
            +
                            self.user_dict = {}
         | 
| 40 | 
            +
                        self.print_verbose(f"user dict from local: {self.user_dict}")
         | 
| 41 | 
            +
                    elif self.client_type == "hosted":
         | 
| 42 | 
            +
                        # Load the user_dict from hosted db
         | 
| 43 | 
            +
                        url = self.api_base + "/get_budget"
         | 
| 44 | 
            +
                        headers = {"Content-Type": "application/json"}
         | 
| 45 | 
            +
                        data = {"project_name": self.project_name}
         | 
| 46 | 
            +
                        response = requests.post(url, headers=headers, json=data)
         | 
| 47 | 
            +
                        response = response.json()
         | 
| 48 | 
            +
                        if response["status"] == "error":
         | 
| 49 | 
            +
                            self.user_dict = (
         | 
| 50 | 
            +
                                {}
         | 
| 51 | 
            +
                            )  # assume this means the user dict hasn't been stored yet
         | 
| 52 | 
            +
                        else:
         | 
| 53 | 
            +
                            self.user_dict = response["data"]
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                def create_budget(
         | 
| 56 | 
            +
                    self,
         | 
| 57 | 
            +
                    total_budget: float,
         | 
| 58 | 
            +
                    user: str,
         | 
| 59 | 
            +
                    duration: Optional[Literal["daily", "weekly", "monthly", "yearly"]] = None,
         | 
| 60 | 
            +
                    created_at: float = time.time(),
         | 
| 61 | 
            +
                ):
         | 
| 62 | 
            +
                    self.user_dict[user] = {"total_budget": total_budget}
         | 
| 63 | 
            +
                    if duration is None:
         | 
| 64 | 
            +
                        return self.user_dict[user]
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                    if duration == "daily":
         | 
| 67 | 
            +
                        duration_in_days = 1
         | 
| 68 | 
            +
                    elif duration == "weekly":
         | 
| 69 | 
            +
                        duration_in_days = 7
         | 
| 70 | 
            +
                    elif duration == "monthly":
         | 
| 71 | 
            +
                        duration_in_days = 28
         | 
| 72 | 
            +
                    elif duration == "yearly":
         | 
| 73 | 
            +
                        duration_in_days = 365
         | 
| 74 | 
            +
                    else:
         | 
| 75 | 
            +
                        raise ValueError(
         | 
| 76 | 
            +
                            """duration needs to be one of ["daily", "weekly", "monthly", "yearly"]"""
         | 
| 77 | 
            +
                        )
         | 
| 78 | 
            +
                    self.user_dict[user] = {
         | 
| 79 | 
            +
                        "total_budget": total_budget,
         | 
| 80 | 
            +
                        "duration": duration_in_days,
         | 
| 81 | 
            +
                        "created_at": created_at,
         | 
| 82 | 
            +
                        "last_updated_at": created_at,
         | 
| 83 | 
            +
                    }
         | 
| 84 | 
            +
                    self._save_data_thread()  # [Non-Blocking] Update persistent storage without blocking execution
         | 
| 85 | 
            +
                    return self.user_dict[user]
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                def projected_cost(self, model: str, messages: list, user: str):
         | 
| 88 | 
            +
                    text = "".join(message["content"] for message in messages)
         | 
| 89 | 
            +
                    prompt_tokens = litellm.token_counter(model=model, text=text)
         | 
| 90 | 
            +
                    prompt_cost, _ = litellm.cost_per_token(
         | 
| 91 | 
            +
                        model=model, prompt_tokens=prompt_tokens, completion_tokens=0
         | 
| 92 | 
            +
                    )
         | 
| 93 | 
            +
                    current_cost = self.user_dict[user].get("current_cost", 0)
         | 
| 94 | 
            +
                    projected_cost = prompt_cost + current_cost
         | 
| 95 | 
            +
                    return projected_cost
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                def get_total_budget(self, user: str):
         | 
| 98 | 
            +
                    return self.user_dict[user]["total_budget"]
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                def update_cost(
         | 
| 101 | 
            +
                    self,
         | 
| 102 | 
            +
                    user: str,
         | 
| 103 | 
            +
                    completion_obj: Optional[ModelResponse] = None,
         | 
| 104 | 
            +
                    model: Optional[str] = None,
         | 
| 105 | 
            +
                    input_text: Optional[str] = None,
         | 
| 106 | 
            +
                    output_text: Optional[str] = None,
         | 
| 107 | 
            +
                ):
         | 
| 108 | 
            +
                    if model and input_text and output_text:
         | 
| 109 | 
            +
                        prompt_tokens = litellm.token_counter(
         | 
| 110 | 
            +
                            model=model, messages=[{"role": "user", "content": input_text}]
         | 
| 111 | 
            +
                        )
         | 
| 112 | 
            +
                        completion_tokens = litellm.token_counter(
         | 
| 113 | 
            +
                            model=model, messages=[{"role": "user", "content": output_text}]
         | 
| 114 | 
            +
                        )
         | 
| 115 | 
            +
                        (
         | 
| 116 | 
            +
                            prompt_tokens_cost_usd_dollar,
         | 
| 117 | 
            +
                            completion_tokens_cost_usd_dollar,
         | 
| 118 | 
            +
                        ) = litellm.cost_per_token(
         | 
| 119 | 
            +
                            model=model,
         | 
| 120 | 
            +
                            prompt_tokens=prompt_tokens,
         | 
| 121 | 
            +
                            completion_tokens=completion_tokens,
         | 
| 122 | 
            +
                        )
         | 
| 123 | 
            +
                        cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
         | 
| 124 | 
            +
                    elif completion_obj:
         | 
| 125 | 
            +
                        cost = litellm.completion_cost(completion_response=completion_obj)
         | 
| 126 | 
            +
                        model = completion_obj[
         | 
| 127 | 
            +
                            "model"
         | 
| 128 | 
            +
                        ]  # if this throws an error try, model = completion_obj['model']
         | 
| 129 | 
            +
                    else:
         | 
| 130 | 
            +
                        raise ValueError(
         | 
| 131 | 
            +
                            "Either a chat completion object or the text response needs to be passed in. Learn more - https://docs.litellm.ai/docs/budget_manager"
         | 
| 132 | 
            +
                        )
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                    self.user_dict[user]["current_cost"] = cost + self.user_dict[user].get(
         | 
| 135 | 
            +
                        "current_cost", 0
         | 
| 136 | 
            +
                    )
         | 
| 137 | 
            +
                    if "model_cost" in self.user_dict[user]:
         | 
| 138 | 
            +
                        self.user_dict[user]["model_cost"][model] = cost + self.user_dict[user][
         | 
| 139 | 
            +
                            "model_cost"
         | 
| 140 | 
            +
                        ].get(model, 0)
         | 
| 141 | 
            +
                    else:
         | 
| 142 | 
            +
                        self.user_dict[user]["model_cost"] = {model: cost}
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                    self._save_data_thread()  # [Non-Blocking] Update persistent storage without blocking execution
         | 
| 145 | 
            +
                    return {"user": self.user_dict[user]}
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                def get_current_cost(self, user):
         | 
| 148 | 
            +
                    return self.user_dict[user].get("current_cost", 0)
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                def get_model_cost(self, user):
         | 
| 151 | 
            +
                    return self.user_dict[user].get("model_cost", 0)
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                def is_valid_user(self, user: str) -> bool:
         | 
| 154 | 
            +
                    return user in self.user_dict
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                def get_users(self):
         | 
| 157 | 
            +
                    return list(self.user_dict.keys())
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                def reset_cost(self, user):
         | 
| 160 | 
            +
                    self.user_dict[user]["current_cost"] = 0
         | 
| 161 | 
            +
                    self.user_dict[user]["model_cost"] = {}
         | 
| 162 | 
            +
                    return {"user": self.user_dict[user]}
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                def reset_on_duration(self, user: str):
         | 
| 165 | 
            +
                    # Get current and creation time
         | 
| 166 | 
            +
                    last_updated_at = self.user_dict[user]["last_updated_at"]
         | 
| 167 | 
            +
                    current_time = time.time()
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                    # Convert duration from days to seconds
         | 
| 170 | 
            +
                    duration_in_seconds = self.user_dict[user]["duration"] * 24 * 60 * 60
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                    # Check if duration has elapsed
         | 
| 173 | 
            +
                    if current_time - last_updated_at >= duration_in_seconds:
         | 
| 174 | 
            +
                        # Reset cost if duration has elapsed and update the creation time
         | 
| 175 | 
            +
                        self.reset_cost(user)
         | 
| 176 | 
            +
                        self.user_dict[user]["last_updated_at"] = current_time
         | 
| 177 | 
            +
                        self._save_data_thread()  # Save the data
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                def update_budget_all_users(self):
         | 
| 180 | 
            +
                    for user in self.get_users():
         | 
| 181 | 
            +
                        if "duration" in self.user_dict[user]:
         | 
| 182 | 
            +
                            self.reset_on_duration(user)
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                def _save_data_thread(self):
         | 
| 185 | 
            +
                    thread = threading.Thread(
         | 
| 186 | 
            +
                        target=self.save_data
         | 
| 187 | 
            +
                    )  # [Non-Blocking]: saves data without blocking execution
         | 
| 188 | 
            +
                    thread.start()
         | 
| 189 | 
            +
             | 
| 190 | 
            +
                def save_data(self):
         | 
| 191 | 
            +
                    if self.client_type == "local":
         | 
| 192 | 
            +
                        import json
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                        # save the user dict
         | 
| 195 | 
            +
                        with open("user_cost.json", "w") as json_file:
         | 
| 196 | 
            +
                            json.dump(
         | 
| 197 | 
            +
                                self.user_dict, json_file, indent=4
         | 
| 198 | 
            +
                            )  # Indent for pretty formatting
         | 
| 199 | 
            +
                        return {"status": "success"}
         | 
| 200 | 
            +
                    elif self.client_type == "hosted":
         | 
| 201 | 
            +
                        url = self.api_base + "/set_budget"
         | 
| 202 | 
            +
                        headers = {"Content-Type": "application/json"}
         | 
| 203 | 
            +
                        data = {"project_name": self.project_name, "user_dict": self.user_dict}
         | 
| 204 | 
            +
                        response = requests.post(url, headers=headers, json=data)
         | 
| 205 | 
            +
                        response = response.json()
         | 
| 206 | 
            +
                        return response
         | 
    	
        litellm/caching.py
    ADDED
    
    | @@ -0,0 +1,678 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # +-----------------------------------------------+
         | 
| 2 | 
            +
            # |                                               |
         | 
| 3 | 
            +
            # |           Give Feedback / Get Help            |
         | 
| 4 | 
            +
            # | https://github.com/BerriAI/litellm/issues/new |
         | 
| 5 | 
            +
            # |                                               |
         | 
| 6 | 
            +
            # +-----------------------------------------------+
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #  Thank you users! We ❤️ you! - Krrish & Ishaan
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            import litellm
         | 
| 11 | 
            +
            import time, logging
         | 
| 12 | 
            +
            import json, traceback, ast, hashlib
         | 
| 13 | 
            +
            from typing import Optional, Literal, List, Union, Any
         | 
| 14 | 
            +
            from openai._models import BaseModel as OpenAIObject
         | 
| 15 | 
            +
             | 
| 16 | 
            +
             | 
| 17 | 
            +
            def print_verbose(print_statement):
         | 
| 18 | 
            +
                try:
         | 
| 19 | 
            +
                    if litellm.set_verbose:
         | 
| 20 | 
            +
                        print(print_statement)  # noqa
         | 
| 21 | 
            +
                except:
         | 
| 22 | 
            +
                    pass
         | 
| 23 | 
            +
             | 
| 24 | 
            +
             | 
| 25 | 
            +
            class BaseCache:
         | 
| 26 | 
            +
                def set_cache(self, key, value, **kwargs):
         | 
| 27 | 
            +
                    raise NotImplementedError
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                def get_cache(self, key, **kwargs):
         | 
| 30 | 
            +
                    raise NotImplementedError
         | 
| 31 | 
            +
             | 
| 32 | 
            +
             | 
| 33 | 
            +
            class InMemoryCache(BaseCache):
         | 
| 34 | 
            +
                def __init__(self):
         | 
| 35 | 
            +
                    # if users don't provider one, use the default litellm cache
         | 
| 36 | 
            +
                    self.cache_dict = {}
         | 
| 37 | 
            +
                    self.ttl_dict = {}
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def set_cache(self, key, value, **kwargs):
         | 
| 40 | 
            +
                    self.cache_dict[key] = value
         | 
| 41 | 
            +
                    if "ttl" in kwargs:
         | 
| 42 | 
            +
                        self.ttl_dict[key] = time.time() + kwargs["ttl"]
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def get_cache(self, key, **kwargs):
         | 
| 45 | 
            +
                    if key in self.cache_dict:
         | 
| 46 | 
            +
                        if key in self.ttl_dict:
         | 
| 47 | 
            +
                            if time.time() > self.ttl_dict[key]:
         | 
| 48 | 
            +
                                self.cache_dict.pop(key, None)
         | 
| 49 | 
            +
                                return None
         | 
| 50 | 
            +
                        original_cached_response = self.cache_dict[key]
         | 
| 51 | 
            +
                        try:
         | 
| 52 | 
            +
                            cached_response = json.loads(original_cached_response)
         | 
| 53 | 
            +
                        except:
         | 
| 54 | 
            +
                            cached_response = original_cached_response
         | 
| 55 | 
            +
                        return cached_response
         | 
| 56 | 
            +
                    return None
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                def flush_cache(self):
         | 
| 59 | 
            +
                    self.cache_dict.clear()
         | 
| 60 | 
            +
                    self.ttl_dict.clear()
         | 
| 61 | 
            +
             | 
| 62 | 
            +
             | 
| 63 | 
            +
            class RedisCache(BaseCache):
         | 
| 64 | 
            +
                def __init__(self, host=None, port=None, password=None, **kwargs):
         | 
| 65 | 
            +
                    import redis
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                    # if users don't provider one, use the default litellm cache
         | 
| 68 | 
            +
                    from ._redis import get_redis_client
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    redis_kwargs = {}
         | 
| 71 | 
            +
                    if host is not None:
         | 
| 72 | 
            +
                        redis_kwargs["host"] = host
         | 
| 73 | 
            +
                    if port is not None:
         | 
| 74 | 
            +
                        redis_kwargs["port"] = port
         | 
| 75 | 
            +
                    if password is not None:
         | 
| 76 | 
            +
                        redis_kwargs["password"] = password
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    redis_kwargs.update(kwargs)
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    self.redis_client = get_redis_client(**redis_kwargs)
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                def set_cache(self, key, value, **kwargs):
         | 
| 83 | 
            +
                    ttl = kwargs.get("ttl", None)
         | 
| 84 | 
            +
                    print_verbose(f"Set Redis Cache: key: {key}\nValue {value}")
         | 
| 85 | 
            +
                    try:
         | 
| 86 | 
            +
                        self.redis_client.set(name=key, value=str(value), ex=ttl)
         | 
| 87 | 
            +
                    except Exception as e:
         | 
| 88 | 
            +
                        # NON blocking - notify users Redis is throwing an exception
         | 
| 89 | 
            +
                        logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                def get_cache(self, key, **kwargs):
         | 
| 92 | 
            +
                    try:
         | 
| 93 | 
            +
                        print_verbose(f"Get Redis Cache: key: {key}")
         | 
| 94 | 
            +
                        cached_response = self.redis_client.get(key)
         | 
| 95 | 
            +
                        print_verbose(
         | 
| 96 | 
            +
                            f"Got Redis Cache: key: {key}, cached_response {cached_response}"
         | 
| 97 | 
            +
                        )
         | 
| 98 | 
            +
                        if cached_response != None:
         | 
| 99 | 
            +
                            # cached_response is in `b{} convert it to ModelResponse
         | 
| 100 | 
            +
                            cached_response = cached_response.decode(
         | 
| 101 | 
            +
                                "utf-8"
         | 
| 102 | 
            +
                            )  # Convert bytes to string
         | 
| 103 | 
            +
                            try:
         | 
| 104 | 
            +
                                cached_response = json.loads(
         | 
| 105 | 
            +
                                    cached_response
         | 
| 106 | 
            +
                                )  # Convert string to dictionary
         | 
| 107 | 
            +
                            except:
         | 
| 108 | 
            +
                                cached_response = ast.literal_eval(cached_response)
         | 
| 109 | 
            +
                            return cached_response
         | 
| 110 | 
            +
                    except Exception as e:
         | 
| 111 | 
            +
                        # NON blocking - notify users Redis is throwing an exception
         | 
| 112 | 
            +
                        traceback.print_exc()
         | 
| 113 | 
            +
                        logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                def flush_cache(self):
         | 
| 116 | 
            +
                    self.redis_client.flushall()
         | 
| 117 | 
            +
             | 
| 118 | 
            +
             | 
| 119 | 
            +
            class S3Cache(BaseCache):
         | 
| 120 | 
            +
                def __init__(
         | 
| 121 | 
            +
                    self,
         | 
| 122 | 
            +
                    s3_bucket_name,
         | 
| 123 | 
            +
                    s3_region_name=None,
         | 
| 124 | 
            +
                    s3_api_version=None,
         | 
| 125 | 
            +
                    s3_use_ssl=True,
         | 
| 126 | 
            +
                    s3_verify=None,
         | 
| 127 | 
            +
                    s3_endpoint_url=None,
         | 
| 128 | 
            +
                    s3_aws_access_key_id=None,
         | 
| 129 | 
            +
                    s3_aws_secret_access_key=None,
         | 
| 130 | 
            +
                    s3_aws_session_token=None,
         | 
| 131 | 
            +
                    s3_config=None,
         | 
| 132 | 
            +
                    **kwargs,
         | 
| 133 | 
            +
                ):
         | 
| 134 | 
            +
                    import boto3
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                    self.bucket_name = s3_bucket_name
         | 
| 137 | 
            +
                    # Create an S3 client with custom endpoint URL
         | 
| 138 | 
            +
                    self.s3_client = boto3.client(
         | 
| 139 | 
            +
                        "s3",
         | 
| 140 | 
            +
                        region_name=s3_region_name,
         | 
| 141 | 
            +
                        endpoint_url=s3_endpoint_url,
         | 
| 142 | 
            +
                        api_version=s3_api_version,
         | 
| 143 | 
            +
                        use_ssl=s3_use_ssl,
         | 
| 144 | 
            +
                        verify=s3_verify,
         | 
| 145 | 
            +
                        aws_access_key_id=s3_aws_access_key_id,
         | 
| 146 | 
            +
                        aws_secret_access_key=s3_aws_secret_access_key,
         | 
| 147 | 
            +
                        aws_session_token=s3_aws_session_token,
         | 
| 148 | 
            +
                        config=s3_config,
         | 
| 149 | 
            +
                        **kwargs,
         | 
| 150 | 
            +
                    )
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                def set_cache(self, key, value, **kwargs):
         | 
| 153 | 
            +
                    try:
         | 
| 154 | 
            +
                        print_verbose(f"LiteLLM SET Cache - S3. Key={key}. Value={value}")
         | 
| 155 | 
            +
                        ttl = kwargs.get("ttl", None)
         | 
| 156 | 
            +
                        # Convert value to JSON before storing in S3
         | 
| 157 | 
            +
                        serialized_value = json.dumps(value)
         | 
| 158 | 
            +
                        if ttl is not None:
         | 
| 159 | 
            +
                            cache_control = f"immutable, max-age={ttl}, s-maxage={ttl}"
         | 
| 160 | 
            +
                            import datetime
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                            # Calculate expiration time
         | 
| 163 | 
            +
                            expiration_time = datetime.datetime.now() + ttl
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                            # Upload the data to S3 with the calculated expiration time
         | 
| 166 | 
            +
                            self.s3_client.put_object(
         | 
| 167 | 
            +
                                Bucket=self.bucket_name,
         | 
| 168 | 
            +
                                Key=key,
         | 
| 169 | 
            +
                                Body=serialized_value,
         | 
| 170 | 
            +
                                Expires=expiration_time,
         | 
| 171 | 
            +
                                CacheControl=cache_control,
         | 
| 172 | 
            +
                                ContentType="application/json",
         | 
| 173 | 
            +
                                ContentLanguage="en",
         | 
| 174 | 
            +
                                ContentDisposition=f"inline; filename=\"{key}.json\""
         | 
| 175 | 
            +
                            )
         | 
| 176 | 
            +
                        else:
         | 
| 177 | 
            +
                            cache_control = "immutable, max-age=31536000, s-maxage=31536000"
         | 
| 178 | 
            +
                            # Upload the data to S3 without specifying Expires
         | 
| 179 | 
            +
                            self.s3_client.put_object(
         | 
| 180 | 
            +
                                Bucket=self.bucket_name,
         | 
| 181 | 
            +
                                Key=key,
         | 
| 182 | 
            +
                                Body=serialized_value,
         | 
| 183 | 
            +
                                CacheControl=cache_control,
         | 
| 184 | 
            +
                                ContentType="application/json",
         | 
| 185 | 
            +
                                ContentLanguage="en",
         | 
| 186 | 
            +
                                ContentDisposition=f"inline; filename=\"{key}.json\""
         | 
| 187 | 
            +
                            )
         | 
| 188 | 
            +
                    except Exception as e:
         | 
| 189 | 
            +
                        # NON blocking - notify users S3 is throwing an exception
         | 
| 190 | 
            +
                        print_verbose(f"S3 Caching: set_cache() - Got exception from S3: {e}")
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                def get_cache(self, key, **kwargs):
         | 
| 193 | 
            +
                    import boto3, botocore
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                    try:
         | 
| 196 | 
            +
                        print_verbose(f"Get S3 Cache: key: {key}")
         | 
| 197 | 
            +
                        # Download the data from S3
         | 
| 198 | 
            +
                        cached_response = self.s3_client.get_object(
         | 
| 199 | 
            +
                            Bucket=self.bucket_name, Key=key
         | 
| 200 | 
            +
                        )
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                        if cached_response != None:
         | 
| 203 | 
            +
                            # cached_response is in `b{} convert it to ModelResponse
         | 
| 204 | 
            +
                            cached_response = (
         | 
| 205 | 
            +
                                cached_response["Body"].read().decode("utf-8")
         | 
| 206 | 
            +
                            )  # Convert bytes to string
         | 
| 207 | 
            +
                            try:
         | 
| 208 | 
            +
                                cached_response = json.loads(
         | 
| 209 | 
            +
                                    cached_response
         | 
| 210 | 
            +
                                )  # Convert string to dictionary
         | 
| 211 | 
            +
                            except Exception as e:
         | 
| 212 | 
            +
                                cached_response = ast.literal_eval(cached_response)
         | 
| 213 | 
            +
                        if type(cached_response) is not dict:
         | 
| 214 | 
            +
                            cached_response = dict(cached_response)
         | 
| 215 | 
            +
                        print_verbose(
         | 
| 216 | 
            +
                            f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
         | 
| 217 | 
            +
                        )
         | 
| 218 | 
            +
             | 
| 219 | 
            +
                        return cached_response
         | 
| 220 | 
            +
                    except botocore.exceptions.ClientError as e:
         | 
| 221 | 
            +
                        if e.response["Error"]["Code"] == "NoSuchKey":
         | 
| 222 | 
            +
                            print_verbose(
         | 
| 223 | 
            +
                                f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
         | 
| 224 | 
            +
                            )
         | 
| 225 | 
            +
                            return None
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                    except Exception as e:
         | 
| 228 | 
            +
                        # NON blocking - notify users S3 is throwing an exception
         | 
| 229 | 
            +
                        traceback.print_exc()
         | 
| 230 | 
            +
                        print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}")
         | 
| 231 | 
            +
             | 
| 232 | 
            +
                def flush_cache(self):
         | 
| 233 | 
            +
                    pass
         | 
| 234 | 
            +
             | 
| 235 | 
            +
             | 
| 236 | 
            +
            class DualCache(BaseCache):
         | 
| 237 | 
            +
                """
         | 
| 238 | 
            +
                This updates both Redis and an in-memory cache simultaneously.
         | 
| 239 | 
            +
                When data is updated or inserted, it is written to both the in-memory cache + Redis.
         | 
| 240 | 
            +
                This ensures that even if Redis hasn't been updated yet, the in-memory cache reflects the most recent data.
         | 
| 241 | 
            +
                """
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                def __init__(
         | 
| 244 | 
            +
                    self,
         | 
| 245 | 
            +
                    in_memory_cache: Optional[InMemoryCache] = None,
         | 
| 246 | 
            +
                    redis_cache: Optional[RedisCache] = None,
         | 
| 247 | 
            +
                ) -> None:
         | 
| 248 | 
            +
                    super().__init__()
         | 
| 249 | 
            +
                    # If in_memory_cache is not provided, use the default InMemoryCache
         | 
| 250 | 
            +
                    self.in_memory_cache = in_memory_cache or InMemoryCache()
         | 
| 251 | 
            +
                    # If redis_cache is not provided, use the default RedisCache
         | 
| 252 | 
            +
                    self.redis_cache = redis_cache
         | 
| 253 | 
            +
             | 
| 254 | 
            +
                def set_cache(self, key, value, local_only: bool = False, **kwargs):
         | 
| 255 | 
            +
                    # Update both Redis and in-memory cache
         | 
| 256 | 
            +
                    try:
         | 
| 257 | 
            +
                        print_verbose(f"set cache: key: {key}; value: {value}")
         | 
| 258 | 
            +
                        if self.in_memory_cache is not None:
         | 
| 259 | 
            +
                            self.in_memory_cache.set_cache(key, value, **kwargs)
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                        if self.redis_cache is not None and local_only == False:
         | 
| 262 | 
            +
                            self.redis_cache.set_cache(key, value, **kwargs)
         | 
| 263 | 
            +
                    except Exception as e:
         | 
| 264 | 
            +
                        print_verbose(e)
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                def get_cache(self, key, local_only: bool = False, **kwargs):
         | 
| 267 | 
            +
                    # Try to fetch from in-memory cache first
         | 
| 268 | 
            +
                    try:
         | 
| 269 | 
            +
                        print_verbose(f"get cache: cache key: {key}; local_only: {local_only}")
         | 
| 270 | 
            +
                        result = None
         | 
| 271 | 
            +
                        if self.in_memory_cache is not None:
         | 
| 272 | 
            +
                            in_memory_result = self.in_memory_cache.get_cache(key, **kwargs)
         | 
| 273 | 
            +
             | 
| 274 | 
            +
                            print_verbose(f"in_memory_result: {in_memory_result}")
         | 
| 275 | 
            +
                            if in_memory_result is not None:
         | 
| 276 | 
            +
                                result = in_memory_result
         | 
| 277 | 
            +
             | 
| 278 | 
            +
                        if result is None and self.redis_cache is not None and local_only == False:
         | 
| 279 | 
            +
                            # If not found in in-memory cache, try fetching from Redis
         | 
| 280 | 
            +
                            redis_result = self.redis_cache.get_cache(key, **kwargs)
         | 
| 281 | 
            +
             | 
| 282 | 
            +
                            if redis_result is not None:
         | 
| 283 | 
            +
                                # Update in-memory cache with the value from Redis
         | 
| 284 | 
            +
                                self.in_memory_cache.set_cache(key, redis_result, **kwargs)
         | 
| 285 | 
            +
             | 
| 286 | 
            +
                            result = redis_result
         | 
| 287 | 
            +
             | 
| 288 | 
            +
                        print_verbose(f"get cache: cache result: {result}")
         | 
| 289 | 
            +
                        return result
         | 
| 290 | 
            +
                    except Exception as e:
         | 
| 291 | 
            +
                        traceback.print_exc()
         | 
| 292 | 
            +
             | 
| 293 | 
            +
                def flush_cache(self):
         | 
| 294 | 
            +
                    if self.in_memory_cache is not None:
         | 
| 295 | 
            +
                        self.in_memory_cache.flush_cache()
         | 
| 296 | 
            +
                    if self.redis_cache is not None:
         | 
| 297 | 
            +
                        self.redis_cache.flush_cache()
         | 
| 298 | 
            +
             | 
| 299 | 
            +
             | 
| 300 | 
            +
            #### LiteLLM.Completion / Embedding Cache ####
         | 
| 301 | 
            +
            class Cache:
         | 
| 302 | 
            +
                def __init__(
         | 
| 303 | 
            +
                    self,
         | 
| 304 | 
            +
                    type: Optional[Literal["local", "redis", "s3"]] = "local",
         | 
| 305 | 
            +
                    host: Optional[str] = None,
         | 
| 306 | 
            +
                    port: Optional[str] = None,
         | 
| 307 | 
            +
                    password: Optional[str] = None,
         | 
| 308 | 
            +
                    supported_call_types: Optional[
         | 
| 309 | 
            +
                        List[Literal["completion", "acompletion", "embedding", "aembedding"]]
         | 
| 310 | 
            +
                    ] = ["completion", "acompletion", "embedding", "aembedding"],
         | 
| 311 | 
            +
                    # s3 Bucket, boto3 configuration
         | 
| 312 | 
            +
                    s3_bucket_name: Optional[str] = None,
         | 
| 313 | 
            +
                    s3_region_name: Optional[str] = None,
         | 
| 314 | 
            +
                    s3_api_version: Optional[str] = None,
         | 
| 315 | 
            +
                    s3_use_ssl: Optional[bool] = True,
         | 
| 316 | 
            +
                    s3_verify: Optional[Union[bool, str]] = None,
         | 
| 317 | 
            +
                    s3_endpoint_url: Optional[str] = None,
         | 
| 318 | 
            +
                    s3_aws_access_key_id: Optional[str] = None,
         | 
| 319 | 
            +
                    s3_aws_secret_access_key: Optional[str] = None,
         | 
| 320 | 
            +
                    s3_aws_session_token: Optional[str] = None,
         | 
| 321 | 
            +
                    s3_config: Optional[Any] = None,
         | 
| 322 | 
            +
                    **kwargs,
         | 
| 323 | 
            +
                ):
         | 
| 324 | 
            +
                    """
         | 
| 325 | 
            +
                    Initializes the cache based on the given type.
         | 
| 326 | 
            +
             | 
| 327 | 
            +
                    Args:
         | 
| 328 | 
            +
                        type (str, optional): The type of cache to initialize. Can be "local" or "redis". Defaults to "local".
         | 
| 329 | 
            +
                        host (str, optional): The host address for the Redis cache. Required if type is "redis".
         | 
| 330 | 
            +
                        port (int, optional): The port number for the Redis cache. Required if type is "redis".
         | 
| 331 | 
            +
                        password (str, optional): The password for the Redis cache. Required if type is "redis".
         | 
| 332 | 
            +
                        supported_call_types (list, optional): List of call types to cache for. Defaults to cache == on for all call types.
         | 
| 333 | 
            +
                        **kwargs: Additional keyword arguments for redis.Redis() cache
         | 
| 334 | 
            +
             | 
| 335 | 
            +
                    Raises:
         | 
| 336 | 
            +
                        ValueError: If an invalid cache type is provided.
         | 
| 337 | 
            +
             | 
| 338 | 
            +
                    Returns:
         | 
| 339 | 
            +
                        None. Cache is set as a litellm param
         | 
| 340 | 
            +
                    """
         | 
| 341 | 
            +
                    if type == "redis":
         | 
| 342 | 
            +
                        self.cache: BaseCache = RedisCache(host, port, password, **kwargs)
         | 
| 343 | 
            +
                    if type == "local":
         | 
| 344 | 
            +
                        self.cache = InMemoryCache()
         | 
| 345 | 
            +
                    if type == "s3":
         | 
| 346 | 
            +
                        self.cache = S3Cache(
         | 
| 347 | 
            +
                            s3_bucket_name=s3_bucket_name,
         | 
| 348 | 
            +
                            s3_region_name=s3_region_name,
         | 
| 349 | 
            +
                            s3_api_version=s3_api_version,
         | 
| 350 | 
            +
                            s3_use_ssl=s3_use_ssl,
         | 
| 351 | 
            +
                            s3_verify=s3_verify,
         | 
| 352 | 
            +
                            s3_endpoint_url=s3_endpoint_url,
         | 
| 353 | 
            +
                            s3_aws_access_key_id=s3_aws_access_key_id,
         | 
| 354 | 
            +
                            s3_aws_secret_access_key=s3_aws_secret_access_key,
         | 
| 355 | 
            +
                            s3_aws_session_token=s3_aws_session_token,
         | 
| 356 | 
            +
                            s3_config=s3_config,
         | 
| 357 | 
            +
                            **kwargs,
         | 
| 358 | 
            +
                        )
         | 
| 359 | 
            +
                    if "cache" not in litellm.input_callback:
         | 
| 360 | 
            +
                        litellm.input_callback.append("cache")
         | 
| 361 | 
            +
                    if "cache" not in litellm.success_callback:
         | 
| 362 | 
            +
                        litellm.success_callback.append("cache")
         | 
| 363 | 
            +
                    if "cache" not in litellm._async_success_callback:
         | 
| 364 | 
            +
                        litellm._async_success_callback.append("cache")
         | 
| 365 | 
            +
                    self.supported_call_types = supported_call_types  # default to ["completion", "acompletion", "embedding", "aembedding"]
         | 
| 366 | 
            +
                    self.type = type
         | 
| 367 | 
            +
             | 
| 368 | 
            +
                def get_cache_key(self, *args, **kwargs):
         | 
| 369 | 
            +
                    """
         | 
| 370 | 
            +
                    Get the cache key for the given arguments.
         | 
| 371 | 
            +
             | 
| 372 | 
            +
                    Args:
         | 
| 373 | 
            +
                        *args: args to litellm.completion() or embedding()
         | 
| 374 | 
            +
                        **kwargs: kwargs to litellm.completion() or embedding()
         | 
| 375 | 
            +
             | 
| 376 | 
            +
                    Returns:
         | 
| 377 | 
            +
                        str: The cache key generated from the arguments, or None if no cache key could be generated.
         | 
| 378 | 
            +
                    """
         | 
| 379 | 
            +
                    cache_key = ""
         | 
| 380 | 
            +
                    print_verbose(f"\nGetting Cache key. Kwargs: {kwargs}")
         | 
| 381 | 
            +
             | 
| 382 | 
            +
                    # for streaming, we use preset_cache_key. It's created in wrapper(), we do this because optional params like max_tokens, get transformed for bedrock -> max_new_tokens
         | 
| 383 | 
            +
                    if kwargs.get("litellm_params", {}).get("preset_cache_key", None) is not None:
         | 
| 384 | 
            +
                        print_verbose(f"\nReturning preset cache key: {cache_key}")
         | 
| 385 | 
            +
                        return kwargs.get("litellm_params", {}).get("preset_cache_key", None)
         | 
| 386 | 
            +
             | 
| 387 | 
            +
                    # sort kwargs by keys, since model: [gpt-4, temperature: 0.2, max_tokens: 200] == [temperature: 0.2, max_tokens: 200, model: gpt-4]
         | 
| 388 | 
            +
                    completion_kwargs = [
         | 
| 389 | 
            +
                        "model",
         | 
| 390 | 
            +
                        "messages",
         | 
| 391 | 
            +
                        "temperature",
         | 
| 392 | 
            +
                        "top_p",
         | 
| 393 | 
            +
                        "n",
         | 
| 394 | 
            +
                        "stop",
         | 
| 395 | 
            +
                        "max_tokens",
         | 
| 396 | 
            +
                        "presence_penalty",
         | 
| 397 | 
            +
                        "frequency_penalty",
         | 
| 398 | 
            +
                        "logit_bias",
         | 
| 399 | 
            +
                        "user",
         | 
| 400 | 
            +
                        "response_format",
         | 
| 401 | 
            +
                        "seed",
         | 
| 402 | 
            +
                        "tools",
         | 
| 403 | 
            +
                        "tool_choice",
         | 
| 404 | 
            +
                    ]
         | 
| 405 | 
            +
                    embedding_only_kwargs = [
         | 
| 406 | 
            +
                        "input",
         | 
| 407 | 
            +
                        "encoding_format",
         | 
| 408 | 
            +
                    ]  # embedding kwargs = model, input, user, encoding_format. Model, user are checked in completion_kwargs
         | 
| 409 | 
            +
             | 
| 410 | 
            +
                    # combined_kwargs - NEEDS to be ordered across get_cache_key(). Do not use a set()
         | 
| 411 | 
            +
                    combined_kwargs = completion_kwargs + embedding_only_kwargs
         | 
| 412 | 
            +
                    for param in combined_kwargs:
         | 
| 413 | 
            +
                        # ignore litellm params here
         | 
| 414 | 
            +
                        if param in kwargs:
         | 
| 415 | 
            +
                            # check if param == model and model_group is passed in, then override model with model_group
         | 
| 416 | 
            +
                            if param == "model":
         | 
| 417 | 
            +
                                model_group = None
         | 
| 418 | 
            +
                                caching_group = None
         | 
| 419 | 
            +
                                metadata = kwargs.get("metadata", None)
         | 
| 420 | 
            +
                                litellm_params = kwargs.get("litellm_params", {})
         | 
| 421 | 
            +
                                if metadata is not None:
         | 
| 422 | 
            +
                                    model_group = metadata.get("model_group")
         | 
| 423 | 
            +
                                    model_group = metadata.get("model_group", None)
         | 
| 424 | 
            +
                                    caching_groups = metadata.get("caching_groups", None)
         | 
| 425 | 
            +
                                    if caching_groups:
         | 
| 426 | 
            +
                                        for group in caching_groups:
         | 
| 427 | 
            +
                                            if model_group in group:
         | 
| 428 | 
            +
                                                caching_group = group
         | 
| 429 | 
            +
                                                break
         | 
| 430 | 
            +
                                if litellm_params is not None:
         | 
| 431 | 
            +
                                    metadata = litellm_params.get("metadata", None)
         | 
| 432 | 
            +
                                    if metadata is not None:
         | 
| 433 | 
            +
                                        model_group = metadata.get("model_group", None)
         | 
| 434 | 
            +
                                        caching_groups = metadata.get("caching_groups", None)
         | 
| 435 | 
            +
                                        if caching_groups:
         | 
| 436 | 
            +
                                            for group in caching_groups:
         | 
| 437 | 
            +
                                                if model_group in group:
         | 
| 438 | 
            +
                                                    caching_group = group
         | 
| 439 | 
            +
                                                    break
         | 
| 440 | 
            +
                                param_value = (
         | 
| 441 | 
            +
                                    caching_group or model_group or kwargs[param]
         | 
| 442 | 
            +
                                )  # use caching_group, if set then model_group if it exists, else use kwargs["model"]
         | 
| 443 | 
            +
                            else:
         | 
| 444 | 
            +
                                if kwargs[param] is None:
         | 
| 445 | 
            +
                                    continue  # ignore None params
         | 
| 446 | 
            +
                                param_value = kwargs[param]
         | 
| 447 | 
            +
                            cache_key += f"{str(param)}: {str(param_value)}"
         | 
| 448 | 
            +
                    print_verbose(f"\nCreated cache key: {cache_key}")
         | 
| 449 | 
            +
                    # Use hashlib to create a sha256 hash of the cache key
         | 
| 450 | 
            +
                    hash_object = hashlib.sha256(cache_key.encode())
         | 
| 451 | 
            +
                    # Hexadecimal representation of the hash
         | 
| 452 | 
            +
                    hash_hex = hash_object.hexdigest()
         | 
| 453 | 
            +
                    print_verbose(f"Hashed cache key (SHA-256): {hash_hex}")
         | 
| 454 | 
            +
                    return hash_hex
         | 
| 455 | 
            +
             | 
| 456 | 
            +
                def generate_streaming_content(self, content):
         | 
| 457 | 
            +
                    chunk_size = 5  # Adjust the chunk size as needed
         | 
| 458 | 
            +
                    for i in range(0, len(content), chunk_size):
         | 
| 459 | 
            +
                        yield {
         | 
| 460 | 
            +
                            "choices": [
         | 
| 461 | 
            +
                                {
         | 
| 462 | 
            +
                                    "delta": {
         | 
| 463 | 
            +
                                        "role": "assistant",
         | 
| 464 | 
            +
                                        "content": content[i : i + chunk_size],
         | 
| 465 | 
            +
                                    }
         | 
| 466 | 
            +
                                }
         | 
| 467 | 
            +
                            ]
         | 
| 468 | 
            +
                        }
         | 
| 469 | 
            +
                        time.sleep(0.02)
         | 
| 470 | 
            +
             | 
| 471 | 
            +
                def get_cache(self, *args, **kwargs):
         | 
| 472 | 
            +
                    """
         | 
| 473 | 
            +
                    Retrieves the cached result for the given arguments.
         | 
| 474 | 
            +
             | 
| 475 | 
            +
                    Args:
         | 
| 476 | 
            +
                        *args: args to litellm.completion() or embedding()
         | 
| 477 | 
            +
                        **kwargs: kwargs to litellm.completion() or embedding()
         | 
| 478 | 
            +
             | 
| 479 | 
            +
                    Returns:
         | 
| 480 | 
            +
                        The cached result if it exists, otherwise None.
         | 
| 481 | 
            +
                    """
         | 
| 482 | 
            +
                    try:  # never block execution
         | 
| 483 | 
            +
                        if "cache_key" in kwargs:
         | 
| 484 | 
            +
                            cache_key = kwargs["cache_key"]
         | 
| 485 | 
            +
                        else:
         | 
| 486 | 
            +
                            cache_key = self.get_cache_key(*args, **kwargs)
         | 
| 487 | 
            +
                        if cache_key is not None:
         | 
| 488 | 
            +
                            cache_control_args = kwargs.get("cache", {})
         | 
| 489 | 
            +
                            max_age = cache_control_args.get(
         | 
| 490 | 
            +
                                "s-max-age", cache_control_args.get("s-maxage", float("inf"))
         | 
| 491 | 
            +
                            )
         | 
| 492 | 
            +
                            cached_result = self.cache.get_cache(cache_key)
         | 
| 493 | 
            +
                            # Check if a timestamp was stored with the cached response
         | 
| 494 | 
            +
                            if (
         | 
| 495 | 
            +
                                cached_result is not None
         | 
| 496 | 
            +
                                and isinstance(cached_result, dict)
         | 
| 497 | 
            +
                                and "timestamp" in cached_result
         | 
| 498 | 
            +
                                and max_age is not None
         | 
| 499 | 
            +
                            ):
         | 
| 500 | 
            +
                                timestamp = cached_result["timestamp"]
         | 
| 501 | 
            +
                                current_time = time.time()
         | 
| 502 | 
            +
             | 
| 503 | 
            +
                                # Calculate age of the cached response
         | 
| 504 | 
            +
                                response_age = current_time - timestamp
         | 
| 505 | 
            +
             | 
| 506 | 
            +
                                # Check if the cached response is older than the max-age
         | 
| 507 | 
            +
                                if response_age > max_age:
         | 
| 508 | 
            +
                                    print_verbose(
         | 
| 509 | 
            +
                                        f"Cached response for key {cache_key} is too old. Max-age: {max_age}s, Age: {response_age}s"
         | 
| 510 | 
            +
                                    )
         | 
| 511 | 
            +
                                    return None  # Cached response is too old
         | 
| 512 | 
            +
             | 
| 513 | 
            +
                                # If the response is fresh, or there's no max-age requirement, return the cached response
         | 
| 514 | 
            +
                                # cached_response is in `b{} convert it to ModelResponse
         | 
| 515 | 
            +
                                cached_response = cached_result.get("response")
         | 
| 516 | 
            +
                                try:
         | 
| 517 | 
            +
                                    if isinstance(cached_response, dict):
         | 
| 518 | 
            +
                                        pass
         | 
| 519 | 
            +
                                    else:
         | 
| 520 | 
            +
                                        cached_response = json.loads(
         | 
| 521 | 
            +
                                            cached_response
         | 
| 522 | 
            +
                                        )  # Convert string to dictionary
         | 
| 523 | 
            +
                                except:
         | 
| 524 | 
            +
                                    cached_response = ast.literal_eval(cached_response)
         | 
| 525 | 
            +
                                return cached_response
         | 
| 526 | 
            +
                            return cached_result
         | 
| 527 | 
            +
                    except Exception as e:
         | 
| 528 | 
            +
                        print_verbose(f"An exception occurred: {traceback.format_exc()}")
         | 
| 529 | 
            +
                        return None
         | 
| 530 | 
            +
             | 
| 531 | 
            +
                def add_cache(self, result, *args, **kwargs):
         | 
| 532 | 
            +
                    """
         | 
| 533 | 
            +
                    Adds a result to the cache.
         | 
| 534 | 
            +
             | 
| 535 | 
            +
                    Args:
         | 
| 536 | 
            +
                        *args: args to litellm.completion() or embedding()
         | 
| 537 | 
            +
                        **kwargs: kwargs to litellm.completion() or embedding()
         | 
| 538 | 
            +
             | 
| 539 | 
            +
                    Returns:
         | 
| 540 | 
            +
                        None
         | 
| 541 | 
            +
                    """
         | 
| 542 | 
            +
                    try:
         | 
| 543 | 
            +
                        if "cache_key" in kwargs:
         | 
| 544 | 
            +
                            cache_key = kwargs["cache_key"]
         | 
| 545 | 
            +
                        else:
         | 
| 546 | 
            +
                            cache_key = self.get_cache_key(*args, **kwargs)
         | 
| 547 | 
            +
                        if cache_key is not None:
         | 
| 548 | 
            +
                            if isinstance(result, OpenAIObject):
         | 
| 549 | 
            +
                                result = result.model_dump_json()
         | 
| 550 | 
            +
             | 
| 551 | 
            +
                            ## Get Cache-Controls ##
         | 
| 552 | 
            +
                            if kwargs.get("cache", None) is not None and isinstance(
         | 
| 553 | 
            +
                                kwargs.get("cache"), dict
         | 
| 554 | 
            +
                            ):
         | 
| 555 | 
            +
                                for k, v in kwargs.get("cache").items():
         | 
| 556 | 
            +
                                    if k == "ttl":
         | 
| 557 | 
            +
                                        kwargs["ttl"] = v
         | 
| 558 | 
            +
                            cached_data = {"timestamp": time.time(), "response": result}
         | 
| 559 | 
            +
                            self.cache.set_cache(cache_key, cached_data, **kwargs)
         | 
| 560 | 
            +
                    except Exception as e:
         | 
| 561 | 
            +
                        print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
         | 
| 562 | 
            +
                        traceback.print_exc()
         | 
| 563 | 
            +
                        pass
         | 
| 564 | 
            +
             | 
| 565 | 
            +
                async def _async_add_cache(self, result, *args, **kwargs):
         | 
| 566 | 
            +
                    self.add_cache(result, *args, **kwargs)
         | 
| 567 | 
            +
             | 
| 568 | 
            +
             | 
| 569 | 
            +
            def enable_cache(
         | 
| 570 | 
            +
                type: Optional[Literal["local", "redis", "s3"]] = "local",
         | 
| 571 | 
            +
                host: Optional[str] = None,
         | 
| 572 | 
            +
                port: Optional[str] = None,
         | 
| 573 | 
            +
                password: Optional[str] = None,
         | 
| 574 | 
            +
                supported_call_types: Optional[
         | 
| 575 | 
            +
                    List[Literal["completion", "acompletion", "embedding", "aembedding"]]
         | 
| 576 | 
            +
                ] = ["completion", "acompletion", "embedding", "aembedding"],
         | 
| 577 | 
            +
                **kwargs,
         | 
| 578 | 
            +
            ):
         | 
| 579 | 
            +
                """
         | 
| 580 | 
            +
                Enable cache with the specified configuration.
         | 
| 581 | 
            +
             | 
| 582 | 
            +
                Args:
         | 
| 583 | 
            +
                    type (Optional[Literal["local", "redis"]]): The type of cache to enable. Defaults to "local".
         | 
| 584 | 
            +
                    host (Optional[str]): The host address of the cache server. Defaults to None.
         | 
| 585 | 
            +
                    port (Optional[str]): The port number of the cache server. Defaults to None.
         | 
| 586 | 
            +
                    password (Optional[str]): The password for the cache server. Defaults to None.
         | 
| 587 | 
            +
                    supported_call_types (Optional[List[Literal["completion", "acompletion", "embedding", "aembedding"]]]):
         | 
| 588 | 
            +
                        The supported call types for the cache. Defaults to ["completion", "acompletion", "embedding", "aembedding"].
         | 
| 589 | 
            +
                    **kwargs: Additional keyword arguments.
         | 
| 590 | 
            +
             | 
| 591 | 
            +
                Returns:
         | 
| 592 | 
            +
                    None
         | 
| 593 | 
            +
             | 
| 594 | 
            +
                Raises:
         | 
| 595 | 
            +
                    None
         | 
| 596 | 
            +
                """
         | 
| 597 | 
            +
                print_verbose("LiteLLM: Enabling Cache")
         | 
| 598 | 
            +
                if "cache" not in litellm.input_callback:
         | 
| 599 | 
            +
                    litellm.input_callback.append("cache")
         | 
| 600 | 
            +
                if "cache" not in litellm.success_callback:
         | 
| 601 | 
            +
                    litellm.success_callback.append("cache")
         | 
| 602 | 
            +
                if "cache" not in litellm._async_success_callback:
         | 
| 603 | 
            +
                    litellm._async_success_callback.append("cache")
         | 
| 604 | 
            +
             | 
| 605 | 
            +
                if litellm.cache == None:
         | 
| 606 | 
            +
                    litellm.cache = Cache(
         | 
| 607 | 
            +
                        type=type,
         | 
| 608 | 
            +
                        host=host,
         | 
| 609 | 
            +
                        port=port,
         | 
| 610 | 
            +
                        password=password,
         | 
| 611 | 
            +
                        supported_call_types=supported_call_types,
         | 
| 612 | 
            +
                        **kwargs,
         | 
| 613 | 
            +
                    )
         | 
| 614 | 
            +
                print_verbose(f"LiteLLM: Cache enabled, litellm.cache={litellm.cache}")
         | 
| 615 | 
            +
                print_verbose(f"LiteLLM Cache: {vars(litellm.cache)}")
         | 
| 616 | 
            +
             | 
| 617 | 
            +
             | 
| 618 | 
            +
            def update_cache(
         | 
| 619 | 
            +
                type: Optional[Literal["local", "redis"]] = "local",
         | 
| 620 | 
            +
                host: Optional[str] = None,
         | 
| 621 | 
            +
                port: Optional[str] = None,
         | 
| 622 | 
            +
                password: Optional[str] = None,
         | 
| 623 | 
            +
                supported_call_types: Optional[
         | 
| 624 | 
            +
                    List[Literal["completion", "acompletion", "embedding", "aembedding"]]
         | 
| 625 | 
            +
                ] = ["completion", "acompletion", "embedding", "aembedding"],
         | 
| 626 | 
            +
                **kwargs,
         | 
| 627 | 
            +
            ):
         | 
| 628 | 
            +
                """
         | 
| 629 | 
            +
                Update the cache for LiteLLM.
         | 
| 630 | 
            +
             | 
| 631 | 
            +
                Args:
         | 
| 632 | 
            +
                    type (Optional[Literal["local", "redis"]]): The type of cache. Defaults to "local".
         | 
| 633 | 
            +
                    host (Optional[str]): The host of the cache. Defaults to None.
         | 
| 634 | 
            +
                    port (Optional[str]): The port of the cache. Defaults to None.
         | 
| 635 | 
            +
                    password (Optional[str]): The password for the cache. Defaults to None.
         | 
| 636 | 
            +
                    supported_call_types (Optional[List[Literal["completion", "acompletion", "embedding", "aembedding"]]]):
         | 
| 637 | 
            +
                        The supported call types for the cache. Defaults to ["completion", "acompletion", "embedding", "aembedding"].
         | 
| 638 | 
            +
                    **kwargs: Additional keyword arguments for the cache.
         | 
| 639 | 
            +
             | 
| 640 | 
            +
                Returns:
         | 
| 641 | 
            +
                    None
         | 
| 642 | 
            +
             | 
| 643 | 
            +
                """
         | 
| 644 | 
            +
                print_verbose("LiteLLM: Updating Cache")
         | 
| 645 | 
            +
                litellm.cache = Cache(
         | 
| 646 | 
            +
                    type=type,
         | 
| 647 | 
            +
                    host=host,
         | 
| 648 | 
            +
                    port=port,
         | 
| 649 | 
            +
                    password=password,
         | 
| 650 | 
            +
                    supported_call_types=supported_call_types,
         | 
| 651 | 
            +
                    **kwargs,
         | 
| 652 | 
            +
                )
         | 
| 653 | 
            +
                print_verbose(f"LiteLLM: Cache Updated, litellm.cache={litellm.cache}")
         | 
| 654 | 
            +
                print_verbose(f"LiteLLM Cache: {vars(litellm.cache)}")
         | 
| 655 | 
            +
             | 
| 656 | 
            +
             | 
| 657 | 
            +
            def disable_cache():
         | 
| 658 | 
            +
                """
         | 
| 659 | 
            +
                Disable the cache used by LiteLLM.
         | 
| 660 | 
            +
             | 
| 661 | 
            +
                This function disables the cache used by the LiteLLM module. It removes the cache-related callbacks from the input_callback, success_callback, and _async_success_callback lists. It also sets the litellm.cache attribute to None.
         | 
| 662 | 
            +
             | 
| 663 | 
            +
                Parameters:
         | 
| 664 | 
            +
                None
         | 
| 665 | 
            +
             | 
| 666 | 
            +
                Returns:
         | 
| 667 | 
            +
                None
         | 
| 668 | 
            +
                """
         | 
| 669 | 
            +
                from contextlib import suppress
         | 
| 670 | 
            +
             | 
| 671 | 
            +
                print_verbose("LiteLLM: Disabling Cache")
         | 
| 672 | 
            +
                with suppress(ValueError):
         | 
| 673 | 
            +
                    litellm.input_callback.remove("cache")
         | 
| 674 | 
            +
                    litellm.success_callback.remove("cache")
         | 
| 675 | 
            +
                    litellm._async_success_callback.remove("cache")
         | 
| 676 | 
            +
             | 
| 677 | 
            +
                litellm.cache = None
         | 
| 678 | 
            +
                print_verbose(f"LiteLLM: Cache disabled, litellm.cache={litellm.cache}")
         | 
    	
        litellm/cost.json
    ADDED
    
    | @@ -0,0 +1,5 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "gpt-3.5-turbo-0613": 0.00015000000000000001,
         | 
| 3 | 
            +
                "claude-2": 0.00016454,
         | 
| 4 | 
            +
                "gpt-4-0613": 0.015408
         | 
| 5 | 
            +
            }
         | 
    	
        litellm/deprecated_litellm_server/.env.template
    ADDED
    
    | @@ -0,0 +1,43 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
         | 
| 2 | 
            +
            # AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            # OPENAI_API_KEY = ""
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # HUGGINGFACE_API_KEY="" 
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            # TOGETHERAI_API_KEY=""
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # REPLICATE_API_KEY="" 
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            # ## bedrock / sagemaker
         | 
| 13 | 
            +
            # AWS_ACCESS_KEY_ID = "" 
         | 
| 14 | 
            +
            # AWS_SECRET_ACCESS_KEY = ""
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            # AZURE_API_KEY = ""
         | 
| 17 | 
            +
            # AZURE_API_BASE = ""
         | 
| 18 | 
            +
            # AZURE_API_VERSION = ""
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            # ANTHROPIC_API_KEY = ""
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            # COHERE_API_KEY = ""
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            # ## CONFIG FILE ## 
         | 
| 25 | 
            +
            # # CONFIG_FILE_PATH = ""  # uncomment to point to config file  
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            # ## LOGGING ## 
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            # SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            # ### LANGFUSE
         | 
| 32 | 
            +
            # LANGFUSE_PUBLIC_KEY = ""
         | 
| 33 | 
            +
            # LANGFUSE_SECRET_KEY = ""
         | 
| 34 | 
            +
            # # Optional, defaults to https://cloud.langfuse.com
         | 
| 35 | 
            +
            # LANGFUSE_HOST = "" # optional
         | 
| 36 | 
            +
             | 
| 37 | 
            +
             | 
| 38 | 
            +
            # ## CACHING ## 
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            # ### REDIS
         | 
| 41 | 
            +
            # REDIS_HOST = "" 
         | 
| 42 | 
            +
            # REDIS_PORT = "" 
         | 
| 43 | 
            +
            # REDIS_PASSWORD = "" 
         | 
    	
        litellm/deprecated_litellm_server/Dockerfile
    ADDED
    
    | @@ -0,0 +1,10 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # FROM python:3.10
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
         | 
| 4 | 
            +
            # COPY . /app
         | 
| 5 | 
            +
            # WORKDIR /app
         | 
| 6 | 
            +
            # RUN pip install -r requirements.txt
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            # EXPOSE $PORT 
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10 
         | 
    	
        litellm/deprecated_litellm_server/README.md
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # litellm-server [experimental]
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Deprecated. See litellm/proxy
         | 
    	
        litellm/deprecated_litellm_server/__init__.py
    ADDED
    
    | @@ -0,0 +1,2 @@ | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # from .main import *
         | 
| 2 | 
            +
            # from .server_utils import *
         | 
    	
        litellm/deprecated_litellm_server/main.py
    ADDED
    
    | @@ -0,0 +1,193 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # import os, traceback
         | 
| 2 | 
            +
            # from fastapi import FastAPI, Request, HTTPException
         | 
| 3 | 
            +
            # from fastapi.routing import APIRouter
         | 
| 4 | 
            +
            # from fastapi.responses import StreamingResponse, FileResponse
         | 
| 5 | 
            +
            # from fastapi.middleware.cors import CORSMiddleware
         | 
| 6 | 
            +
            # import json, sys
         | 
| 7 | 
            +
            # from typing import Optional
         | 
| 8 | 
            +
            # sys.path.insert(
         | 
| 9 | 
            +
            #     0, os.path.abspath("../")
         | 
| 10 | 
            +
            # )  # Adds the parent directory to the system path - for litellm local dev
         | 
| 11 | 
            +
            # import litellm
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # try:
         | 
| 14 | 
            +
            #     from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
         | 
| 15 | 
            +
            # except ImportError:
         | 
| 16 | 
            +
            #     from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
         | 
| 17 | 
            +
            # import dotenv
         | 
| 18 | 
            +
            # dotenv.load_dotenv() # load env variables
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            # app = FastAPI(docs_url="/", title="LiteLLM API")
         | 
| 21 | 
            +
            # router = APIRouter()
         | 
| 22 | 
            +
            # origins = ["*"]
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            # app.add_middleware(
         | 
| 25 | 
            +
            #     CORSMiddleware,
         | 
| 26 | 
            +
            #     allow_origins=origins,
         | 
| 27 | 
            +
            #     allow_credentials=True,
         | 
| 28 | 
            +
            #     allow_methods=["*"],
         | 
| 29 | 
            +
            #     allow_headers=["*"],
         | 
| 30 | 
            +
            # )
         | 
| 31 | 
            +
            # #### GLOBAL VARIABLES ####
         | 
| 32 | 
            +
            # llm_router: Optional[litellm.Router] = None
         | 
| 33 | 
            +
            # llm_model_list: Optional[list] = None
         | 
| 34 | 
            +
            # server_settings: Optional[dict] = None
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            # set_callbacks() # sets litellm callbacks for logging if they exist in the environment
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            # if "CONFIG_FILE_PATH" in os.environ:
         | 
| 39 | 
            +
            #     llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
         | 
| 40 | 
            +
            # else:
         | 
| 41 | 
            +
            #     llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
         | 
| 42 | 
            +
            # #### API ENDPOINTS ####
         | 
| 43 | 
            +
            # @router.get("/v1/models")
         | 
| 44 | 
            +
            # @router.get("/models")  # if project requires model list
         | 
| 45 | 
            +
            # def model_list():
         | 
| 46 | 
            +
            #     all_models = litellm.utils.get_valid_models()
         | 
| 47 | 
            +
            #     if llm_model_list:
         | 
| 48 | 
            +
            #         all_models += llm_model_list
         | 
| 49 | 
            +
            #     return dict(
         | 
| 50 | 
            +
            #         data=[
         | 
| 51 | 
            +
            #             {
         | 
| 52 | 
            +
            #                 "id": model,
         | 
| 53 | 
            +
            #                 "object": "model",
         | 
| 54 | 
            +
            #                 "created": 1677610602,
         | 
| 55 | 
            +
            #                 "owned_by": "openai",
         | 
| 56 | 
            +
            #             }
         | 
| 57 | 
            +
            #             for model in all_models
         | 
| 58 | 
            +
            #         ],
         | 
| 59 | 
            +
            #         object="list",
         | 
| 60 | 
            +
            #     )
         | 
| 61 | 
            +
            # # for streaming
         | 
| 62 | 
            +
            # def data_generator(response):
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            #     for chunk in response:
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            #         yield f"data: {json.dumps(chunk)}\n\n"
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            # @router.post("/v1/completions")
         | 
| 69 | 
            +
            # @router.post("/completions")
         | 
| 70 | 
            +
            # async def completion(request: Request):
         | 
| 71 | 
            +
            #     data = await request.json()
         | 
| 72 | 
            +
            #     response = litellm.completion(
         | 
| 73 | 
            +
            #         **data
         | 
| 74 | 
            +
            #     )
         | 
| 75 | 
            +
            #     if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
         | 
| 76 | 
            +
            #             return StreamingResponse(data_generator(response), media_type='text/event-stream')
         | 
| 77 | 
            +
            #     return response
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            # @router.post("/v1/embeddings")
         | 
| 80 | 
            +
            # @router.post("/embeddings")
         | 
| 81 | 
            +
            # async def embedding(request: Request):
         | 
| 82 | 
            +
            #     try:
         | 
| 83 | 
            +
            #         data = await request.json()
         | 
| 84 | 
            +
            #         # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
         | 
| 85 | 
            +
            #         if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
         | 
| 86 | 
            +
            #             api_key = request.headers.get("authorization")
         | 
| 87 | 
            +
            #             api_key = api_key.replace("Bearer", "").strip() # type: ignore
         | 
| 88 | 
            +
            #             if len(api_key.strip()) > 0:
         | 
| 89 | 
            +
            #                 api_key = api_key
         | 
| 90 | 
            +
            #                 data["api_key"] = api_key
         | 
| 91 | 
            +
            #         response = litellm.embedding(
         | 
| 92 | 
            +
            #             **data
         | 
| 93 | 
            +
            #         )
         | 
| 94 | 
            +
            #         return response
         | 
| 95 | 
            +
            #     except Exception as e:
         | 
| 96 | 
            +
            #         error_traceback = traceback.format_exc()
         | 
| 97 | 
            +
            #         error_msg = f"{str(e)}\n\n{error_traceback}"
         | 
| 98 | 
            +
            #         return {"error": error_msg}
         | 
| 99 | 
            +
             | 
| 100 | 
            +
            # @router.post("/v1/chat/completions")
         | 
| 101 | 
            +
            # @router.post("/chat/completions")
         | 
| 102 | 
            +
            # @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
         | 
| 103 | 
            +
            # async def chat_completion(request: Request, model: Optional[str] = None):
         | 
| 104 | 
            +
            #     global llm_model_list, server_settings
         | 
| 105 | 
            +
            #     try:
         | 
| 106 | 
            +
            #         data = await request.json()
         | 
| 107 | 
            +
            #         server_model = server_settings.get("completion_model", None) if server_settings else None
         | 
| 108 | 
            +
            #         data["model"] = server_model or model or data["model"]
         | 
| 109 | 
            +
            #         ## CHECK KEYS ##
         | 
| 110 | 
            +
            #         # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
         | 
| 111 | 
            +
            #         # env_validation = litellm.validate_environment(model=data["model"])
         | 
| 112 | 
            +
            #         # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
         | 
| 113 | 
            +
            #         #     if "authorization" in request.headers:
         | 
| 114 | 
            +
            #         #         api_key = request.headers.get("authorization")
         | 
| 115 | 
            +
            #         #     elif "api-key" in request.headers:
         | 
| 116 | 
            +
            #         #         api_key = request.headers.get("api-key")
         | 
| 117 | 
            +
            #         #     print(f"api_key in headers: {api_key}")
         | 
| 118 | 
            +
            #         #     if " " in api_key:
         | 
| 119 | 
            +
            #         #         api_key = api_key.split(" ")[1]
         | 
| 120 | 
            +
            #         #     print(f"api_key split: {api_key}")
         | 
| 121 | 
            +
            #         #     if len(api_key) > 0:
         | 
| 122 | 
            +
            #         #         api_key = api_key
         | 
| 123 | 
            +
            #         #         data["api_key"] = api_key
         | 
| 124 | 
            +
            #         #         print(f"api_key in data: {api_key}")
         | 
| 125 | 
            +
            #         ## CHECK CONFIG ##
         | 
| 126 | 
            +
            #         if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
         | 
| 127 | 
            +
            #             for m in llm_model_list:
         | 
| 128 | 
            +
            #                 if data["model"] == m["model_name"]:
         | 
| 129 | 
            +
            #                     for key, value in m["litellm_params"].items():
         | 
| 130 | 
            +
            #                         data[key] = value
         | 
| 131 | 
            +
            #                     break
         | 
| 132 | 
            +
            #         response = litellm.completion(
         | 
| 133 | 
            +
            #             **data
         | 
| 134 | 
            +
            #         )
         | 
| 135 | 
            +
            #         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
         | 
| 136 | 
            +
            #                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
         | 
| 137 | 
            +
            #         return response
         | 
| 138 | 
            +
            #     except Exception as e:
         | 
| 139 | 
            +
            #         error_traceback = traceback.format_exc()
         | 
| 140 | 
            +
             | 
| 141 | 
            +
            #         error_msg = f"{str(e)}\n\n{error_traceback}"
         | 
| 142 | 
            +
            #         # return {"error": error_msg}
         | 
| 143 | 
            +
            #         raise HTTPException(status_code=500, detail=error_msg)
         | 
| 144 | 
            +
             | 
| 145 | 
            +
            # @router.post("/router/completions")
         | 
| 146 | 
            +
            # async def router_completion(request: Request):
         | 
| 147 | 
            +
            #     global llm_router
         | 
| 148 | 
            +
            #     try:
         | 
| 149 | 
            +
            #         data = await request.json()
         | 
| 150 | 
            +
            #         if "model_list" in data:
         | 
| 151 | 
            +
            #             llm_router = litellm.Router(model_list=data.pop("model_list"))
         | 
| 152 | 
            +
            #         if llm_router is None:
         | 
| 153 | 
            +
            #             raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
         | 
| 154 | 
            +
             | 
| 155 | 
            +
            #         # openai.ChatCompletion.create replacement
         | 
| 156 | 
            +
            #         response = await llm_router.acompletion(model="gpt-3.5-turbo",
         | 
| 157 | 
            +
            #                         messages=[{"role": "user", "content": "Hey, how's it going?"}])
         | 
| 158 | 
            +
             | 
| 159 | 
            +
            #         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
         | 
| 160 | 
            +
            #                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
         | 
| 161 | 
            +
            #         return response
         | 
| 162 | 
            +
            #     except Exception as e:
         | 
| 163 | 
            +
            #         error_traceback = traceback.format_exc()
         | 
| 164 | 
            +
            #         error_msg = f"{str(e)}\n\n{error_traceback}"
         | 
| 165 | 
            +
            #         return {"error": error_msg}
         | 
| 166 | 
            +
             | 
| 167 | 
            +
            # @router.post("/router/embedding")
         | 
| 168 | 
            +
            # async def router_embedding(request: Request):
         | 
| 169 | 
            +
            #     global llm_router
         | 
| 170 | 
            +
            #     try:
         | 
| 171 | 
            +
            #         data = await request.json()
         | 
| 172 | 
            +
            #         if "model_list" in data:
         | 
| 173 | 
            +
            #             llm_router = litellm.Router(model_list=data.pop("model_list"))
         | 
| 174 | 
            +
            #         if llm_router is None:
         | 
| 175 | 
            +
            #             raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
         | 
| 176 | 
            +
             | 
| 177 | 
            +
            #         response = await llm_router.aembedding(model="gpt-3.5-turbo",  # type: ignore
         | 
| 178 | 
            +
            #                         messages=[{"role": "user", "content": "Hey, how's it going?"}])
         | 
| 179 | 
            +
             | 
| 180 | 
            +
            #         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
         | 
| 181 | 
            +
            #                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
         | 
| 182 | 
            +
            #         return response
         | 
| 183 | 
            +
            #     except Exception as e:
         | 
| 184 | 
            +
            #         error_traceback = traceback.format_exc()
         | 
| 185 | 
            +
            #         error_msg = f"{str(e)}\n\n{error_traceback}"
         | 
| 186 | 
            +
            #         return {"error": error_msg}
         | 
| 187 | 
            +
             | 
| 188 | 
            +
            # @router.get("/")
         | 
| 189 | 
            +
            # async def home(request: Request):
         | 
| 190 | 
            +
            #     return "LiteLLM: RUNNING"
         | 
| 191 | 
            +
             | 
| 192 | 
            +
             | 
| 193 | 
            +
            # app.include_router(router)
         | 
    	
        litellm/deprecated_litellm_server/requirements.txt
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # openai
         | 
| 2 | 
            +
            # fastapi
         | 
| 3 | 
            +
            # uvicorn
         | 
| 4 | 
            +
            # boto3
         | 
| 5 | 
            +
            # litellm
         | 
| 6 | 
            +
            # python-dotenv
         | 
| 7 | 
            +
            # redis
         | 
    	
        litellm/deprecated_litellm_server/server_utils.py
    ADDED
    
    | @@ -0,0 +1,85 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # import os, litellm
         | 
| 2 | 
            +
            # import pkg_resources
         | 
| 3 | 
            +
            # import dotenv
         | 
| 4 | 
            +
            # dotenv.load_dotenv() # load env variables
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # def print_verbose(print_statement):
         | 
| 7 | 
            +
            #     pass
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # def get_package_version(package_name):
         | 
| 10 | 
            +
            #     try:
         | 
| 11 | 
            +
            #         package = pkg_resources.get_distribution(package_name)
         | 
| 12 | 
            +
            #         return package.version
         | 
| 13 | 
            +
            #     except pkg_resources.DistributionNotFound:
         | 
| 14 | 
            +
            #         return None
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            # # Usage example
         | 
| 17 | 
            +
            # package_name = "litellm"
         | 
| 18 | 
            +
            # version = get_package_version(package_name)
         | 
| 19 | 
            +
            # if version:
         | 
| 20 | 
            +
            #     print_verbose(f"The version of {package_name} is {version}")
         | 
| 21 | 
            +
            # else:
         | 
| 22 | 
            +
            #     print_verbose(f"{package_name} is not installed")
         | 
| 23 | 
            +
            # import yaml
         | 
| 24 | 
            +
            # import dotenv
         | 
| 25 | 
            +
            # from typing import Optional
         | 
| 26 | 
            +
            # dotenv.load_dotenv() # load env variables
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            # def set_callbacks():
         | 
| 29 | 
            +
            #     ## LOGGING
         | 
| 30 | 
            +
            #     if len(os.getenv("SET_VERBOSE", "")) > 0:
         | 
| 31 | 
            +
            #         if os.getenv("SET_VERBOSE") == "True":
         | 
| 32 | 
            +
            #             litellm.set_verbose = True
         | 
| 33 | 
            +
            #             print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
         | 
| 34 | 
            +
            #         else:
         | 
| 35 | 
            +
            #             litellm.set_verbose = False
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            #     ### LANGFUSE
         | 
| 38 | 
            +
            #     if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
         | 
| 39 | 
            +
            #         litellm.success_callback = ["langfuse"]
         | 
| 40 | 
            +
            #         print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            #     ## CACHING
         | 
| 43 | 
            +
            #     ### REDIS
         | 
| 44 | 
            +
            #     # if len(os.getenv("REDIS_HOST", "")) >  0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
         | 
| 45 | 
            +
            #     #     print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
         | 
| 46 | 
            +
            #     #     from litellm.caching import Cache
         | 
| 47 | 
            +
            #     #     litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
         | 
| 48 | 
            +
            #     #     print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
         | 
| 49 | 
            +
             | 
| 50 | 
            +
             | 
| 51 | 
            +
            # def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
         | 
| 52 | 
            +
            #     config = {}
         | 
| 53 | 
            +
            #     server_settings  = {}
         | 
| 54 | 
            +
            #     try:
         | 
| 55 | 
            +
            #         if os.path.exists(config_file_path): # type: ignore
         | 
| 56 | 
            +
            #             with open(config_file_path, 'r') as file: # type: ignore
         | 
| 57 | 
            +
            #                 config = yaml.safe_load(file)
         | 
| 58 | 
            +
            #         else:
         | 
| 59 | 
            +
            #             pass
         | 
| 60 | 
            +
            #     except:
         | 
| 61 | 
            +
            #         pass
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            #     ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
         | 
| 64 | 
            +
            #     server_settings = config.get("server_settings", None)
         | 
| 65 | 
            +
            #     if server_settings:
         | 
| 66 | 
            +
            #         server_settings = server_settings
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            #     ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
         | 
| 69 | 
            +
            #     litellm_settings = config.get('litellm_settings', None)
         | 
| 70 | 
            +
            #     if litellm_settings:
         | 
| 71 | 
            +
            #         for key, value in litellm_settings.items():
         | 
| 72 | 
            +
            #             setattr(litellm, key, value)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            #     ## MODEL LIST
         | 
| 75 | 
            +
            #     model_list = config.get('model_list', None)
         | 
| 76 | 
            +
            #     if model_list:
         | 
| 77 | 
            +
            #         router = litellm.Router(model_list=model_list)
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            #     ## ENVIRONMENT VARIABLES
         | 
| 80 | 
            +
            #     environment_variables = config.get('environment_variables', None)
         | 
| 81 | 
            +
            #     if environment_variables:
         | 
| 82 | 
            +
            #         for key, value in environment_variables.items():
         | 
| 83 | 
            +
            #             os.environ[key] = value
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            #     return router, model_list, server_settings
         | 
    	
        litellm/exceptions.py
    ADDED
    
    | @@ -0,0 +1,200 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # +-----------------------------------------------+
         | 
| 2 | 
            +
            # |                                               |
         | 
| 3 | 
            +
            # |           Give Feedback / Get Help            |
         | 
| 4 | 
            +
            # | https://github.com/BerriAI/litellm/issues/new |
         | 
| 5 | 
            +
            # |                                               |
         | 
| 6 | 
            +
            # +-----------------------------------------------+
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #  Thank you users! We ❤️ you! - Krrish & Ishaan
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            ## LiteLLM versions of the OpenAI Exception Types
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            from openai import (
         | 
| 13 | 
            +
                AuthenticationError,
         | 
| 14 | 
            +
                BadRequestError,
         | 
| 15 | 
            +
                NotFoundError,
         | 
| 16 | 
            +
                RateLimitError,
         | 
| 17 | 
            +
                APIStatusError,
         | 
| 18 | 
            +
                OpenAIError,
         | 
| 19 | 
            +
                APIError,
         | 
| 20 | 
            +
                APITimeoutError,
         | 
| 21 | 
            +
                APIConnectionError,
         | 
| 22 | 
            +
                APIResponseValidationError,
         | 
| 23 | 
            +
                UnprocessableEntityError,
         | 
| 24 | 
            +
            )
         | 
| 25 | 
            +
            import httpx
         | 
| 26 | 
            +
             | 
| 27 | 
            +
             | 
| 28 | 
            +
            class AuthenticationError(AuthenticationError):  # type: ignore
         | 
| 29 | 
            +
                def __init__(self, message, llm_provider, model, response: httpx.Response):
         | 
| 30 | 
            +
                    self.status_code = 401
         | 
| 31 | 
            +
                    self.message = message
         | 
| 32 | 
            +
                    self.llm_provider = llm_provider
         | 
| 33 | 
            +
                    self.model = model
         | 
| 34 | 
            +
                    super().__init__(
         | 
| 35 | 
            +
                        self.message, response=response, body=None
         | 
| 36 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 37 | 
            +
             | 
| 38 | 
            +
             | 
| 39 | 
            +
            # raise when invalid models passed, example gpt-8
         | 
| 40 | 
            +
            class NotFoundError(NotFoundError):  # type: ignore
         | 
| 41 | 
            +
                def __init__(self, message, model, llm_provider, response: httpx.Response):
         | 
| 42 | 
            +
                    self.status_code = 404
         | 
| 43 | 
            +
                    self.message = message
         | 
| 44 | 
            +
                    self.model = model
         | 
| 45 | 
            +
                    self.llm_provider = llm_provider
         | 
| 46 | 
            +
                    super().__init__(
         | 
| 47 | 
            +
                        self.message, response=response, body=None
         | 
| 48 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 49 | 
            +
             | 
| 50 | 
            +
             | 
| 51 | 
            +
            class BadRequestError(BadRequestError):  # type: ignore
         | 
| 52 | 
            +
                def __init__(self, message, model, llm_provider, response: httpx.Response):
         | 
| 53 | 
            +
                    self.status_code = 400
         | 
| 54 | 
            +
                    self.message = message
         | 
| 55 | 
            +
                    self.model = model
         | 
| 56 | 
            +
                    self.llm_provider = llm_provider
         | 
| 57 | 
            +
                    super().__init__(
         | 
| 58 | 
            +
                        self.message, response=response, body=None
         | 
| 59 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 60 | 
            +
             | 
| 61 | 
            +
             | 
| 62 | 
            +
            class UnprocessableEntityError(UnprocessableEntityError):  # type: ignore
         | 
| 63 | 
            +
                def __init__(self, message, model, llm_provider, response: httpx.Response):
         | 
| 64 | 
            +
                    self.status_code = 422
         | 
| 65 | 
            +
                    self.message = message
         | 
| 66 | 
            +
                    self.model = model
         | 
| 67 | 
            +
                    self.llm_provider = llm_provider
         | 
| 68 | 
            +
                    super().__init__(
         | 
| 69 | 
            +
                        self.message, response=response, body=None
         | 
| 70 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 71 | 
            +
             | 
| 72 | 
            +
             | 
| 73 | 
            +
            class Timeout(APITimeoutError):  # type: ignore
         | 
| 74 | 
            +
                def __init__(self, message, model, llm_provider):
         | 
| 75 | 
            +
                    self.status_code = 408
         | 
| 76 | 
            +
                    self.message = message
         | 
| 77 | 
            +
                    self.model = model
         | 
| 78 | 
            +
                    self.llm_provider = llm_provider
         | 
| 79 | 
            +
                    request = httpx.Request(method="POST", url="https://api.openai.com/v1")
         | 
| 80 | 
            +
                    super().__init__(
         | 
| 81 | 
            +
                        request=request
         | 
| 82 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 83 | 
            +
             | 
| 84 | 
            +
             | 
| 85 | 
            +
            class RateLimitError(RateLimitError):  # type: ignore
         | 
| 86 | 
            +
                def __init__(self, message, llm_provider, model, response: httpx.Response):
         | 
| 87 | 
            +
                    self.status_code = 429
         | 
| 88 | 
            +
                    self.message = message
         | 
| 89 | 
            +
                    self.llm_provider = llm_provider
         | 
| 90 | 
            +
                    self.modle = model
         | 
| 91 | 
            +
                    super().__init__(
         | 
| 92 | 
            +
                        self.message, response=response, body=None
         | 
| 93 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 94 | 
            +
             | 
| 95 | 
            +
             | 
| 96 | 
            +
            # sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
         | 
| 97 | 
            +
            class ContextWindowExceededError(BadRequestError):  # type: ignore
         | 
| 98 | 
            +
                def __init__(self, message, model, llm_provider, response: httpx.Response):
         | 
| 99 | 
            +
                    self.status_code = 400
         | 
| 100 | 
            +
                    self.message = message
         | 
| 101 | 
            +
                    self.model = model
         | 
| 102 | 
            +
                    self.llm_provider = llm_provider
         | 
| 103 | 
            +
                    super().__init__(
         | 
| 104 | 
            +
                        message=self.message,
         | 
| 105 | 
            +
                        model=self.model,  # type: ignore
         | 
| 106 | 
            +
                        llm_provider=self.llm_provider,  # type: ignore
         | 
| 107 | 
            +
                        response=response,
         | 
| 108 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 109 | 
            +
             | 
| 110 | 
            +
             | 
| 111 | 
            +
            class ContentPolicyViolationError(BadRequestError):  # type: ignore
         | 
| 112 | 
            +
                #  Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
         | 
| 113 | 
            +
                def __init__(self, message, model, llm_provider, response: httpx.Response):
         | 
| 114 | 
            +
                    self.status_code = 400
         | 
| 115 | 
            +
                    self.message = message
         | 
| 116 | 
            +
                    self.model = model
         | 
| 117 | 
            +
                    self.llm_provider = llm_provider
         | 
| 118 | 
            +
                    super().__init__(
         | 
| 119 | 
            +
                        message=self.message,
         | 
| 120 | 
            +
                        model=self.model,  # type: ignore
         | 
| 121 | 
            +
                        llm_provider=self.llm_provider,  # type: ignore
         | 
| 122 | 
            +
                        response=response,
         | 
| 123 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 124 | 
            +
             | 
| 125 | 
            +
             | 
| 126 | 
            +
            class ServiceUnavailableError(APIStatusError):  # type: ignore
         | 
| 127 | 
            +
                def __init__(self, message, llm_provider, model, response: httpx.Response):
         | 
| 128 | 
            +
                    self.status_code = 503
         | 
| 129 | 
            +
                    self.message = message
         | 
| 130 | 
            +
                    self.llm_provider = llm_provider
         | 
| 131 | 
            +
                    self.model = model
         | 
| 132 | 
            +
                    super().__init__(
         | 
| 133 | 
            +
                        self.message, response=response, body=None
         | 
| 134 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 135 | 
            +
             | 
| 136 | 
            +
             | 
| 137 | 
            +
            # raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
         | 
| 138 | 
            +
            class APIError(APIError):  # type: ignore
         | 
| 139 | 
            +
                def __init__(
         | 
| 140 | 
            +
                    self, status_code, message, llm_provider, model, request: httpx.Request
         | 
| 141 | 
            +
                ):
         | 
| 142 | 
            +
                    self.status_code = status_code
         | 
| 143 | 
            +
                    self.message = message
         | 
| 144 | 
            +
                    self.llm_provider = llm_provider
         | 
| 145 | 
            +
                    self.model = model
         | 
| 146 | 
            +
                    super().__init__(self.message, request=request, body=None)  # type: ignore
         | 
| 147 | 
            +
             | 
| 148 | 
            +
             | 
| 149 | 
            +
            # raised if an invalid request (not get, delete, put, post) is made
         | 
| 150 | 
            +
            class APIConnectionError(APIConnectionError):  # type: ignore
         | 
| 151 | 
            +
                def __init__(self, message, llm_provider, model, request: httpx.Request):
         | 
| 152 | 
            +
                    self.message = message
         | 
| 153 | 
            +
                    self.llm_provider = llm_provider
         | 
| 154 | 
            +
                    self.model = model
         | 
| 155 | 
            +
                    self.status_code = 500
         | 
| 156 | 
            +
                    super().__init__(message=self.message, request=request)
         | 
| 157 | 
            +
             | 
| 158 | 
            +
             | 
| 159 | 
            +
            # raised if an invalid request (not get, delete, put, post) is made
         | 
| 160 | 
            +
            class APIResponseValidationError(APIResponseValidationError):  # type: ignore
         | 
| 161 | 
            +
                def __init__(self, message, llm_provider, model):
         | 
| 162 | 
            +
                    self.message = message
         | 
| 163 | 
            +
                    self.llm_provider = llm_provider
         | 
| 164 | 
            +
                    self.model = model
         | 
| 165 | 
            +
                    request = httpx.Request(method="POST", url="https://api.openai.com/v1")
         | 
| 166 | 
            +
                    response = httpx.Response(status_code=500, request=request)
         | 
| 167 | 
            +
                    super().__init__(response=response, body=None, message=message)
         | 
| 168 | 
            +
             | 
| 169 | 
            +
             | 
| 170 | 
            +
            class OpenAIError(OpenAIError):  # type: ignore
         | 
| 171 | 
            +
                def __init__(self, original_exception):
         | 
| 172 | 
            +
                    self.status_code = original_exception.http_status
         | 
| 173 | 
            +
                    super().__init__(
         | 
| 174 | 
            +
                        http_body=original_exception.http_body,
         | 
| 175 | 
            +
                        http_status=original_exception.http_status,
         | 
| 176 | 
            +
                        json_body=original_exception.json_body,
         | 
| 177 | 
            +
                        headers=original_exception.headers,
         | 
| 178 | 
            +
                        code=original_exception.code,
         | 
| 179 | 
            +
                    )
         | 
| 180 | 
            +
                    self.llm_provider = "openai"
         | 
| 181 | 
            +
             | 
| 182 | 
            +
             | 
| 183 | 
            +
            class BudgetExceededError(Exception):
         | 
| 184 | 
            +
                def __init__(self, current_cost, max_budget):
         | 
| 185 | 
            +
                    self.current_cost = current_cost
         | 
| 186 | 
            +
                    self.max_budget = max_budget
         | 
| 187 | 
            +
                    message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
         | 
| 188 | 
            +
                    super().__init__(message)
         | 
| 189 | 
            +
             | 
| 190 | 
            +
             | 
| 191 | 
            +
            ## DEPRECATED ##
         | 
| 192 | 
            +
            class InvalidRequestError(BadRequestError):  # type: ignore
         | 
| 193 | 
            +
                def __init__(self, message, model, llm_provider):
         | 
| 194 | 
            +
                    self.status_code = 400
         | 
| 195 | 
            +
                    self.message = message
         | 
| 196 | 
            +
                    self.model = model
         | 
| 197 | 
            +
                    self.llm_provider = llm_provider
         | 
| 198 | 
            +
                    super().__init__(
         | 
| 199 | 
            +
                        self.message, f"{self.model}"
         | 
| 200 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
    	
        litellm/integrations/__init__.py
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            from . import *
         | 
    	
        litellm/integrations/aispend.py
    ADDED
    
    | @@ -0,0 +1,177 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success + failure, log events to aispend.io
         | 
| 3 | 
            +
            import dotenv, os
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 7 | 
            +
            import traceback
         | 
| 8 | 
            +
            import datetime
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            model_cost = {
         | 
| 11 | 
            +
                "gpt-3.5-turbo": {
         | 
| 12 | 
            +
                    "max_tokens": 4000,
         | 
| 13 | 
            +
                    "input_cost_per_token": 0.0000015,
         | 
| 14 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 15 | 
            +
                },
         | 
| 16 | 
            +
                "gpt-35-turbo": {
         | 
| 17 | 
            +
                    "max_tokens": 4000,
         | 
| 18 | 
            +
                    "input_cost_per_token": 0.0000015,
         | 
| 19 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 20 | 
            +
                },  # azure model name
         | 
| 21 | 
            +
                "gpt-3.5-turbo-0613": {
         | 
| 22 | 
            +
                    "max_tokens": 4000,
         | 
| 23 | 
            +
                    "input_cost_per_token": 0.0000015,
         | 
| 24 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 25 | 
            +
                },
         | 
| 26 | 
            +
                "gpt-3.5-turbo-0301": {
         | 
| 27 | 
            +
                    "max_tokens": 4000,
         | 
| 28 | 
            +
                    "input_cost_per_token": 0.0000015,
         | 
| 29 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 30 | 
            +
                },
         | 
| 31 | 
            +
                "gpt-3.5-turbo-16k": {
         | 
| 32 | 
            +
                    "max_tokens": 16000,
         | 
| 33 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 34 | 
            +
                    "output_cost_per_token": 0.000004,
         | 
| 35 | 
            +
                },
         | 
| 36 | 
            +
                "gpt-35-turbo-16k": {
         | 
| 37 | 
            +
                    "max_tokens": 16000,
         | 
| 38 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 39 | 
            +
                    "output_cost_per_token": 0.000004,
         | 
| 40 | 
            +
                },  # azure model name
         | 
| 41 | 
            +
                "gpt-3.5-turbo-16k-0613": {
         | 
| 42 | 
            +
                    "max_tokens": 16000,
         | 
| 43 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 44 | 
            +
                    "output_cost_per_token": 0.000004,
         | 
| 45 | 
            +
                },
         | 
| 46 | 
            +
                "gpt-4": {
         | 
| 47 | 
            +
                    "max_tokens": 8000,
         | 
| 48 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 49 | 
            +
                    "output_cost_per_token": 0.00006,
         | 
| 50 | 
            +
                },
         | 
| 51 | 
            +
                "gpt-4-0613": {
         | 
| 52 | 
            +
                    "max_tokens": 8000,
         | 
| 53 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 54 | 
            +
                    "output_cost_per_token": 0.00006,
         | 
| 55 | 
            +
                },
         | 
| 56 | 
            +
                "gpt-4-32k": {
         | 
| 57 | 
            +
                    "max_tokens": 8000,
         | 
| 58 | 
            +
                    "input_cost_per_token": 0.00006,
         | 
| 59 | 
            +
                    "output_cost_per_token": 0.00012,
         | 
| 60 | 
            +
                },
         | 
| 61 | 
            +
                "claude-instant-1": {
         | 
| 62 | 
            +
                    "max_tokens": 100000,
         | 
| 63 | 
            +
                    "input_cost_per_token": 0.00000163,
         | 
| 64 | 
            +
                    "output_cost_per_token": 0.00000551,
         | 
| 65 | 
            +
                },
         | 
| 66 | 
            +
                "claude-2": {
         | 
| 67 | 
            +
                    "max_tokens": 100000,
         | 
| 68 | 
            +
                    "input_cost_per_token": 0.00001102,
         | 
| 69 | 
            +
                    "output_cost_per_token": 0.00003268,
         | 
| 70 | 
            +
                },
         | 
| 71 | 
            +
                "text-bison-001": {
         | 
| 72 | 
            +
                    "max_tokens": 8192,
         | 
| 73 | 
            +
                    "input_cost_per_token": 0.000004,
         | 
| 74 | 
            +
                    "output_cost_per_token": 0.000004,
         | 
| 75 | 
            +
                },
         | 
| 76 | 
            +
                "chat-bison-001": {
         | 
| 77 | 
            +
                    "max_tokens": 4096,
         | 
| 78 | 
            +
                    "input_cost_per_token": 0.000002,
         | 
| 79 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 80 | 
            +
                },
         | 
| 81 | 
            +
                "command-nightly": {
         | 
| 82 | 
            +
                    "max_tokens": 4096,
         | 
| 83 | 
            +
                    "input_cost_per_token": 0.000015,
         | 
| 84 | 
            +
                    "output_cost_per_token": 0.000015,
         | 
| 85 | 
            +
                },
         | 
| 86 | 
            +
            }
         | 
| 87 | 
            +
             | 
| 88 | 
            +
             | 
| 89 | 
            +
            class AISpendLogger:
         | 
| 90 | 
            +
                # Class variables or attributes
         | 
| 91 | 
            +
                def __init__(self):
         | 
| 92 | 
            +
                    # Instance variables
         | 
| 93 | 
            +
                    self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
         | 
| 94 | 
            +
                    self.api_key = os.getenv("AISPEND_API_KEY")
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                def price_calculator(self, model, response_obj, start_time, end_time):
         | 
| 97 | 
            +
                    # try and find if the model is in the model_cost map
         | 
| 98 | 
            +
                    # else default to the average of the costs
         | 
| 99 | 
            +
                    prompt_tokens_cost_usd_dollar = 0
         | 
| 100 | 
            +
                    completion_tokens_cost_usd_dollar = 0
         | 
| 101 | 
            +
                    if model in model_cost:
         | 
| 102 | 
            +
                        prompt_tokens_cost_usd_dollar = (
         | 
| 103 | 
            +
                            model_cost[model]["input_cost_per_token"]
         | 
| 104 | 
            +
                            * response_obj["usage"]["prompt_tokens"]
         | 
| 105 | 
            +
                        )
         | 
| 106 | 
            +
                        completion_tokens_cost_usd_dollar = (
         | 
| 107 | 
            +
                            model_cost[model]["output_cost_per_token"]
         | 
| 108 | 
            +
                            * response_obj["usage"]["completion_tokens"]
         | 
| 109 | 
            +
                        )
         | 
| 110 | 
            +
                    elif "replicate" in model:
         | 
| 111 | 
            +
                        # replicate models are charged based on time
         | 
| 112 | 
            +
                        # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
         | 
| 113 | 
            +
                        model_run_time = end_time - start_time  # assuming time in seconds
         | 
| 114 | 
            +
                        cost_usd_dollar = model_run_time * 0.0032
         | 
| 115 | 
            +
                        prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
         | 
| 116 | 
            +
                        completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
         | 
| 117 | 
            +
                    else:
         | 
| 118 | 
            +
                        # calculate average input cost
         | 
| 119 | 
            +
                        input_cost_sum = 0
         | 
| 120 | 
            +
                        output_cost_sum = 0
         | 
| 121 | 
            +
                        for model in model_cost:
         | 
| 122 | 
            +
                            input_cost_sum += model_cost[model]["input_cost_per_token"]
         | 
| 123 | 
            +
                            output_cost_sum += model_cost[model]["output_cost_per_token"]
         | 
| 124 | 
            +
                        avg_input_cost = input_cost_sum / len(model_cost.keys())
         | 
| 125 | 
            +
                        avg_output_cost = output_cost_sum / len(model_cost.keys())
         | 
| 126 | 
            +
                        prompt_tokens_cost_usd_dollar = (
         | 
| 127 | 
            +
                            model_cost[model]["input_cost_per_token"]
         | 
| 128 | 
            +
                            * response_obj["usage"]["prompt_tokens"]
         | 
| 129 | 
            +
                        )
         | 
| 130 | 
            +
                        completion_tokens_cost_usd_dollar = (
         | 
| 131 | 
            +
                            model_cost[model]["output_cost_per_token"]
         | 
| 132 | 
            +
                            * response_obj["usage"]["completion_tokens"]
         | 
| 133 | 
            +
                        )
         | 
| 134 | 
            +
                    return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                def log_event(self, model, response_obj, start_time, end_time, print_verbose):
         | 
| 137 | 
            +
                    # Method definition
         | 
| 138 | 
            +
                    try:
         | 
| 139 | 
            +
                        print_verbose(
         | 
| 140 | 
            +
                            f"AISpend Logging - Enters logging function for model {model}"
         | 
| 141 | 
            +
                        )
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                        url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
         | 
| 144 | 
            +
                        headers = {
         | 
| 145 | 
            +
                            "Authorization": f"Bearer {self.api_key}",
         | 
| 146 | 
            +
                            "Content-Type": "application/json",
         | 
| 147 | 
            +
                        }
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                        response_timestamp = datetime.datetime.fromtimestamp(
         | 
| 150 | 
            +
                            int(response_obj["created"])
         | 
| 151 | 
            +
                        ).strftime("%Y-%m-%d")
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                        (
         | 
| 154 | 
            +
                            prompt_tokens_cost_usd_dollar,
         | 
| 155 | 
            +
                            completion_tokens_cost_usd_dollar,
         | 
| 156 | 
            +
                        ) = self.price_calculator(model, response_obj, start_time, end_time)
         | 
| 157 | 
            +
                        prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
         | 
| 158 | 
            +
                        completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
         | 
| 159 | 
            +
                        data = [
         | 
| 160 | 
            +
                            {
         | 
| 161 | 
            +
                                "requests": 1,
         | 
| 162 | 
            +
                                "requests_context": 1,
         | 
| 163 | 
            +
                                "context_tokens": response_obj["usage"]["prompt_tokens"],
         | 
| 164 | 
            +
                                "requests_generated": 1,
         | 
| 165 | 
            +
                                "generated_tokens": response_obj["usage"]["completion_tokens"],
         | 
| 166 | 
            +
                                "recorded_date": response_timestamp,
         | 
| 167 | 
            +
                                "model_id": response_obj["model"],
         | 
| 168 | 
            +
                                "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
         | 
| 169 | 
            +
                                "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
         | 
| 170 | 
            +
                            }
         | 
| 171 | 
            +
                        ]
         | 
| 172 | 
            +
             | 
| 173 | 
            +
                        print_verbose(f"AISpend Logging - final data object: {data}")
         | 
| 174 | 
            +
                    except:
         | 
| 175 | 
            +
                        # traceback.print_exc()
         | 
| 176 | 
            +
                        print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
         | 
| 177 | 
            +
                        pass
         | 
    	
        litellm/integrations/berrispend.py
    ADDED
    
    | @@ -0,0 +1,184 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success + failure, log events to aispend.io
         | 
| 3 | 
            +
            import dotenv, os
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 7 | 
            +
            import traceback
         | 
| 8 | 
            +
            import datetime
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            model_cost = {
         | 
| 11 | 
            +
                "gpt-3.5-turbo": {
         | 
| 12 | 
            +
                    "max_tokens": 4000,
         | 
| 13 | 
            +
                    "input_cost_per_token": 0.0000015,
         | 
| 14 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 15 | 
            +
                },
         | 
| 16 | 
            +
                "gpt-35-turbo": {
         | 
| 17 | 
            +
                    "max_tokens": 4000,
         | 
| 18 | 
            +
                    "input_cost_per_token": 0.0000015,
         | 
| 19 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 20 | 
            +
                },  # azure model name
         | 
| 21 | 
            +
                "gpt-3.5-turbo-0613": {
         | 
| 22 | 
            +
                    "max_tokens": 4000,
         | 
| 23 | 
            +
                    "input_cost_per_token": 0.0000015,
         | 
| 24 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 25 | 
            +
                },
         | 
| 26 | 
            +
                "gpt-3.5-turbo-0301": {
         | 
| 27 | 
            +
                    "max_tokens": 4000,
         | 
| 28 | 
            +
                    "input_cost_per_token": 0.0000015,
         | 
| 29 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 30 | 
            +
                },
         | 
| 31 | 
            +
                "gpt-3.5-turbo-16k": {
         | 
| 32 | 
            +
                    "max_tokens": 16000,
         | 
| 33 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 34 | 
            +
                    "output_cost_per_token": 0.000004,
         | 
| 35 | 
            +
                },
         | 
| 36 | 
            +
                "gpt-35-turbo-16k": {
         | 
| 37 | 
            +
                    "max_tokens": 16000,
         | 
| 38 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 39 | 
            +
                    "output_cost_per_token": 0.000004,
         | 
| 40 | 
            +
                },  # azure model name
         | 
| 41 | 
            +
                "gpt-3.5-turbo-16k-0613": {
         | 
| 42 | 
            +
                    "max_tokens": 16000,
         | 
| 43 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 44 | 
            +
                    "output_cost_per_token": 0.000004,
         | 
| 45 | 
            +
                },
         | 
| 46 | 
            +
                "gpt-4": {
         | 
| 47 | 
            +
                    "max_tokens": 8000,
         | 
| 48 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 49 | 
            +
                    "output_cost_per_token": 0.00006,
         | 
| 50 | 
            +
                },
         | 
| 51 | 
            +
                "gpt-4-0613": {
         | 
| 52 | 
            +
                    "max_tokens": 8000,
         | 
| 53 | 
            +
                    "input_cost_per_token": 0.000003,
         | 
| 54 | 
            +
                    "output_cost_per_token": 0.00006,
         | 
| 55 | 
            +
                },
         | 
| 56 | 
            +
                "gpt-4-32k": {
         | 
| 57 | 
            +
                    "max_tokens": 8000,
         | 
| 58 | 
            +
                    "input_cost_per_token": 0.00006,
         | 
| 59 | 
            +
                    "output_cost_per_token": 0.00012,
         | 
| 60 | 
            +
                },
         | 
| 61 | 
            +
                "claude-instant-1": {
         | 
| 62 | 
            +
                    "max_tokens": 100000,
         | 
| 63 | 
            +
                    "input_cost_per_token": 0.00000163,
         | 
| 64 | 
            +
                    "output_cost_per_token": 0.00000551,
         | 
| 65 | 
            +
                },
         | 
| 66 | 
            +
                "claude-2": {
         | 
| 67 | 
            +
                    "max_tokens": 100000,
         | 
| 68 | 
            +
                    "input_cost_per_token": 0.00001102,
         | 
| 69 | 
            +
                    "output_cost_per_token": 0.00003268,
         | 
| 70 | 
            +
                },
         | 
| 71 | 
            +
                "text-bison-001": {
         | 
| 72 | 
            +
                    "max_tokens": 8192,
         | 
| 73 | 
            +
                    "input_cost_per_token": 0.000004,
         | 
| 74 | 
            +
                    "output_cost_per_token": 0.000004,
         | 
| 75 | 
            +
                },
         | 
| 76 | 
            +
                "chat-bison-001": {
         | 
| 77 | 
            +
                    "max_tokens": 4096,
         | 
| 78 | 
            +
                    "input_cost_per_token": 0.000002,
         | 
| 79 | 
            +
                    "output_cost_per_token": 0.000002,
         | 
| 80 | 
            +
                },
         | 
| 81 | 
            +
                "command-nightly": {
         | 
| 82 | 
            +
                    "max_tokens": 4096,
         | 
| 83 | 
            +
                    "input_cost_per_token": 0.000015,
         | 
| 84 | 
            +
                    "output_cost_per_token": 0.000015,
         | 
| 85 | 
            +
                },
         | 
| 86 | 
            +
            }
         | 
| 87 | 
            +
             | 
| 88 | 
            +
             | 
| 89 | 
            +
            class BerriSpendLogger:
         | 
| 90 | 
            +
                # Class variables or attributes
         | 
| 91 | 
            +
                def __init__(self):
         | 
| 92 | 
            +
                    # Instance variables
         | 
| 93 | 
            +
                    self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                def price_calculator(self, model, response_obj, start_time, end_time):
         | 
| 96 | 
            +
                    # try and find if the model is in the model_cost map
         | 
| 97 | 
            +
                    # else default to the average of the costs
         | 
| 98 | 
            +
                    prompt_tokens_cost_usd_dollar = 0
         | 
| 99 | 
            +
                    completion_tokens_cost_usd_dollar = 0
         | 
| 100 | 
            +
                    if model in model_cost:
         | 
| 101 | 
            +
                        prompt_tokens_cost_usd_dollar = (
         | 
| 102 | 
            +
                            model_cost[model]["input_cost_per_token"]
         | 
| 103 | 
            +
                            * response_obj["usage"]["prompt_tokens"]
         | 
| 104 | 
            +
                        )
         | 
| 105 | 
            +
                        completion_tokens_cost_usd_dollar = (
         | 
| 106 | 
            +
                            model_cost[model]["output_cost_per_token"]
         | 
| 107 | 
            +
                            * response_obj["usage"]["completion_tokens"]
         | 
| 108 | 
            +
                        )
         | 
| 109 | 
            +
                    elif "replicate" in model:
         | 
| 110 | 
            +
                        # replicate models are charged based on time
         | 
| 111 | 
            +
                        # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
         | 
| 112 | 
            +
                        model_run_time = end_time - start_time  # assuming time in seconds
         | 
| 113 | 
            +
                        cost_usd_dollar = model_run_time * 0.0032
         | 
| 114 | 
            +
                        prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
         | 
| 115 | 
            +
                        completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
         | 
| 116 | 
            +
                    else:
         | 
| 117 | 
            +
                        # calculate average input cost
         | 
| 118 | 
            +
                        input_cost_sum = 0
         | 
| 119 | 
            +
                        output_cost_sum = 0
         | 
| 120 | 
            +
                        for model in model_cost:
         | 
| 121 | 
            +
                            input_cost_sum += model_cost[model]["input_cost_per_token"]
         | 
| 122 | 
            +
                            output_cost_sum += model_cost[model]["output_cost_per_token"]
         | 
| 123 | 
            +
                        avg_input_cost = input_cost_sum / len(model_cost.keys())
         | 
| 124 | 
            +
                        avg_output_cost = output_cost_sum / len(model_cost.keys())
         | 
| 125 | 
            +
                        prompt_tokens_cost_usd_dollar = (
         | 
| 126 | 
            +
                            model_cost[model]["input_cost_per_token"]
         | 
| 127 | 
            +
                            * response_obj["usage"]["prompt_tokens"]
         | 
| 128 | 
            +
                        )
         | 
| 129 | 
            +
                        completion_tokens_cost_usd_dollar = (
         | 
| 130 | 
            +
                            model_cost[model]["output_cost_per_token"]
         | 
| 131 | 
            +
                            * response_obj["usage"]["completion_tokens"]
         | 
| 132 | 
            +
                        )
         | 
| 133 | 
            +
                    return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                def log_event(
         | 
| 136 | 
            +
                    self, model, messages, response_obj, start_time, end_time, print_verbose
         | 
| 137 | 
            +
                ):
         | 
| 138 | 
            +
                    # Method definition
         | 
| 139 | 
            +
                    try:
         | 
| 140 | 
            +
                        print_verbose(
         | 
| 141 | 
            +
                            f"BerriSpend Logging - Enters logging function for model {model}"
         | 
| 142 | 
            +
                        )
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                        url = f"https://berrispend.berri.ai/spend"
         | 
| 145 | 
            +
                        headers = {"Content-Type": "application/json"}
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                        (
         | 
| 148 | 
            +
                            prompt_tokens_cost_usd_dollar,
         | 
| 149 | 
            +
                            completion_tokens_cost_usd_dollar,
         | 
| 150 | 
            +
                        ) = self.price_calculator(model, response_obj, start_time, end_time)
         | 
| 151 | 
            +
                        total_cost = (
         | 
| 152 | 
            +
                            prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
         | 
| 153 | 
            +
                        )
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                        response_time = (end_time - start_time).total_seconds()
         | 
| 156 | 
            +
                        if "response" in response_obj:
         | 
| 157 | 
            +
                            data = [
         | 
| 158 | 
            +
                                {
         | 
| 159 | 
            +
                                    "response_time": response_time,
         | 
| 160 | 
            +
                                    "model_id": response_obj["model"],
         | 
| 161 | 
            +
                                    "total_cost": total_cost,
         | 
| 162 | 
            +
                                    "messages": messages,
         | 
| 163 | 
            +
                                    "response": response_obj["choices"][0]["message"]["content"],
         | 
| 164 | 
            +
                                    "account_id": self.account_id,
         | 
| 165 | 
            +
                                }
         | 
| 166 | 
            +
                            ]
         | 
| 167 | 
            +
                        elif "error" in response_obj:
         | 
| 168 | 
            +
                            data = [
         | 
| 169 | 
            +
                                {
         | 
| 170 | 
            +
                                    "response_time": response_time,
         | 
| 171 | 
            +
                                    "model_id": response_obj["model"],
         | 
| 172 | 
            +
                                    "total_cost": total_cost,
         | 
| 173 | 
            +
                                    "messages": messages,
         | 
| 174 | 
            +
                                    "error": response_obj["error"],
         | 
| 175 | 
            +
                                    "account_id": self.account_id,
         | 
| 176 | 
            +
                                }
         | 
| 177 | 
            +
                            ]
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                        print_verbose(f"BerriSpend Logging - final data object: {data}")
         | 
| 180 | 
            +
                        response = requests.post(url, headers=headers, json=data)
         | 
| 181 | 
            +
                    except:
         | 
| 182 | 
            +
                        # traceback.print_exc()
         | 
| 183 | 
            +
                        print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
         | 
| 184 | 
            +
                        pass
         | 
    	
        litellm/integrations/custom_logger.py
    ADDED
    
    | @@ -0,0 +1,130 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success, logs events to Promptlayer
         | 
| 3 | 
            +
            import dotenv, os
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            from litellm.proxy._types import UserAPIKeyAuth
         | 
| 6 | 
            +
            from litellm.caching import DualCache
         | 
| 7 | 
            +
            from typing import Literal
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 10 | 
            +
            import traceback
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
         | 
| 14 | 
            +
                # Class variables or attributes
         | 
| 15 | 
            +
                def __init__(self):
         | 
| 16 | 
            +
                    pass
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def log_pre_api_call(self, model, messages, kwargs):
         | 
| 19 | 
            +
                    pass
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
         | 
| 22 | 
            +
                    pass
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def log_stream_event(self, kwargs, response_obj, start_time, end_time):
         | 
| 25 | 
            +
                    pass
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                def log_success_event(self, kwargs, response_obj, start_time, end_time):
         | 
| 28 | 
            +
                    pass
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def log_failure_event(self, kwargs, response_obj, start_time, end_time):
         | 
| 31 | 
            +
                    pass
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                #### ASYNC ####
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
         | 
| 36 | 
            +
                    pass
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                async def async_log_pre_api_call(self, model, messages, kwargs):
         | 
| 39 | 
            +
                    pass
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
         | 
| 42 | 
            +
                    pass
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
         | 
| 45 | 
            +
                    pass
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                #### CALL HOOKS - proxy only ####
         | 
| 48 | 
            +
                """
         | 
| 49 | 
            +
                Control the modify incoming / outgoung data before calling the model
         | 
| 50 | 
            +
                """
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                async def async_pre_call_hook(
         | 
| 53 | 
            +
                    self,
         | 
| 54 | 
            +
                    user_api_key_dict: UserAPIKeyAuth,
         | 
| 55 | 
            +
                    cache: DualCache,
         | 
| 56 | 
            +
                    data: dict,
         | 
| 57 | 
            +
                    call_type: Literal["completion", "embeddings"],
         | 
| 58 | 
            +
                ):
         | 
| 59 | 
            +
                    pass
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                async def async_post_call_failure_hook(
         | 
| 62 | 
            +
                    self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
         | 
| 63 | 
            +
                ):
         | 
| 64 | 
            +
                    pass
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                #### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                def log_input_event(self, model, messages, kwargs, print_verbose, callback_func):
         | 
| 69 | 
            +
                    try:
         | 
| 70 | 
            +
                        kwargs["model"] = model
         | 
| 71 | 
            +
                        kwargs["messages"] = messages
         | 
| 72 | 
            +
                        kwargs["log_event_type"] = "pre_api_call"
         | 
| 73 | 
            +
                        callback_func(
         | 
| 74 | 
            +
                            kwargs,
         | 
| 75 | 
            +
                        )
         | 
| 76 | 
            +
                        print_verbose(f"Custom Logger - model call details: {kwargs}")
         | 
| 77 | 
            +
                    except:
         | 
| 78 | 
            +
                        traceback.print_exc()
         | 
| 79 | 
            +
                        print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                async def async_log_input_event(
         | 
| 82 | 
            +
                    self, model, messages, kwargs, print_verbose, callback_func
         | 
| 83 | 
            +
                ):
         | 
| 84 | 
            +
                    try:
         | 
| 85 | 
            +
                        kwargs["model"] = model
         | 
| 86 | 
            +
                        kwargs["messages"] = messages
         | 
| 87 | 
            +
                        kwargs["log_event_type"] = "pre_api_call"
         | 
| 88 | 
            +
                        await callback_func(
         | 
| 89 | 
            +
                            kwargs,
         | 
| 90 | 
            +
                        )
         | 
| 91 | 
            +
                        print_verbose(f"Custom Logger - model call details: {kwargs}")
         | 
| 92 | 
            +
                    except:
         | 
| 93 | 
            +
                        traceback.print_exc()
         | 
| 94 | 
            +
                        print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                def log_event(
         | 
| 97 | 
            +
                    self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func
         | 
| 98 | 
            +
                ):
         | 
| 99 | 
            +
                    # Method definition
         | 
| 100 | 
            +
                    try:
         | 
| 101 | 
            +
                        kwargs["log_event_type"] = "post_api_call"
         | 
| 102 | 
            +
                        callback_func(
         | 
| 103 | 
            +
                            kwargs,  # kwargs to func
         | 
| 104 | 
            +
                            response_obj,
         | 
| 105 | 
            +
                            start_time,
         | 
| 106 | 
            +
                            end_time,
         | 
| 107 | 
            +
                        )
         | 
| 108 | 
            +
                        print_verbose(f"Custom Logger - final response object: {response_obj}")
         | 
| 109 | 
            +
                    except:
         | 
| 110 | 
            +
                        # traceback.print_exc()
         | 
| 111 | 
            +
                        print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
         | 
| 112 | 
            +
                        pass
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                async def async_log_event(
         | 
| 115 | 
            +
                    self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func
         | 
| 116 | 
            +
                ):
         | 
| 117 | 
            +
                    # Method definition
         | 
| 118 | 
            +
                    try:
         | 
| 119 | 
            +
                        kwargs["log_event_type"] = "post_api_call"
         | 
| 120 | 
            +
                        await callback_func(
         | 
| 121 | 
            +
                            kwargs,  # kwargs to func
         | 
| 122 | 
            +
                            response_obj,
         | 
| 123 | 
            +
                            start_time,
         | 
| 124 | 
            +
                            end_time,
         | 
| 125 | 
            +
                        )
         | 
| 126 | 
            +
                        print_verbose(f"Custom Logger - final response object: {response_obj}")
         | 
| 127 | 
            +
                    except:
         | 
| 128 | 
            +
                        # traceback.print_exc()
         | 
| 129 | 
            +
                        print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
         | 
| 130 | 
            +
                        pass
         | 
    	
        litellm/integrations/dynamodb.py
    ADDED
    
    | @@ -0,0 +1,92 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success + failure, log events to Supabase
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import dotenv, os
         | 
| 5 | 
            +
            import requests
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 8 | 
            +
            import traceback
         | 
| 9 | 
            +
            import datetime, subprocess, sys
         | 
| 10 | 
            +
            import litellm, uuid
         | 
| 11 | 
            +
            from litellm._logging import print_verbose
         | 
| 12 | 
            +
             | 
| 13 | 
            +
             | 
| 14 | 
            +
            class DyanmoDBLogger:
         | 
| 15 | 
            +
                # Class variables or attributes
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def __init__(self):
         | 
| 18 | 
            +
                    # Instance variables
         | 
| 19 | 
            +
                    import boto3
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    self.dynamodb = boto3.resource(
         | 
| 22 | 
            +
                        "dynamodb", region_name=os.environ["AWS_REGION_NAME"]
         | 
| 23 | 
            +
                    )
         | 
| 24 | 
            +
                    if litellm.dynamodb_table_name is None:
         | 
| 25 | 
            +
                        raise ValueError(
         | 
| 26 | 
            +
                            "LiteLLM Error, trying to use DynamoDB but not table name passed. Create a table and set `litellm.dynamodb_table_name=<your-table>`"
         | 
| 27 | 
            +
                        )
         | 
| 28 | 
            +
                    self.table_name = litellm.dynamodb_table_name
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                async def _async_log_event(
         | 
| 31 | 
            +
                    self, kwargs, response_obj, start_time, end_time, print_verbose
         | 
| 32 | 
            +
                ):
         | 
| 33 | 
            +
                    self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
         | 
| 36 | 
            +
                    try:
         | 
| 37 | 
            +
                        print_verbose(
         | 
| 38 | 
            +
                            f"DynamoDB Logging - Enters logging function for model {kwargs}"
         | 
| 39 | 
            +
                        )
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                        # construct payload to send to DynamoDB
         | 
| 42 | 
            +
                        # follows the same params as langfuse.py
         | 
| 43 | 
            +
                        litellm_params = kwargs.get("litellm_params", {})
         | 
| 44 | 
            +
                        metadata = (
         | 
| 45 | 
            +
                            litellm_params.get("metadata", {}) or {}
         | 
| 46 | 
            +
                        )  # if litellm_params['metadata'] == None
         | 
| 47 | 
            +
                        messages = kwargs.get("messages")
         | 
| 48 | 
            +
                        optional_params = kwargs.get("optional_params", {})
         | 
| 49 | 
            +
                        call_type = kwargs.get("call_type", "litellm.completion")
         | 
| 50 | 
            +
                        usage = response_obj["usage"]
         | 
| 51 | 
            +
                        id = response_obj.get("id", str(uuid.uuid4()))
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                        # Build the initial payload
         | 
| 54 | 
            +
                        payload = {
         | 
| 55 | 
            +
                            "id": id,
         | 
| 56 | 
            +
                            "call_type": call_type,
         | 
| 57 | 
            +
                            "startTime": start_time,
         | 
| 58 | 
            +
                            "endTime": end_time,
         | 
| 59 | 
            +
                            "model": kwargs.get("model", ""),
         | 
| 60 | 
            +
                            "user": kwargs.get("user", ""),
         | 
| 61 | 
            +
                            "modelParameters": optional_params,
         | 
| 62 | 
            +
                            "messages": messages,
         | 
| 63 | 
            +
                            "response": response_obj,
         | 
| 64 | 
            +
                            "usage": usage,
         | 
| 65 | 
            +
                            "metadata": metadata,
         | 
| 66 | 
            +
                        }
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                        # Ensure everything in the payload is converted to str
         | 
| 69 | 
            +
                        for key, value in payload.items():
         | 
| 70 | 
            +
                            try:
         | 
| 71 | 
            +
                                payload[key] = str(value)
         | 
| 72 | 
            +
                            except:
         | 
| 73 | 
            +
                                # non blocking if it can't cast to a str
         | 
| 74 | 
            +
                                pass
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                        print_verbose(f"\nDynamoDB Logger - Logging payload = {payload}")
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                        # put data in dyanmo DB
         | 
| 79 | 
            +
                        table = self.dynamodb.Table(self.table_name)
         | 
| 80 | 
            +
                        # Assuming log_data is a dictionary with log information
         | 
| 81 | 
            +
                        response = table.put_item(Item=payload)
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                        print_verbose(f"Response from DynamoDB:{str(response)}")
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                        print_verbose(
         | 
| 86 | 
            +
                            f"DynamoDB Layer Logging - final response object: {response_obj}"
         | 
| 87 | 
            +
                        )
         | 
| 88 | 
            +
                        return response
         | 
| 89 | 
            +
                    except:
         | 
| 90 | 
            +
                        traceback.print_exc()
         | 
| 91 | 
            +
                        print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
         | 
| 92 | 
            +
                        pass
         | 
    	
        litellm/integrations/helicone.py
    ADDED
    
    | @@ -0,0 +1,114 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success, logs events to Helicone
         | 
| 3 | 
            +
            import dotenv, os
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 7 | 
            +
            import traceback
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            class HeliconeLogger:
         | 
| 11 | 
            +
                # Class variables or attributes
         | 
| 12 | 
            +
                helicone_model_list = ["gpt", "claude"]
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def __init__(self):
         | 
| 15 | 
            +
                    # Instance variables
         | 
| 16 | 
            +
                    self.provider_url = "https://api.openai.com/v1"
         | 
| 17 | 
            +
                    self.key = os.getenv("HELICONE_API_KEY")
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                def claude_mapping(self, model, messages, response_obj):
         | 
| 20 | 
            +
                    from anthropic import HUMAN_PROMPT, AI_PROMPT
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                    prompt = f"{HUMAN_PROMPT}"
         | 
| 23 | 
            +
                    for message in messages:
         | 
| 24 | 
            +
                        if "role" in message:
         | 
| 25 | 
            +
                            if message["role"] == "user":
         | 
| 26 | 
            +
                                prompt += f"{HUMAN_PROMPT}{message['content']}"
         | 
| 27 | 
            +
                            else:
         | 
| 28 | 
            +
                                prompt += f"{AI_PROMPT}{message['content']}"
         | 
| 29 | 
            +
                        else:
         | 
| 30 | 
            +
                            prompt += f"{HUMAN_PROMPT}{message['content']}"
         | 
| 31 | 
            +
                    prompt += f"{AI_PROMPT}"
         | 
| 32 | 
            +
                    claude_provider_request = {"model": model, "prompt": prompt}
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                    claude_response_obj = {
         | 
| 35 | 
            +
                        "completion": response_obj["choices"][0]["message"]["content"],
         | 
| 36 | 
            +
                        "model": model,
         | 
| 37 | 
            +
                        "stop_reason": "stop_sequence",
         | 
| 38 | 
            +
                    }
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                    return claude_provider_request, claude_response_obj
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def log_success(
         | 
| 43 | 
            +
                    self, model, messages, response_obj, start_time, end_time, print_verbose
         | 
| 44 | 
            +
                ):
         | 
| 45 | 
            +
                    # Method definition
         | 
| 46 | 
            +
                    try:
         | 
| 47 | 
            +
                        print_verbose(
         | 
| 48 | 
            +
                            f"Helicone Logging - Enters logging function for model {model}"
         | 
| 49 | 
            +
                        )
         | 
| 50 | 
            +
                        model = (
         | 
| 51 | 
            +
                            model
         | 
| 52 | 
            +
                            if any(
         | 
| 53 | 
            +
                                accepted_model in model
         | 
| 54 | 
            +
                                for accepted_model in self.helicone_model_list
         | 
| 55 | 
            +
                            )
         | 
| 56 | 
            +
                            else "gpt-3.5-turbo"
         | 
| 57 | 
            +
                        )
         | 
| 58 | 
            +
                        provider_request = {"model": model, "messages": messages}
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                        if "claude" in model:
         | 
| 61 | 
            +
                            provider_request, response_obj = self.claude_mapping(
         | 
| 62 | 
            +
                                model=model, messages=messages, response_obj=response_obj
         | 
| 63 | 
            +
                            )
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                        providerResponse = {
         | 
| 66 | 
            +
                            "json": response_obj,
         | 
| 67 | 
            +
                            "headers": {"openai-version": "2020-10-01"},
         | 
| 68 | 
            +
                            "status": 200,
         | 
| 69 | 
            +
                        }
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                        # Code to be executed
         | 
| 72 | 
            +
                        url = "https://api.hconeai.com/oai/v1/log"
         | 
| 73 | 
            +
                        headers = {
         | 
| 74 | 
            +
                            "Authorization": f"Bearer {self.key}",
         | 
| 75 | 
            +
                            "Content-Type": "application/json",
         | 
| 76 | 
            +
                        }
         | 
| 77 | 
            +
                        start_time_seconds = int(start_time.timestamp())
         | 
| 78 | 
            +
                        start_time_milliseconds = int(
         | 
| 79 | 
            +
                            (start_time.timestamp() - start_time_seconds) * 1000
         | 
| 80 | 
            +
                        )
         | 
| 81 | 
            +
                        end_time_seconds = int(end_time.timestamp())
         | 
| 82 | 
            +
                        end_time_milliseconds = int(
         | 
| 83 | 
            +
                            (end_time.timestamp() - end_time_seconds) * 1000
         | 
| 84 | 
            +
                        )
         | 
| 85 | 
            +
                        data = {
         | 
| 86 | 
            +
                            "providerRequest": {
         | 
| 87 | 
            +
                                "url": self.provider_url,
         | 
| 88 | 
            +
                                "json": provider_request,
         | 
| 89 | 
            +
                                "meta": {"Helicone-Auth": f"Bearer {self.key}"},
         | 
| 90 | 
            +
                            },
         | 
| 91 | 
            +
                            "providerResponse": providerResponse,
         | 
| 92 | 
            +
                            "timing": {
         | 
| 93 | 
            +
                                "startTime": {
         | 
| 94 | 
            +
                                    "seconds": start_time_seconds,
         | 
| 95 | 
            +
                                    "milliseconds": start_time_milliseconds,
         | 
| 96 | 
            +
                                },
         | 
| 97 | 
            +
                                "endTime": {
         | 
| 98 | 
            +
                                    "seconds": end_time_seconds,
         | 
| 99 | 
            +
                                    "milliseconds": end_time_milliseconds,
         | 
| 100 | 
            +
                                },
         | 
| 101 | 
            +
                            },  # {"seconds": .., "milliseconds": ..}
         | 
| 102 | 
            +
                        }
         | 
| 103 | 
            +
                        response = requests.post(url, headers=headers, json=data)
         | 
| 104 | 
            +
                        if response.status_code == 200:
         | 
| 105 | 
            +
                            print_verbose("Helicone Logging - Success!")
         | 
| 106 | 
            +
                        else:
         | 
| 107 | 
            +
                            print_verbose(
         | 
| 108 | 
            +
                                f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
         | 
| 109 | 
            +
                            )
         | 
| 110 | 
            +
                            print_verbose(f"Helicone Logging - Error {response.text}")
         | 
| 111 | 
            +
                    except:
         | 
| 112 | 
            +
                        # traceback.print_exc()
         | 
| 113 | 
            +
                        print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
         | 
| 114 | 
            +
                        pass
         | 
    	
        litellm/integrations/langfuse.py
    ADDED
    
    | @@ -0,0 +1,191 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success, logs events to Langfuse
         | 
| 3 | 
            +
            import dotenv, os
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import requests
         | 
| 6 | 
            +
            from datetime import datetime
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 9 | 
            +
            import traceback
         | 
| 10 | 
            +
            from packaging.version import Version
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            class LangFuseLogger:
         | 
| 14 | 
            +
                # Class variables or attributes
         | 
| 15 | 
            +
                def __init__(self):
         | 
| 16 | 
            +
                    try:
         | 
| 17 | 
            +
                        from langfuse import Langfuse
         | 
| 18 | 
            +
                    except Exception as e:
         | 
| 19 | 
            +
                        raise Exception(
         | 
| 20 | 
            +
                            f"\033[91mLangfuse not installed, try running 'pip install langfuse' to fix this error: {e}\033[0m"
         | 
| 21 | 
            +
                        )
         | 
| 22 | 
            +
                    # Instance variables
         | 
| 23 | 
            +
                    self.secret_key = os.getenv("LANGFUSE_SECRET_KEY")
         | 
| 24 | 
            +
                    self.public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
         | 
| 25 | 
            +
                    self.langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
         | 
| 26 | 
            +
                    self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
         | 
| 27 | 
            +
                    self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
         | 
| 28 | 
            +
                    self.Langfuse = Langfuse(
         | 
| 29 | 
            +
                        public_key=self.public_key,
         | 
| 30 | 
            +
                        secret_key=self.secret_key,
         | 
| 31 | 
            +
                        host=self.langfuse_host,
         | 
| 32 | 
            +
                        release=self.langfuse_release,
         | 
| 33 | 
            +
                        debug=self.langfuse_debug,
         | 
| 34 | 
            +
                    )
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                def log_event(
         | 
| 37 | 
            +
                    self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
         | 
| 38 | 
            +
                ):
         | 
| 39 | 
            +
                    # Method definition
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                    try:
         | 
| 42 | 
            +
                        print_verbose(
         | 
| 43 | 
            +
                            f"Langfuse Logging - Enters logging function for model {kwargs}"
         | 
| 44 | 
            +
                        )
         | 
| 45 | 
            +
                        litellm_params = kwargs.get("litellm_params", {})
         | 
| 46 | 
            +
                        metadata = (
         | 
| 47 | 
            +
                            litellm_params.get("metadata", {}) or {}
         | 
| 48 | 
            +
                        )  # if litellm_params['metadata'] == None
         | 
| 49 | 
            +
                        prompt = [kwargs.get("messages")]
         | 
| 50 | 
            +
                        optional_params = kwargs.get("optional_params", {})
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                        optional_params.pop("functions", None)
         | 
| 53 | 
            +
                        optional_params.pop("tools", None)
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                        # langfuse only accepts str, int, bool, float for logging
         | 
| 56 | 
            +
                        for param, value in optional_params.items():
         | 
| 57 | 
            +
                            if not isinstance(value, (str, int, bool, float)):
         | 
| 58 | 
            +
                                try:
         | 
| 59 | 
            +
                                    optional_params[param] = str(value)
         | 
| 60 | 
            +
                                except:
         | 
| 61 | 
            +
                                    # if casting value to str fails don't block logging
         | 
| 62 | 
            +
                                    pass
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                        # end of processing langfuse ########################
         | 
| 65 | 
            +
                        input = prompt
         | 
| 66 | 
            +
                        output = response_obj["choices"][0]["message"].json()
         | 
| 67 | 
            +
                        print_verbose(
         | 
| 68 | 
            +
                            f"OUTPUT IN LANGFUSE: {output}; original: {response_obj['choices'][0]['message']}"
         | 
| 69 | 
            +
                        )
         | 
| 70 | 
            +
                        self._log_langfuse_v2(
         | 
| 71 | 
            +
                            user_id,
         | 
| 72 | 
            +
                            metadata,
         | 
| 73 | 
            +
                            output,
         | 
| 74 | 
            +
                            start_time,
         | 
| 75 | 
            +
                            end_time,
         | 
| 76 | 
            +
                            kwargs,
         | 
| 77 | 
            +
                            optional_params,
         | 
| 78 | 
            +
                            input,
         | 
| 79 | 
            +
                            response_obj,
         | 
| 80 | 
            +
                        ) if self._is_langfuse_v2() else self._log_langfuse_v1(
         | 
| 81 | 
            +
                            user_id,
         | 
| 82 | 
            +
                            metadata,
         | 
| 83 | 
            +
                            output,
         | 
| 84 | 
            +
                            start_time,
         | 
| 85 | 
            +
                            end_time,
         | 
| 86 | 
            +
                            kwargs,
         | 
| 87 | 
            +
                            optional_params,
         | 
| 88 | 
            +
                            input,
         | 
| 89 | 
            +
                            response_obj,
         | 
| 90 | 
            +
                        )
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                        self.Langfuse.flush()
         | 
| 93 | 
            +
                        print_verbose(
         | 
| 94 | 
            +
                            f"Langfuse Layer Logging - final response object: {response_obj}"
         | 
| 95 | 
            +
                        )
         | 
| 96 | 
            +
                    except:
         | 
| 97 | 
            +
                        traceback.print_exc()
         | 
| 98 | 
            +
                        print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}")
         | 
| 99 | 
            +
                        pass
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                async def _async_log_event(
         | 
| 102 | 
            +
                    self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
         | 
| 103 | 
            +
                ):
         | 
| 104 | 
            +
                    self.log_event(
         | 
| 105 | 
            +
                        kwargs, response_obj, start_time, end_time, user_id, print_verbose
         | 
| 106 | 
            +
                    )
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                def _is_langfuse_v2(self):
         | 
| 109 | 
            +
                    import langfuse
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                    return Version(langfuse.version.__version__) >= Version("2.0.0")
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                def _log_langfuse_v1(
         | 
| 114 | 
            +
                    self,
         | 
| 115 | 
            +
                    user_id,
         | 
| 116 | 
            +
                    metadata,
         | 
| 117 | 
            +
                    output,
         | 
| 118 | 
            +
                    start_time,
         | 
| 119 | 
            +
                    end_time,
         | 
| 120 | 
            +
                    kwargs,
         | 
| 121 | 
            +
                    optional_params,
         | 
| 122 | 
            +
                    input,
         | 
| 123 | 
            +
                    response_obj,
         | 
| 124 | 
            +
                ):
         | 
| 125 | 
            +
                    from langfuse.model import CreateTrace, CreateGeneration
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                    print(
         | 
| 128 | 
            +
                        "Please upgrade langfuse to v2.0.0 or higher: https://github.com/langfuse/langfuse-python/releases/tag/v2.0.1"
         | 
| 129 | 
            +
                    )
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                    trace = self.Langfuse.trace(
         | 
| 132 | 
            +
                        CreateTrace(
         | 
| 133 | 
            +
                            name=metadata.get("generation_name", "litellm-completion"),
         | 
| 134 | 
            +
                            input=input,
         | 
| 135 | 
            +
                            output=output,
         | 
| 136 | 
            +
                            userId=user_id,
         | 
| 137 | 
            +
                        )
         | 
| 138 | 
            +
                    )
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                    trace.generation(
         | 
| 141 | 
            +
                        CreateGeneration(
         | 
| 142 | 
            +
                            name=metadata.get("generation_name", "litellm-completion"),
         | 
| 143 | 
            +
                            startTime=start_time,
         | 
| 144 | 
            +
                            endTime=end_time,
         | 
| 145 | 
            +
                            model=kwargs["model"],
         | 
| 146 | 
            +
                            modelParameters=optional_params,
         | 
| 147 | 
            +
                            input=input,
         | 
| 148 | 
            +
                            output=output,
         | 
| 149 | 
            +
                            usage={
         | 
| 150 | 
            +
                                "prompt_tokens": response_obj["usage"]["prompt_tokens"],
         | 
| 151 | 
            +
                                "completion_tokens": response_obj["usage"]["completion_tokens"],
         | 
| 152 | 
            +
                            },
         | 
| 153 | 
            +
                            metadata=metadata,
         | 
| 154 | 
            +
                        )
         | 
| 155 | 
            +
                    )
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                def _log_langfuse_v2(
         | 
| 158 | 
            +
                    self,
         | 
| 159 | 
            +
                    user_id,
         | 
| 160 | 
            +
                    metadata,
         | 
| 161 | 
            +
                    output,
         | 
| 162 | 
            +
                    start_time,
         | 
| 163 | 
            +
                    end_time,
         | 
| 164 | 
            +
                    kwargs,
         | 
| 165 | 
            +
                    optional_params,
         | 
| 166 | 
            +
                    input,
         | 
| 167 | 
            +
                    response_obj,
         | 
| 168 | 
            +
                ):
         | 
| 169 | 
            +
                    trace = self.Langfuse.trace(
         | 
| 170 | 
            +
                        name=metadata.get("generation_name", "litellm-completion"),
         | 
| 171 | 
            +
                        input=input,
         | 
| 172 | 
            +
                        output=output,
         | 
| 173 | 
            +
                        user_id=metadata.get("trace_user_id", user_id),
         | 
| 174 | 
            +
                        id=metadata.get("trace_id", None),
         | 
| 175 | 
            +
                    )
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                    trace.generation(
         | 
| 178 | 
            +
                        name=metadata.get("generation_name", "litellm-completion"),
         | 
| 179 | 
            +
                        id=metadata.get("generation_id", None),
         | 
| 180 | 
            +
                        startTime=start_time,
         | 
| 181 | 
            +
                        endTime=end_time,
         | 
| 182 | 
            +
                        model=kwargs["model"],
         | 
| 183 | 
            +
                        modelParameters=optional_params,
         | 
| 184 | 
            +
                        input=input,
         | 
| 185 | 
            +
                        output=output,
         | 
| 186 | 
            +
                        usage={
         | 
| 187 | 
            +
                            "prompt_tokens": response_obj["usage"]["prompt_tokens"],
         | 
| 188 | 
            +
                            "completion_tokens": response_obj["usage"]["completion_tokens"],
         | 
| 189 | 
            +
                        },
         | 
| 190 | 
            +
                        metadata=metadata,
         | 
| 191 | 
            +
                    )
         | 
    	
        litellm/integrations/langsmith.py
    ADDED
    
    | @@ -0,0 +1,75 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success, logs events to Langsmith
         | 
| 3 | 
            +
            import dotenv, os
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import requests
         | 
| 6 | 
            +
            from datetime import datetime
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 9 | 
            +
            import traceback
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            class LangsmithLogger:
         | 
| 13 | 
            +
                # Class variables or attributes
         | 
| 14 | 
            +
                def __init__(self):
         | 
| 15 | 
            +
                    self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
         | 
| 18 | 
            +
                    # Method definition
         | 
| 19 | 
            +
                    # inspired by Langsmith http api here: https://github.com/langchain-ai/langsmith-cookbook/blob/main/tracing-examples/rest/rest.ipynb
         | 
| 20 | 
            +
                    metadata = {}
         | 
| 21 | 
            +
                    if "litellm_params" in kwargs:
         | 
| 22 | 
            +
                        metadata = kwargs["litellm_params"].get("metadata", {})
         | 
| 23 | 
            +
                    # set project name and run_name for langsmith logging
         | 
| 24 | 
            +
                    # users can pass project_name and run name to litellm.completion()
         | 
| 25 | 
            +
                    # Example: litellm.completion(model, messages, metadata={"project_name": "my-litellm-project", "run_name": "my-langsmith-run"})
         | 
| 26 | 
            +
                    # if not set litellm will use default project_name = litellm-completion, run_name = LLMRun
         | 
| 27 | 
            +
                    project_name = metadata.get("project_name", "litellm-completion")
         | 
| 28 | 
            +
                    run_name = metadata.get("run_name", "LLMRun")
         | 
| 29 | 
            +
                    print_verbose(
         | 
| 30 | 
            +
                        f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
         | 
| 31 | 
            +
                    )
         | 
| 32 | 
            +
                    try:
         | 
| 33 | 
            +
                        print_verbose(
         | 
| 34 | 
            +
                            f"Langsmith Logging - Enters logging function for model {kwargs}"
         | 
| 35 | 
            +
                        )
         | 
| 36 | 
            +
                        import requests
         | 
| 37 | 
            +
                        import datetime
         | 
| 38 | 
            +
                        from datetime import timezone
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                        try:
         | 
| 41 | 
            +
                            start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat()
         | 
| 42 | 
            +
                            end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat()
         | 
| 43 | 
            +
                        except:
         | 
| 44 | 
            +
                            start_time = datetime.datetime.utcnow().isoformat()
         | 
| 45 | 
            +
                            end_time = datetime.datetime.utcnow().isoformat()
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                        # filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs
         | 
| 48 | 
            +
                        new_kwargs = {}
         | 
| 49 | 
            +
                        for key in kwargs:
         | 
| 50 | 
            +
                            value = kwargs[key]
         | 
| 51 | 
            +
                            if key == "start_time" or key == "end_time":
         | 
| 52 | 
            +
                                pass
         | 
| 53 | 
            +
                            elif type(value) != dict:
         | 
| 54 | 
            +
                                new_kwargs[key] = value
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                        requests.post(
         | 
| 57 | 
            +
                            "https://api.smith.langchain.com/runs",
         | 
| 58 | 
            +
                            json={
         | 
| 59 | 
            +
                                "name": run_name,
         | 
| 60 | 
            +
                                "run_type": "llm",  # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
         | 
| 61 | 
            +
                                "inputs": {**new_kwargs},
         | 
| 62 | 
            +
                                "outputs": response_obj.json(),
         | 
| 63 | 
            +
                                "session_name": project_name,
         | 
| 64 | 
            +
                                "start_time": start_time,
         | 
| 65 | 
            +
                                "end_time": end_time,
         | 
| 66 | 
            +
                            },
         | 
| 67 | 
            +
                            headers={"x-api-key": self.langsmith_api_key},
         | 
| 68 | 
            +
                        )
         | 
| 69 | 
            +
                        print_verbose(
         | 
| 70 | 
            +
                            f"Langsmith Layer Logging - final response object: {response_obj}"
         | 
| 71 | 
            +
                        )
         | 
| 72 | 
            +
                    except:
         | 
| 73 | 
            +
                        # traceback.print_exc()
         | 
| 74 | 
            +
                        print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
         | 
| 75 | 
            +
                        pass
         | 
    	
        litellm/integrations/litedebugger.py
    ADDED
    
    | @@ -0,0 +1,262 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import requests, traceback, json, os
         | 
| 2 | 
            +
            import types
         | 
| 3 | 
            +
             | 
| 4 | 
            +
             | 
| 5 | 
            +
            class LiteDebugger:
         | 
| 6 | 
            +
                user_email = None
         | 
| 7 | 
            +
                dashboard_url = None
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def __init__(self, email=None):
         | 
| 10 | 
            +
                    self.api_url = "https://api.litellm.ai/debugger"
         | 
| 11 | 
            +
                    self.validate_environment(email)
         | 
| 12 | 
            +
                    pass
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def validate_environment(self, email):
         | 
| 15 | 
            +
                    try:
         | 
| 16 | 
            +
                        self.user_email = (
         | 
| 17 | 
            +
                            email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL")
         | 
| 18 | 
            +
                        )
         | 
| 19 | 
            +
                        if (
         | 
| 20 | 
            +
                            self.user_email == None
         | 
| 21 | 
            +
                        ):  # if users are trying to use_client=True but token not set
         | 
| 22 | 
            +
                            raise ValueError(
         | 
| 23 | 
            +
                                "litellm.use_client = True but no token or email passed. Please set it in litellm.token"
         | 
| 24 | 
            +
                            )
         | 
| 25 | 
            +
                        self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
         | 
| 26 | 
            +
                        try:
         | 
| 27 | 
            +
                            print(
         | 
| 28 | 
            +
                                f"\033[92mHere's your LiteLLM Dashboard 👉 \033[94m\033[4m{self.dashboard_url}\033[0m"
         | 
| 29 | 
            +
                            )
         | 
| 30 | 
            +
                        except:
         | 
| 31 | 
            +
                            print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}")
         | 
| 32 | 
            +
                        if self.user_email == None:
         | 
| 33 | 
            +
                            raise ValueError(
         | 
| 34 | 
            +
                                "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
         | 
| 35 | 
            +
                            )
         | 
| 36 | 
            +
                    except Exception as e:
         | 
| 37 | 
            +
                        raise ValueError(
         | 
| 38 | 
            +
                            "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
         | 
| 39 | 
            +
                        )
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def input_log_event(
         | 
| 42 | 
            +
                    self,
         | 
| 43 | 
            +
                    model,
         | 
| 44 | 
            +
                    messages,
         | 
| 45 | 
            +
                    end_user,
         | 
| 46 | 
            +
                    litellm_call_id,
         | 
| 47 | 
            +
                    call_type,
         | 
| 48 | 
            +
                    print_verbose,
         | 
| 49 | 
            +
                    litellm_params,
         | 
| 50 | 
            +
                    optional_params,
         | 
| 51 | 
            +
                ):
         | 
| 52 | 
            +
                    print_verbose(
         | 
| 53 | 
            +
                        f"LiteDebugger: Pre-API Call Logging for call id {litellm_call_id}"
         | 
| 54 | 
            +
                    )
         | 
| 55 | 
            +
                    try:
         | 
| 56 | 
            +
                        print_verbose(
         | 
| 57 | 
            +
                            f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
         | 
| 58 | 
            +
                        )
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                        def remove_key_value(dictionary, key):
         | 
| 61 | 
            +
                            new_dict = dictionary.copy()  # Create a copy of the original dictionary
         | 
| 62 | 
            +
                            new_dict.pop(key)  # Remove the specified key-value pair from the copy
         | 
| 63 | 
            +
                            return new_dict
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                        updated_litellm_params = remove_key_value(litellm_params, "logger_fn")
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                        if call_type == "embedding":
         | 
| 68 | 
            +
                            for (
         | 
| 69 | 
            +
                                message
         | 
| 70 | 
            +
                            ) in (
         | 
| 71 | 
            +
                                messages
         | 
| 72 | 
            +
                            ):  # assuming the input is a list as required by the embedding function
         | 
| 73 | 
            +
                                litellm_data_obj = {
         | 
| 74 | 
            +
                                    "model": model,
         | 
| 75 | 
            +
                                    "messages": [{"role": "user", "content": message}],
         | 
| 76 | 
            +
                                    "end_user": end_user,
         | 
| 77 | 
            +
                                    "status": "initiated",
         | 
| 78 | 
            +
                                    "litellm_call_id": litellm_call_id,
         | 
| 79 | 
            +
                                    "user_email": self.user_email,
         | 
| 80 | 
            +
                                    "litellm_params": updated_litellm_params,
         | 
| 81 | 
            +
                                    "optional_params": optional_params,
         | 
| 82 | 
            +
                                }
         | 
| 83 | 
            +
                                print_verbose(
         | 
| 84 | 
            +
                                    f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
         | 
| 85 | 
            +
                                )
         | 
| 86 | 
            +
                                response = requests.post(
         | 
| 87 | 
            +
                                    url=self.api_url,
         | 
| 88 | 
            +
                                    headers={"content-type": "application/json"},
         | 
| 89 | 
            +
                                    data=json.dumps(litellm_data_obj),
         | 
| 90 | 
            +
                                )
         | 
| 91 | 
            +
                            print_verbose(f"LiteDebugger: embedding api response - {response.text}")
         | 
| 92 | 
            +
                        elif call_type == "completion":
         | 
| 93 | 
            +
                            litellm_data_obj = {
         | 
| 94 | 
            +
                                "model": model,
         | 
| 95 | 
            +
                                "messages": messages
         | 
| 96 | 
            +
                                if isinstance(messages, list)
         | 
| 97 | 
            +
                                else [{"role": "user", "content": messages}],
         | 
| 98 | 
            +
                                "end_user": end_user,
         | 
| 99 | 
            +
                                "status": "initiated",
         | 
| 100 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 101 | 
            +
                                "user_email": self.user_email,
         | 
| 102 | 
            +
                                "litellm_params": updated_litellm_params,
         | 
| 103 | 
            +
                                "optional_params": optional_params,
         | 
| 104 | 
            +
                            }
         | 
| 105 | 
            +
                            print_verbose(
         | 
| 106 | 
            +
                                f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
         | 
| 107 | 
            +
                            )
         | 
| 108 | 
            +
                            response = requests.post(
         | 
| 109 | 
            +
                                url=self.api_url,
         | 
| 110 | 
            +
                                headers={"content-type": "application/json"},
         | 
| 111 | 
            +
                                data=json.dumps(litellm_data_obj),
         | 
| 112 | 
            +
                            )
         | 
| 113 | 
            +
                            print_verbose(
         | 
| 114 | 
            +
                                f"LiteDebugger: completion api response - {response.text}"
         | 
| 115 | 
            +
                            )
         | 
| 116 | 
            +
                    except:
         | 
| 117 | 
            +
                        print_verbose(
         | 
| 118 | 
            +
                            f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
         | 
| 119 | 
            +
                        )
         | 
| 120 | 
            +
                        pass
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                def post_call_log_event(
         | 
| 123 | 
            +
                    self, original_response, litellm_call_id, print_verbose, call_type, stream
         | 
| 124 | 
            +
                ):
         | 
| 125 | 
            +
                    print_verbose(
         | 
| 126 | 
            +
                        f"LiteDebugger: Post-API Call Logging for call id {litellm_call_id}"
         | 
| 127 | 
            +
                    )
         | 
| 128 | 
            +
                    try:
         | 
| 129 | 
            +
                        if call_type == "embedding":
         | 
| 130 | 
            +
                            litellm_data_obj = {
         | 
| 131 | 
            +
                                "status": "received",
         | 
| 132 | 
            +
                                "additional_details": {
         | 
| 133 | 
            +
                                    "original_response": str(
         | 
| 134 | 
            +
                                        original_response["data"][0]["embedding"][:5]
         | 
| 135 | 
            +
                                    )
         | 
| 136 | 
            +
                                },  # don't store the entire vector
         | 
| 137 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 138 | 
            +
                                "user_email": self.user_email,
         | 
| 139 | 
            +
                            }
         | 
| 140 | 
            +
                        elif call_type == "completion" and not stream:
         | 
| 141 | 
            +
                            litellm_data_obj = {
         | 
| 142 | 
            +
                                "status": "received",
         | 
| 143 | 
            +
                                "additional_details": {"original_response": original_response},
         | 
| 144 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 145 | 
            +
                                "user_email": self.user_email,
         | 
| 146 | 
            +
                            }
         | 
| 147 | 
            +
                        elif call_type == "completion" and stream:
         | 
| 148 | 
            +
                            litellm_data_obj = {
         | 
| 149 | 
            +
                                "status": "received",
         | 
| 150 | 
            +
                                "additional_details": {
         | 
| 151 | 
            +
                                    "original_response": "Streamed response"
         | 
| 152 | 
            +
                                    if isinstance(original_response, types.GeneratorType)
         | 
| 153 | 
            +
                                    else original_response
         | 
| 154 | 
            +
                                },
         | 
| 155 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 156 | 
            +
                                "user_email": self.user_email,
         | 
| 157 | 
            +
                            }
         | 
| 158 | 
            +
                        print_verbose(f"litedebugger post-call data object - {litellm_data_obj}")
         | 
| 159 | 
            +
                        response = requests.post(
         | 
| 160 | 
            +
                            url=self.api_url,
         | 
| 161 | 
            +
                            headers={"content-type": "application/json"},
         | 
| 162 | 
            +
                            data=json.dumps(litellm_data_obj),
         | 
| 163 | 
            +
                        )
         | 
| 164 | 
            +
                        print_verbose(f"LiteDebugger: api response - {response.text}")
         | 
| 165 | 
            +
                    except:
         | 
| 166 | 
            +
                        print_verbose(
         | 
| 167 | 
            +
                            f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
         | 
| 168 | 
            +
                        )
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                def log_event(
         | 
| 171 | 
            +
                    self,
         | 
| 172 | 
            +
                    end_user,
         | 
| 173 | 
            +
                    response_obj,
         | 
| 174 | 
            +
                    start_time,
         | 
| 175 | 
            +
                    end_time,
         | 
| 176 | 
            +
                    litellm_call_id,
         | 
| 177 | 
            +
                    print_verbose,
         | 
| 178 | 
            +
                    call_type,
         | 
| 179 | 
            +
                    stream=False,
         | 
| 180 | 
            +
                ):
         | 
| 181 | 
            +
                    print_verbose(
         | 
| 182 | 
            +
                        f"LiteDebugger: Success/Failure Call Logging for call id {litellm_call_id}"
         | 
| 183 | 
            +
                    )
         | 
| 184 | 
            +
                    try:
         | 
| 185 | 
            +
                        print_verbose(
         | 
| 186 | 
            +
                            f"LiteLLMDebugger: Success/Failure Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
         | 
| 187 | 
            +
                        )
         | 
| 188 | 
            +
                        total_cost = 0  # [TODO] implement cost tracking
         | 
| 189 | 
            +
                        response_time = (end_time - start_time).total_seconds()
         | 
| 190 | 
            +
                        if call_type == "completion" and stream == False:
         | 
| 191 | 
            +
                            litellm_data_obj = {
         | 
| 192 | 
            +
                                "response_time": response_time,
         | 
| 193 | 
            +
                                "total_cost": total_cost,
         | 
| 194 | 
            +
                                "response": response_obj["choices"][0]["message"]["content"],
         | 
| 195 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 196 | 
            +
                                "status": "success",
         | 
| 197 | 
            +
                            }
         | 
| 198 | 
            +
                            print_verbose(
         | 
| 199 | 
            +
                                f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
         | 
| 200 | 
            +
                            )
         | 
| 201 | 
            +
                            response = requests.post(
         | 
| 202 | 
            +
                                url=self.api_url,
         | 
| 203 | 
            +
                                headers={"content-type": "application/json"},
         | 
| 204 | 
            +
                                data=json.dumps(litellm_data_obj),
         | 
| 205 | 
            +
                            )
         | 
| 206 | 
            +
                        elif call_type == "embedding":
         | 
| 207 | 
            +
                            litellm_data_obj = {
         | 
| 208 | 
            +
                                "response_time": response_time,
         | 
| 209 | 
            +
                                "total_cost": total_cost,
         | 
| 210 | 
            +
                                "response": str(response_obj["data"][0]["embedding"][:5]),
         | 
| 211 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 212 | 
            +
                                "status": "success",
         | 
| 213 | 
            +
                            }
         | 
| 214 | 
            +
                            response = requests.post(
         | 
| 215 | 
            +
                                url=self.api_url,
         | 
| 216 | 
            +
                                headers={"content-type": "application/json"},
         | 
| 217 | 
            +
                                data=json.dumps(litellm_data_obj),
         | 
| 218 | 
            +
                            )
         | 
| 219 | 
            +
                        elif call_type == "completion" and stream == True:
         | 
| 220 | 
            +
                            if len(response_obj["content"]) > 0:  # don't log the empty strings
         | 
| 221 | 
            +
                                litellm_data_obj = {
         | 
| 222 | 
            +
                                    "response_time": response_time,
         | 
| 223 | 
            +
                                    "total_cost": total_cost,
         | 
| 224 | 
            +
                                    "response": response_obj["content"],
         | 
| 225 | 
            +
                                    "litellm_call_id": litellm_call_id,
         | 
| 226 | 
            +
                                    "status": "success",
         | 
| 227 | 
            +
                                }
         | 
| 228 | 
            +
                                print_verbose(
         | 
| 229 | 
            +
                                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
         | 
| 230 | 
            +
                                )
         | 
| 231 | 
            +
                                response = requests.post(
         | 
| 232 | 
            +
                                    url=self.api_url,
         | 
| 233 | 
            +
                                    headers={"content-type": "application/json"},
         | 
| 234 | 
            +
                                    data=json.dumps(litellm_data_obj),
         | 
| 235 | 
            +
                                )
         | 
| 236 | 
            +
                        elif "error" in response_obj:
         | 
| 237 | 
            +
                            if "Unable to map your input to a model." in response_obj["error"]:
         | 
| 238 | 
            +
                                total_cost = 0
         | 
| 239 | 
            +
                            litellm_data_obj = {
         | 
| 240 | 
            +
                                "response_time": response_time,
         | 
| 241 | 
            +
                                "model": response_obj["model"],
         | 
| 242 | 
            +
                                "total_cost": total_cost,
         | 
| 243 | 
            +
                                "error": response_obj["error"],
         | 
| 244 | 
            +
                                "end_user": end_user,
         | 
| 245 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 246 | 
            +
                                "status": "failure",
         | 
| 247 | 
            +
                                "user_email": self.user_email,
         | 
| 248 | 
            +
                            }
         | 
| 249 | 
            +
                            print_verbose(
         | 
| 250 | 
            +
                                f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
         | 
| 251 | 
            +
                            )
         | 
| 252 | 
            +
                            response = requests.post(
         | 
| 253 | 
            +
                                url=self.api_url,
         | 
| 254 | 
            +
                                headers={"content-type": "application/json"},
         | 
| 255 | 
            +
                                data=json.dumps(litellm_data_obj),
         | 
| 256 | 
            +
                            )
         | 
| 257 | 
            +
                            print_verbose(f"LiteDebugger: api response - {response.text}")
         | 
| 258 | 
            +
                    except:
         | 
| 259 | 
            +
                        print_verbose(
         | 
| 260 | 
            +
                            f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
         | 
| 261 | 
            +
                        )
         | 
| 262 | 
            +
                        pass
         | 
    	
        litellm/integrations/llmonitor.py
    ADDED
    
    | @@ -0,0 +1,127 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success + failure, log events to aispend.io
         | 
| 3 | 
            +
            import datetime
         | 
| 4 | 
            +
            import traceback
         | 
| 5 | 
            +
            import dotenv
         | 
| 6 | 
            +
            import os
         | 
| 7 | 
            +
            import requests
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            # convert to {completion: xx, tokens: xx}
         | 
| 13 | 
            +
            def parse_usage(usage):
         | 
| 14 | 
            +
                return {
         | 
| 15 | 
            +
                    "completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
         | 
| 16 | 
            +
                    "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
         | 
| 17 | 
            +
                }
         | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            def parse_messages(input):
         | 
| 21 | 
            +
                if input is None:
         | 
| 22 | 
            +
                    return None
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def clean_message(message):
         | 
| 25 | 
            +
                    # if is strin, return as is
         | 
| 26 | 
            +
                    if isinstance(message, str):
         | 
| 27 | 
            +
                        return message
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    if "message" in message:
         | 
| 30 | 
            +
                        return clean_message(message["message"])
         | 
| 31 | 
            +
                    text = message["content"]
         | 
| 32 | 
            +
                    if text == None:
         | 
| 33 | 
            +
                        text = message.get("function_call", None)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    return {
         | 
| 36 | 
            +
                        "role": message["role"],
         | 
| 37 | 
            +
                        "text": text,
         | 
| 38 | 
            +
                    }
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                if isinstance(input, list):
         | 
| 41 | 
            +
                    if len(input) == 1:
         | 
| 42 | 
            +
                        return clean_message(input[0])
         | 
| 43 | 
            +
                    else:
         | 
| 44 | 
            +
                        return [clean_message(msg) for msg in input]
         | 
| 45 | 
            +
                else:
         | 
| 46 | 
            +
                    return clean_message(input)
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            class LLMonitorLogger:
         | 
| 50 | 
            +
                # Class variables or attributes
         | 
| 51 | 
            +
                def __init__(self):
         | 
| 52 | 
            +
                    # Instance variables
         | 
| 53 | 
            +
                    self.api_url = os.getenv("LLMONITOR_API_URL") or "https://app.llmonitor.com"
         | 
| 54 | 
            +
                    self.app_id = os.getenv("LLMONITOR_APP_ID")
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                def log_event(
         | 
| 57 | 
            +
                    self,
         | 
| 58 | 
            +
                    type,
         | 
| 59 | 
            +
                    event,
         | 
| 60 | 
            +
                    run_id,
         | 
| 61 | 
            +
                    model,
         | 
| 62 | 
            +
                    print_verbose,
         | 
| 63 | 
            +
                    input=None,
         | 
| 64 | 
            +
                    user_id=None,
         | 
| 65 | 
            +
                    response_obj=None,
         | 
| 66 | 
            +
                    start_time=datetime.datetime.now(),
         | 
| 67 | 
            +
                    end_time=datetime.datetime.now(),
         | 
| 68 | 
            +
                    error=None,
         | 
| 69 | 
            +
                ):
         | 
| 70 | 
            +
                    # Method definition
         | 
| 71 | 
            +
                    try:
         | 
| 72 | 
            +
                        print_verbose(f"LLMonitor Logging - Logging request for model {model}")
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                        if response_obj:
         | 
| 75 | 
            +
                            usage = (
         | 
| 76 | 
            +
                                parse_usage(response_obj["usage"])
         | 
| 77 | 
            +
                                if "usage" in response_obj
         | 
| 78 | 
            +
                                else None
         | 
| 79 | 
            +
                            )
         | 
| 80 | 
            +
                            output = response_obj["choices"] if "choices" in response_obj else None
         | 
| 81 | 
            +
                        else:
         | 
| 82 | 
            +
                            usage = None
         | 
| 83 | 
            +
                            output = None
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                        if error:
         | 
| 86 | 
            +
                            error_obj = {"stack": error}
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                        else:
         | 
| 89 | 
            +
                            error_obj = None
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                        data = [
         | 
| 92 | 
            +
                            {
         | 
| 93 | 
            +
                                "type": type,
         | 
| 94 | 
            +
                                "name": model,
         | 
| 95 | 
            +
                                "runId": run_id,
         | 
| 96 | 
            +
                                "app": self.app_id,
         | 
| 97 | 
            +
                                "event": "start",
         | 
| 98 | 
            +
                                "timestamp": start_time.isoformat(),
         | 
| 99 | 
            +
                                "userId": user_id,
         | 
| 100 | 
            +
                                "input": parse_messages(input),
         | 
| 101 | 
            +
                            },
         | 
| 102 | 
            +
                            {
         | 
| 103 | 
            +
                                "type": type,
         | 
| 104 | 
            +
                                "runId": run_id,
         | 
| 105 | 
            +
                                "app": self.app_id,
         | 
| 106 | 
            +
                                "event": event,
         | 
| 107 | 
            +
                                "error": error_obj,
         | 
| 108 | 
            +
                                "timestamp": end_time.isoformat(),
         | 
| 109 | 
            +
                                "userId": user_id,
         | 
| 110 | 
            +
                                "output": parse_messages(output),
         | 
| 111 | 
            +
                                "tokensUsage": usage,
         | 
| 112 | 
            +
                            },
         | 
| 113 | 
            +
                        ]
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                        print_verbose(f"LLMonitor Logging - final data object: {data}")
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                        response = requests.post(
         | 
| 118 | 
            +
                            self.api_url + "/api/report",
         | 
| 119 | 
            +
                            headers={"Content-Type": "application/json"},
         | 
| 120 | 
            +
                            json={"events": data},
         | 
| 121 | 
            +
                        )
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                        print_verbose(f"LLMonitor Logging - response: {response}")
         | 
| 124 | 
            +
                    except:
         | 
| 125 | 
            +
                        # traceback.print_exc()
         | 
| 126 | 
            +
                        print_verbose(f"LLMonitor Logging Error - {traceback.format_exc()}")
         | 
| 127 | 
            +
                        pass
         | 
    	
        litellm/integrations/prompt_layer.py
    ADDED
    
    | @@ -0,0 +1,72 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success, logs events to Promptlayer
         | 
| 3 | 
            +
            import dotenv, os
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import requests
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 8 | 
            +
            import traceback
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            class PromptLayerLogger:
         | 
| 12 | 
            +
                # Class variables or attributes
         | 
| 13 | 
            +
                def __init__(self):
         | 
| 14 | 
            +
                    # Instance variables
         | 
| 15 | 
            +
                    self.key = os.getenv("PROMPTLAYER_API_KEY")
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
         | 
| 18 | 
            +
                    # Method definition
         | 
| 19 | 
            +
                    try:
         | 
| 20 | 
            +
                        new_kwargs = {}
         | 
| 21 | 
            +
                        new_kwargs["model"] = kwargs["model"]
         | 
| 22 | 
            +
                        new_kwargs["messages"] = kwargs["messages"]
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                        # add kwargs["optional_params"] to new_kwargs
         | 
| 25 | 
            +
                        for optional_param in kwargs["optional_params"]:
         | 
| 26 | 
            +
                            new_kwargs[optional_param] = kwargs["optional_params"][optional_param]
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                        print_verbose(
         | 
| 29 | 
            +
                            f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
         | 
| 30 | 
            +
                        )
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                        request_response = requests.post(
         | 
| 33 | 
            +
                            "https://api.promptlayer.com/rest/track-request",
         | 
| 34 | 
            +
                            json={
         | 
| 35 | 
            +
                                "function_name": "openai.ChatCompletion.create",
         | 
| 36 | 
            +
                                "kwargs": new_kwargs,
         | 
| 37 | 
            +
                                "tags": ["hello", "world"],
         | 
| 38 | 
            +
                                "request_response": dict(response_obj),
         | 
| 39 | 
            +
                                "request_start_time": int(start_time.timestamp()),
         | 
| 40 | 
            +
                                "request_end_time": int(end_time.timestamp()),
         | 
| 41 | 
            +
                                "api_key": self.key,
         | 
| 42 | 
            +
                                # Optional params for PromptLayer
         | 
| 43 | 
            +
                                # "prompt_id": "<PROMPT ID>",
         | 
| 44 | 
            +
                                # "prompt_input_variables": "<Dictionary of variables for prompt>",
         | 
| 45 | 
            +
                                # "prompt_version":1,
         | 
| 46 | 
            +
                            },
         | 
| 47 | 
            +
                        )
         | 
| 48 | 
            +
                        print_verbose(
         | 
| 49 | 
            +
                            f"Prompt Layer Logging: success - final response object: {request_response.text}"
         | 
| 50 | 
            +
                        )
         | 
| 51 | 
            +
                        response_json = request_response.json()
         | 
| 52 | 
            +
                        if "success" not in request_response.json():
         | 
| 53 | 
            +
                            raise Exception("Promptlayer did not successfully log the response!")
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                        if "request_id" in response_json:
         | 
| 56 | 
            +
                            print(kwargs["litellm_params"]["metadata"])
         | 
| 57 | 
            +
                            if kwargs["litellm_params"]["metadata"] is not None:
         | 
| 58 | 
            +
                                response = requests.post(
         | 
| 59 | 
            +
                                    "https://api.promptlayer.com/rest/track-metadata",
         | 
| 60 | 
            +
                                    json={
         | 
| 61 | 
            +
                                        "request_id": response_json["request_id"],
         | 
| 62 | 
            +
                                        "api_key": self.key,
         | 
| 63 | 
            +
                                        "metadata": kwargs["litellm_params"]["metadata"],
         | 
| 64 | 
            +
                                    },
         | 
| 65 | 
            +
                                )
         | 
| 66 | 
            +
                                print_verbose(
         | 
| 67 | 
            +
                                    f"Prompt Layer Logging: success - metadata post response object: {response.text}"
         | 
| 68 | 
            +
                                )
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    except:
         | 
| 71 | 
            +
                        print_verbose(f"error: Prompt Layer Error - {traceback.format_exc()}")
         | 
| 72 | 
            +
                        pass
         | 
    	
        litellm/integrations/s3.py
    ADDED
    
    | @@ -0,0 +1,150 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success + failure, log events to Supabase
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import dotenv, os
         | 
| 5 | 
            +
            import requests
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 8 | 
            +
            import traceback
         | 
| 9 | 
            +
            import datetime, subprocess, sys
         | 
| 10 | 
            +
            import litellm, uuid
         | 
| 11 | 
            +
            from litellm._logging import print_verbose
         | 
| 12 | 
            +
             | 
| 13 | 
            +
             | 
| 14 | 
            +
            class S3Logger:
         | 
| 15 | 
            +
                # Class variables or attributes
         | 
| 16 | 
            +
                def __init__(
         | 
| 17 | 
            +
                    self,
         | 
| 18 | 
            +
                    s3_bucket_name=None,
         | 
| 19 | 
            +
                    s3_region_name=None,
         | 
| 20 | 
            +
                    s3_api_version=None,
         | 
| 21 | 
            +
                    s3_use_ssl=True,
         | 
| 22 | 
            +
                    s3_verify=None,
         | 
| 23 | 
            +
                    s3_endpoint_url=None,
         | 
| 24 | 
            +
                    s3_aws_access_key_id=None,
         | 
| 25 | 
            +
                    s3_aws_secret_access_key=None,
         | 
| 26 | 
            +
                    s3_aws_session_token=None,
         | 
| 27 | 
            +
                    s3_config=None,
         | 
| 28 | 
            +
                    **kwargs,
         | 
| 29 | 
            +
                ):
         | 
| 30 | 
            +
                    import boto3
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                    try:
         | 
| 33 | 
            +
                        print_verbose("in init s3 logger")
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                        if litellm.s3_callback_params is not None:
         | 
| 36 | 
            +
                            # read in .env variables - example os.environ/AWS_BUCKET_NAME
         | 
| 37 | 
            +
                            for key, value in litellm.s3_callback_params.items():
         | 
| 38 | 
            +
                                if type(value) is str and value.startswith("os.environ/"):
         | 
| 39 | 
            +
                                    litellm.s3_callback_params[key] = litellm.get_secret(value)
         | 
| 40 | 
            +
                            # now set s3 params from litellm.s3_logger_params
         | 
| 41 | 
            +
                            s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name")
         | 
| 42 | 
            +
                            s3_region_name = litellm.s3_callback_params.get("s3_region_name")
         | 
| 43 | 
            +
                            s3_api_version = litellm.s3_callback_params.get("s3_api_version")
         | 
| 44 | 
            +
                            s3_use_ssl = litellm.s3_callback_params.get("s3_use_ssl")
         | 
| 45 | 
            +
                            s3_verify = litellm.s3_callback_params.get("s3_verify")
         | 
| 46 | 
            +
                            s3_endpoint_url = litellm.s3_callback_params.get("s3_endpoint_url")
         | 
| 47 | 
            +
                            s3_aws_access_key_id = litellm.s3_callback_params.get(
         | 
| 48 | 
            +
                                "s3_aws_access_key_id"
         | 
| 49 | 
            +
                            )
         | 
| 50 | 
            +
                            s3_aws_secret_access_key = litellm.s3_callback_params.get(
         | 
| 51 | 
            +
                                "s3_aws_secret_access_key"
         | 
| 52 | 
            +
                            )
         | 
| 53 | 
            +
                            s3_aws_session_token = litellm.s3_callback_params.get(
         | 
| 54 | 
            +
                                "s3_aws_session_token"
         | 
| 55 | 
            +
                            )
         | 
| 56 | 
            +
                            s3_config = litellm.s3_callback_params.get("s3_config")
         | 
| 57 | 
            +
                            # done reading litellm.s3_callback_params
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                        self.bucket_name = s3_bucket_name
         | 
| 60 | 
            +
                        # Create an S3 client with custom endpoint URL
         | 
| 61 | 
            +
                        self.s3_client = boto3.client(
         | 
| 62 | 
            +
                            "s3",
         | 
| 63 | 
            +
                            region_name=s3_region_name,
         | 
| 64 | 
            +
                            endpoint_url=s3_endpoint_url,
         | 
| 65 | 
            +
                            api_version=s3_api_version,
         | 
| 66 | 
            +
                            use_ssl=s3_use_ssl,
         | 
| 67 | 
            +
                            verify=s3_verify,
         | 
| 68 | 
            +
                            aws_access_key_id=s3_aws_access_key_id,
         | 
| 69 | 
            +
                            aws_secret_access_key=s3_aws_secret_access_key,
         | 
| 70 | 
            +
                            aws_session_token=s3_aws_session_token,
         | 
| 71 | 
            +
                            config=s3_config,
         | 
| 72 | 
            +
                            **kwargs,
         | 
| 73 | 
            +
                        )
         | 
| 74 | 
            +
                    except Exception as e:
         | 
| 75 | 
            +
                        print_verbose(f"Got exception on init s3 client {str(e)}")
         | 
| 76 | 
            +
                        raise e
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                async def _async_log_event(
         | 
| 79 | 
            +
                    self, kwargs, response_obj, start_time, end_time, print_verbose
         | 
| 80 | 
            +
                ):
         | 
| 81 | 
            +
                    self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
         | 
| 84 | 
            +
                    try:
         | 
| 85 | 
            +
                        print_verbose(f"s3 Logging - Enters logging function for model {kwargs}")
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                        # construct payload to send to s3
         | 
| 88 | 
            +
                        # follows the same params as langfuse.py
         | 
| 89 | 
            +
                        litellm_params = kwargs.get("litellm_params", {})
         | 
| 90 | 
            +
                        metadata = (
         | 
| 91 | 
            +
                            litellm_params.get("metadata", {}) or {}
         | 
| 92 | 
            +
                        )  # if litellm_params['metadata'] == None
         | 
| 93 | 
            +
                        messages = kwargs.get("messages")
         | 
| 94 | 
            +
                        optional_params = kwargs.get("optional_params", {})
         | 
| 95 | 
            +
                        call_type = kwargs.get("call_type", "litellm.completion")
         | 
| 96 | 
            +
                        cache_hit = kwargs.get("cache_hit", False)
         | 
| 97 | 
            +
                        usage = response_obj["usage"]
         | 
| 98 | 
            +
                        id = response_obj.get("id", str(uuid.uuid4()))
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                        # Build the initial payload
         | 
| 101 | 
            +
                        payload = {
         | 
| 102 | 
            +
                            "id": id,
         | 
| 103 | 
            +
                            "call_type": call_type,
         | 
| 104 | 
            +
                            "cache_hit": cache_hit,
         | 
| 105 | 
            +
                            "startTime": start_time,
         | 
| 106 | 
            +
                            "endTime": end_time,
         | 
| 107 | 
            +
                            "model": kwargs.get("model", ""),
         | 
| 108 | 
            +
                            "user": kwargs.get("user", ""),
         | 
| 109 | 
            +
                            "modelParameters": optional_params,
         | 
| 110 | 
            +
                            "messages": messages,
         | 
| 111 | 
            +
                            "response": response_obj,
         | 
| 112 | 
            +
                            "usage": usage,
         | 
| 113 | 
            +
                            "metadata": metadata,
         | 
| 114 | 
            +
                        }
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                        # Ensure everything in the payload is converted to str
         | 
| 117 | 
            +
                        for key, value in payload.items():
         | 
| 118 | 
            +
                            try:
         | 
| 119 | 
            +
                                payload[key] = str(value)
         | 
| 120 | 
            +
                            except:
         | 
| 121 | 
            +
                                # non blocking if it can't cast to a str
         | 
| 122 | 
            +
                                pass
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                        s3_object_key = (
         | 
| 125 | 
            +
                            payload["id"] + "-time=" + str(start_time)
         | 
| 126 | 
            +
                        )  # we need the s3 key to include the time, so we log cache hits too
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                        import json
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                        payload = json.dumps(payload)
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                        print_verbose(f"\ns3 Logger - Logging payload = {payload}")
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                        response = self.s3_client.put_object(
         | 
| 135 | 
            +
                            Bucket=self.bucket_name,
         | 
| 136 | 
            +
                            Key=s3_object_key,
         | 
| 137 | 
            +
                            Body=payload,
         | 
| 138 | 
            +
                            ContentType="application/json",
         | 
| 139 | 
            +
                            ContentLanguage="en",
         | 
| 140 | 
            +
                            ContentDisposition=f'inline; filename="{key}.json"',
         | 
| 141 | 
            +
                        )
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                        print_verbose(f"Response from s3:{str(response)}")
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                        print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
         | 
| 146 | 
            +
                        return response
         | 
| 147 | 
            +
                    except Exception as e:
         | 
| 148 | 
            +
                        traceback.print_exc()
         | 
| 149 | 
            +
                        print_verbose(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
         | 
| 150 | 
            +
                        pass
         | 
    	
        litellm/integrations/supabase.py
    ADDED
    
    | @@ -0,0 +1,117 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #### What this does ####
         | 
| 2 | 
            +
            #    On success + failure, log events to Supabase
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import dotenv, os
         | 
| 5 | 
            +
            import requests
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 8 | 
            +
            import traceback
         | 
| 9 | 
            +
            import datetime, subprocess, sys
         | 
| 10 | 
            +
            import litellm
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            class Supabase:
         | 
| 14 | 
            +
                # Class variables or attributes
         | 
| 15 | 
            +
                supabase_table_name = "request_logs"
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def __init__(self):
         | 
| 18 | 
            +
                    # Instance variables
         | 
| 19 | 
            +
                    self.supabase_url = os.getenv("SUPABASE_URL")
         | 
| 20 | 
            +
                    self.supabase_key = os.getenv("SUPABASE_KEY")
         | 
| 21 | 
            +
                    try:
         | 
| 22 | 
            +
                        import supabase
         | 
| 23 | 
            +
                    except ImportError:
         | 
| 24 | 
            +
                        subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
         | 
| 25 | 
            +
                        import supabase
         | 
| 26 | 
            +
                    self.supabase_client = supabase.create_client(
         | 
| 27 | 
            +
                        self.supabase_url, self.supabase_key
         | 
| 28 | 
            +
                    )
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def input_log_event(
         | 
| 31 | 
            +
                    self, model, messages, end_user, litellm_call_id, print_verbose
         | 
| 32 | 
            +
                ):
         | 
| 33 | 
            +
                    try:
         | 
| 34 | 
            +
                        print_verbose(
         | 
| 35 | 
            +
                            f"Supabase Logging - Enters input logging function for model {model}"
         | 
| 36 | 
            +
                        )
         | 
| 37 | 
            +
                        supabase_data_obj = {
         | 
| 38 | 
            +
                            "model": model,
         | 
| 39 | 
            +
                            "messages": messages,
         | 
| 40 | 
            +
                            "end_user": end_user,
         | 
| 41 | 
            +
                            "status": "initiated",
         | 
| 42 | 
            +
                            "litellm_call_id": litellm_call_id,
         | 
| 43 | 
            +
                        }
         | 
| 44 | 
            +
                        data, count = (
         | 
| 45 | 
            +
                            self.supabase_client.table(self.supabase_table_name)
         | 
| 46 | 
            +
                            .insert(supabase_data_obj)
         | 
| 47 | 
            +
                            .execute()
         | 
| 48 | 
            +
                        )
         | 
| 49 | 
            +
                        print_verbose(f"data: {data}")
         | 
| 50 | 
            +
                    except:
         | 
| 51 | 
            +
                        print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
         | 
| 52 | 
            +
                        pass
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                def log_event(
         | 
| 55 | 
            +
                    self,
         | 
| 56 | 
            +
                    model,
         | 
| 57 | 
            +
                    messages,
         | 
| 58 | 
            +
                    end_user,
         | 
| 59 | 
            +
                    response_obj,
         | 
| 60 | 
            +
                    start_time,
         | 
| 61 | 
            +
                    end_time,
         | 
| 62 | 
            +
                    litellm_call_id,
         | 
| 63 | 
            +
                    print_verbose,
         | 
| 64 | 
            +
                ):
         | 
| 65 | 
            +
                    try:
         | 
| 66 | 
            +
                        print_verbose(
         | 
| 67 | 
            +
                            f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
         | 
| 68 | 
            +
                        )
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                        total_cost = litellm.completion_cost(completion_response=response_obj)
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                        response_time = (end_time - start_time).total_seconds()
         | 
| 73 | 
            +
                        if "choices" in response_obj:
         | 
| 74 | 
            +
                            supabase_data_obj = {
         | 
| 75 | 
            +
                                "response_time": response_time,
         | 
| 76 | 
            +
                                "model": response_obj["model"],
         | 
| 77 | 
            +
                                "total_cost": total_cost,
         | 
| 78 | 
            +
                                "messages": messages,
         | 
| 79 | 
            +
                                "response": response_obj["choices"][0]["message"]["content"],
         | 
| 80 | 
            +
                                "end_user": end_user,
         | 
| 81 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 82 | 
            +
                                "status": "success",
         | 
| 83 | 
            +
                            }
         | 
| 84 | 
            +
                            print_verbose(
         | 
| 85 | 
            +
                                f"Supabase Logging - final data object: {supabase_data_obj}"
         | 
| 86 | 
            +
                            )
         | 
| 87 | 
            +
                            data, count = (
         | 
| 88 | 
            +
                                self.supabase_client.table(self.supabase_table_name)
         | 
| 89 | 
            +
                                .upsert(supabase_data_obj, on_conflict="litellm_call_id")
         | 
| 90 | 
            +
                                .execute()
         | 
| 91 | 
            +
                            )
         | 
| 92 | 
            +
                        elif "error" in response_obj:
         | 
| 93 | 
            +
                            if "Unable to map your input to a model." in response_obj["error"]:
         | 
| 94 | 
            +
                                total_cost = 0
         | 
| 95 | 
            +
                            supabase_data_obj = {
         | 
| 96 | 
            +
                                "response_time": response_time,
         | 
| 97 | 
            +
                                "model": response_obj["model"],
         | 
| 98 | 
            +
                                "total_cost": total_cost,
         | 
| 99 | 
            +
                                "messages": messages,
         | 
| 100 | 
            +
                                "error": response_obj["error"],
         | 
| 101 | 
            +
                                "end_user": end_user,
         | 
| 102 | 
            +
                                "litellm_call_id": litellm_call_id,
         | 
| 103 | 
            +
                                "status": "failure",
         | 
| 104 | 
            +
                            }
         | 
| 105 | 
            +
                            print_verbose(
         | 
| 106 | 
            +
                                f"Supabase Logging - final data object: {supabase_data_obj}"
         | 
| 107 | 
            +
                            )
         | 
| 108 | 
            +
                            data, count = (
         | 
| 109 | 
            +
                                self.supabase_client.table(self.supabase_table_name)
         | 
| 110 | 
            +
                                .upsert(supabase_data_obj, on_conflict="litellm_call_id")
         | 
| 111 | 
            +
                                .execute()
         | 
| 112 | 
            +
                            )
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                    except:
         | 
| 115 | 
            +
                        # traceback.print_exc()
         | 
| 116 | 
            +
                        print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
         | 
| 117 | 
            +
                        pass
         | 
    	
        litellm/integrations/traceloop.py
    ADDED
    
    | @@ -0,0 +1,114 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            class TraceloopLogger:
         | 
| 2 | 
            +
                def __init__(self):
         | 
| 3 | 
            +
                    from traceloop.sdk.tracing.tracing import TracerWrapper
         | 
| 4 | 
            +
                    from traceloop.sdk import Traceloop
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                    Traceloop.init(app_name="Litellm-Server", disable_batch=True)
         | 
| 7 | 
            +
                    self.tracer_wrapper = TracerWrapper()
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
         | 
| 10 | 
            +
                    from opentelemetry.trace import SpanKind
         | 
| 11 | 
            +
                    from opentelemetry.semconv.ai import SpanAttributes
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                    try:
         | 
| 14 | 
            +
                        tracer = self.tracer_wrapper.get_tracer()
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                        model = kwargs.get("model")
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                        # LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
         | 
| 19 | 
            +
                        if kwargs.get("litellm_params").get("custom_llm_provider") == "openai":
         | 
| 20 | 
            +
                            return
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                        optional_params = kwargs.get("optional_params", {})
         | 
| 23 | 
            +
                        with tracer.start_as_current_span(
         | 
| 24 | 
            +
                            "litellm.completion",
         | 
| 25 | 
            +
                            kind=SpanKind.CLIENT,
         | 
| 26 | 
            +
                        ) as span:
         | 
| 27 | 
            +
                            if span.is_recording():
         | 
| 28 | 
            +
                                span.set_attribute(
         | 
| 29 | 
            +
                                    SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
         | 
| 30 | 
            +
                                )
         | 
| 31 | 
            +
                                if "stop" in optional_params:
         | 
| 32 | 
            +
                                    span.set_attribute(
         | 
| 33 | 
            +
                                        SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
         | 
| 34 | 
            +
                                        optional_params.get("stop"),
         | 
| 35 | 
            +
                                    )
         | 
| 36 | 
            +
                                if "frequency_penalty" in optional_params:
         | 
| 37 | 
            +
                                    span.set_attribute(
         | 
| 38 | 
            +
                                        SpanAttributes.LLM_FREQUENCY_PENALTY,
         | 
| 39 | 
            +
                                        optional_params.get("frequency_penalty"),
         | 
| 40 | 
            +
                                    )
         | 
| 41 | 
            +
                                if "presence_penalty" in optional_params:
         | 
| 42 | 
            +
                                    span.set_attribute(
         | 
| 43 | 
            +
                                        SpanAttributes.LLM_PRESENCE_PENALTY,
         | 
| 44 | 
            +
                                        optional_params.get("presence_penalty"),
         | 
| 45 | 
            +
                                    )
         | 
| 46 | 
            +
                                if "top_p" in optional_params:
         | 
| 47 | 
            +
                                    span.set_attribute(
         | 
| 48 | 
            +
                                        SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
         | 
| 49 | 
            +
                                    )
         | 
| 50 | 
            +
                                if "tools" in optional_params or "functions" in optional_params:
         | 
| 51 | 
            +
                                    span.set_attribute(
         | 
| 52 | 
            +
                                        SpanAttributes.LLM_REQUEST_FUNCTIONS,
         | 
| 53 | 
            +
                                        optional_params.get(
         | 
| 54 | 
            +
                                            "tools", optional_params.get("functions")
         | 
| 55 | 
            +
                                        ),
         | 
| 56 | 
            +
                                    )
         | 
| 57 | 
            +
                                if "user" in optional_params:
         | 
| 58 | 
            +
                                    span.set_attribute(
         | 
| 59 | 
            +
                                        SpanAttributes.LLM_USER, optional_params.get("user")
         | 
| 60 | 
            +
                                    )
         | 
| 61 | 
            +
                                if "max_tokens" in optional_params:
         | 
| 62 | 
            +
                                    span.set_attribute(
         | 
| 63 | 
            +
                                        SpanAttributes.LLM_REQUEST_MAX_TOKENS,
         | 
| 64 | 
            +
                                        kwargs.get("max_tokens"),
         | 
| 65 | 
            +
                                    )
         | 
| 66 | 
            +
                                if "temperature" in optional_params:
         | 
| 67 | 
            +
                                    span.set_attribute(
         | 
| 68 | 
            +
                                        SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
         | 
| 69 | 
            +
                                    )
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                                for idx, prompt in enumerate(kwargs.get("messages")):
         | 
| 72 | 
            +
                                    span.set_attribute(
         | 
| 73 | 
            +
                                        f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
         | 
| 74 | 
            +
                                        prompt.get("role"),
         | 
| 75 | 
            +
                                    )
         | 
| 76 | 
            +
                                    span.set_attribute(
         | 
| 77 | 
            +
                                        f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
         | 
| 78 | 
            +
                                        prompt.get("content"),
         | 
| 79 | 
            +
                                    )
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                                span.set_attribute(
         | 
| 82 | 
            +
                                    SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
         | 
| 83 | 
            +
                                )
         | 
| 84 | 
            +
                                usage = response_obj.get("usage")
         | 
| 85 | 
            +
                                if usage:
         | 
| 86 | 
            +
                                    span.set_attribute(
         | 
| 87 | 
            +
                                        SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
         | 
| 88 | 
            +
                                        usage.get("total_tokens"),
         | 
| 89 | 
            +
                                    )
         | 
| 90 | 
            +
                                    span.set_attribute(
         | 
| 91 | 
            +
                                        SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
         | 
| 92 | 
            +
                                        usage.get("completion_tokens"),
         | 
| 93 | 
            +
                                    )
         | 
| 94 | 
            +
                                    span.set_attribute(
         | 
| 95 | 
            +
                                        SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
         | 
| 96 | 
            +
                                        usage.get("prompt_tokens"),
         | 
| 97 | 
            +
                                    )
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                                for idx, choice in enumerate(response_obj.get("choices")):
         | 
| 100 | 
            +
                                    span.set_attribute(
         | 
| 101 | 
            +
                                        f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
         | 
| 102 | 
            +
                                        choice.get("finish_reason"),
         | 
| 103 | 
            +
                                    )
         | 
| 104 | 
            +
                                    span.set_attribute(
         | 
| 105 | 
            +
                                        f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
         | 
| 106 | 
            +
                                        choice.get("message").get("role"),
         | 
| 107 | 
            +
                                    )
         | 
| 108 | 
            +
                                    span.set_attribute(
         | 
| 109 | 
            +
                                        f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
         | 
| 110 | 
            +
                                        choice.get("message").get("content"),
         | 
| 111 | 
            +
                                    )
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                    except Exception as e:
         | 
| 114 | 
            +
                        print_verbose(f"Traceloop Layer Error - {e}")
         | 
    	
        litellm/integrations/weights_biases.py
    ADDED
    
    | @@ -0,0 +1,223 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            imported_openAIResponse = True
         | 
| 2 | 
            +
            try:
         | 
| 3 | 
            +
                import io
         | 
| 4 | 
            +
                import logging
         | 
| 5 | 
            +
                import sys
         | 
| 6 | 
            +
                from typing import Any, Dict, List, Optional, TypeVar
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                from wandb.sdk.data_types import trace_tree
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                if sys.version_info >= (3, 8):
         | 
| 11 | 
            +
                    from typing import Literal, Protocol
         | 
| 12 | 
            +
                else:
         | 
| 13 | 
            +
                    from typing_extensions import Literal, Protocol
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                logger = logging.getLogger(__name__)
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                K = TypeVar("K", bound=str)
         | 
| 18 | 
            +
                V = TypeVar("V")
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                class OpenAIResponse(Protocol[K, V]):  # type: ignore
         | 
| 21 | 
            +
                    # contains a (known) object attribute
         | 
| 22 | 
            +
                    object: Literal["chat.completion", "edit", "text_completion"]
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                    def __getitem__(self, key: K) -> V:
         | 
| 25 | 
            +
                        ...  # pragma: no cover
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    def get(self, key: K, default: Optional[V] = None) -> Optional[V]:
         | 
| 28 | 
            +
                        ...  # pragma: no cover
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                class OpenAIRequestResponseResolver:
         | 
| 31 | 
            +
                    def __call__(
         | 
| 32 | 
            +
                        self,
         | 
| 33 | 
            +
                        request: Dict[str, Any],
         | 
| 34 | 
            +
                        response: OpenAIResponse,
         | 
| 35 | 
            +
                        time_elapsed: float,
         | 
| 36 | 
            +
                    ) -> Optional[trace_tree.WBTraceTree]:
         | 
| 37 | 
            +
                        try:
         | 
| 38 | 
            +
                            if response["object"] == "edit":
         | 
| 39 | 
            +
                                return self._resolve_edit(request, response, time_elapsed)
         | 
| 40 | 
            +
                            elif response["object"] == "text_completion":
         | 
| 41 | 
            +
                                return self._resolve_completion(request, response, time_elapsed)
         | 
| 42 | 
            +
                            elif response["object"] == "chat.completion":
         | 
| 43 | 
            +
                                return self._resolve_chat_completion(
         | 
| 44 | 
            +
                                    request, response, time_elapsed
         | 
| 45 | 
            +
                                )
         | 
| 46 | 
            +
                            else:
         | 
| 47 | 
            +
                                logger.info(f"Unknown OpenAI response object: {response['object']}")
         | 
| 48 | 
            +
                        except Exception as e:
         | 
| 49 | 
            +
                            logger.warning(f"Failed to resolve request/response: {e}")
         | 
| 50 | 
            +
                        return None
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                    @staticmethod
         | 
| 53 | 
            +
                    def results_to_trace_tree(
         | 
| 54 | 
            +
                        request: Dict[str, Any],
         | 
| 55 | 
            +
                        response: OpenAIResponse,
         | 
| 56 | 
            +
                        results: List[trace_tree.Result],
         | 
| 57 | 
            +
                        time_elapsed: float,
         | 
| 58 | 
            +
                    ) -> trace_tree.WBTraceTree:
         | 
| 59 | 
            +
                        """Converts the request, response, and results into a trace tree.
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                        params:
         | 
| 62 | 
            +
                            request: The request dictionary
         | 
| 63 | 
            +
                            response: The response object
         | 
| 64 | 
            +
                            results: A list of results object
         | 
| 65 | 
            +
                            time_elapsed: The time elapsed in seconds
         | 
| 66 | 
            +
                        returns:
         | 
| 67 | 
            +
                            A wandb trace tree object.
         | 
| 68 | 
            +
                        """
         | 
| 69 | 
            +
                        start_time_ms = int(round(response["created"] * 1000))
         | 
| 70 | 
            +
                        end_time_ms = start_time_ms + int(round(time_elapsed * 1000))
         | 
| 71 | 
            +
                        span = trace_tree.Span(
         | 
| 72 | 
            +
                            name=f"{response.get('model', 'openai')}_{response['object']}_{response.get('created')}",
         | 
| 73 | 
            +
                            attributes=dict(response),  # type: ignore
         | 
| 74 | 
            +
                            start_time_ms=start_time_ms,
         | 
| 75 | 
            +
                            end_time_ms=end_time_ms,
         | 
| 76 | 
            +
                            span_kind=trace_tree.SpanKind.LLM,
         | 
| 77 | 
            +
                            results=results,
         | 
| 78 | 
            +
                        )
         | 
| 79 | 
            +
                        model_obj = {"request": request, "response": response, "_kind": "openai"}
         | 
| 80 | 
            +
                        return trace_tree.WBTraceTree(root_span=span, model_dict=model_obj)
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                    def _resolve_edit(
         | 
| 83 | 
            +
                        self,
         | 
| 84 | 
            +
                        request: Dict[str, Any],
         | 
| 85 | 
            +
                        response: OpenAIResponse,
         | 
| 86 | 
            +
                        time_elapsed: float,
         | 
| 87 | 
            +
                    ) -> trace_tree.WBTraceTree:
         | 
| 88 | 
            +
                        """Resolves the request and response objects for `openai.Edit`."""
         | 
| 89 | 
            +
                        request_str = (
         | 
| 90 | 
            +
                            f"\n\n**Instruction**: {request['instruction']}\n\n"
         | 
| 91 | 
            +
                            f"**Input**: {request['input']}\n"
         | 
| 92 | 
            +
                        )
         | 
| 93 | 
            +
                        choices = [
         | 
| 94 | 
            +
                            f"\n\n**Edited**: {choice['text']}\n" for choice in response["choices"]
         | 
| 95 | 
            +
                        ]
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                        return self._request_response_result_to_trace(
         | 
| 98 | 
            +
                            request=request,
         | 
| 99 | 
            +
                            response=response,
         | 
| 100 | 
            +
                            request_str=request_str,
         | 
| 101 | 
            +
                            choices=choices,
         | 
| 102 | 
            +
                            time_elapsed=time_elapsed,
         | 
| 103 | 
            +
                        )
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                    def _resolve_completion(
         | 
| 106 | 
            +
                        self,
         | 
| 107 | 
            +
                        request: Dict[str, Any],
         | 
| 108 | 
            +
                        response: OpenAIResponse,
         | 
| 109 | 
            +
                        time_elapsed: float,
         | 
| 110 | 
            +
                    ) -> trace_tree.WBTraceTree:
         | 
| 111 | 
            +
                        """Resolves the request and response objects for `openai.Completion`."""
         | 
| 112 | 
            +
                        request_str = f"\n\n**Prompt**: {request['prompt']}\n"
         | 
| 113 | 
            +
                        choices = [
         | 
| 114 | 
            +
                            f"\n\n**Completion**: {choice['text']}\n"
         | 
| 115 | 
            +
                            for choice in response["choices"]
         | 
| 116 | 
            +
                        ]
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                        return self._request_response_result_to_trace(
         | 
| 119 | 
            +
                            request=request,
         | 
| 120 | 
            +
                            response=response,
         | 
| 121 | 
            +
                            request_str=request_str,
         | 
| 122 | 
            +
                            choices=choices,
         | 
| 123 | 
            +
                            time_elapsed=time_elapsed,
         | 
| 124 | 
            +
                        )
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                    def _resolve_chat_completion(
         | 
| 127 | 
            +
                        self,
         | 
| 128 | 
            +
                        request: Dict[str, Any],
         | 
| 129 | 
            +
                        response: OpenAIResponse,
         | 
| 130 | 
            +
                        time_elapsed: float,
         | 
| 131 | 
            +
                    ) -> trace_tree.WBTraceTree:
         | 
| 132 | 
            +
                        """Resolves the request and response objects for `openai.Completion`."""
         | 
| 133 | 
            +
                        prompt = io.StringIO()
         | 
| 134 | 
            +
                        for message in request["messages"]:
         | 
| 135 | 
            +
                            prompt.write(f"\n\n**{message['role']}**: {message['content']}\n")
         | 
| 136 | 
            +
                        request_str = prompt.getvalue()
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                        choices = [
         | 
| 139 | 
            +
                            f"\n\n**{choice['message']['role']}**: {choice['message']['content']}\n"
         | 
| 140 | 
            +
                            for choice in response["choices"]
         | 
| 141 | 
            +
                        ]
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                        return self._request_response_result_to_trace(
         | 
| 144 | 
            +
                            request=request,
         | 
| 145 | 
            +
                            response=response,
         | 
| 146 | 
            +
                            request_str=request_str,
         | 
| 147 | 
            +
                            choices=choices,
         | 
| 148 | 
            +
                            time_elapsed=time_elapsed,
         | 
| 149 | 
            +
                        )
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                    def _request_response_result_to_trace(
         | 
| 152 | 
            +
                        self,
         | 
| 153 | 
            +
                        request: Dict[str, Any],
         | 
| 154 | 
            +
                        response: OpenAIResponse,
         | 
| 155 | 
            +
                        request_str: str,
         | 
| 156 | 
            +
                        choices: List[str],
         | 
| 157 | 
            +
                        time_elapsed: float,
         | 
| 158 | 
            +
                    ) -> trace_tree.WBTraceTree:
         | 
| 159 | 
            +
                        """Resolves the request and response objects for `openai.Completion`."""
         | 
| 160 | 
            +
                        results = [
         | 
| 161 | 
            +
                            trace_tree.Result(
         | 
| 162 | 
            +
                                inputs={"request": request_str},
         | 
| 163 | 
            +
                                outputs={"response": choice},
         | 
| 164 | 
            +
                            )
         | 
| 165 | 
            +
                            for choice in choices
         | 
| 166 | 
            +
                        ]
         | 
| 167 | 
            +
                        trace = self.results_to_trace_tree(request, response, results, time_elapsed)
         | 
| 168 | 
            +
                        return trace
         | 
| 169 | 
            +
             | 
| 170 | 
            +
            except:
         | 
| 171 | 
            +
                imported_openAIResponse = False
         | 
| 172 | 
            +
             | 
| 173 | 
            +
             | 
| 174 | 
            +
            #### What this does ####
         | 
| 175 | 
            +
            #    On success, logs events to Langfuse
         | 
| 176 | 
            +
            import dotenv, os
         | 
| 177 | 
            +
            import requests
         | 
| 178 | 
            +
            import requests
         | 
| 179 | 
            +
            from datetime import datetime
         | 
| 180 | 
            +
             | 
| 181 | 
            +
            dotenv.load_dotenv()  # Loading env variables using dotenv
         | 
| 182 | 
            +
            import traceback
         | 
| 183 | 
            +
             | 
| 184 | 
            +
             | 
| 185 | 
            +
            class WeightsBiasesLogger:
         | 
| 186 | 
            +
                # Class variables or attributes
         | 
| 187 | 
            +
                def __init__(self):
         | 
| 188 | 
            +
                    try:
         | 
| 189 | 
            +
                        import wandb
         | 
| 190 | 
            +
                    except:
         | 
| 191 | 
            +
                        raise Exception(
         | 
| 192 | 
            +
                            "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
         | 
| 193 | 
            +
                        )
         | 
| 194 | 
            +
                    if imported_openAIResponse == False:
         | 
| 195 | 
            +
                        raise Exception(
         | 
| 196 | 
            +
                            "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
         | 
| 197 | 
            +
                        )
         | 
| 198 | 
            +
                    self.resolver = OpenAIRequestResponseResolver()
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
         | 
| 201 | 
            +
                    # Method definition
         | 
| 202 | 
            +
                    import wandb
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                    try:
         | 
| 205 | 
            +
                        print_verbose(f"W&B Logging - Enters logging function for model {kwargs}")
         | 
| 206 | 
            +
                        run = wandb.init()
         | 
| 207 | 
            +
                        print_verbose(response_obj)
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                        trace = self.resolver(
         | 
| 210 | 
            +
                            kwargs, response_obj, (end_time - start_time).total_seconds()
         | 
| 211 | 
            +
                        )
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                        if trace is not None:
         | 
| 214 | 
            +
                            run.log({"trace": trace})
         | 
| 215 | 
            +
             | 
| 216 | 
            +
                        run.finish()
         | 
| 217 | 
            +
                        print_verbose(
         | 
| 218 | 
            +
                            f"W&B Logging Logging - final response object: {response_obj}"
         | 
| 219 | 
            +
                        )
         | 
| 220 | 
            +
                    except:
         | 
| 221 | 
            +
                        # traceback.print_exc()
         | 
| 222 | 
            +
                        print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
         | 
| 223 | 
            +
                        pass
         | 
    	
        litellm/llms/__init__.py
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            from . import *
         | 
    	
        litellm/llms/ai21.py
    ADDED
    
    | @@ -0,0 +1,212 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, types, traceback
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import time, httpx
         | 
| 6 | 
            +
            from typing import Callable, Optional
         | 
| 7 | 
            +
            from litellm.utils import ModelResponse, Choices, Message
         | 
| 8 | 
            +
            import litellm
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            class AI21Error(Exception):
         | 
| 12 | 
            +
                def __init__(self, status_code, message):
         | 
| 13 | 
            +
                    self.status_code = status_code
         | 
| 14 | 
            +
                    self.message = message
         | 
| 15 | 
            +
                    self.request = httpx.Request(
         | 
| 16 | 
            +
                        method="POST", url="https://api.ai21.com/studio/v1/"
         | 
| 17 | 
            +
                    )
         | 
| 18 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 19 | 
            +
                    super().__init__(
         | 
| 20 | 
            +
                        self.message
         | 
| 21 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 22 | 
            +
             | 
| 23 | 
            +
             | 
| 24 | 
            +
            class AI21Config:
         | 
| 25 | 
            +
                """
         | 
| 26 | 
            +
                Reference: https://docs.ai21.com/reference/j2-complete-ref
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                The class `AI21Config` provides configuration for the AI21's API interface. Below are the parameters:
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                - `numResults` (int32): Number of completions to sample and return. Optional, default is 1. If the temperature is greater than 0 (non-greedy decoding), a value greater than 1 can be meaningful.
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                - `minTokens` (int32): The minimum number of tokens to generate per result. Optional, default is 0. If `stopSequences` are given, they are ignored until `minTokens` are generated.
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                - `topKReturn` (int32): Range between 0 to 10, including both. Optional, default is 0. Specifies the top-K alternative tokens to return. A non-zero value includes the string representations and log-probabilities for each of the top-K alternatives at each position.
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                - `frequencyPenalty` (object): Placeholder for frequency penalty object.
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                - `presencePenalty` (object): Placeholder for presence penalty object.
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                - `countPenalty` (object): Placeholder for count penalty object.
         | 
| 49 | 
            +
                """
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                numResults: Optional[int] = None
         | 
| 52 | 
            +
                maxTokens: Optional[int] = None
         | 
| 53 | 
            +
                minTokens: Optional[int] = None
         | 
| 54 | 
            +
                temperature: Optional[float] = None
         | 
| 55 | 
            +
                topP: Optional[float] = None
         | 
| 56 | 
            +
                stopSequences: Optional[list] = None
         | 
| 57 | 
            +
                topKReturn: Optional[int] = None
         | 
| 58 | 
            +
                frequencePenalty: Optional[dict] = None
         | 
| 59 | 
            +
                presencePenalty: Optional[dict] = None
         | 
| 60 | 
            +
                countPenalty: Optional[dict] = None
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                def __init__(
         | 
| 63 | 
            +
                    self,
         | 
| 64 | 
            +
                    numResults: Optional[int] = None,
         | 
| 65 | 
            +
                    maxTokens: Optional[int] = None,
         | 
| 66 | 
            +
                    minTokens: Optional[int] = None,
         | 
| 67 | 
            +
                    temperature: Optional[float] = None,
         | 
| 68 | 
            +
                    topP: Optional[float] = None,
         | 
| 69 | 
            +
                    stopSequences: Optional[list] = None,
         | 
| 70 | 
            +
                    topKReturn: Optional[int] = None,
         | 
| 71 | 
            +
                    frequencePenalty: Optional[dict] = None,
         | 
| 72 | 
            +
                    presencePenalty: Optional[dict] = None,
         | 
| 73 | 
            +
                    countPenalty: Optional[dict] = None,
         | 
| 74 | 
            +
                ) -> None:
         | 
| 75 | 
            +
                    locals_ = locals()
         | 
| 76 | 
            +
                    for key, value in locals_.items():
         | 
| 77 | 
            +
                        if key != "self" and value is not None:
         | 
| 78 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                @classmethod
         | 
| 81 | 
            +
                def get_config(cls):
         | 
| 82 | 
            +
                    return {
         | 
| 83 | 
            +
                        k: v
         | 
| 84 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 85 | 
            +
                        if not k.startswith("__")
         | 
| 86 | 
            +
                        and not isinstance(
         | 
| 87 | 
            +
                            v,
         | 
| 88 | 
            +
                            (
         | 
| 89 | 
            +
                                types.FunctionType,
         | 
| 90 | 
            +
                                types.BuiltinFunctionType,
         | 
| 91 | 
            +
                                classmethod,
         | 
| 92 | 
            +
                                staticmethod,
         | 
| 93 | 
            +
                            ),
         | 
| 94 | 
            +
                        )
         | 
| 95 | 
            +
                        and v is not None
         | 
| 96 | 
            +
                    }
         | 
| 97 | 
            +
             | 
| 98 | 
            +
             | 
| 99 | 
            +
            def validate_environment(api_key):
         | 
| 100 | 
            +
                if api_key is None:
         | 
| 101 | 
            +
                    raise ValueError(
         | 
| 102 | 
            +
                        "Missing AI21 API Key - A call is being made to ai21 but no key is set either in the environment variables or via params"
         | 
| 103 | 
            +
                    )
         | 
| 104 | 
            +
                headers = {
         | 
| 105 | 
            +
                    "accept": "application/json",
         | 
| 106 | 
            +
                    "content-type": "application/json",
         | 
| 107 | 
            +
                    "Authorization": "Bearer " + api_key,
         | 
| 108 | 
            +
                }
         | 
| 109 | 
            +
                return headers
         | 
| 110 | 
            +
             | 
| 111 | 
            +
             | 
| 112 | 
            +
            def completion(
         | 
| 113 | 
            +
                model: str,
         | 
| 114 | 
            +
                messages: list,
         | 
| 115 | 
            +
                api_base: str,
         | 
| 116 | 
            +
                model_response: ModelResponse,
         | 
| 117 | 
            +
                print_verbose: Callable,
         | 
| 118 | 
            +
                encoding,
         | 
| 119 | 
            +
                api_key,
         | 
| 120 | 
            +
                logging_obj,
         | 
| 121 | 
            +
                optional_params=None,
         | 
| 122 | 
            +
                litellm_params=None,
         | 
| 123 | 
            +
                logger_fn=None,
         | 
| 124 | 
            +
            ):
         | 
| 125 | 
            +
                headers = validate_environment(api_key)
         | 
| 126 | 
            +
                model = model
         | 
| 127 | 
            +
                prompt = ""
         | 
| 128 | 
            +
                for message in messages:
         | 
| 129 | 
            +
                    if "role" in message:
         | 
| 130 | 
            +
                        if message["role"] == "user":
         | 
| 131 | 
            +
                            prompt += f"{message['content']}"
         | 
| 132 | 
            +
                        else:
         | 
| 133 | 
            +
                            prompt += f"{message['content']}"
         | 
| 134 | 
            +
                    else:
         | 
| 135 | 
            +
                        prompt += f"{message['content']}"
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                ## Load Config
         | 
| 138 | 
            +
                config = litellm.AI21Config.get_config()
         | 
| 139 | 
            +
                for k, v in config.items():
         | 
| 140 | 
            +
                    if (
         | 
| 141 | 
            +
                        k not in optional_params
         | 
| 142 | 
            +
                    ):  # completion(top_k=3) > ai21_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 143 | 
            +
                        optional_params[k] = v
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                data = {
         | 
| 146 | 
            +
                    "prompt": prompt,
         | 
| 147 | 
            +
                    # "instruction": prompt, # some baseten models require the prompt to be passed in via the 'instruction' kwarg
         | 
| 148 | 
            +
                    **optional_params,
         | 
| 149 | 
            +
                }
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                ## LOGGING
         | 
| 152 | 
            +
                logging_obj.pre_call(
         | 
| 153 | 
            +
                    input=prompt,
         | 
| 154 | 
            +
                    api_key=api_key,
         | 
| 155 | 
            +
                    additional_args={"complete_input_dict": data},
         | 
| 156 | 
            +
                )
         | 
| 157 | 
            +
                ## COMPLETION CALL
         | 
| 158 | 
            +
                response = requests.post(
         | 
| 159 | 
            +
                    api_base + model + "/complete", headers=headers, data=json.dumps(data)
         | 
| 160 | 
            +
                )
         | 
| 161 | 
            +
                if response.status_code != 200:
         | 
| 162 | 
            +
                    raise AI21Error(status_code=response.status_code, message=response.text)
         | 
| 163 | 
            +
                if "stream" in optional_params and optional_params["stream"] == True:
         | 
| 164 | 
            +
                    return response.iter_lines()
         | 
| 165 | 
            +
                else:
         | 
| 166 | 
            +
                    ## LOGGING
         | 
| 167 | 
            +
                    logging_obj.post_call(
         | 
| 168 | 
            +
                        input=prompt,
         | 
| 169 | 
            +
                        api_key=api_key,
         | 
| 170 | 
            +
                        original_response=response.text,
         | 
| 171 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 172 | 
            +
                    )
         | 
| 173 | 
            +
                    ## RESPONSE OBJECT
         | 
| 174 | 
            +
                    completion_response = response.json()
         | 
| 175 | 
            +
                    try:
         | 
| 176 | 
            +
                        choices_list = []
         | 
| 177 | 
            +
                        for idx, item in enumerate(completion_response["completions"]):
         | 
| 178 | 
            +
                            if len(item["data"]["text"]) > 0:
         | 
| 179 | 
            +
                                message_obj = Message(content=item["data"]["text"])
         | 
| 180 | 
            +
                            else:
         | 
| 181 | 
            +
                                message_obj = Message(content=None)
         | 
| 182 | 
            +
                            choice_obj = Choices(
         | 
| 183 | 
            +
                                finish_reason=item["finishReason"]["reason"],
         | 
| 184 | 
            +
                                index=idx + 1,
         | 
| 185 | 
            +
                                message=message_obj,
         | 
| 186 | 
            +
                            )
         | 
| 187 | 
            +
                            choices_list.append(choice_obj)
         | 
| 188 | 
            +
                        model_response["choices"] = choices_list
         | 
| 189 | 
            +
                    except Exception as e:
         | 
| 190 | 
            +
                        raise AI21Error(
         | 
| 191 | 
            +
                            message=traceback.format_exc(), status_code=response.status_code
         | 
| 192 | 
            +
                        )
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                    ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
         | 
| 195 | 
            +
                    prompt_tokens = len(encoding.encode(prompt))
         | 
| 196 | 
            +
                    completion_tokens = len(
         | 
| 197 | 
            +
                        encoding.encode(model_response["choices"][0]["message"].get("content"))
         | 
| 198 | 
            +
                    )
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 201 | 
            +
                    model_response["model"] = model
         | 
| 202 | 
            +
                    model_response["usage"] = {
         | 
| 203 | 
            +
                        "prompt_tokens": prompt_tokens,
         | 
| 204 | 
            +
                        "completion_tokens": completion_tokens,
         | 
| 205 | 
            +
                        "total_tokens": prompt_tokens + completion_tokens,
         | 
| 206 | 
            +
                    }
         | 
| 207 | 
            +
                    return model_response
         | 
| 208 | 
            +
             | 
| 209 | 
            +
             | 
| 210 | 
            +
            def embedding():
         | 
| 211 | 
            +
                # logic for parsing in - calling - parsing out model embedding calls
         | 
| 212 | 
            +
                pass
         | 
    	
        litellm/llms/aleph_alpha.py
    ADDED
    
    | @@ -0,0 +1,304 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, types
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import time
         | 
| 6 | 
            +
            from typing import Callable, Optional
         | 
| 7 | 
            +
            import litellm
         | 
| 8 | 
            +
            from litellm.utils import ModelResponse, Choices, Message, Usage
         | 
| 9 | 
            +
            import httpx
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            class AlephAlphaError(Exception):
         | 
| 13 | 
            +
                def __init__(self, status_code, message):
         | 
| 14 | 
            +
                    self.status_code = status_code
         | 
| 15 | 
            +
                    self.message = message
         | 
| 16 | 
            +
                    self.request = httpx.Request(
         | 
| 17 | 
            +
                        method="POST", url="https://api.aleph-alpha.com/complete"
         | 
| 18 | 
            +
                    )
         | 
| 19 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 20 | 
            +
                    super().__init__(
         | 
| 21 | 
            +
                        self.message
         | 
| 22 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 23 | 
            +
             | 
| 24 | 
            +
             | 
| 25 | 
            +
            class AlephAlphaConfig:
         | 
| 26 | 
            +
                """
         | 
| 27 | 
            +
                Reference: https://docs.aleph-alpha.com/api/complete/
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                The `AlephAlphaConfig` class represents the configuration for the Aleph Alpha API. Here are the properties:
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                - `maximum_tokens` (integer, required): The maximum number of tokens to be generated by the completion. The sum of input tokens and maximum tokens may not exceed 2048.
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                - `minimum_tokens` (integer, optional; default value: 0): Generate at least this number of tokens before an end-of-text token is generated.
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                - `echo` (boolean, optional; default value: false): Whether to echo the prompt in the completion.
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                - `temperature` (number, nullable; default value: 0): Adjusts how creatively the model generates outputs. Use combinations of temperature, top_k, and top_p sensibly.
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                - `top_k` (integer, nullable; default value: 0): Introduces randomness into token generation by considering the top k most likely options.
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                - `top_p` (number, nullable; default value: 0): Adds randomness by considering the smallest set of tokens whose cumulative probability exceeds top_p.
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                - `presence_penalty`, `frequency_penalty`, `sequence_penalty` (number, nullable; default value: 0): Various penalties that can reduce repetition.
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                - `sequence_penalty_min_length` (integer; default value: 2): Minimum number of tokens to be considered as a sequence.
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                - `repetition_penalties_include_prompt`, `repetition_penalties_include_completion`, `use_multiplicative_presence_penalty`,`use_multiplicative_frequency_penalty`,`use_multiplicative_sequence_penalty` (boolean, nullable; default value: false): Various settings that adjust how the repetition penalties are applied.
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                - `penalty_bias` (string, nullable): Text used in addition to the penalized tokens for repetition penalties.
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                - `penalty_exceptions` (string[], nullable): Strings that may be generated without penalty.
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                - `penalty_exceptions_include_stop_sequences` (boolean, nullable; default value: true): Include all stop_sequences in penalty_exceptions.
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                - `best_of` (integer, nullable; default value: 1): The number of completions will be generated on the server side.
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                - `n` (integer, nullable; default value: 1): The number of completions to return.
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                - `logit_bias` (object, nullable): Adjust the logit scores before sampling.
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                - `log_probs` (integer, nullable): Number of top log probabilities for each token generated.
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                - `stop_sequences` (string[], nullable): List of strings that will stop generation if they're generated.
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                - `tokens` (boolean, nullable; default value: false): Flag indicating whether individual tokens of the completion should be returned or not.
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                - `raw_completion` (boolean; default value: false): if True, the raw completion of the model will be returned.
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                - `disable_optimizations` (boolean, nullable; default value: false): Disables any applied optimizations to both your prompt and completion.
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                - `completion_bias_inclusion`, `completion_bias_exclusion` (string[], default value: []): Set of strings to bias the generation of tokens.
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                - `completion_bias_inclusion_first_token_only`, `completion_bias_exclusion_first_token_only` (boolean; default value: false): Consider only the first token for the completion_bias_inclusion/exclusion.
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                - `contextual_control_threshold` (number, nullable): Control over how similar tokens are controlled.
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                - `control_log_additive` (boolean; default value: true): Method of applying control to attention scores.
         | 
| 78 | 
            +
                """
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                maximum_tokens: Optional[
         | 
| 81 | 
            +
                    int
         | 
| 82 | 
            +
                ] = litellm.max_tokens  # aleph alpha requires max tokens
         | 
| 83 | 
            +
                minimum_tokens: Optional[int] = None
         | 
| 84 | 
            +
                echo: Optional[bool] = None
         | 
| 85 | 
            +
                temperature: Optional[int] = None
         | 
| 86 | 
            +
                top_k: Optional[int] = None
         | 
| 87 | 
            +
                top_p: Optional[int] = None
         | 
| 88 | 
            +
                presence_penalty: Optional[int] = None
         | 
| 89 | 
            +
                frequency_penalty: Optional[int] = None
         | 
| 90 | 
            +
                sequence_penalty: Optional[int] = None
         | 
| 91 | 
            +
                sequence_penalty_min_length: Optional[int] = None
         | 
| 92 | 
            +
                repetition_penalties_include_prompt: Optional[bool] = None
         | 
| 93 | 
            +
                repetition_penalties_include_completion: Optional[bool] = None
         | 
| 94 | 
            +
                use_multiplicative_presence_penalty: Optional[bool] = None
         | 
| 95 | 
            +
                use_multiplicative_frequency_penalty: Optional[bool] = None
         | 
| 96 | 
            +
                use_multiplicative_sequence_penalty: Optional[bool] = None
         | 
| 97 | 
            +
                penalty_bias: Optional[str] = None
         | 
| 98 | 
            +
                penalty_exceptions_include_stop_sequences: Optional[bool] = None
         | 
| 99 | 
            +
                best_of: Optional[int] = None
         | 
| 100 | 
            +
                n: Optional[int] = None
         | 
| 101 | 
            +
                logit_bias: Optional[dict] = None
         | 
| 102 | 
            +
                log_probs: Optional[int] = None
         | 
| 103 | 
            +
                stop_sequences: Optional[list] = None
         | 
| 104 | 
            +
                tokens: Optional[bool] = None
         | 
| 105 | 
            +
                raw_completion: Optional[bool] = None
         | 
| 106 | 
            +
                disable_optimizations: Optional[bool] = None
         | 
| 107 | 
            +
                completion_bias_inclusion: Optional[list] = None
         | 
| 108 | 
            +
                completion_bias_exclusion: Optional[list] = None
         | 
| 109 | 
            +
                completion_bias_inclusion_first_token_only: Optional[bool] = None
         | 
| 110 | 
            +
                completion_bias_exclusion_first_token_only: Optional[bool] = None
         | 
| 111 | 
            +
                contextual_control_threshold: Optional[int] = None
         | 
| 112 | 
            +
                control_log_additive: Optional[bool] = None
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                def __init__(
         | 
| 115 | 
            +
                    self,
         | 
| 116 | 
            +
                    maximum_tokens: Optional[int] = None,
         | 
| 117 | 
            +
                    minimum_tokens: Optional[int] = None,
         | 
| 118 | 
            +
                    echo: Optional[bool] = None,
         | 
| 119 | 
            +
                    temperature: Optional[int] = None,
         | 
| 120 | 
            +
                    top_k: Optional[int] = None,
         | 
| 121 | 
            +
                    top_p: Optional[int] = None,
         | 
| 122 | 
            +
                    presence_penalty: Optional[int] = None,
         | 
| 123 | 
            +
                    frequency_penalty: Optional[int] = None,
         | 
| 124 | 
            +
                    sequence_penalty: Optional[int] = None,
         | 
| 125 | 
            +
                    sequence_penalty_min_length: Optional[int] = None,
         | 
| 126 | 
            +
                    repetition_penalties_include_prompt: Optional[bool] = None,
         | 
| 127 | 
            +
                    repetition_penalties_include_completion: Optional[bool] = None,
         | 
| 128 | 
            +
                    use_multiplicative_presence_penalty: Optional[bool] = None,
         | 
| 129 | 
            +
                    use_multiplicative_frequency_penalty: Optional[bool] = None,
         | 
| 130 | 
            +
                    use_multiplicative_sequence_penalty: Optional[bool] = None,
         | 
| 131 | 
            +
                    penalty_bias: Optional[str] = None,
         | 
| 132 | 
            +
                    penalty_exceptions_include_stop_sequences: Optional[bool] = None,
         | 
| 133 | 
            +
                    best_of: Optional[int] = None,
         | 
| 134 | 
            +
                    n: Optional[int] = None,
         | 
| 135 | 
            +
                    logit_bias: Optional[dict] = None,
         | 
| 136 | 
            +
                    log_probs: Optional[int] = None,
         | 
| 137 | 
            +
                    stop_sequences: Optional[list] = None,
         | 
| 138 | 
            +
                    tokens: Optional[bool] = None,
         | 
| 139 | 
            +
                    raw_completion: Optional[bool] = None,
         | 
| 140 | 
            +
                    disable_optimizations: Optional[bool] = None,
         | 
| 141 | 
            +
                    completion_bias_inclusion: Optional[list] = None,
         | 
| 142 | 
            +
                    completion_bias_exclusion: Optional[list] = None,
         | 
| 143 | 
            +
                    completion_bias_inclusion_first_token_only: Optional[bool] = None,
         | 
| 144 | 
            +
                    completion_bias_exclusion_first_token_only: Optional[bool] = None,
         | 
| 145 | 
            +
                    contextual_control_threshold: Optional[int] = None,
         | 
| 146 | 
            +
                    control_log_additive: Optional[bool] = None,
         | 
| 147 | 
            +
                ) -> None:
         | 
| 148 | 
            +
                    locals_ = locals()
         | 
| 149 | 
            +
                    for key, value in locals_.items():
         | 
| 150 | 
            +
                        if key != "self" and value is not None:
         | 
| 151 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                @classmethod
         | 
| 154 | 
            +
                def get_config(cls):
         | 
| 155 | 
            +
                    return {
         | 
| 156 | 
            +
                        k: v
         | 
| 157 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 158 | 
            +
                        if not k.startswith("__")
         | 
| 159 | 
            +
                        and not isinstance(
         | 
| 160 | 
            +
                            v,
         | 
| 161 | 
            +
                            (
         | 
| 162 | 
            +
                                types.FunctionType,
         | 
| 163 | 
            +
                                types.BuiltinFunctionType,
         | 
| 164 | 
            +
                                classmethod,
         | 
| 165 | 
            +
                                staticmethod,
         | 
| 166 | 
            +
                            ),
         | 
| 167 | 
            +
                        )
         | 
| 168 | 
            +
                        and v is not None
         | 
| 169 | 
            +
                    }
         | 
| 170 | 
            +
             | 
| 171 | 
            +
             | 
| 172 | 
            +
            def validate_environment(api_key):
         | 
| 173 | 
            +
                headers = {
         | 
| 174 | 
            +
                    "accept": "application/json",
         | 
| 175 | 
            +
                    "content-type": "application/json",
         | 
| 176 | 
            +
                }
         | 
| 177 | 
            +
                if api_key:
         | 
| 178 | 
            +
                    headers["Authorization"] = f"Bearer {api_key}"
         | 
| 179 | 
            +
                return headers
         | 
| 180 | 
            +
             | 
| 181 | 
            +
             | 
| 182 | 
            +
            def completion(
         | 
| 183 | 
            +
                model: str,
         | 
| 184 | 
            +
                messages: list,
         | 
| 185 | 
            +
                api_base: str,
         | 
| 186 | 
            +
                model_response: ModelResponse,
         | 
| 187 | 
            +
                print_verbose: Callable,
         | 
| 188 | 
            +
                encoding,
         | 
| 189 | 
            +
                api_key,
         | 
| 190 | 
            +
                logging_obj,
         | 
| 191 | 
            +
                optional_params=None,
         | 
| 192 | 
            +
                litellm_params=None,
         | 
| 193 | 
            +
                logger_fn=None,
         | 
| 194 | 
            +
                default_max_tokens_to_sample=None,
         | 
| 195 | 
            +
            ):
         | 
| 196 | 
            +
                headers = validate_environment(api_key)
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                ## Load Config
         | 
| 199 | 
            +
                config = litellm.AlephAlphaConfig.get_config()
         | 
| 200 | 
            +
                for k, v in config.items():
         | 
| 201 | 
            +
                    if (
         | 
| 202 | 
            +
                        k not in optional_params
         | 
| 203 | 
            +
                    ):  # completion(top_k=3) > aleph_alpha_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 204 | 
            +
                        optional_params[k] = v
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                completion_url = api_base
         | 
| 207 | 
            +
                model = model
         | 
| 208 | 
            +
                prompt = ""
         | 
| 209 | 
            +
                if "control" in model:  # follow the ###Instruction / ###Response format
         | 
| 210 | 
            +
                    for idx, message in enumerate(messages):
         | 
| 211 | 
            +
                        if "role" in message:
         | 
| 212 | 
            +
                            if (
         | 
| 213 | 
            +
                                idx == 0
         | 
| 214 | 
            +
                            ):  # set first message as instruction (required), let later user messages be input
         | 
| 215 | 
            +
                                prompt += f"###Instruction: {message['content']}"
         | 
| 216 | 
            +
                            else:
         | 
| 217 | 
            +
                                if message["role"] == "system":
         | 
| 218 | 
            +
                                    prompt += f"###Instruction: {message['content']}"
         | 
| 219 | 
            +
                                elif message["role"] == "user":
         | 
| 220 | 
            +
                                    prompt += f"###Input: {message['content']}"
         | 
| 221 | 
            +
                                else:
         | 
| 222 | 
            +
                                    prompt += f"###Response: {message['content']}"
         | 
| 223 | 
            +
                        else:
         | 
| 224 | 
            +
                            prompt += f"{message['content']}"
         | 
| 225 | 
            +
                else:
         | 
| 226 | 
            +
                    prompt = " ".join(message["content"] for message in messages)
         | 
| 227 | 
            +
                data = {
         | 
| 228 | 
            +
                    "model": model,
         | 
| 229 | 
            +
                    "prompt": prompt,
         | 
| 230 | 
            +
                    **optional_params,
         | 
| 231 | 
            +
                }
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                ## LOGGING
         | 
| 234 | 
            +
                logging_obj.pre_call(
         | 
| 235 | 
            +
                    input=prompt,
         | 
| 236 | 
            +
                    api_key=api_key,
         | 
| 237 | 
            +
                    additional_args={"complete_input_dict": data},
         | 
| 238 | 
            +
                )
         | 
| 239 | 
            +
                ## COMPLETION CALL
         | 
| 240 | 
            +
                response = requests.post(
         | 
| 241 | 
            +
                    completion_url,
         | 
| 242 | 
            +
                    headers=headers,
         | 
| 243 | 
            +
                    data=json.dumps(data),
         | 
| 244 | 
            +
                    stream=optional_params["stream"] if "stream" in optional_params else False,
         | 
| 245 | 
            +
                )
         | 
| 246 | 
            +
                if "stream" in optional_params and optional_params["stream"] == True:
         | 
| 247 | 
            +
                    return response.iter_lines()
         | 
| 248 | 
            +
                else:
         | 
| 249 | 
            +
                    ## LOGGING
         | 
| 250 | 
            +
                    logging_obj.post_call(
         | 
| 251 | 
            +
                        input=prompt,
         | 
| 252 | 
            +
                        api_key=api_key,
         | 
| 253 | 
            +
                        original_response=response.text,
         | 
| 254 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 255 | 
            +
                    )
         | 
| 256 | 
            +
                    print_verbose(f"raw model_response: {response.text}")
         | 
| 257 | 
            +
                    ## RESPONSE OBJECT
         | 
| 258 | 
            +
                    completion_response = response.json()
         | 
| 259 | 
            +
                    if "error" in completion_response:
         | 
| 260 | 
            +
                        raise AlephAlphaError(
         | 
| 261 | 
            +
                            message=completion_response["error"],
         | 
| 262 | 
            +
                            status_code=response.status_code,
         | 
| 263 | 
            +
                        )
         | 
| 264 | 
            +
                    else:
         | 
| 265 | 
            +
                        try:
         | 
| 266 | 
            +
                            choices_list = []
         | 
| 267 | 
            +
                            for idx, item in enumerate(completion_response["completions"]):
         | 
| 268 | 
            +
                                if len(item["completion"]) > 0:
         | 
| 269 | 
            +
                                    message_obj = Message(content=item["completion"])
         | 
| 270 | 
            +
                                else:
         | 
| 271 | 
            +
                                    message_obj = Message(content=None)
         | 
| 272 | 
            +
                                choice_obj = Choices(
         | 
| 273 | 
            +
                                    finish_reason=item["finish_reason"],
         | 
| 274 | 
            +
                                    index=idx + 1,
         | 
| 275 | 
            +
                                    message=message_obj,
         | 
| 276 | 
            +
                                )
         | 
| 277 | 
            +
                                choices_list.append(choice_obj)
         | 
| 278 | 
            +
                            model_response["choices"] = choices_list
         | 
| 279 | 
            +
                        except:
         | 
| 280 | 
            +
                            raise AlephAlphaError(
         | 
| 281 | 
            +
                                message=json.dumps(completion_response),
         | 
| 282 | 
            +
                                status_code=response.status_code,
         | 
| 283 | 
            +
                            )
         | 
| 284 | 
            +
             | 
| 285 | 
            +
                    ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
         | 
| 286 | 
            +
                    prompt_tokens = len(encoding.encode(prompt))
         | 
| 287 | 
            +
                    completion_tokens = len(
         | 
| 288 | 
            +
                        encoding.encode(model_response["choices"][0]["message"]["content"])
         | 
| 289 | 
            +
                    )
         | 
| 290 | 
            +
             | 
| 291 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 292 | 
            +
                    model_response["model"] = model
         | 
| 293 | 
            +
                    usage = Usage(
         | 
| 294 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 295 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 296 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 297 | 
            +
                    )
         | 
| 298 | 
            +
                    model_response.usage = usage
         | 
| 299 | 
            +
                    return model_response
         | 
| 300 | 
            +
             | 
| 301 | 
            +
             | 
| 302 | 
            +
            def embedding():
         | 
| 303 | 
            +
                # logic for parsing in - calling - parsing out model embedding calls
         | 
| 304 | 
            +
                pass
         | 
    	
        litellm/llms/anthropic.py
    ADDED
    
    | @@ -0,0 +1,215 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, types
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import time
         | 
| 6 | 
            +
            from typing import Callable, Optional
         | 
| 7 | 
            +
            from litellm.utils import ModelResponse, Usage
         | 
| 8 | 
            +
            import litellm
         | 
| 9 | 
            +
            from .prompt_templates.factory import prompt_factory, custom_prompt
         | 
| 10 | 
            +
            import httpx
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            class AnthropicConstants(Enum):
         | 
| 14 | 
            +
                HUMAN_PROMPT = "\n\nHuman: "
         | 
| 15 | 
            +
                AI_PROMPT = "\n\nAssistant: "
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            class AnthropicError(Exception):
         | 
| 19 | 
            +
                def __init__(self, status_code, message):
         | 
| 20 | 
            +
                    self.status_code = status_code
         | 
| 21 | 
            +
                    self.message = message
         | 
| 22 | 
            +
                    self.request = httpx.Request(
         | 
| 23 | 
            +
                        method="POST", url="https://api.anthropic.com/v1/complete"
         | 
| 24 | 
            +
                    )
         | 
| 25 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 26 | 
            +
                    super().__init__(
         | 
| 27 | 
            +
                        self.message
         | 
| 28 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 29 | 
            +
             | 
| 30 | 
            +
             | 
| 31 | 
            +
            class AnthropicConfig:
         | 
| 32 | 
            +
                """
         | 
| 33 | 
            +
                Reference: https://docs.anthropic.com/claude/reference/complete_post
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
         | 
| 36 | 
            +
                """
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                max_tokens_to_sample: Optional[
         | 
| 39 | 
            +
                    int
         | 
| 40 | 
            +
                ] = litellm.max_tokens  # anthropic requires a default
         | 
| 41 | 
            +
                stop_sequences: Optional[list] = None
         | 
| 42 | 
            +
                temperature: Optional[int] = None
         | 
| 43 | 
            +
                top_p: Optional[int] = None
         | 
| 44 | 
            +
                top_k: Optional[int] = None
         | 
| 45 | 
            +
                metadata: Optional[dict] = None
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def __init__(
         | 
| 48 | 
            +
                    self,
         | 
| 49 | 
            +
                    max_tokens_to_sample: Optional[int] = 256,  # anthropic requires a default
         | 
| 50 | 
            +
                    stop_sequences: Optional[list] = None,
         | 
| 51 | 
            +
                    temperature: Optional[int] = None,
         | 
| 52 | 
            +
                    top_p: Optional[int] = None,
         | 
| 53 | 
            +
                    top_k: Optional[int] = None,
         | 
| 54 | 
            +
                    metadata: Optional[dict] = None,
         | 
| 55 | 
            +
                ) -> None:
         | 
| 56 | 
            +
                    locals_ = locals()
         | 
| 57 | 
            +
                    for key, value in locals_.items():
         | 
| 58 | 
            +
                        if key != "self" and value is not None:
         | 
| 59 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                @classmethod
         | 
| 62 | 
            +
                def get_config(cls):
         | 
| 63 | 
            +
                    return {
         | 
| 64 | 
            +
                        k: v
         | 
| 65 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 66 | 
            +
                        if not k.startswith("__")
         | 
| 67 | 
            +
                        and not isinstance(
         | 
| 68 | 
            +
                            v,
         | 
| 69 | 
            +
                            (
         | 
| 70 | 
            +
                                types.FunctionType,
         | 
| 71 | 
            +
                                types.BuiltinFunctionType,
         | 
| 72 | 
            +
                                classmethod,
         | 
| 73 | 
            +
                                staticmethod,
         | 
| 74 | 
            +
                            ),
         | 
| 75 | 
            +
                        )
         | 
| 76 | 
            +
                        and v is not None
         | 
| 77 | 
            +
                    }
         | 
| 78 | 
            +
             | 
| 79 | 
            +
             | 
| 80 | 
            +
            # makes headers for API call
         | 
| 81 | 
            +
            def validate_environment(api_key):
         | 
| 82 | 
            +
                if api_key is None:
         | 
| 83 | 
            +
                    raise ValueError(
         | 
| 84 | 
            +
                        "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
         | 
| 85 | 
            +
                    )
         | 
| 86 | 
            +
                headers = {
         | 
| 87 | 
            +
                    "accept": "application/json",
         | 
| 88 | 
            +
                    "anthropic-version": "2023-06-01",
         | 
| 89 | 
            +
                    "content-type": "application/json",
         | 
| 90 | 
            +
                    "x-api-key": api_key,
         | 
| 91 | 
            +
                }
         | 
| 92 | 
            +
                return headers
         | 
| 93 | 
            +
             | 
| 94 | 
            +
             | 
| 95 | 
            +
            def completion(
         | 
| 96 | 
            +
                model: str,
         | 
| 97 | 
            +
                messages: list,
         | 
| 98 | 
            +
                api_base: str,
         | 
| 99 | 
            +
                custom_prompt_dict: dict,
         | 
| 100 | 
            +
                model_response: ModelResponse,
         | 
| 101 | 
            +
                print_verbose: Callable,
         | 
| 102 | 
            +
                encoding,
         | 
| 103 | 
            +
                api_key,
         | 
| 104 | 
            +
                logging_obj,
         | 
| 105 | 
            +
                optional_params=None,
         | 
| 106 | 
            +
                litellm_params=None,
         | 
| 107 | 
            +
                logger_fn=None,
         | 
| 108 | 
            +
            ):
         | 
| 109 | 
            +
                headers = validate_environment(api_key)
         | 
| 110 | 
            +
                if model in custom_prompt_dict:
         | 
| 111 | 
            +
                    # check if the model has a registered custom prompt
         | 
| 112 | 
            +
                    model_prompt_details = custom_prompt_dict[model]
         | 
| 113 | 
            +
                    prompt = custom_prompt(
         | 
| 114 | 
            +
                        role_dict=model_prompt_details["roles"],
         | 
| 115 | 
            +
                        initial_prompt_value=model_prompt_details["initial_prompt_value"],
         | 
| 116 | 
            +
                        final_prompt_value=model_prompt_details["final_prompt_value"],
         | 
| 117 | 
            +
                        messages=messages,
         | 
| 118 | 
            +
                    )
         | 
| 119 | 
            +
                else:
         | 
| 120 | 
            +
                    prompt = prompt_factory(
         | 
| 121 | 
            +
                        model=model, messages=messages, custom_llm_provider="anthropic"
         | 
| 122 | 
            +
                    )
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                ## Load Config
         | 
| 125 | 
            +
                config = litellm.AnthropicConfig.get_config()
         | 
| 126 | 
            +
                for k, v in config.items():
         | 
| 127 | 
            +
                    if (
         | 
| 128 | 
            +
                        k not in optional_params
         | 
| 129 | 
            +
                    ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 130 | 
            +
                        optional_params[k] = v
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                data = {
         | 
| 133 | 
            +
                    "model": model,
         | 
| 134 | 
            +
                    "prompt": prompt,
         | 
| 135 | 
            +
                    **optional_params,
         | 
| 136 | 
            +
                }
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                ## LOGGING
         | 
| 139 | 
            +
                logging_obj.pre_call(
         | 
| 140 | 
            +
                    input=prompt,
         | 
| 141 | 
            +
                    api_key=api_key,
         | 
| 142 | 
            +
                    additional_args={"complete_input_dict": data, "api_base": api_base},
         | 
| 143 | 
            +
                )
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                ## COMPLETION CALL
         | 
| 146 | 
            +
                if "stream" in optional_params and optional_params["stream"] == True:
         | 
| 147 | 
            +
                    response = requests.post(
         | 
| 148 | 
            +
                        api_base,
         | 
| 149 | 
            +
                        headers=headers,
         | 
| 150 | 
            +
                        data=json.dumps(data),
         | 
| 151 | 
            +
                        stream=optional_params["stream"],
         | 
| 152 | 
            +
                    )
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                    if response.status_code != 200:
         | 
| 155 | 
            +
                        raise AnthropicError(
         | 
| 156 | 
            +
                            status_code=response.status_code, message=response.text
         | 
| 157 | 
            +
                        )
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                    return response.iter_lines()
         | 
| 160 | 
            +
                else:
         | 
| 161 | 
            +
                    response = requests.post(api_base, headers=headers, data=json.dumps(data))
         | 
| 162 | 
            +
                    if response.status_code != 200:
         | 
| 163 | 
            +
                        raise AnthropicError(
         | 
| 164 | 
            +
                            status_code=response.status_code, message=response.text
         | 
| 165 | 
            +
                        )
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                    ## LOGGING
         | 
| 168 | 
            +
                    logging_obj.post_call(
         | 
| 169 | 
            +
                        input=prompt,
         | 
| 170 | 
            +
                        api_key=api_key,
         | 
| 171 | 
            +
                        original_response=response.text,
         | 
| 172 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 173 | 
            +
                    )
         | 
| 174 | 
            +
                    print_verbose(f"raw model_response: {response.text}")
         | 
| 175 | 
            +
                    ## RESPONSE OBJECT
         | 
| 176 | 
            +
                    try:
         | 
| 177 | 
            +
                        completion_response = response.json()
         | 
| 178 | 
            +
                    except:
         | 
| 179 | 
            +
                        raise AnthropicError(
         | 
| 180 | 
            +
                            message=response.text, status_code=response.status_code
         | 
| 181 | 
            +
                        )
         | 
| 182 | 
            +
                    if "error" in completion_response:
         | 
| 183 | 
            +
                        raise AnthropicError(
         | 
| 184 | 
            +
                            message=str(completion_response["error"]),
         | 
| 185 | 
            +
                            status_code=response.status_code,
         | 
| 186 | 
            +
                        )
         | 
| 187 | 
            +
                    else:
         | 
| 188 | 
            +
                        if len(completion_response["completion"]) > 0:
         | 
| 189 | 
            +
                            model_response["choices"][0]["message"][
         | 
| 190 | 
            +
                                "content"
         | 
| 191 | 
            +
                            ] = completion_response["completion"]
         | 
| 192 | 
            +
                        model_response.choices[0].finish_reason = completion_response["stop_reason"]
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                    ## CALCULATING USAGE
         | 
| 195 | 
            +
                    prompt_tokens = len(
         | 
| 196 | 
            +
                        encoding.encode(prompt)
         | 
| 197 | 
            +
                    )  ##[TODO] use the anthropic tokenizer here
         | 
| 198 | 
            +
                    completion_tokens = len(
         | 
| 199 | 
            +
                        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
         | 
| 200 | 
            +
                    )  ##[TODO] use the anthropic tokenizer here
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 203 | 
            +
                    model_response["model"] = model
         | 
| 204 | 
            +
                    usage = Usage(
         | 
| 205 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 206 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 207 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 208 | 
            +
                    )
         | 
| 209 | 
            +
                    model_response.usage = usage
         | 
| 210 | 
            +
                    return model_response
         | 
| 211 | 
            +
             | 
| 212 | 
            +
             | 
| 213 | 
            +
            def embedding():
         | 
| 214 | 
            +
                # logic for parsing in - calling - parsing out model embedding calls
         | 
| 215 | 
            +
                pass
         | 
    	
        litellm/llms/azure.py
    ADDED
    
    | @@ -0,0 +1,799 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from typing import Optional, Union, Any
         | 
| 2 | 
            +
            import types, requests
         | 
| 3 | 
            +
            from .base import BaseLLM
         | 
| 4 | 
            +
            from litellm.utils import (
         | 
| 5 | 
            +
                ModelResponse,
         | 
| 6 | 
            +
                Choices,
         | 
| 7 | 
            +
                Message,
         | 
| 8 | 
            +
                CustomStreamWrapper,
         | 
| 9 | 
            +
                convert_to_model_response_object,
         | 
| 10 | 
            +
            )
         | 
| 11 | 
            +
            from typing import Callable, Optional
         | 
| 12 | 
            +
            from litellm import OpenAIConfig
         | 
| 13 | 
            +
            import litellm, json
         | 
| 14 | 
            +
            import httpx
         | 
| 15 | 
            +
            from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
         | 
| 16 | 
            +
            from openai import AzureOpenAI, AsyncAzureOpenAI
         | 
| 17 | 
            +
             | 
| 18 | 
            +
             | 
| 19 | 
            +
            class AzureOpenAIError(Exception):
         | 
| 20 | 
            +
                def __init__(
         | 
| 21 | 
            +
                    self,
         | 
| 22 | 
            +
                    status_code,
         | 
| 23 | 
            +
                    message,
         | 
| 24 | 
            +
                    request: Optional[httpx.Request] = None,
         | 
| 25 | 
            +
                    response: Optional[httpx.Response] = None,
         | 
| 26 | 
            +
                ):
         | 
| 27 | 
            +
                    self.status_code = status_code
         | 
| 28 | 
            +
                    self.message = message
         | 
| 29 | 
            +
                    if request:
         | 
| 30 | 
            +
                        self.request = request
         | 
| 31 | 
            +
                    else:
         | 
| 32 | 
            +
                        self.request = httpx.Request(method="POST", url="https://api.openai.com/v1")
         | 
| 33 | 
            +
                    if response:
         | 
| 34 | 
            +
                        self.response = response
         | 
| 35 | 
            +
                    else:
         | 
| 36 | 
            +
                        self.response = httpx.Response(
         | 
| 37 | 
            +
                            status_code=status_code, request=self.request
         | 
| 38 | 
            +
                        )
         | 
| 39 | 
            +
                    super().__init__(
         | 
| 40 | 
            +
                        self.message
         | 
| 41 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 42 | 
            +
             | 
| 43 | 
            +
             | 
| 44 | 
            +
            class AzureOpenAIConfig(OpenAIConfig):
         | 
| 45 | 
            +
                """
         | 
| 46 | 
            +
                Reference: https://platform.openai.com/docs/api-reference/chat/create
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                The class `AzureOpenAIConfig` provides configuration for the OpenAI's Chat API interface, for use with Azure. It inherits from `OpenAIConfig`. Below are the parameters::
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                - `function_call` (string or object): This optional parameter controls how the model calls functions.
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
         | 
| 69 | 
            +
                """
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                def __init__(
         | 
| 72 | 
            +
                    self,
         | 
| 73 | 
            +
                    frequency_penalty: Optional[int] = None,
         | 
| 74 | 
            +
                    function_call: Optional[Union[str, dict]] = None,
         | 
| 75 | 
            +
                    functions: Optional[list] = None,
         | 
| 76 | 
            +
                    logit_bias: Optional[dict] = None,
         | 
| 77 | 
            +
                    max_tokens: Optional[int] = None,
         | 
| 78 | 
            +
                    n: Optional[int] = None,
         | 
| 79 | 
            +
                    presence_penalty: Optional[int] = None,
         | 
| 80 | 
            +
                    stop: Optional[Union[str, list]] = None,
         | 
| 81 | 
            +
                    temperature: Optional[int] = None,
         | 
| 82 | 
            +
                    top_p: Optional[int] = None,
         | 
| 83 | 
            +
                ) -> None:
         | 
| 84 | 
            +
                    super().__init__(
         | 
| 85 | 
            +
                        frequency_penalty,
         | 
| 86 | 
            +
                        function_call,
         | 
| 87 | 
            +
                        functions,
         | 
| 88 | 
            +
                        logit_bias,
         | 
| 89 | 
            +
                        max_tokens,
         | 
| 90 | 
            +
                        n,
         | 
| 91 | 
            +
                        presence_penalty,
         | 
| 92 | 
            +
                        stop,
         | 
| 93 | 
            +
                        temperature,
         | 
| 94 | 
            +
                        top_p,
         | 
| 95 | 
            +
                    )
         | 
| 96 | 
            +
             | 
| 97 | 
            +
             | 
| 98 | 
            +
            class AzureChatCompletion(BaseLLM):
         | 
| 99 | 
            +
                def __init__(self) -> None:
         | 
| 100 | 
            +
                    super().__init__()
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                def validate_environment(self, api_key, azure_ad_token):
         | 
| 103 | 
            +
                    headers = {
         | 
| 104 | 
            +
                        "content-type": "application/json",
         | 
| 105 | 
            +
                    }
         | 
| 106 | 
            +
                    if api_key is not None:
         | 
| 107 | 
            +
                        headers["api-key"] = api_key
         | 
| 108 | 
            +
                    elif azure_ad_token is not None:
         | 
| 109 | 
            +
                        headers["Authorization"] = f"Bearer {azure_ad_token}"
         | 
| 110 | 
            +
                    return headers
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                def completion(
         | 
| 113 | 
            +
                    self,
         | 
| 114 | 
            +
                    model: str,
         | 
| 115 | 
            +
                    messages: list,
         | 
| 116 | 
            +
                    model_response: ModelResponse,
         | 
| 117 | 
            +
                    api_key: str,
         | 
| 118 | 
            +
                    api_base: str,
         | 
| 119 | 
            +
                    api_version: str,
         | 
| 120 | 
            +
                    api_type: str,
         | 
| 121 | 
            +
                    azure_ad_token: str,
         | 
| 122 | 
            +
                    print_verbose: Callable,
         | 
| 123 | 
            +
                    timeout,
         | 
| 124 | 
            +
                    logging_obj,
         | 
| 125 | 
            +
                    optional_params,
         | 
| 126 | 
            +
                    litellm_params,
         | 
| 127 | 
            +
                    logger_fn,
         | 
| 128 | 
            +
                    acompletion: bool = False,
         | 
| 129 | 
            +
                    headers: Optional[dict] = None,
         | 
| 130 | 
            +
                    client=None,
         | 
| 131 | 
            +
                ):
         | 
| 132 | 
            +
                    super().completion()
         | 
| 133 | 
            +
                    exception_mapping_worked = False
         | 
| 134 | 
            +
                    try:
         | 
| 135 | 
            +
                        if model is None or messages is None:
         | 
| 136 | 
            +
                            raise AzureOpenAIError(
         | 
| 137 | 
            +
                                status_code=422, message=f"Missing model or messages"
         | 
| 138 | 
            +
                            )
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                        max_retries = optional_params.pop("max_retries", 2)
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                        ### CHECK IF CLOUDFLARE AI GATEWAY ###
         | 
| 143 | 
            +
                        ### if so - set the model as part of the base url
         | 
| 144 | 
            +
                        if "gateway.ai.cloudflare.com" in api_base:
         | 
| 145 | 
            +
                            ## build base url - assume api base includes resource name
         | 
| 146 | 
            +
                            if client is None:
         | 
| 147 | 
            +
                                if not api_base.endswith("/"):
         | 
| 148 | 
            +
                                    api_base += "/"
         | 
| 149 | 
            +
                                api_base += f"{model}"
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                                azure_client_params = {
         | 
| 152 | 
            +
                                    "api_version": api_version,
         | 
| 153 | 
            +
                                    "base_url": f"{api_base}",
         | 
| 154 | 
            +
                                    "http_client": litellm.client_session,
         | 
| 155 | 
            +
                                    "max_retries": max_retries,
         | 
| 156 | 
            +
                                    "timeout": timeout,
         | 
| 157 | 
            +
                                }
         | 
| 158 | 
            +
                                if api_key is not None:
         | 
| 159 | 
            +
                                    azure_client_params["api_key"] = api_key
         | 
| 160 | 
            +
                                elif azure_ad_token is not None:
         | 
| 161 | 
            +
                                    azure_client_params["azure_ad_token"] = azure_ad_token
         | 
| 162 | 
            +
             | 
| 163 | 
            +
                                if acompletion is True:
         | 
| 164 | 
            +
                                    client = AsyncAzureOpenAI(**azure_client_params)
         | 
| 165 | 
            +
                                else:
         | 
| 166 | 
            +
                                    client = AzureOpenAI(**azure_client_params)
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                            data = {"model": None, "messages": messages, **optional_params}
         | 
| 169 | 
            +
                        else:
         | 
| 170 | 
            +
                            data = {
         | 
| 171 | 
            +
                                "model": model,  # type: ignore
         | 
| 172 | 
            +
                                "messages": messages,
         | 
| 173 | 
            +
                                **optional_params,
         | 
| 174 | 
            +
                            }
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                        if acompletion is True:
         | 
| 177 | 
            +
                            if optional_params.get("stream", False):
         | 
| 178 | 
            +
                                return self.async_streaming(
         | 
| 179 | 
            +
                                    logging_obj=logging_obj,
         | 
| 180 | 
            +
                                    api_base=api_base,
         | 
| 181 | 
            +
                                    data=data,
         | 
| 182 | 
            +
                                    model=model,
         | 
| 183 | 
            +
                                    api_key=api_key,
         | 
| 184 | 
            +
                                    api_version=api_version,
         | 
| 185 | 
            +
                                    azure_ad_token=azure_ad_token,
         | 
| 186 | 
            +
                                    timeout=timeout,
         | 
| 187 | 
            +
                                    client=client,
         | 
| 188 | 
            +
                                )
         | 
| 189 | 
            +
                            else:
         | 
| 190 | 
            +
                                return self.acompletion(
         | 
| 191 | 
            +
                                    api_base=api_base,
         | 
| 192 | 
            +
                                    data=data,
         | 
| 193 | 
            +
                                    model_response=model_response,
         | 
| 194 | 
            +
                                    api_key=api_key,
         | 
| 195 | 
            +
                                    api_version=api_version,
         | 
| 196 | 
            +
                                    model=model,
         | 
| 197 | 
            +
                                    azure_ad_token=azure_ad_token,
         | 
| 198 | 
            +
                                    timeout=timeout,
         | 
| 199 | 
            +
                                    client=client,
         | 
| 200 | 
            +
                                    logging_obj=logging_obj,
         | 
| 201 | 
            +
                                )
         | 
| 202 | 
            +
                        elif "stream" in optional_params and optional_params["stream"] == True:
         | 
| 203 | 
            +
                            return self.streaming(
         | 
| 204 | 
            +
                                logging_obj=logging_obj,
         | 
| 205 | 
            +
                                api_base=api_base,
         | 
| 206 | 
            +
                                data=data,
         | 
| 207 | 
            +
                                model=model,
         | 
| 208 | 
            +
                                api_key=api_key,
         | 
| 209 | 
            +
                                api_version=api_version,
         | 
| 210 | 
            +
                                azure_ad_token=azure_ad_token,
         | 
| 211 | 
            +
                                timeout=timeout,
         | 
| 212 | 
            +
                                client=client,
         | 
| 213 | 
            +
                            )
         | 
| 214 | 
            +
                        else:
         | 
| 215 | 
            +
                            ## LOGGING
         | 
| 216 | 
            +
                            logging_obj.pre_call(
         | 
| 217 | 
            +
                                input=messages,
         | 
| 218 | 
            +
                                api_key=api_key,
         | 
| 219 | 
            +
                                additional_args={
         | 
| 220 | 
            +
                                    "headers": {
         | 
| 221 | 
            +
                                        "api_key": api_key,
         | 
| 222 | 
            +
                                        "azure_ad_token": azure_ad_token,
         | 
| 223 | 
            +
                                    },
         | 
| 224 | 
            +
                                    "api_version": api_version,
         | 
| 225 | 
            +
                                    "api_base": api_base,
         | 
| 226 | 
            +
                                    "complete_input_dict": data,
         | 
| 227 | 
            +
                                },
         | 
| 228 | 
            +
                            )
         | 
| 229 | 
            +
                            if not isinstance(max_retries, int):
         | 
| 230 | 
            +
                                raise AzureOpenAIError(
         | 
| 231 | 
            +
                                    status_code=422, message="max retries must be an int"
         | 
| 232 | 
            +
                                )
         | 
| 233 | 
            +
                            # init AzureOpenAI Client
         | 
| 234 | 
            +
                            azure_client_params = {
         | 
| 235 | 
            +
                                "api_version": api_version,
         | 
| 236 | 
            +
                                "azure_endpoint": api_base,
         | 
| 237 | 
            +
                                "azure_deployment": model,
         | 
| 238 | 
            +
                                "http_client": litellm.client_session,
         | 
| 239 | 
            +
                                "max_retries": max_retries,
         | 
| 240 | 
            +
                                "timeout": timeout,
         | 
| 241 | 
            +
                            }
         | 
| 242 | 
            +
                            if api_key is not None:
         | 
| 243 | 
            +
                                azure_client_params["api_key"] = api_key
         | 
| 244 | 
            +
                            elif azure_ad_token is not None:
         | 
| 245 | 
            +
                                azure_client_params["azure_ad_token"] = azure_ad_token
         | 
| 246 | 
            +
                            if client is None:
         | 
| 247 | 
            +
                                azure_client = AzureOpenAI(**azure_client_params)
         | 
| 248 | 
            +
                            else:
         | 
| 249 | 
            +
                                azure_client = client
         | 
| 250 | 
            +
                            response = azure_client.chat.completions.create(**data, timeout=timeout)  # type: ignore
         | 
| 251 | 
            +
                            stringified_response = response.model_dump()
         | 
| 252 | 
            +
                            ## LOGGING
         | 
| 253 | 
            +
                            logging_obj.post_call(
         | 
| 254 | 
            +
                                input=messages,
         | 
| 255 | 
            +
                                api_key=api_key,
         | 
| 256 | 
            +
                                original_response=stringified_response,
         | 
| 257 | 
            +
                                additional_args={
         | 
| 258 | 
            +
                                    "headers": headers,
         | 
| 259 | 
            +
                                    "api_version": api_version,
         | 
| 260 | 
            +
                                    "api_base": api_base,
         | 
| 261 | 
            +
                                },
         | 
| 262 | 
            +
                            )
         | 
| 263 | 
            +
                            return convert_to_model_response_object(
         | 
| 264 | 
            +
                                response_object=stringified_response,
         | 
| 265 | 
            +
                                model_response_object=model_response,
         | 
| 266 | 
            +
                            )
         | 
| 267 | 
            +
                    except AzureOpenAIError as e:
         | 
| 268 | 
            +
                        exception_mapping_worked = True
         | 
| 269 | 
            +
                        raise e
         | 
| 270 | 
            +
                    except Exception as e:
         | 
| 271 | 
            +
                        if hasattr(e, "status_code"):
         | 
| 272 | 
            +
                            raise AzureOpenAIError(status_code=e.status_code, message=str(e))
         | 
| 273 | 
            +
                        else:
         | 
| 274 | 
            +
                            raise AzureOpenAIError(status_code=500, message=str(e))
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                async def acompletion(
         | 
| 277 | 
            +
                    self,
         | 
| 278 | 
            +
                    api_key: str,
         | 
| 279 | 
            +
                    api_version: str,
         | 
| 280 | 
            +
                    model: str,
         | 
| 281 | 
            +
                    api_base: str,
         | 
| 282 | 
            +
                    data: dict,
         | 
| 283 | 
            +
                    timeout: Any,
         | 
| 284 | 
            +
                    model_response: ModelResponse,
         | 
| 285 | 
            +
                    azure_ad_token: Optional[str] = None,
         | 
| 286 | 
            +
                    client=None,  # this is the AsyncAzureOpenAI
         | 
| 287 | 
            +
                    logging_obj=None,
         | 
| 288 | 
            +
                ):
         | 
| 289 | 
            +
                    response = None
         | 
| 290 | 
            +
                    try:
         | 
| 291 | 
            +
                        max_retries = data.pop("max_retries", 2)
         | 
| 292 | 
            +
                        if not isinstance(max_retries, int):
         | 
| 293 | 
            +
                            raise AzureOpenAIError(
         | 
| 294 | 
            +
                                status_code=422, message="max retries must be an int"
         | 
| 295 | 
            +
                            )
         | 
| 296 | 
            +
             | 
| 297 | 
            +
                        # init AzureOpenAI Client
         | 
| 298 | 
            +
                        azure_client_params = {
         | 
| 299 | 
            +
                            "api_version": api_version,
         | 
| 300 | 
            +
                            "azure_endpoint": api_base,
         | 
| 301 | 
            +
                            "azure_deployment": model,
         | 
| 302 | 
            +
                            "http_client": litellm.client_session,
         | 
| 303 | 
            +
                            "max_retries": max_retries,
         | 
| 304 | 
            +
                            "timeout": timeout,
         | 
| 305 | 
            +
                        }
         | 
| 306 | 
            +
                        if api_key is not None:
         | 
| 307 | 
            +
                            azure_client_params["api_key"] = api_key
         | 
| 308 | 
            +
                        elif azure_ad_token is not None:
         | 
| 309 | 
            +
                            azure_client_params["azure_ad_token"] = azure_ad_token
         | 
| 310 | 
            +
                        if client is None:
         | 
| 311 | 
            +
                            azure_client = AsyncAzureOpenAI(**azure_client_params)
         | 
| 312 | 
            +
                        else:
         | 
| 313 | 
            +
                            azure_client = client
         | 
| 314 | 
            +
                        ## LOGGING
         | 
| 315 | 
            +
                        logging_obj.pre_call(
         | 
| 316 | 
            +
                            input=data["messages"],
         | 
| 317 | 
            +
                            api_key=azure_client.api_key,
         | 
| 318 | 
            +
                            additional_args={
         | 
| 319 | 
            +
                                "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
         | 
| 320 | 
            +
                                "api_base": azure_client._base_url._uri_reference,
         | 
| 321 | 
            +
                                "acompletion": True,
         | 
| 322 | 
            +
                                "complete_input_dict": data,
         | 
| 323 | 
            +
                            },
         | 
| 324 | 
            +
                        )
         | 
| 325 | 
            +
                        response = await azure_client.chat.completions.create(
         | 
| 326 | 
            +
                            **data, timeout=timeout
         | 
| 327 | 
            +
                        )
         | 
| 328 | 
            +
                        return convert_to_model_response_object(
         | 
| 329 | 
            +
                            response_object=response.model_dump(),
         | 
| 330 | 
            +
                            model_response_object=model_response,
         | 
| 331 | 
            +
                        )
         | 
| 332 | 
            +
                    except AzureOpenAIError as e:
         | 
| 333 | 
            +
                        exception_mapping_worked = True
         | 
| 334 | 
            +
                        raise e
         | 
| 335 | 
            +
                    except Exception as e:
         | 
| 336 | 
            +
                        if hasattr(e, "status_code"):
         | 
| 337 | 
            +
                            raise e
         | 
| 338 | 
            +
                        else:
         | 
| 339 | 
            +
                            raise AzureOpenAIError(status_code=500, message=str(e))
         | 
| 340 | 
            +
             | 
| 341 | 
            +
                def streaming(
         | 
| 342 | 
            +
                    self,
         | 
| 343 | 
            +
                    logging_obj,
         | 
| 344 | 
            +
                    api_base: str,
         | 
| 345 | 
            +
                    api_key: str,
         | 
| 346 | 
            +
                    api_version: str,
         | 
| 347 | 
            +
                    data: dict,
         | 
| 348 | 
            +
                    model: str,
         | 
| 349 | 
            +
                    timeout: Any,
         | 
| 350 | 
            +
                    azure_ad_token: Optional[str] = None,
         | 
| 351 | 
            +
                    client=None,
         | 
| 352 | 
            +
                ):
         | 
| 353 | 
            +
                    max_retries = data.pop("max_retries", 2)
         | 
| 354 | 
            +
                    if not isinstance(max_retries, int):
         | 
| 355 | 
            +
                        raise AzureOpenAIError(
         | 
| 356 | 
            +
                            status_code=422, message="max retries must be an int"
         | 
| 357 | 
            +
                        )
         | 
| 358 | 
            +
                    # init AzureOpenAI Client
         | 
| 359 | 
            +
                    azure_client_params = {
         | 
| 360 | 
            +
                        "api_version": api_version,
         | 
| 361 | 
            +
                        "azure_endpoint": api_base,
         | 
| 362 | 
            +
                        "azure_deployment": model,
         | 
| 363 | 
            +
                        "http_client": litellm.client_session,
         | 
| 364 | 
            +
                        "max_retries": max_retries,
         | 
| 365 | 
            +
                        "timeout": timeout,
         | 
| 366 | 
            +
                    }
         | 
| 367 | 
            +
                    if api_key is not None:
         | 
| 368 | 
            +
                        azure_client_params["api_key"] = api_key
         | 
| 369 | 
            +
                    elif azure_ad_token is not None:
         | 
| 370 | 
            +
                        azure_client_params["azure_ad_token"] = azure_ad_token
         | 
| 371 | 
            +
                    if client is None:
         | 
| 372 | 
            +
                        azure_client = AzureOpenAI(**azure_client_params)
         | 
| 373 | 
            +
                    else:
         | 
| 374 | 
            +
                        azure_client = client
         | 
| 375 | 
            +
                    ## LOGGING
         | 
| 376 | 
            +
                    logging_obj.pre_call(
         | 
| 377 | 
            +
                        input=data["messages"],
         | 
| 378 | 
            +
                        api_key=azure_client.api_key,
         | 
| 379 | 
            +
                        additional_args={
         | 
| 380 | 
            +
                            "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
         | 
| 381 | 
            +
                            "api_base": azure_client._base_url._uri_reference,
         | 
| 382 | 
            +
                            "acompletion": True,
         | 
| 383 | 
            +
                            "complete_input_dict": data,
         | 
| 384 | 
            +
                        },
         | 
| 385 | 
            +
                    )
         | 
| 386 | 
            +
                    response = azure_client.chat.completions.create(**data, timeout=timeout)
         | 
| 387 | 
            +
                    streamwrapper = CustomStreamWrapper(
         | 
| 388 | 
            +
                        completion_stream=response,
         | 
| 389 | 
            +
                        model=model,
         | 
| 390 | 
            +
                        custom_llm_provider="azure",
         | 
| 391 | 
            +
                        logging_obj=logging_obj,
         | 
| 392 | 
            +
                    )
         | 
| 393 | 
            +
                    return streamwrapper
         | 
| 394 | 
            +
             | 
| 395 | 
            +
                async def async_streaming(
         | 
| 396 | 
            +
                    self,
         | 
| 397 | 
            +
                    logging_obj,
         | 
| 398 | 
            +
                    api_base: str,
         | 
| 399 | 
            +
                    api_key: str,
         | 
| 400 | 
            +
                    api_version: str,
         | 
| 401 | 
            +
                    data: dict,
         | 
| 402 | 
            +
                    model: str,
         | 
| 403 | 
            +
                    timeout: Any,
         | 
| 404 | 
            +
                    azure_ad_token: Optional[str] = None,
         | 
| 405 | 
            +
                    client=None,
         | 
| 406 | 
            +
                ):
         | 
| 407 | 
            +
                    try:
         | 
| 408 | 
            +
                        # init AzureOpenAI Client
         | 
| 409 | 
            +
                        azure_client_params = {
         | 
| 410 | 
            +
                            "api_version": api_version,
         | 
| 411 | 
            +
                            "azure_endpoint": api_base,
         | 
| 412 | 
            +
                            "azure_deployment": model,
         | 
| 413 | 
            +
                            "http_client": litellm.client_session,
         | 
| 414 | 
            +
                            "max_retries": data.pop("max_retries", 2),
         | 
| 415 | 
            +
                            "timeout": timeout,
         | 
| 416 | 
            +
                        }
         | 
| 417 | 
            +
                        if api_key is not None:
         | 
| 418 | 
            +
                            azure_client_params["api_key"] = api_key
         | 
| 419 | 
            +
                        elif azure_ad_token is not None:
         | 
| 420 | 
            +
                            azure_client_params["azure_ad_token"] = azure_ad_token
         | 
| 421 | 
            +
                        if client is None:
         | 
| 422 | 
            +
                            azure_client = AsyncAzureOpenAI(**azure_client_params)
         | 
| 423 | 
            +
                        else:
         | 
| 424 | 
            +
                            azure_client = client
         | 
| 425 | 
            +
                        ## LOGGING
         | 
| 426 | 
            +
                        logging_obj.pre_call(
         | 
| 427 | 
            +
                            input=data["messages"],
         | 
| 428 | 
            +
                            api_key=azure_client.api_key,
         | 
| 429 | 
            +
                            additional_args={
         | 
| 430 | 
            +
                                "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
         | 
| 431 | 
            +
                                "api_base": azure_client._base_url._uri_reference,
         | 
| 432 | 
            +
                                "acompletion": True,
         | 
| 433 | 
            +
                                "complete_input_dict": data,
         | 
| 434 | 
            +
                            },
         | 
| 435 | 
            +
                        )
         | 
| 436 | 
            +
                        response = await azure_client.chat.completions.create(
         | 
| 437 | 
            +
                            **data, timeout=timeout
         | 
| 438 | 
            +
                        )
         | 
| 439 | 
            +
                        # return response
         | 
| 440 | 
            +
                        streamwrapper = CustomStreamWrapper(
         | 
| 441 | 
            +
                            completion_stream=response,
         | 
| 442 | 
            +
                            model=model,
         | 
| 443 | 
            +
                            custom_llm_provider="azure",
         | 
| 444 | 
            +
                            logging_obj=logging_obj,
         | 
| 445 | 
            +
                        )
         | 
| 446 | 
            +
                        return streamwrapper  ## DO NOT make this into an async for ... loop, it will yield an async generator, which won't raise errors if the response fails
         | 
| 447 | 
            +
                    except Exception as e:
         | 
| 448 | 
            +
                        if hasattr(e, "status_code"):
         | 
| 449 | 
            +
                            raise AzureOpenAIError(status_code=e.status_code, message=str(e))
         | 
| 450 | 
            +
                        else:
         | 
| 451 | 
            +
                            raise AzureOpenAIError(status_code=500, message=str(e))
         | 
| 452 | 
            +
             | 
| 453 | 
            +
                async def aembedding(
         | 
| 454 | 
            +
                    self,
         | 
| 455 | 
            +
                    data: dict,
         | 
| 456 | 
            +
                    model_response: ModelResponse,
         | 
| 457 | 
            +
                    azure_client_params: dict,
         | 
| 458 | 
            +
                    api_key: str,
         | 
| 459 | 
            +
                    input: list,
         | 
| 460 | 
            +
                    client=None,
         | 
| 461 | 
            +
                    logging_obj=None,
         | 
| 462 | 
            +
                    timeout=None,
         | 
| 463 | 
            +
                ):
         | 
| 464 | 
            +
                    response = None
         | 
| 465 | 
            +
                    try:
         | 
| 466 | 
            +
                        if client is None:
         | 
| 467 | 
            +
                            openai_aclient = AsyncAzureOpenAI(**azure_client_params)
         | 
| 468 | 
            +
                        else:
         | 
| 469 | 
            +
                            openai_aclient = client
         | 
| 470 | 
            +
                        response = await openai_aclient.embeddings.create(**data, timeout=timeout)
         | 
| 471 | 
            +
                        stringified_response = response.model_dump()
         | 
| 472 | 
            +
                        ## LOGGING
         | 
| 473 | 
            +
                        logging_obj.post_call(
         | 
| 474 | 
            +
                            input=input,
         | 
| 475 | 
            +
                            api_key=api_key,
         | 
| 476 | 
            +
                            additional_args={"complete_input_dict": data},
         | 
| 477 | 
            +
                            original_response=stringified_response,
         | 
| 478 | 
            +
                        )
         | 
| 479 | 
            +
                        return convert_to_model_response_object(
         | 
| 480 | 
            +
                            response_object=stringified_response,
         | 
| 481 | 
            +
                            model_response_object=model_response,
         | 
| 482 | 
            +
                            response_type="embedding",
         | 
| 483 | 
            +
                        )
         | 
| 484 | 
            +
                    except Exception as e:
         | 
| 485 | 
            +
                        ## LOGGING
         | 
| 486 | 
            +
                        logging_obj.post_call(
         | 
| 487 | 
            +
                            input=input,
         | 
| 488 | 
            +
                            api_key=api_key,
         | 
| 489 | 
            +
                            additional_args={"complete_input_dict": data},
         | 
| 490 | 
            +
                            original_response=str(e),
         | 
| 491 | 
            +
                        )
         | 
| 492 | 
            +
                        raise e
         | 
| 493 | 
            +
             | 
| 494 | 
            +
                def embedding(
         | 
| 495 | 
            +
                    self,
         | 
| 496 | 
            +
                    model: str,
         | 
| 497 | 
            +
                    input: list,
         | 
| 498 | 
            +
                    api_key: str,
         | 
| 499 | 
            +
                    api_base: str,
         | 
| 500 | 
            +
                    api_version: str,
         | 
| 501 | 
            +
                    timeout: float,
         | 
| 502 | 
            +
                    logging_obj=None,
         | 
| 503 | 
            +
                    model_response=None,
         | 
| 504 | 
            +
                    optional_params=None,
         | 
| 505 | 
            +
                    azure_ad_token: Optional[str] = None,
         | 
| 506 | 
            +
                    client=None,
         | 
| 507 | 
            +
                    aembedding=None,
         | 
| 508 | 
            +
                ):
         | 
| 509 | 
            +
                    super().embedding()
         | 
| 510 | 
            +
                    exception_mapping_worked = False
         | 
| 511 | 
            +
                    if self._client_session is None:
         | 
| 512 | 
            +
                        self._client_session = self.create_client_session()
         | 
| 513 | 
            +
                    try:
         | 
| 514 | 
            +
                        data = {"model": model, "input": input, **optional_params}
         | 
| 515 | 
            +
                        max_retries = data.pop("max_retries", 2)
         | 
| 516 | 
            +
                        if not isinstance(max_retries, int):
         | 
| 517 | 
            +
                            raise AzureOpenAIError(
         | 
| 518 | 
            +
                                status_code=422, message="max retries must be an int"
         | 
| 519 | 
            +
                            )
         | 
| 520 | 
            +
             | 
| 521 | 
            +
                        # init AzureOpenAI Client
         | 
| 522 | 
            +
                        azure_client_params = {
         | 
| 523 | 
            +
                            "api_version": api_version,
         | 
| 524 | 
            +
                            "azure_endpoint": api_base,
         | 
| 525 | 
            +
                            "azure_deployment": model,
         | 
| 526 | 
            +
                            "http_client": litellm.client_session,
         | 
| 527 | 
            +
                            "max_retries": max_retries,
         | 
| 528 | 
            +
                            "timeout": timeout,
         | 
| 529 | 
            +
                        }
         | 
| 530 | 
            +
                        if api_key is not None:
         | 
| 531 | 
            +
                            azure_client_params["api_key"] = api_key
         | 
| 532 | 
            +
                        elif azure_ad_token is not None:
         | 
| 533 | 
            +
                            azure_client_params["azure_ad_token"] = azure_ad_token
         | 
| 534 | 
            +
             | 
| 535 | 
            +
                        ## LOGGING
         | 
| 536 | 
            +
                        logging_obj.pre_call(
         | 
| 537 | 
            +
                            input=input,
         | 
| 538 | 
            +
                            api_key=api_key,
         | 
| 539 | 
            +
                            additional_args={
         | 
| 540 | 
            +
                                "complete_input_dict": data,
         | 
| 541 | 
            +
                                "headers": {"api_key": api_key, "azure_ad_token": azure_ad_token},
         | 
| 542 | 
            +
                            },
         | 
| 543 | 
            +
                        )
         | 
| 544 | 
            +
             | 
| 545 | 
            +
                        if aembedding == True:
         | 
| 546 | 
            +
                            response = self.aembedding(
         | 
| 547 | 
            +
                                data=data,
         | 
| 548 | 
            +
                                input=input,
         | 
| 549 | 
            +
                                logging_obj=logging_obj,
         | 
| 550 | 
            +
                                api_key=api_key,
         | 
| 551 | 
            +
                                model_response=model_response,
         | 
| 552 | 
            +
                                azure_client_params=azure_client_params,
         | 
| 553 | 
            +
                                timeout=timeout,
         | 
| 554 | 
            +
                            )
         | 
| 555 | 
            +
                            return response
         | 
| 556 | 
            +
                        if client is None:
         | 
| 557 | 
            +
                            azure_client = AzureOpenAI(**azure_client_params)  # type: ignore
         | 
| 558 | 
            +
                        else:
         | 
| 559 | 
            +
                            azure_client = client
         | 
| 560 | 
            +
                        ## COMPLETION CALL
         | 
| 561 | 
            +
                        response = azure_client.embeddings.create(**data, timeout=timeout)  # type: ignore
         | 
| 562 | 
            +
                        ## LOGGING
         | 
| 563 | 
            +
                        logging_obj.post_call(
         | 
| 564 | 
            +
                            input=input,
         | 
| 565 | 
            +
                            api_key=api_key,
         | 
| 566 | 
            +
                            additional_args={"complete_input_dict": data, "api_base": api_base},
         | 
| 567 | 
            +
                            original_response=response,
         | 
| 568 | 
            +
                        )
         | 
| 569 | 
            +
             | 
| 570 | 
            +
                        return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding")  # type: ignore
         | 
| 571 | 
            +
                    except AzureOpenAIError as e:
         | 
| 572 | 
            +
                        exception_mapping_worked = True
         | 
| 573 | 
            +
                        raise e
         | 
| 574 | 
            +
                    except Exception as e:
         | 
| 575 | 
            +
                        if hasattr(e, "status_code"):
         | 
| 576 | 
            +
                            raise AzureOpenAIError(status_code=e.status_code, message=str(e))
         | 
| 577 | 
            +
                        else:
         | 
| 578 | 
            +
                            raise AzureOpenAIError(status_code=500, message=str(e))
         | 
| 579 | 
            +
             | 
| 580 | 
            +
                async def aimage_generation(
         | 
| 581 | 
            +
                    self,
         | 
| 582 | 
            +
                    data: dict,
         | 
| 583 | 
            +
                    model_response: ModelResponse,
         | 
| 584 | 
            +
                    azure_client_params: dict,
         | 
| 585 | 
            +
                    api_key: str,
         | 
| 586 | 
            +
                    input: list,
         | 
| 587 | 
            +
                    client=None,
         | 
| 588 | 
            +
                    logging_obj=None,
         | 
| 589 | 
            +
                    timeout=None,
         | 
| 590 | 
            +
                ):
         | 
| 591 | 
            +
                    response = None
         | 
| 592 | 
            +
                    try:
         | 
| 593 | 
            +
                        if client is None:
         | 
| 594 | 
            +
                            client_session = litellm.aclient_session or httpx.AsyncClient(
         | 
| 595 | 
            +
                                transport=AsyncCustomHTTPTransport(),
         | 
| 596 | 
            +
                            )
         | 
| 597 | 
            +
                            openai_aclient = AsyncAzureOpenAI(
         | 
| 598 | 
            +
                                http_client=client_session, **azure_client_params
         | 
| 599 | 
            +
                            )
         | 
| 600 | 
            +
                        else:
         | 
| 601 | 
            +
                            openai_aclient = client
         | 
| 602 | 
            +
                        response = await openai_aclient.images.generate(**data, timeout=timeout)
         | 
| 603 | 
            +
                        stringified_response = response.model_dump()
         | 
| 604 | 
            +
                        ## LOGGING
         | 
| 605 | 
            +
                        logging_obj.post_call(
         | 
| 606 | 
            +
                            input=input,
         | 
| 607 | 
            +
                            api_key=api_key,
         | 
| 608 | 
            +
                            additional_args={"complete_input_dict": data},
         | 
| 609 | 
            +
                            original_response=stringified_response,
         | 
| 610 | 
            +
                        )
         | 
| 611 | 
            +
                        return convert_to_model_response_object(
         | 
| 612 | 
            +
                            response_object=stringified_response,
         | 
| 613 | 
            +
                            model_response_object=model_response,
         | 
| 614 | 
            +
                            response_type="image_generation",
         | 
| 615 | 
            +
                        )
         | 
| 616 | 
            +
                    except Exception as e:
         | 
| 617 | 
            +
                        ## LOGGING
         | 
| 618 | 
            +
                        logging_obj.post_call(
         | 
| 619 | 
            +
                            input=input,
         | 
| 620 | 
            +
                            api_key=api_key,
         | 
| 621 | 
            +
                            additional_args={"complete_input_dict": data},
         | 
| 622 | 
            +
                            original_response=str(e),
         | 
| 623 | 
            +
                        )
         | 
| 624 | 
            +
                        raise e
         | 
| 625 | 
            +
             | 
| 626 | 
            +
                def image_generation(
         | 
| 627 | 
            +
                    self,
         | 
| 628 | 
            +
                    prompt: str,
         | 
| 629 | 
            +
                    timeout: float,
         | 
| 630 | 
            +
                    model: Optional[str] = None,
         | 
| 631 | 
            +
                    api_key: Optional[str] = None,
         | 
| 632 | 
            +
                    api_base: Optional[str] = None,
         | 
| 633 | 
            +
                    api_version: Optional[str] = None,
         | 
| 634 | 
            +
                    model_response: Optional[litellm.utils.ImageResponse] = None,
         | 
| 635 | 
            +
                    azure_ad_token: Optional[str] = None,
         | 
| 636 | 
            +
                    logging_obj=None,
         | 
| 637 | 
            +
                    optional_params=None,
         | 
| 638 | 
            +
                    client=None,
         | 
| 639 | 
            +
                    aimg_generation=None,
         | 
| 640 | 
            +
                ):
         | 
| 641 | 
            +
                    exception_mapping_worked = False
         | 
| 642 | 
            +
                    try:
         | 
| 643 | 
            +
                        if model and len(model) > 0:
         | 
| 644 | 
            +
                            model = model
         | 
| 645 | 
            +
                        else:
         | 
| 646 | 
            +
                            model = None
         | 
| 647 | 
            +
                        data = {"model": model, "prompt": prompt, **optional_params}
         | 
| 648 | 
            +
                        max_retries = data.pop("max_retries", 2)
         | 
| 649 | 
            +
                        if not isinstance(max_retries, int):
         | 
| 650 | 
            +
                            raise AzureOpenAIError(
         | 
| 651 | 
            +
                                status_code=422, message="max retries must be an int"
         | 
| 652 | 
            +
                            )
         | 
| 653 | 
            +
             | 
| 654 | 
            +
                        # init AzureOpenAI Client
         | 
| 655 | 
            +
                        azure_client_params = {
         | 
| 656 | 
            +
                            "api_version": api_version,
         | 
| 657 | 
            +
                            "azure_endpoint": api_base,
         | 
| 658 | 
            +
                            "azure_deployment": model,
         | 
| 659 | 
            +
                            "max_retries": max_retries,
         | 
| 660 | 
            +
                            "timeout": timeout,
         | 
| 661 | 
            +
                        }
         | 
| 662 | 
            +
                        if api_key is not None:
         | 
| 663 | 
            +
                            azure_client_params["api_key"] = api_key
         | 
| 664 | 
            +
                        elif azure_ad_token is not None:
         | 
| 665 | 
            +
                            azure_client_params["azure_ad_token"] = azure_ad_token
         | 
| 666 | 
            +
             | 
| 667 | 
            +
                        if aimg_generation == True:
         | 
| 668 | 
            +
                            response = self.aimage_generation(data=data, input=input, logging_obj=logging_obj, model_response=model_response, api_key=api_key, client=client, azure_client_params=azure_client_params, timeout=timeout)  # type: ignore
         | 
| 669 | 
            +
                            return response
         | 
| 670 | 
            +
             | 
| 671 | 
            +
                        if client is None:
         | 
| 672 | 
            +
                            client_session = litellm.client_session or httpx.Client(
         | 
| 673 | 
            +
                                transport=CustomHTTPTransport(),
         | 
| 674 | 
            +
                            )
         | 
| 675 | 
            +
                            azure_client = AzureOpenAI(http_client=client_session, **azure_client_params)  # type: ignore
         | 
| 676 | 
            +
                        else:
         | 
| 677 | 
            +
                            azure_client = client
         | 
| 678 | 
            +
             | 
| 679 | 
            +
                        ## LOGGING
         | 
| 680 | 
            +
                        logging_obj.pre_call(
         | 
| 681 | 
            +
                            input=prompt,
         | 
| 682 | 
            +
                            api_key=azure_client.api_key,
         | 
| 683 | 
            +
                            additional_args={
         | 
| 684 | 
            +
                                "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
         | 
| 685 | 
            +
                                "api_base": azure_client._base_url._uri_reference,
         | 
| 686 | 
            +
                                "acompletion": False,
         | 
| 687 | 
            +
                                "complete_input_dict": data,
         | 
| 688 | 
            +
                            },
         | 
| 689 | 
            +
                        )
         | 
| 690 | 
            +
             | 
| 691 | 
            +
                        ## COMPLETION CALL
         | 
| 692 | 
            +
                        response = azure_client.images.generate(**data, timeout=timeout)  # type: ignore
         | 
| 693 | 
            +
                        ## LOGGING
         | 
| 694 | 
            +
                        logging_obj.post_call(
         | 
| 695 | 
            +
                            input=input,
         | 
| 696 | 
            +
                            api_key=api_key,
         | 
| 697 | 
            +
                            additional_args={"complete_input_dict": data},
         | 
| 698 | 
            +
                            original_response=response,
         | 
| 699 | 
            +
                        )
         | 
| 700 | 
            +
                        # return response
         | 
| 701 | 
            +
                        return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="image_generation")  # type: ignore
         | 
| 702 | 
            +
                    except AzureOpenAIError as e:
         | 
| 703 | 
            +
                        exception_mapping_worked = True
         | 
| 704 | 
            +
                        raise e
         | 
| 705 | 
            +
                    except Exception as e:
         | 
| 706 | 
            +
                        if hasattr(e, "status_code"):
         | 
| 707 | 
            +
                            raise AzureOpenAIError(status_code=e.status_code, message=str(e))
         | 
| 708 | 
            +
                        else:
         | 
| 709 | 
            +
                            raise AzureOpenAIError(status_code=500, message=str(e))
         | 
| 710 | 
            +
             | 
| 711 | 
            +
                async def ahealth_check(
         | 
| 712 | 
            +
                    self,
         | 
| 713 | 
            +
                    model: Optional[str],
         | 
| 714 | 
            +
                    api_key: str,
         | 
| 715 | 
            +
                    api_base: str,
         | 
| 716 | 
            +
                    api_version: str,
         | 
| 717 | 
            +
                    timeout: float,
         | 
| 718 | 
            +
                    mode: str,
         | 
| 719 | 
            +
                    messages: Optional[list] = None,
         | 
| 720 | 
            +
                    input: Optional[list] = None,
         | 
| 721 | 
            +
                    prompt: Optional[str] = None,
         | 
| 722 | 
            +
                ):
         | 
| 723 | 
            +
                    client_session = litellm.aclient_session or httpx.AsyncClient(
         | 
| 724 | 
            +
                        transport=AsyncCustomHTTPTransport(),  # handle dall-e-2 calls
         | 
| 725 | 
            +
                    )
         | 
| 726 | 
            +
                    if "gateway.ai.cloudflare.com" in api_base:
         | 
| 727 | 
            +
                        ## build base url - assume api base includes resource name
         | 
| 728 | 
            +
                        if not api_base.endswith("/"):
         | 
| 729 | 
            +
                            api_base += "/"
         | 
| 730 | 
            +
                        api_base += f"{model}"
         | 
| 731 | 
            +
                        client = AsyncAzureOpenAI(
         | 
| 732 | 
            +
                            base_url=api_base,
         | 
| 733 | 
            +
                            api_version=api_version,
         | 
| 734 | 
            +
                            api_key=api_key,
         | 
| 735 | 
            +
                            timeout=timeout,
         | 
| 736 | 
            +
                            http_client=client_session,
         | 
| 737 | 
            +
                        )
         | 
| 738 | 
            +
                        model = None
         | 
| 739 | 
            +
                        # cloudflare ai gateway, needs model=None
         | 
| 740 | 
            +
                    else:
         | 
| 741 | 
            +
                        client = AsyncAzureOpenAI(
         | 
| 742 | 
            +
                            api_version=api_version,
         | 
| 743 | 
            +
                            azure_endpoint=api_base,
         | 
| 744 | 
            +
                            api_key=api_key,
         | 
| 745 | 
            +
                            timeout=timeout,
         | 
| 746 | 
            +
                            http_client=client_session,
         | 
| 747 | 
            +
                        )
         | 
| 748 | 
            +
             | 
| 749 | 
            +
                        # only run this check if it's not cloudflare ai gateway
         | 
| 750 | 
            +
                        if model is None and mode != "image_generation":
         | 
| 751 | 
            +
                            raise Exception("model is not set")
         | 
| 752 | 
            +
             | 
| 753 | 
            +
                    completion = None
         | 
| 754 | 
            +
             | 
| 755 | 
            +
                    if mode == "completion":
         | 
| 756 | 
            +
                        completion = await client.completions.with_raw_response.create(
         | 
| 757 | 
            +
                            model=model,  # type: ignore
         | 
| 758 | 
            +
                            prompt=prompt,  # type: ignore
         | 
| 759 | 
            +
                        )
         | 
| 760 | 
            +
                    elif mode == "chat":
         | 
| 761 | 
            +
                        if messages is None:
         | 
| 762 | 
            +
                            raise Exception("messages is not set")
         | 
| 763 | 
            +
                        completion = await client.chat.completions.with_raw_response.create(
         | 
| 764 | 
            +
                            model=model,  # type: ignore
         | 
| 765 | 
            +
                            messages=messages,  # type: ignore
         | 
| 766 | 
            +
                        )
         | 
| 767 | 
            +
                    elif mode == "embedding":
         | 
| 768 | 
            +
                        if input is None:
         | 
| 769 | 
            +
                            raise Exception("input is not set")
         | 
| 770 | 
            +
                        completion = await client.embeddings.with_raw_response.create(
         | 
| 771 | 
            +
                            model=model,  # type: ignore
         | 
| 772 | 
            +
                            input=input,  # type: ignore
         | 
| 773 | 
            +
                        )
         | 
| 774 | 
            +
                    elif mode == "image_generation":
         | 
| 775 | 
            +
                        if prompt is None:
         | 
| 776 | 
            +
                            raise Exception("prompt is not set")
         | 
| 777 | 
            +
                        completion = await client.images.with_raw_response.generate(
         | 
| 778 | 
            +
                            model=model,  # type: ignore
         | 
| 779 | 
            +
                            prompt=prompt,  # type: ignore
         | 
| 780 | 
            +
                        )
         | 
| 781 | 
            +
                    else:
         | 
| 782 | 
            +
                        raise Exception("mode not set")
         | 
| 783 | 
            +
                    response = {}
         | 
| 784 | 
            +
             | 
| 785 | 
            +
                    if completion is None or not hasattr(completion, "headers"):
         | 
| 786 | 
            +
                        raise Exception("invalid completion response")
         | 
| 787 | 
            +
             | 
| 788 | 
            +
                    if (
         | 
| 789 | 
            +
                        completion.headers.get("x-ratelimit-remaining-requests", None) is not None
         | 
| 790 | 
            +
                    ):  # not provided for dall-e requests
         | 
| 791 | 
            +
                        response["x-ratelimit-remaining-requests"] = completion.headers[
         | 
| 792 | 
            +
                            "x-ratelimit-remaining-requests"
         | 
| 793 | 
            +
                        ]
         | 
| 794 | 
            +
             | 
| 795 | 
            +
                    if completion.headers.get("x-ratelimit-remaining-tokens", None) is not None:
         | 
| 796 | 
            +
                        response["x-ratelimit-remaining-tokens"] = completion.headers[
         | 
| 797 | 
            +
                            "x-ratelimit-remaining-tokens"
         | 
| 798 | 
            +
                        ]
         | 
| 799 | 
            +
                    return response
         | 
    	
        litellm/llms/base.py
    ADDED
    
    | @@ -0,0 +1,45 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ## This is a template base class to be used for adding new LLM providers via API calls
         | 
| 2 | 
            +
            import litellm
         | 
| 3 | 
            +
            import httpx
         | 
| 4 | 
            +
            from typing import Optional
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
            class BaseLLM:
         | 
| 8 | 
            +
                _client_session: Optional[httpx.Client] = None
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                def create_client_session(self):
         | 
| 11 | 
            +
                    if litellm.client_session:
         | 
| 12 | 
            +
                        _client_session = litellm.client_session
         | 
| 13 | 
            +
                    else:
         | 
| 14 | 
            +
                        _client_session = httpx.Client()
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                    return _client_session
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def create_aclient_session(self):
         | 
| 19 | 
            +
                    if litellm.aclient_session:
         | 
| 20 | 
            +
                        _aclient_session = litellm.aclient_session
         | 
| 21 | 
            +
                    else:
         | 
| 22 | 
            +
                        _aclient_session = httpx.AsyncClient()
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                    return _aclient_session
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                def __exit__(self):
         | 
| 27 | 
            +
                    if hasattr(self, "_client_session"):
         | 
| 28 | 
            +
                        self._client_session.close()
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                async def __aexit__(self, exc_type, exc_val, exc_tb):
         | 
| 31 | 
            +
                    if hasattr(self, "_aclient_session"):
         | 
| 32 | 
            +
                        await self._aclient_session.aclose()
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                def validate_environment(self):  # set up the environment required to run the model
         | 
| 35 | 
            +
                    pass
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                def completion(
         | 
| 38 | 
            +
                    self, *args, **kwargs
         | 
| 39 | 
            +
                ):  # logic for parsing in - calling - parsing out model completion calls
         | 
| 40 | 
            +
                    pass
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def embedding(
         | 
| 43 | 
            +
                    self, *args, **kwargs
         | 
| 44 | 
            +
                ):  # logic for parsing in - calling - parsing out model embedding calls
         | 
| 45 | 
            +
                    pass
         | 
    	
        litellm/llms/baseten.py
    ADDED
    
    | @@ -0,0 +1,164 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import time
         | 
| 6 | 
            +
            from typing import Callable
         | 
| 7 | 
            +
            from litellm.utils import ModelResponse, Usage
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            class BasetenError(Exception):
         | 
| 11 | 
            +
                def __init__(self, status_code, message):
         | 
| 12 | 
            +
                    self.status_code = status_code
         | 
| 13 | 
            +
                    self.message = message
         | 
| 14 | 
            +
                    super().__init__(
         | 
| 15 | 
            +
                        self.message
         | 
| 16 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 17 | 
            +
             | 
| 18 | 
            +
             | 
| 19 | 
            +
            def validate_environment(api_key):
         | 
| 20 | 
            +
                headers = {
         | 
| 21 | 
            +
                    "accept": "application/json",
         | 
| 22 | 
            +
                    "content-type": "application/json",
         | 
| 23 | 
            +
                }
         | 
| 24 | 
            +
                if api_key:
         | 
| 25 | 
            +
                    headers["Authorization"] = f"Api-Key {api_key}"
         | 
| 26 | 
            +
                return headers
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
            def completion(
         | 
| 30 | 
            +
                model: str,
         | 
| 31 | 
            +
                messages: list,
         | 
| 32 | 
            +
                model_response: ModelResponse,
         | 
| 33 | 
            +
                print_verbose: Callable,
         | 
| 34 | 
            +
                encoding,
         | 
| 35 | 
            +
                api_key,
         | 
| 36 | 
            +
                logging_obj,
         | 
| 37 | 
            +
                optional_params=None,
         | 
| 38 | 
            +
                litellm_params=None,
         | 
| 39 | 
            +
                logger_fn=None,
         | 
| 40 | 
            +
            ):
         | 
| 41 | 
            +
                headers = validate_environment(api_key)
         | 
| 42 | 
            +
                completion_url_fragment_1 = "https://app.baseten.co/models/"
         | 
| 43 | 
            +
                completion_url_fragment_2 = "/predict"
         | 
| 44 | 
            +
                model = model
         | 
| 45 | 
            +
                prompt = ""
         | 
| 46 | 
            +
                for message in messages:
         | 
| 47 | 
            +
                    if "role" in message:
         | 
| 48 | 
            +
                        if message["role"] == "user":
         | 
| 49 | 
            +
                            prompt += f"{message['content']}"
         | 
| 50 | 
            +
                        else:
         | 
| 51 | 
            +
                            prompt += f"{message['content']}"
         | 
| 52 | 
            +
                    else:
         | 
| 53 | 
            +
                        prompt += f"{message['content']}"
         | 
| 54 | 
            +
                data = {
         | 
| 55 | 
            +
                    "inputs": prompt,
         | 
| 56 | 
            +
                    "prompt": prompt,
         | 
| 57 | 
            +
                    "parameters": optional_params,
         | 
| 58 | 
            +
                    "stream": True
         | 
| 59 | 
            +
                    if "stream" in optional_params and optional_params["stream"] == True
         | 
| 60 | 
            +
                    else False,
         | 
| 61 | 
            +
                }
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                ## LOGGING
         | 
| 64 | 
            +
                logging_obj.pre_call(
         | 
| 65 | 
            +
                    input=prompt,
         | 
| 66 | 
            +
                    api_key=api_key,
         | 
| 67 | 
            +
                    additional_args={"complete_input_dict": data},
         | 
| 68 | 
            +
                )
         | 
| 69 | 
            +
                ## COMPLETION CALL
         | 
| 70 | 
            +
                response = requests.post(
         | 
| 71 | 
            +
                    completion_url_fragment_1 + model + completion_url_fragment_2,
         | 
| 72 | 
            +
                    headers=headers,
         | 
| 73 | 
            +
                    data=json.dumps(data),
         | 
| 74 | 
            +
                    stream=True
         | 
| 75 | 
            +
                    if "stream" in optional_params and optional_params["stream"] == True
         | 
| 76 | 
            +
                    else False,
         | 
| 77 | 
            +
                )
         | 
| 78 | 
            +
                if "text/event-stream" in response.headers["Content-Type"] or (
         | 
| 79 | 
            +
                    "stream" in optional_params and optional_params["stream"] == True
         | 
| 80 | 
            +
                ):
         | 
| 81 | 
            +
                    return response.iter_lines()
         | 
| 82 | 
            +
                else:
         | 
| 83 | 
            +
                    ## LOGGING
         | 
| 84 | 
            +
                    logging_obj.post_call(
         | 
| 85 | 
            +
                        input=prompt,
         | 
| 86 | 
            +
                        api_key=api_key,
         | 
| 87 | 
            +
                        original_response=response.text,
         | 
| 88 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 89 | 
            +
                    )
         | 
| 90 | 
            +
                    print_verbose(f"raw model_response: {response.text}")
         | 
| 91 | 
            +
                    ## RESPONSE OBJECT
         | 
| 92 | 
            +
                    completion_response = response.json()
         | 
| 93 | 
            +
                    if "error" in completion_response:
         | 
| 94 | 
            +
                        raise BasetenError(
         | 
| 95 | 
            +
                            message=completion_response["error"],
         | 
| 96 | 
            +
                            status_code=response.status_code,
         | 
| 97 | 
            +
                        )
         | 
| 98 | 
            +
                    else:
         | 
| 99 | 
            +
                        if "model_output" in completion_response:
         | 
| 100 | 
            +
                            if (
         | 
| 101 | 
            +
                                isinstance(completion_response["model_output"], dict)
         | 
| 102 | 
            +
                                and "data" in completion_response["model_output"]
         | 
| 103 | 
            +
                                and isinstance(completion_response["model_output"]["data"], list)
         | 
| 104 | 
            +
                            ):
         | 
| 105 | 
            +
                                model_response["choices"][0]["message"][
         | 
| 106 | 
            +
                                    "content"
         | 
| 107 | 
            +
                                ] = completion_response["model_output"]["data"][0]
         | 
| 108 | 
            +
                            elif isinstance(completion_response["model_output"], str):
         | 
| 109 | 
            +
                                model_response["choices"][0]["message"][
         | 
| 110 | 
            +
                                    "content"
         | 
| 111 | 
            +
                                ] = completion_response["model_output"]
         | 
| 112 | 
            +
                        elif "completion" in completion_response and isinstance(
         | 
| 113 | 
            +
                            completion_response["completion"], str
         | 
| 114 | 
            +
                        ):
         | 
| 115 | 
            +
                            model_response["choices"][0]["message"][
         | 
| 116 | 
            +
                                "content"
         | 
| 117 | 
            +
                            ] = completion_response["completion"]
         | 
| 118 | 
            +
                        elif isinstance(completion_response, list) and len(completion_response) > 0:
         | 
| 119 | 
            +
                            if "generated_text" not in completion_response:
         | 
| 120 | 
            +
                                raise BasetenError(
         | 
| 121 | 
            +
                                    message=f"Unable to parse response. Original response: {response.text}",
         | 
| 122 | 
            +
                                    status_code=response.status_code,
         | 
| 123 | 
            +
                                )
         | 
| 124 | 
            +
                            model_response["choices"][0]["message"][
         | 
| 125 | 
            +
                                "content"
         | 
| 126 | 
            +
                            ] = completion_response[0]["generated_text"]
         | 
| 127 | 
            +
                            ## GETTING LOGPROBS
         | 
| 128 | 
            +
                            if (
         | 
| 129 | 
            +
                                "details" in completion_response[0]
         | 
| 130 | 
            +
                                and "tokens" in completion_response[0]["details"]
         | 
| 131 | 
            +
                            ):
         | 
| 132 | 
            +
                                model_response.choices[0].finish_reason = completion_response[0][
         | 
| 133 | 
            +
                                    "details"
         | 
| 134 | 
            +
                                ]["finish_reason"]
         | 
| 135 | 
            +
                                sum_logprob = 0
         | 
| 136 | 
            +
                                for token in completion_response[0]["details"]["tokens"]:
         | 
| 137 | 
            +
                                    sum_logprob += token["logprob"]
         | 
| 138 | 
            +
                                model_response["choices"][0]["message"]._logprobs = sum_logprob
         | 
| 139 | 
            +
                        else:
         | 
| 140 | 
            +
                            raise BasetenError(
         | 
| 141 | 
            +
                                message=f"Unable to parse response. Original response: {response.text}",
         | 
| 142 | 
            +
                                status_code=response.status_code,
         | 
| 143 | 
            +
                            )
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
         | 
| 146 | 
            +
                    prompt_tokens = len(encoding.encode(prompt))
         | 
| 147 | 
            +
                    completion_tokens = len(
         | 
| 148 | 
            +
                        encoding.encode(model_response["choices"][0]["message"]["content"])
         | 
| 149 | 
            +
                    )
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 152 | 
            +
                    model_response["model"] = model
         | 
| 153 | 
            +
                    usage = Usage(
         | 
| 154 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 155 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 156 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 157 | 
            +
                    )
         | 
| 158 | 
            +
                    model_response.usage = usage
         | 
| 159 | 
            +
                    return model_response
         | 
| 160 | 
            +
             | 
| 161 | 
            +
             | 
| 162 | 
            +
            def embedding():
         | 
| 163 | 
            +
                # logic for parsing in - calling - parsing out model embedding calls
         | 
| 164 | 
            +
                pass
         | 
    	
        litellm/llms/bedrock.py
    ADDED
    
    | @@ -0,0 +1,799 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import json, copy, types
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import time
         | 
| 5 | 
            +
            from typing import Callable, Optional, Any, Union
         | 
| 6 | 
            +
            import litellm
         | 
| 7 | 
            +
            from litellm.utils import ModelResponse, get_secret, Usage
         | 
| 8 | 
            +
            from .prompt_templates.factory import prompt_factory, custom_prompt
         | 
| 9 | 
            +
            import httpx
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            class BedrockError(Exception):
         | 
| 13 | 
            +
                def __init__(self, status_code, message):
         | 
| 14 | 
            +
                    self.status_code = status_code
         | 
| 15 | 
            +
                    self.message = message
         | 
| 16 | 
            +
                    self.request = httpx.Request(
         | 
| 17 | 
            +
                        method="POST", url="https://us-west-2.console.aws.amazon.com/bedrock"
         | 
| 18 | 
            +
                    )
         | 
| 19 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 20 | 
            +
                    super().__init__(
         | 
| 21 | 
            +
                        self.message
         | 
| 22 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 23 | 
            +
             | 
| 24 | 
            +
             | 
| 25 | 
            +
            class AmazonTitanConfig:
         | 
| 26 | 
            +
                """
         | 
| 27 | 
            +
                Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-text-express-v1
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                Supported Params for the Amazon Titan models:
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                - `maxTokenCount` (integer) max tokens,
         | 
| 32 | 
            +
                - `stopSequences` (string[]) list of stop sequence strings
         | 
| 33 | 
            +
                - `temperature` (float) temperature for model,
         | 
| 34 | 
            +
                - `topP` (int) top p for model
         | 
| 35 | 
            +
                """
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                maxTokenCount: Optional[int] = None
         | 
| 38 | 
            +
                stopSequences: Optional[list] = None
         | 
| 39 | 
            +
                temperature: Optional[float] = None
         | 
| 40 | 
            +
                topP: Optional[int] = None
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def __init__(
         | 
| 43 | 
            +
                    self,
         | 
| 44 | 
            +
                    maxTokenCount: Optional[int] = None,
         | 
| 45 | 
            +
                    stopSequences: Optional[list] = None,
         | 
| 46 | 
            +
                    temperature: Optional[float] = None,
         | 
| 47 | 
            +
                    topP: Optional[int] = None,
         | 
| 48 | 
            +
                ) -> None:
         | 
| 49 | 
            +
                    locals_ = locals()
         | 
| 50 | 
            +
                    for key, value in locals_.items():
         | 
| 51 | 
            +
                        if key != "self" and value is not None:
         | 
| 52 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                @classmethod
         | 
| 55 | 
            +
                def get_config(cls):
         | 
| 56 | 
            +
                    return {
         | 
| 57 | 
            +
                        k: v
         | 
| 58 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 59 | 
            +
                        if not k.startswith("__")
         | 
| 60 | 
            +
                        and not isinstance(
         | 
| 61 | 
            +
                            v,
         | 
| 62 | 
            +
                            (
         | 
| 63 | 
            +
                                types.FunctionType,
         | 
| 64 | 
            +
                                types.BuiltinFunctionType,
         | 
| 65 | 
            +
                                classmethod,
         | 
| 66 | 
            +
                                staticmethod,
         | 
| 67 | 
            +
                            ),
         | 
| 68 | 
            +
                        )
         | 
| 69 | 
            +
                        and v is not None
         | 
| 70 | 
            +
                    }
         | 
| 71 | 
            +
             | 
| 72 | 
            +
             | 
| 73 | 
            +
            class AmazonAnthropicConfig:
         | 
| 74 | 
            +
                """
         | 
| 75 | 
            +
                Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                Supported Params for the Amazon / Anthropic models:
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                - `max_tokens_to_sample` (integer) max tokens,
         | 
| 80 | 
            +
                - `temperature` (float) model temperature,
         | 
| 81 | 
            +
                - `top_k` (integer) top k,
         | 
| 82 | 
            +
                - `top_p` (integer) top p,
         | 
| 83 | 
            +
                - `stop_sequences` (string[]) list of stop sequences - e.g. ["\\n\\nHuman:"],
         | 
| 84 | 
            +
                - `anthropic_version` (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
         | 
| 85 | 
            +
                """
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                max_tokens_to_sample: Optional[int] = litellm.max_tokens
         | 
| 88 | 
            +
                stop_sequences: Optional[list] = None
         | 
| 89 | 
            +
                temperature: Optional[float] = None
         | 
| 90 | 
            +
                top_k: Optional[int] = None
         | 
| 91 | 
            +
                top_p: Optional[int] = None
         | 
| 92 | 
            +
                anthropic_version: Optional[str] = None
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                def __init__(
         | 
| 95 | 
            +
                    self,
         | 
| 96 | 
            +
                    max_tokens_to_sample: Optional[int] = None,
         | 
| 97 | 
            +
                    stop_sequences: Optional[list] = None,
         | 
| 98 | 
            +
                    temperature: Optional[float] = None,
         | 
| 99 | 
            +
                    top_k: Optional[int] = None,
         | 
| 100 | 
            +
                    top_p: Optional[int] = None,
         | 
| 101 | 
            +
                    anthropic_version: Optional[str] = None,
         | 
| 102 | 
            +
                ) -> None:
         | 
| 103 | 
            +
                    locals_ = locals()
         | 
| 104 | 
            +
                    for key, value in locals_.items():
         | 
| 105 | 
            +
                        if key != "self" and value is not None:
         | 
| 106 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                @classmethod
         | 
| 109 | 
            +
                def get_config(cls):
         | 
| 110 | 
            +
                    return {
         | 
| 111 | 
            +
                        k: v
         | 
| 112 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 113 | 
            +
                        if not k.startswith("__")
         | 
| 114 | 
            +
                        and not isinstance(
         | 
| 115 | 
            +
                            v,
         | 
| 116 | 
            +
                            (
         | 
| 117 | 
            +
                                types.FunctionType,
         | 
| 118 | 
            +
                                types.BuiltinFunctionType,
         | 
| 119 | 
            +
                                classmethod,
         | 
| 120 | 
            +
                                staticmethod,
         | 
| 121 | 
            +
                            ),
         | 
| 122 | 
            +
                        )
         | 
| 123 | 
            +
                        and v is not None
         | 
| 124 | 
            +
                    }
         | 
| 125 | 
            +
             | 
| 126 | 
            +
             | 
| 127 | 
            +
            class AmazonCohereConfig:
         | 
| 128 | 
            +
                """
         | 
| 129 | 
            +
                Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=command
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                Supported Params for the Amazon / Cohere models:
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                - `max_tokens` (integer) max tokens,
         | 
| 134 | 
            +
                - `temperature` (float) model temperature,
         | 
| 135 | 
            +
                - `return_likelihood` (string) n/a
         | 
| 136 | 
            +
                """
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                max_tokens: Optional[int] = None
         | 
| 139 | 
            +
                temperature: Optional[float] = None
         | 
| 140 | 
            +
                return_likelihood: Optional[str] = None
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                def __init__(
         | 
| 143 | 
            +
                    self,
         | 
| 144 | 
            +
                    max_tokens: Optional[int] = None,
         | 
| 145 | 
            +
                    temperature: Optional[float] = None,
         | 
| 146 | 
            +
                    return_likelihood: Optional[str] = None,
         | 
| 147 | 
            +
                ) -> None:
         | 
| 148 | 
            +
                    locals_ = locals()
         | 
| 149 | 
            +
                    for key, value in locals_.items():
         | 
| 150 | 
            +
                        if key != "self" and value is not None:
         | 
| 151 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                @classmethod
         | 
| 154 | 
            +
                def get_config(cls):
         | 
| 155 | 
            +
                    return {
         | 
| 156 | 
            +
                        k: v
         | 
| 157 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 158 | 
            +
                        if not k.startswith("__")
         | 
| 159 | 
            +
                        and not isinstance(
         | 
| 160 | 
            +
                            v,
         | 
| 161 | 
            +
                            (
         | 
| 162 | 
            +
                                types.FunctionType,
         | 
| 163 | 
            +
                                types.BuiltinFunctionType,
         | 
| 164 | 
            +
                                classmethod,
         | 
| 165 | 
            +
                                staticmethod,
         | 
| 166 | 
            +
                            ),
         | 
| 167 | 
            +
                        )
         | 
| 168 | 
            +
                        and v is not None
         | 
| 169 | 
            +
                    }
         | 
| 170 | 
            +
             | 
| 171 | 
            +
             | 
| 172 | 
            +
            class AmazonAI21Config:
         | 
| 173 | 
            +
                """
         | 
| 174 | 
            +
                Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                Supported Params for the Amazon / AI21 models:
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                - `frequencyPenalty` (object): Placeholder for frequency penalty object.
         | 
| 187 | 
            +
             | 
| 188 | 
            +
                - `presencePenalty` (object): Placeholder for presence penalty object.
         | 
| 189 | 
            +
             | 
| 190 | 
            +
                - `countPenalty` (object): Placeholder for count penalty object.
         | 
| 191 | 
            +
                """
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                maxTokens: Optional[int] = None
         | 
| 194 | 
            +
                temperature: Optional[float] = None
         | 
| 195 | 
            +
                topP: Optional[float] = None
         | 
| 196 | 
            +
                stopSequences: Optional[list] = None
         | 
| 197 | 
            +
                frequencePenalty: Optional[dict] = None
         | 
| 198 | 
            +
                presencePenalty: Optional[dict] = None
         | 
| 199 | 
            +
                countPenalty: Optional[dict] = None
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                def __init__(
         | 
| 202 | 
            +
                    self,
         | 
| 203 | 
            +
                    maxTokens: Optional[int] = None,
         | 
| 204 | 
            +
                    temperature: Optional[float] = None,
         | 
| 205 | 
            +
                    topP: Optional[float] = None,
         | 
| 206 | 
            +
                    stopSequences: Optional[list] = None,
         | 
| 207 | 
            +
                    frequencePenalty: Optional[dict] = None,
         | 
| 208 | 
            +
                    presencePenalty: Optional[dict] = None,
         | 
| 209 | 
            +
                    countPenalty: Optional[dict] = None,
         | 
| 210 | 
            +
                ) -> None:
         | 
| 211 | 
            +
                    locals_ = locals()
         | 
| 212 | 
            +
                    for key, value in locals_.items():
         | 
| 213 | 
            +
                        if key != "self" and value is not None:
         | 
| 214 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 215 | 
            +
             | 
| 216 | 
            +
                @classmethod
         | 
| 217 | 
            +
                def get_config(cls):
         | 
| 218 | 
            +
                    return {
         | 
| 219 | 
            +
                        k: v
         | 
| 220 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 221 | 
            +
                        if not k.startswith("__")
         | 
| 222 | 
            +
                        and not isinstance(
         | 
| 223 | 
            +
                            v,
         | 
| 224 | 
            +
                            (
         | 
| 225 | 
            +
                                types.FunctionType,
         | 
| 226 | 
            +
                                types.BuiltinFunctionType,
         | 
| 227 | 
            +
                                classmethod,
         | 
| 228 | 
            +
                                staticmethod,
         | 
| 229 | 
            +
                            ),
         | 
| 230 | 
            +
                        )
         | 
| 231 | 
            +
                        and v is not None
         | 
| 232 | 
            +
                    }
         | 
| 233 | 
            +
             | 
| 234 | 
            +
             | 
| 235 | 
            +
            class AnthropicConstants(Enum):
         | 
| 236 | 
            +
                HUMAN_PROMPT = "\n\nHuman: "
         | 
| 237 | 
            +
                AI_PROMPT = "\n\nAssistant: "
         | 
| 238 | 
            +
             | 
| 239 | 
            +
             | 
| 240 | 
            +
            class AmazonLlamaConfig:
         | 
| 241 | 
            +
                """
         | 
| 242 | 
            +
                Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
         | 
| 243 | 
            +
             | 
| 244 | 
            +
                Supported Params for the Amazon / Meta Llama models:
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                - `max_gen_len` (integer) max tokens,
         | 
| 247 | 
            +
                - `temperature` (float) temperature for model,
         | 
| 248 | 
            +
                - `top_p` (float) top p for model
         | 
| 249 | 
            +
                """
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                max_gen_len: Optional[int] = None
         | 
| 252 | 
            +
                temperature: Optional[float] = None
         | 
| 253 | 
            +
                topP: Optional[float] = None
         | 
| 254 | 
            +
             | 
| 255 | 
            +
                def __init__(
         | 
| 256 | 
            +
                    self,
         | 
| 257 | 
            +
                    maxTokenCount: Optional[int] = None,
         | 
| 258 | 
            +
                    temperature: Optional[float] = None,
         | 
| 259 | 
            +
                    topP: Optional[int] = None,
         | 
| 260 | 
            +
                ) -> None:
         | 
| 261 | 
            +
                    locals_ = locals()
         | 
| 262 | 
            +
                    for key, value in locals_.items():
         | 
| 263 | 
            +
                        if key != "self" and value is not None:
         | 
| 264 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                @classmethod
         | 
| 267 | 
            +
                def get_config(cls):
         | 
| 268 | 
            +
                    return {
         | 
| 269 | 
            +
                        k: v
         | 
| 270 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 271 | 
            +
                        if not k.startswith("__")
         | 
| 272 | 
            +
                        and not isinstance(
         | 
| 273 | 
            +
                            v,
         | 
| 274 | 
            +
                            (
         | 
| 275 | 
            +
                                types.FunctionType,
         | 
| 276 | 
            +
                                types.BuiltinFunctionType,
         | 
| 277 | 
            +
                                classmethod,
         | 
| 278 | 
            +
                                staticmethod,
         | 
| 279 | 
            +
                            ),
         | 
| 280 | 
            +
                        )
         | 
| 281 | 
            +
                        and v is not None
         | 
| 282 | 
            +
                    }
         | 
| 283 | 
            +
             | 
| 284 | 
            +
             | 
| 285 | 
            +
            def init_bedrock_client(
         | 
| 286 | 
            +
                region_name=None,
         | 
| 287 | 
            +
                aws_access_key_id: Optional[str] = None,
         | 
| 288 | 
            +
                aws_secret_access_key: Optional[str] = None,
         | 
| 289 | 
            +
                aws_region_name: Optional[str] = None,
         | 
| 290 | 
            +
                aws_bedrock_runtime_endpoint: Optional[str] = None,
         | 
| 291 | 
            +
            ):
         | 
| 292 | 
            +
                # check for custom AWS_REGION_NAME and use it if not passed to init_bedrock_client
         | 
| 293 | 
            +
                litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
         | 
| 294 | 
            +
                standard_aws_region_name = get_secret("AWS_REGION", None)
         | 
| 295 | 
            +
             | 
| 296 | 
            +
                ## CHECK IS  'os.environ/' passed in
         | 
| 297 | 
            +
                # Define the list of parameters to check
         | 
| 298 | 
            +
                params_to_check = [
         | 
| 299 | 
            +
                    aws_access_key_id,
         | 
| 300 | 
            +
                    aws_secret_access_key,
         | 
| 301 | 
            +
                    aws_region_name,
         | 
| 302 | 
            +
                    aws_bedrock_runtime_endpoint,
         | 
| 303 | 
            +
                ]
         | 
| 304 | 
            +
             | 
| 305 | 
            +
                # Iterate over parameters and update if needed
         | 
| 306 | 
            +
                for i, param in enumerate(params_to_check):
         | 
| 307 | 
            +
                    if param and param.startswith("os.environ/"):
         | 
| 308 | 
            +
                        params_to_check[i] = get_secret(param)
         | 
| 309 | 
            +
                # Assign updated values back to parameters
         | 
| 310 | 
            +
                (
         | 
| 311 | 
            +
                    aws_access_key_id,
         | 
| 312 | 
            +
                    aws_secret_access_key,
         | 
| 313 | 
            +
                    aws_region_name,
         | 
| 314 | 
            +
                    aws_bedrock_runtime_endpoint,
         | 
| 315 | 
            +
                ) = params_to_check
         | 
| 316 | 
            +
                if region_name:
         | 
| 317 | 
            +
                    pass
         | 
| 318 | 
            +
                elif aws_region_name:
         | 
| 319 | 
            +
                    region_name = aws_region_name
         | 
| 320 | 
            +
                elif litellm_aws_region_name:
         | 
| 321 | 
            +
                    region_name = litellm_aws_region_name
         | 
| 322 | 
            +
                elif standard_aws_region_name:
         | 
| 323 | 
            +
                    region_name = standard_aws_region_name
         | 
| 324 | 
            +
                else:
         | 
| 325 | 
            +
                    raise BedrockError(
         | 
| 326 | 
            +
                        message="AWS region not set: set AWS_REGION_NAME or AWS_REGION env variable or in .env file",
         | 
| 327 | 
            +
                        status_code=401,
         | 
| 328 | 
            +
                    )
         | 
| 329 | 
            +
             | 
| 330 | 
            +
                # check for custom AWS_BEDROCK_RUNTIME_ENDPOINT and use it if not passed to init_bedrock_client
         | 
| 331 | 
            +
                env_aws_bedrock_runtime_endpoint = get_secret("AWS_BEDROCK_RUNTIME_ENDPOINT")
         | 
| 332 | 
            +
                if aws_bedrock_runtime_endpoint:
         | 
| 333 | 
            +
                    endpoint_url = aws_bedrock_runtime_endpoint
         | 
| 334 | 
            +
                elif env_aws_bedrock_runtime_endpoint:
         | 
| 335 | 
            +
                    endpoint_url = env_aws_bedrock_runtime_endpoint
         | 
| 336 | 
            +
                else:
         | 
| 337 | 
            +
                    endpoint_url = f"https://bedrock-runtime.{region_name}.amazonaws.com"
         | 
| 338 | 
            +
             | 
| 339 | 
            +
                import boto3
         | 
| 340 | 
            +
             | 
| 341 | 
            +
                if aws_access_key_id != None:
         | 
| 342 | 
            +
                    # uses auth params passed to completion
         | 
| 343 | 
            +
                    # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
         | 
| 344 | 
            +
             | 
| 345 | 
            +
                    client = boto3.client(
         | 
| 346 | 
            +
                        service_name="bedrock-runtime",
         | 
| 347 | 
            +
                        aws_access_key_id=aws_access_key_id,
         | 
| 348 | 
            +
                        aws_secret_access_key=aws_secret_access_key,
         | 
| 349 | 
            +
                        region_name=region_name,
         | 
| 350 | 
            +
                        endpoint_url=endpoint_url,
         | 
| 351 | 
            +
                    )
         | 
| 352 | 
            +
                else:
         | 
| 353 | 
            +
                    # aws_access_key_id is None, assume user is trying to auth using env variables
         | 
| 354 | 
            +
                    # boto3 automatically reads env variables
         | 
| 355 | 
            +
             | 
| 356 | 
            +
                    client = boto3.client(
         | 
| 357 | 
            +
                        service_name="bedrock-runtime",
         | 
| 358 | 
            +
                        region_name=region_name,
         | 
| 359 | 
            +
                        endpoint_url=endpoint_url,
         | 
| 360 | 
            +
                    )
         | 
| 361 | 
            +
             | 
| 362 | 
            +
                return client
         | 
| 363 | 
            +
             | 
| 364 | 
            +
             | 
| 365 | 
            +
            def convert_messages_to_prompt(model, messages, provider, custom_prompt_dict):
         | 
| 366 | 
            +
                # handle anthropic prompts using anthropic constants
         | 
| 367 | 
            +
                if provider == "anthropic":
         | 
| 368 | 
            +
                    if model in custom_prompt_dict:
         | 
| 369 | 
            +
                        # check if the model has a registered custom prompt
         | 
| 370 | 
            +
                        model_prompt_details = custom_prompt_dict[model]
         | 
| 371 | 
            +
                        prompt = custom_prompt(
         | 
| 372 | 
            +
                            role_dict=model_prompt_details["roles"],
         | 
| 373 | 
            +
                            initial_prompt_value=model_prompt_details["initial_prompt_value"],
         | 
| 374 | 
            +
                            final_prompt_value=model_prompt_details["final_prompt_value"],
         | 
| 375 | 
            +
                            messages=messages,
         | 
| 376 | 
            +
                        )
         | 
| 377 | 
            +
                    else:
         | 
| 378 | 
            +
                        prompt = prompt_factory(
         | 
| 379 | 
            +
                            model=model, messages=messages, custom_llm_provider="anthropic"
         | 
| 380 | 
            +
                        )
         | 
| 381 | 
            +
                else:
         | 
| 382 | 
            +
                    prompt = ""
         | 
| 383 | 
            +
                    for message in messages:
         | 
| 384 | 
            +
                        if "role" in message:
         | 
| 385 | 
            +
                            if message["role"] == "user":
         | 
| 386 | 
            +
                                prompt += f"{message['content']}"
         | 
| 387 | 
            +
                            else:
         | 
| 388 | 
            +
                                prompt += f"{message['content']}"
         | 
| 389 | 
            +
                        else:
         | 
| 390 | 
            +
                            prompt += f"{message['content']}"
         | 
| 391 | 
            +
                return prompt
         | 
| 392 | 
            +
             | 
| 393 | 
            +
             | 
| 394 | 
            +
            """
         | 
| 395 | 
            +
            BEDROCK AUTH Keys/Vars
         | 
| 396 | 
            +
            os.environ['AWS_ACCESS_KEY_ID'] = ""
         | 
| 397 | 
            +
            os.environ['AWS_SECRET_ACCESS_KEY'] = ""
         | 
| 398 | 
            +
            """
         | 
| 399 | 
            +
             | 
| 400 | 
            +
             | 
| 401 | 
            +
            # set os.environ['AWS_REGION_NAME'] = <your-region_name>
         | 
| 402 | 
            +
             | 
| 403 | 
            +
             | 
| 404 | 
            +
            def completion(
         | 
| 405 | 
            +
                model: str,
         | 
| 406 | 
            +
                messages: list,
         | 
| 407 | 
            +
                custom_prompt_dict: dict,
         | 
| 408 | 
            +
                model_response: ModelResponse,
         | 
| 409 | 
            +
                print_verbose: Callable,
         | 
| 410 | 
            +
                encoding,
         | 
| 411 | 
            +
                logging_obj,
         | 
| 412 | 
            +
                optional_params=None,
         | 
| 413 | 
            +
                litellm_params=None,
         | 
| 414 | 
            +
                logger_fn=None,
         | 
| 415 | 
            +
            ):
         | 
| 416 | 
            +
                exception_mapping_worked = False
         | 
| 417 | 
            +
                try:
         | 
| 418 | 
            +
                    # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
         | 
| 419 | 
            +
                    aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
         | 
| 420 | 
            +
                    aws_access_key_id = optional_params.pop("aws_access_key_id", None)
         | 
| 421 | 
            +
                    aws_region_name = optional_params.pop("aws_region_name", None)
         | 
| 422 | 
            +
                    aws_bedrock_runtime_endpoint = optional_params.pop(
         | 
| 423 | 
            +
                        "aws_bedrock_runtime_endpoint", None
         | 
| 424 | 
            +
                    )
         | 
| 425 | 
            +
             | 
| 426 | 
            +
                    # use passed in BedrockRuntime.Client if provided, otherwise create a new one
         | 
| 427 | 
            +
                    client = optional_params.pop("aws_bedrock_client", None)
         | 
| 428 | 
            +
             | 
| 429 | 
            +
                    # only init client, if user did not pass one
         | 
| 430 | 
            +
                    if client is None:
         | 
| 431 | 
            +
                        client = init_bedrock_client(
         | 
| 432 | 
            +
                            aws_access_key_id=aws_access_key_id,
         | 
| 433 | 
            +
                            aws_secret_access_key=aws_secret_access_key,
         | 
| 434 | 
            +
                            aws_region_name=aws_region_name,
         | 
| 435 | 
            +
                            aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
         | 
| 436 | 
            +
                        )
         | 
| 437 | 
            +
             | 
| 438 | 
            +
                    model = model
         | 
| 439 | 
            +
                    modelId = (
         | 
| 440 | 
            +
                        optional_params.pop("model_id", None) or model
         | 
| 441 | 
            +
                    )  # default to model if not passed
         | 
| 442 | 
            +
                    provider = model.split(".")[0]
         | 
| 443 | 
            +
                    prompt = convert_messages_to_prompt(
         | 
| 444 | 
            +
                        model, messages, provider, custom_prompt_dict
         | 
| 445 | 
            +
                    )
         | 
| 446 | 
            +
                    inference_params = copy.deepcopy(optional_params)
         | 
| 447 | 
            +
                    stream = inference_params.pop("stream", False)
         | 
| 448 | 
            +
                    if provider == "anthropic":
         | 
| 449 | 
            +
                        ## LOAD CONFIG
         | 
| 450 | 
            +
                        config = litellm.AmazonAnthropicConfig.get_config()
         | 
| 451 | 
            +
                        for k, v in config.items():
         | 
| 452 | 
            +
                            if (
         | 
| 453 | 
            +
                                k not in inference_params
         | 
| 454 | 
            +
                            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 455 | 
            +
                                inference_params[k] = v
         | 
| 456 | 
            +
                        data = json.dumps({"prompt": prompt, **inference_params})
         | 
| 457 | 
            +
                    elif provider == "ai21":
         | 
| 458 | 
            +
                        ## LOAD CONFIG
         | 
| 459 | 
            +
                        config = litellm.AmazonAI21Config.get_config()
         | 
| 460 | 
            +
                        for k, v in config.items():
         | 
| 461 | 
            +
                            if (
         | 
| 462 | 
            +
                                k not in inference_params
         | 
| 463 | 
            +
                            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 464 | 
            +
                                inference_params[k] = v
         | 
| 465 | 
            +
             | 
| 466 | 
            +
                        data = json.dumps({"prompt": prompt, **inference_params})
         | 
| 467 | 
            +
                    elif provider == "cohere":
         | 
| 468 | 
            +
                        ## LOAD CONFIG
         | 
| 469 | 
            +
                        config = litellm.AmazonCohereConfig.get_config()
         | 
| 470 | 
            +
                        for k, v in config.items():
         | 
| 471 | 
            +
                            if (
         | 
| 472 | 
            +
                                k not in inference_params
         | 
| 473 | 
            +
                            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 474 | 
            +
                                inference_params[k] = v
         | 
| 475 | 
            +
                        if optional_params.get("stream", False) == True:
         | 
| 476 | 
            +
                            inference_params[
         | 
| 477 | 
            +
                                "stream"
         | 
| 478 | 
            +
                            ] = True  # cohere requires stream = True in inference params
         | 
| 479 | 
            +
                        data = json.dumps({"prompt": prompt, **inference_params})
         | 
| 480 | 
            +
                    elif provider == "meta":
         | 
| 481 | 
            +
                        ## LOAD CONFIG
         | 
| 482 | 
            +
                        config = litellm.AmazonLlamaConfig.get_config()
         | 
| 483 | 
            +
                        for k, v in config.items():
         | 
| 484 | 
            +
                            if (
         | 
| 485 | 
            +
                                k not in inference_params
         | 
| 486 | 
            +
                            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 487 | 
            +
                                inference_params[k] = v
         | 
| 488 | 
            +
                        data = json.dumps({"prompt": prompt, **inference_params})
         | 
| 489 | 
            +
                    elif provider == "amazon":  # amazon titan
         | 
| 490 | 
            +
                        ## LOAD CONFIG
         | 
| 491 | 
            +
                        config = litellm.AmazonTitanConfig.get_config()
         | 
| 492 | 
            +
                        for k, v in config.items():
         | 
| 493 | 
            +
                            if (
         | 
| 494 | 
            +
                                k not in inference_params
         | 
| 495 | 
            +
                            ):  # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 496 | 
            +
                                inference_params[k] = v
         | 
| 497 | 
            +
             | 
| 498 | 
            +
                        data = json.dumps(
         | 
| 499 | 
            +
                            {
         | 
| 500 | 
            +
                                "inputText": prompt,
         | 
| 501 | 
            +
                                "textGenerationConfig": inference_params,
         | 
| 502 | 
            +
                            }
         | 
| 503 | 
            +
                        )
         | 
| 504 | 
            +
                    else:
         | 
| 505 | 
            +
                        data = json.dumps({})
         | 
| 506 | 
            +
             | 
| 507 | 
            +
                    ## COMPLETION CALL
         | 
| 508 | 
            +
                    accept = "application/json"
         | 
| 509 | 
            +
                    contentType = "application/json"
         | 
| 510 | 
            +
                    if stream == True:
         | 
| 511 | 
            +
                        if provider == "ai21":
         | 
| 512 | 
            +
                            ## LOGGING
         | 
| 513 | 
            +
                            request_str = f"""
         | 
| 514 | 
            +
                            response = client.invoke_model(
         | 
| 515 | 
            +
                                body={data},
         | 
| 516 | 
            +
                                modelId={modelId},
         | 
| 517 | 
            +
                                accept=accept,
         | 
| 518 | 
            +
                                contentType=contentType
         | 
| 519 | 
            +
                            )
         | 
| 520 | 
            +
                            """
         | 
| 521 | 
            +
                            logging_obj.pre_call(
         | 
| 522 | 
            +
                                input=prompt,
         | 
| 523 | 
            +
                                api_key="",
         | 
| 524 | 
            +
                                additional_args={
         | 
| 525 | 
            +
                                    "complete_input_dict": data,
         | 
| 526 | 
            +
                                    "request_str": request_str,
         | 
| 527 | 
            +
                                },
         | 
| 528 | 
            +
                            )
         | 
| 529 | 
            +
             | 
| 530 | 
            +
                            response = client.invoke_model(
         | 
| 531 | 
            +
                                body=data, modelId=modelId, accept=accept, contentType=contentType
         | 
| 532 | 
            +
                            )
         | 
| 533 | 
            +
             | 
| 534 | 
            +
                            response = response.get("body").read()
         | 
| 535 | 
            +
                            return response
         | 
| 536 | 
            +
                        else:
         | 
| 537 | 
            +
                            ## LOGGING
         | 
| 538 | 
            +
                            request_str = f"""
         | 
| 539 | 
            +
                            response = client.invoke_model_with_response_stream(
         | 
| 540 | 
            +
                                body={data},
         | 
| 541 | 
            +
                                modelId={modelId},
         | 
| 542 | 
            +
                                accept=accept,
         | 
| 543 | 
            +
                                contentType=contentType
         | 
| 544 | 
            +
                            )
         | 
| 545 | 
            +
                            """
         | 
| 546 | 
            +
                            logging_obj.pre_call(
         | 
| 547 | 
            +
                                input=prompt,
         | 
| 548 | 
            +
                                api_key="",
         | 
| 549 | 
            +
                                additional_args={
         | 
| 550 | 
            +
                                    "complete_input_dict": data,
         | 
| 551 | 
            +
                                    "request_str": request_str,
         | 
| 552 | 
            +
                                },
         | 
| 553 | 
            +
                            )
         | 
| 554 | 
            +
             | 
| 555 | 
            +
                            response = client.invoke_model_with_response_stream(
         | 
| 556 | 
            +
                                body=data, modelId=modelId, accept=accept, contentType=contentType
         | 
| 557 | 
            +
                            )
         | 
| 558 | 
            +
                            response = response.get("body")
         | 
| 559 | 
            +
                            return response
         | 
| 560 | 
            +
                    try:
         | 
| 561 | 
            +
                        ## LOGGING
         | 
| 562 | 
            +
                        request_str = f"""
         | 
| 563 | 
            +
                        response = client.invoke_model(
         | 
| 564 | 
            +
                            body={data},
         | 
| 565 | 
            +
                            modelId={modelId},
         | 
| 566 | 
            +
                            accept=accept,
         | 
| 567 | 
            +
                            contentType=contentType
         | 
| 568 | 
            +
                        )
         | 
| 569 | 
            +
                        """
         | 
| 570 | 
            +
                        logging_obj.pre_call(
         | 
| 571 | 
            +
                            input=prompt,
         | 
| 572 | 
            +
                            api_key="",
         | 
| 573 | 
            +
                            additional_args={
         | 
| 574 | 
            +
                                "complete_input_dict": data,
         | 
| 575 | 
            +
                                "request_str": request_str,
         | 
| 576 | 
            +
                            },
         | 
| 577 | 
            +
                        )
         | 
| 578 | 
            +
                        response = client.invoke_model(
         | 
| 579 | 
            +
                            body=data, modelId=modelId, accept=accept, contentType=contentType
         | 
| 580 | 
            +
                        )
         | 
| 581 | 
            +
                    except client.exceptions.ValidationException as e:
         | 
| 582 | 
            +
                        if "The provided model identifier is invalid" in str(e):
         | 
| 583 | 
            +
                            raise BedrockError(status_code=404, message=str(e))
         | 
| 584 | 
            +
                        raise BedrockError(status_code=400, message=str(e))
         | 
| 585 | 
            +
                    except Exception as e:
         | 
| 586 | 
            +
                        raise BedrockError(status_code=500, message=str(e))
         | 
| 587 | 
            +
             | 
| 588 | 
            +
                    response_body = json.loads(response.get("body").read())
         | 
| 589 | 
            +
             | 
| 590 | 
            +
                    ## LOGGING
         | 
| 591 | 
            +
                    logging_obj.post_call(
         | 
| 592 | 
            +
                        input=prompt,
         | 
| 593 | 
            +
                        api_key="",
         | 
| 594 | 
            +
                        original_response=json.dumps(response_body),
         | 
| 595 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 596 | 
            +
                    )
         | 
| 597 | 
            +
                    print_verbose(f"raw model_response: {response}")
         | 
| 598 | 
            +
                    ## RESPONSE OBJECT
         | 
| 599 | 
            +
                    outputText = "default"
         | 
| 600 | 
            +
                    if provider == "ai21":
         | 
| 601 | 
            +
                        outputText = response_body.get("completions")[0].get("data").get("text")
         | 
| 602 | 
            +
                    elif provider == "anthropic":
         | 
| 603 | 
            +
                        outputText = response_body["completion"]
         | 
| 604 | 
            +
                        model_response["finish_reason"] = response_body["stop_reason"]
         | 
| 605 | 
            +
                    elif provider == "cohere":
         | 
| 606 | 
            +
                        outputText = response_body["generations"][0]["text"]
         | 
| 607 | 
            +
                    elif provider == "meta":
         | 
| 608 | 
            +
                        outputText = response_body["generation"]
         | 
| 609 | 
            +
                    else:  # amazon titan
         | 
| 610 | 
            +
                        outputText = response_body.get("results")[0].get("outputText")
         | 
| 611 | 
            +
             | 
| 612 | 
            +
                    response_metadata = response.get("ResponseMetadata", {})
         | 
| 613 | 
            +
                    if response_metadata.get("HTTPStatusCode", 500) >= 400:
         | 
| 614 | 
            +
                        raise BedrockError(
         | 
| 615 | 
            +
                            message=outputText,
         | 
| 616 | 
            +
                            status_code=response_metadata.get("HTTPStatusCode", 500),
         | 
| 617 | 
            +
                        )
         | 
| 618 | 
            +
                    else:
         | 
| 619 | 
            +
                        try:
         | 
| 620 | 
            +
                            if len(outputText) > 0:
         | 
| 621 | 
            +
                                model_response["choices"][0]["message"]["content"] = outputText
         | 
| 622 | 
            +
                        except:
         | 
| 623 | 
            +
                            raise BedrockError(
         | 
| 624 | 
            +
                                message=json.dumps(outputText),
         | 
| 625 | 
            +
                                status_code=response_metadata.get("HTTPStatusCode", 500),
         | 
| 626 | 
            +
                            )
         | 
| 627 | 
            +
             | 
| 628 | 
            +
                    ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
         | 
| 629 | 
            +
                    prompt_tokens = len(encoding.encode(prompt))
         | 
| 630 | 
            +
                    completion_tokens = len(
         | 
| 631 | 
            +
                        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
         | 
| 632 | 
            +
                    )
         | 
| 633 | 
            +
             | 
| 634 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 635 | 
            +
                    model_response["model"] = model
         | 
| 636 | 
            +
                    usage = Usage(
         | 
| 637 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 638 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 639 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 640 | 
            +
                    )
         | 
| 641 | 
            +
                    model_response.usage = usage
         | 
| 642 | 
            +
                    return model_response
         | 
| 643 | 
            +
                except BedrockError as e:
         | 
| 644 | 
            +
                    exception_mapping_worked = True
         | 
| 645 | 
            +
                    raise e
         | 
| 646 | 
            +
                except Exception as e:
         | 
| 647 | 
            +
                    if exception_mapping_worked:
         | 
| 648 | 
            +
                        raise e
         | 
| 649 | 
            +
                    else:
         | 
| 650 | 
            +
                        import traceback
         | 
| 651 | 
            +
             | 
| 652 | 
            +
                        raise BedrockError(status_code=500, message=traceback.format_exc())
         | 
| 653 | 
            +
             | 
| 654 | 
            +
             | 
| 655 | 
            +
            def _embedding_func_single(
         | 
| 656 | 
            +
                model: str,
         | 
| 657 | 
            +
                input: str,
         | 
| 658 | 
            +
                client: Any,
         | 
| 659 | 
            +
                optional_params=None,
         | 
| 660 | 
            +
                encoding=None,
         | 
| 661 | 
            +
                logging_obj=None,
         | 
| 662 | 
            +
            ):
         | 
| 663 | 
            +
                # logic for parsing in - calling - parsing out model embedding calls
         | 
| 664 | 
            +
                ## FORMAT EMBEDDING INPUT ##
         | 
| 665 | 
            +
                provider = model.split(".")[0]
         | 
| 666 | 
            +
                inference_params = copy.deepcopy(optional_params)
         | 
| 667 | 
            +
                inference_params.pop(
         | 
| 668 | 
            +
                    "user", None
         | 
| 669 | 
            +
                )  # make sure user is not passed in for bedrock call
         | 
| 670 | 
            +
                modelId = (
         | 
| 671 | 
            +
                    optional_params.pop("model_id", None) or model
         | 
| 672 | 
            +
                )  # default to model if not passed
         | 
| 673 | 
            +
                if provider == "amazon":
         | 
| 674 | 
            +
                    input = input.replace(os.linesep, " ")
         | 
| 675 | 
            +
                    data = {"inputText": input, **inference_params}
         | 
| 676 | 
            +
                    # data = json.dumps(data)
         | 
| 677 | 
            +
                elif provider == "cohere":
         | 
| 678 | 
            +
                    inference_params["input_type"] = inference_params.get(
         | 
| 679 | 
            +
                        "input_type", "search_document"
         | 
| 680 | 
            +
                    )  # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
         | 
| 681 | 
            +
                    data = {"texts": [input], **inference_params}  # type: ignore
         | 
| 682 | 
            +
                body = json.dumps(data).encode("utf-8")
         | 
| 683 | 
            +
                ## LOGGING
         | 
| 684 | 
            +
                request_str = f"""
         | 
| 685 | 
            +
                response = client.invoke_model(
         | 
| 686 | 
            +
                    body={body},
         | 
| 687 | 
            +
                    modelId={modelId},
         | 
| 688 | 
            +
                    accept="*/*",
         | 
| 689 | 
            +
                    contentType="application/json",
         | 
| 690 | 
            +
                )"""  # type: ignore
         | 
| 691 | 
            +
                logging_obj.pre_call(
         | 
| 692 | 
            +
                    input=input,
         | 
| 693 | 
            +
                    api_key="",  # boto3 is used for init.
         | 
| 694 | 
            +
                    additional_args={
         | 
| 695 | 
            +
                        "complete_input_dict": {"model": modelId, "texts": input},
         | 
| 696 | 
            +
                        "request_str": request_str,
         | 
| 697 | 
            +
                    },
         | 
| 698 | 
            +
                )
         | 
| 699 | 
            +
                try:
         | 
| 700 | 
            +
                    response = client.invoke_model(
         | 
| 701 | 
            +
                        body=body,
         | 
| 702 | 
            +
                        modelId=modelId,
         | 
| 703 | 
            +
                        accept="*/*",
         | 
| 704 | 
            +
                        contentType="application/json",
         | 
| 705 | 
            +
                    )
         | 
| 706 | 
            +
                    response_body = json.loads(response.get("body").read())
         | 
| 707 | 
            +
                    ## LOGGING
         | 
| 708 | 
            +
                    logging_obj.post_call(
         | 
| 709 | 
            +
                        input=input,
         | 
| 710 | 
            +
                        api_key="",
         | 
| 711 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 712 | 
            +
                        original_response=json.dumps(response_body),
         | 
| 713 | 
            +
                    )
         | 
| 714 | 
            +
                    if provider == "cohere":
         | 
| 715 | 
            +
                        response = response_body.get("embeddings")
         | 
| 716 | 
            +
                        # flatten list
         | 
| 717 | 
            +
                        response = [item for sublist in response for item in sublist]
         | 
| 718 | 
            +
                        return response
         | 
| 719 | 
            +
                    elif provider == "amazon":
         | 
| 720 | 
            +
                        return response_body.get("embedding")
         | 
| 721 | 
            +
                except Exception as e:
         | 
| 722 | 
            +
                    raise BedrockError(
         | 
| 723 | 
            +
                        message=f"Embedding Error with model {model}: {e}", status_code=500
         | 
| 724 | 
            +
                    )
         | 
| 725 | 
            +
             | 
| 726 | 
            +
             | 
| 727 | 
            +
            def embedding(
         | 
| 728 | 
            +
                model: str,
         | 
| 729 | 
            +
                input: Union[list, str],
         | 
| 730 | 
            +
                api_key: Optional[str] = None,
         | 
| 731 | 
            +
                logging_obj=None,
         | 
| 732 | 
            +
                model_response=None,
         | 
| 733 | 
            +
                optional_params=None,
         | 
| 734 | 
            +
                encoding=None,
         | 
| 735 | 
            +
            ):
         | 
| 736 | 
            +
                ### BOTO3 INIT ###
         | 
| 737 | 
            +
                # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
         | 
| 738 | 
            +
                aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
         | 
| 739 | 
            +
                aws_access_key_id = optional_params.pop("aws_access_key_id", None)
         | 
| 740 | 
            +
                aws_region_name = optional_params.pop("aws_region_name", None)
         | 
| 741 | 
            +
                aws_bedrock_runtime_endpoint = optional_params.pop(
         | 
| 742 | 
            +
                    "aws_bedrock_runtime_endpoint", None
         | 
| 743 | 
            +
                )
         | 
| 744 | 
            +
             | 
| 745 | 
            +
                # use passed in BedrockRuntime.Client if provided, otherwise create a new one
         | 
| 746 | 
            +
                client = init_bedrock_client(
         | 
| 747 | 
            +
                    aws_access_key_id=aws_access_key_id,
         | 
| 748 | 
            +
                    aws_secret_access_key=aws_secret_access_key,
         | 
| 749 | 
            +
                    aws_region_name=aws_region_name,
         | 
| 750 | 
            +
                    aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
         | 
| 751 | 
            +
                )
         | 
| 752 | 
            +
                if type(input) == str:
         | 
| 753 | 
            +
                    embeddings = [
         | 
| 754 | 
            +
                        _embedding_func_single(
         | 
| 755 | 
            +
                            model,
         | 
| 756 | 
            +
                            input,
         | 
| 757 | 
            +
                            optional_params=optional_params,
         | 
| 758 | 
            +
                            client=client,
         | 
| 759 | 
            +
                            logging_obj=logging_obj,
         | 
| 760 | 
            +
                        )
         | 
| 761 | 
            +
                    ]
         | 
| 762 | 
            +
                else:
         | 
| 763 | 
            +
                    ## Embedding Call
         | 
| 764 | 
            +
                    embeddings = [
         | 
| 765 | 
            +
                        _embedding_func_single(
         | 
| 766 | 
            +
                            model,
         | 
| 767 | 
            +
                            i,
         | 
| 768 | 
            +
                            optional_params=optional_params,
         | 
| 769 | 
            +
                            client=client,
         | 
| 770 | 
            +
                            logging_obj=logging_obj,
         | 
| 771 | 
            +
                        )
         | 
| 772 | 
            +
                        for i in input
         | 
| 773 | 
            +
                    ]  # [TODO]: make these parallel calls
         | 
| 774 | 
            +
             | 
| 775 | 
            +
                ## Populate OpenAI compliant dictionary
         | 
| 776 | 
            +
                embedding_response = []
         | 
| 777 | 
            +
                for idx, embedding in enumerate(embeddings):
         | 
| 778 | 
            +
                    embedding_response.append(
         | 
| 779 | 
            +
                        {
         | 
| 780 | 
            +
                            "object": "embedding",
         | 
| 781 | 
            +
                            "index": idx,
         | 
| 782 | 
            +
                            "embedding": embedding,
         | 
| 783 | 
            +
                        }
         | 
| 784 | 
            +
                    )
         | 
| 785 | 
            +
                model_response["object"] = "list"
         | 
| 786 | 
            +
                model_response["data"] = embedding_response
         | 
| 787 | 
            +
                model_response["model"] = model
         | 
| 788 | 
            +
                input_tokens = 0
         | 
| 789 | 
            +
             | 
| 790 | 
            +
                input_str = "".join(input)
         | 
| 791 | 
            +
             | 
| 792 | 
            +
                input_tokens += len(encoding.encode(input_str))
         | 
| 793 | 
            +
             | 
| 794 | 
            +
                usage = Usage(
         | 
| 795 | 
            +
                    prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + 0
         | 
| 796 | 
            +
                )
         | 
| 797 | 
            +
                model_response.usage = usage
         | 
| 798 | 
            +
             | 
| 799 | 
            +
                return model_response
         | 
    	
        litellm/llms/cloudflare.py
    ADDED
    
    | @@ -0,0 +1,176 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, types
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import time
         | 
| 6 | 
            +
            from typing import Callable, Optional
         | 
| 7 | 
            +
            import litellm
         | 
| 8 | 
            +
            import httpx
         | 
| 9 | 
            +
            from litellm.utils import ModelResponse, Usage
         | 
| 10 | 
            +
            from .prompt_templates.factory import prompt_factory, custom_prompt
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            class CloudflareError(Exception):
         | 
| 14 | 
            +
                def __init__(self, status_code, message):
         | 
| 15 | 
            +
                    self.status_code = status_code
         | 
| 16 | 
            +
                    self.message = message
         | 
| 17 | 
            +
                    self.request = httpx.Request(method="POST", url="https://api.cloudflare.com")
         | 
| 18 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 19 | 
            +
                    super().__init__(
         | 
| 20 | 
            +
                        self.message
         | 
| 21 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 22 | 
            +
             | 
| 23 | 
            +
             | 
| 24 | 
            +
            class CloudflareConfig:
         | 
| 25 | 
            +
                max_tokens: Optional[int] = None
         | 
| 26 | 
            +
                stream: Optional[bool] = None
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                def __init__(
         | 
| 29 | 
            +
                    self,
         | 
| 30 | 
            +
                    max_tokens: Optional[int] = None,
         | 
| 31 | 
            +
                    stream: Optional[bool] = None,
         | 
| 32 | 
            +
                ) -> None:
         | 
| 33 | 
            +
                    locals_ = locals()
         | 
| 34 | 
            +
                    for key, value in locals_.items():
         | 
| 35 | 
            +
                        if key != "self" and value is not None:
         | 
| 36 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                @classmethod
         | 
| 39 | 
            +
                def get_config(cls):
         | 
| 40 | 
            +
                    return {
         | 
| 41 | 
            +
                        k: v
         | 
| 42 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 43 | 
            +
                        if not k.startswith("__")
         | 
| 44 | 
            +
                        and not isinstance(
         | 
| 45 | 
            +
                            v,
         | 
| 46 | 
            +
                            (
         | 
| 47 | 
            +
                                types.FunctionType,
         | 
| 48 | 
            +
                                types.BuiltinFunctionType,
         | 
| 49 | 
            +
                                classmethod,
         | 
| 50 | 
            +
                                staticmethod,
         | 
| 51 | 
            +
                            ),
         | 
| 52 | 
            +
                        )
         | 
| 53 | 
            +
                        and v is not None
         | 
| 54 | 
            +
                    }
         | 
| 55 | 
            +
             | 
| 56 | 
            +
             | 
| 57 | 
            +
            def validate_environment(api_key):
         | 
| 58 | 
            +
                if api_key is None:
         | 
| 59 | 
            +
                    raise ValueError(
         | 
| 60 | 
            +
                        "Missing CloudflareError API Key - A call is being made to cloudflare but no key is set either in the environment variables or via params"
         | 
| 61 | 
            +
                    )
         | 
| 62 | 
            +
                headers = {
         | 
| 63 | 
            +
                    "accept": "application/json",
         | 
| 64 | 
            +
                    "content-type": "application/json",
         | 
| 65 | 
            +
                    "Authorization": "Bearer " + api_key,
         | 
| 66 | 
            +
                }
         | 
| 67 | 
            +
                return headers
         | 
| 68 | 
            +
             | 
| 69 | 
            +
             | 
| 70 | 
            +
            def completion(
         | 
| 71 | 
            +
                model: str,
         | 
| 72 | 
            +
                messages: list,
         | 
| 73 | 
            +
                api_base: str,
         | 
| 74 | 
            +
                model_response: ModelResponse,
         | 
| 75 | 
            +
                print_verbose: Callable,
         | 
| 76 | 
            +
                encoding,
         | 
| 77 | 
            +
                api_key,
         | 
| 78 | 
            +
                logging_obj,
         | 
| 79 | 
            +
                custom_prompt_dict={},
         | 
| 80 | 
            +
                optional_params=None,
         | 
| 81 | 
            +
                litellm_params=None,
         | 
| 82 | 
            +
                logger_fn=None,
         | 
| 83 | 
            +
            ):
         | 
| 84 | 
            +
                headers = validate_environment(api_key)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                ## Load Config
         | 
| 87 | 
            +
                config = litellm.CloudflareConfig.get_config()
         | 
| 88 | 
            +
                for k, v in config.items():
         | 
| 89 | 
            +
                    if k not in optional_params:
         | 
| 90 | 
            +
                        optional_params[k] = v
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}")
         | 
| 93 | 
            +
                if model in custom_prompt_dict:
         | 
| 94 | 
            +
                    # check if the model has a registered custom prompt
         | 
| 95 | 
            +
                    model_prompt_details = custom_prompt_dict[model]
         | 
| 96 | 
            +
                    prompt = custom_prompt(
         | 
| 97 | 
            +
                        role_dict=model_prompt_details.get("roles", {}),
         | 
| 98 | 
            +
                        initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
         | 
| 99 | 
            +
                        final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
         | 
| 100 | 
            +
                        bos_token=model_prompt_details.get("bos_token", ""),
         | 
| 101 | 
            +
                        eos_token=model_prompt_details.get("eos_token", ""),
         | 
| 102 | 
            +
                        messages=messages,
         | 
| 103 | 
            +
                    )
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                # cloudflare adds the model to the api base
         | 
| 106 | 
            +
                api_base = api_base + model
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                data = {
         | 
| 109 | 
            +
                    "messages": messages,
         | 
| 110 | 
            +
                    **optional_params,
         | 
| 111 | 
            +
                }
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                ## LOGGING
         | 
| 114 | 
            +
                logging_obj.pre_call(
         | 
| 115 | 
            +
                    input=messages,
         | 
| 116 | 
            +
                    api_key=api_key,
         | 
| 117 | 
            +
                    additional_args={
         | 
| 118 | 
            +
                        "headers": headers,
         | 
| 119 | 
            +
                        "api_base": api_base,
         | 
| 120 | 
            +
                        "complete_input_dict": data,
         | 
| 121 | 
            +
                    },
         | 
| 122 | 
            +
                )
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                ## COMPLETION CALL
         | 
| 125 | 
            +
                if "stream" in optional_params and optional_params["stream"] == True:
         | 
| 126 | 
            +
                    response = requests.post(
         | 
| 127 | 
            +
                        api_base,
         | 
| 128 | 
            +
                        headers=headers,
         | 
| 129 | 
            +
                        data=json.dumps(data),
         | 
| 130 | 
            +
                        stream=optional_params["stream"],
         | 
| 131 | 
            +
                    )
         | 
| 132 | 
            +
                    return response.iter_lines()
         | 
| 133 | 
            +
                else:
         | 
| 134 | 
            +
                    response = requests.post(api_base, headers=headers, data=json.dumps(data))
         | 
| 135 | 
            +
                    ## LOGGING
         | 
| 136 | 
            +
                    logging_obj.post_call(
         | 
| 137 | 
            +
                        input=messages,
         | 
| 138 | 
            +
                        api_key=api_key,
         | 
| 139 | 
            +
                        original_response=response.text,
         | 
| 140 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 141 | 
            +
                    )
         | 
| 142 | 
            +
                    print_verbose(f"raw model_response: {response.text}")
         | 
| 143 | 
            +
                    ## RESPONSE OBJECT
         | 
| 144 | 
            +
                    if response.status_code != 200:
         | 
| 145 | 
            +
                        raise CloudflareError(
         | 
| 146 | 
            +
                            status_code=response.status_code, message=response.text
         | 
| 147 | 
            +
                        )
         | 
| 148 | 
            +
                    completion_response = response.json()
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                    model_response["choices"][0]["message"]["content"] = completion_response[
         | 
| 151 | 
            +
                        "result"
         | 
| 152 | 
            +
                    ]["response"]
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                    ## CALCULATING USAGE
         | 
| 155 | 
            +
                    print_verbose(
         | 
| 156 | 
            +
                        f"CALCULATING CLOUDFLARE TOKEN USAGE. Model Response: {model_response}; model_response['choices'][0]['message'].get('content', ''): {model_response['choices'][0]['message'].get('content', None)}"
         | 
| 157 | 
            +
                    )
         | 
| 158 | 
            +
                    prompt_tokens = litellm.utils.get_token_count(messages=messages, model=model)
         | 
| 159 | 
            +
                    completion_tokens = len(
         | 
| 160 | 
            +
                        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
         | 
| 161 | 
            +
                    )
         | 
| 162 | 
            +
             | 
| 163 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 164 | 
            +
                    model_response["model"] = "cloudflare/" + model
         | 
| 165 | 
            +
                    usage = Usage(
         | 
| 166 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 167 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 168 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 169 | 
            +
                    )
         | 
| 170 | 
            +
                    model_response.usage = usage
         | 
| 171 | 
            +
                    return model_response
         | 
| 172 | 
            +
             | 
| 173 | 
            +
             | 
| 174 | 
            +
            def embedding():
         | 
| 175 | 
            +
                # logic for parsing in - calling - parsing out model embedding calls
         | 
| 176 | 
            +
                pass
         | 
    	
        litellm/llms/cohere.py
    ADDED
    
    | @@ -0,0 +1,293 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, types
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import time, traceback
         | 
| 6 | 
            +
            from typing import Callable, Optional
         | 
| 7 | 
            +
            from litellm.utils import ModelResponse, Choices, Message, Usage
         | 
| 8 | 
            +
            import litellm
         | 
| 9 | 
            +
            import httpx
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            class CohereError(Exception):
         | 
| 13 | 
            +
                def __init__(self, status_code, message):
         | 
| 14 | 
            +
                    self.status_code = status_code
         | 
| 15 | 
            +
                    self.message = message
         | 
| 16 | 
            +
                    self.request = httpx.Request(
         | 
| 17 | 
            +
                        method="POST", url="https://api.cohere.ai/v1/generate"
         | 
| 18 | 
            +
                    )
         | 
| 19 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 20 | 
            +
                    super().__init__(
         | 
| 21 | 
            +
                        self.message
         | 
| 22 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 23 | 
            +
             | 
| 24 | 
            +
             | 
| 25 | 
            +
            class CohereConfig:
         | 
| 26 | 
            +
                """
         | 
| 27 | 
            +
                Reference: https://docs.cohere.com/reference/generate
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                The class `CohereConfig` provides configuration for the Cohere's API interface. Below are the parameters:
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                - `num_generations` (integer): Maximum number of generations returned. Default is 1, with a minimum value of 1 and a maximum value of 5.
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default value is 20.
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                - `truncate` (string): Specifies how the API handles inputs longer than maximum token length. Options include NONE, START, END. Default is END.
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.75.
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                - `preset` (string): Identifier of a custom preset, a combination of parameters such as prompt, temperature etc.
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                - `end_sequences` (array of strings): The generated text gets cut at the beginning of the earliest occurrence of an end sequence, which will be excluded from the text.
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                - `stop_sequences` (array of strings): The generated text gets cut at the end of the earliest occurrence of a stop sequence, which will be included in the text.
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                - `k` (integer): Limits generation at each step to top `k` most likely tokens. Default is 0.
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                - `p` (number): Limits generation at each step to most likely tokens with total probability mass of `p`. Default is 0.
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                - `frequency_penalty` (number): Reduces repetitiveness of generated tokens. Higher values apply stronger penalties to previously occurred tokens.
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                - `presence_penalty` (number): Reduces repetitiveness of generated tokens. Similar to frequency_penalty, but this penalty applies equally to all tokens that have already appeared.
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                - `return_likelihoods` (string): Specifies how and if token likelihoods are returned with the response. Options include GENERATION, ALL and NONE.
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                - `logit_bias` (object): Used to prevent the model from generating unwanted tokens or to incentivize it to include desired tokens. e.g. {"hello_world": 1233}
         | 
| 56 | 
            +
                """
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                num_generations: Optional[int] = None
         | 
| 59 | 
            +
                max_tokens: Optional[int] = None
         | 
| 60 | 
            +
                truncate: Optional[str] = None
         | 
| 61 | 
            +
                temperature: Optional[int] = None
         | 
| 62 | 
            +
                preset: Optional[str] = None
         | 
| 63 | 
            +
                end_sequences: Optional[list] = None
         | 
| 64 | 
            +
                stop_sequences: Optional[list] = None
         | 
| 65 | 
            +
                k: Optional[int] = None
         | 
| 66 | 
            +
                p: Optional[int] = None
         | 
| 67 | 
            +
                frequency_penalty: Optional[int] = None
         | 
| 68 | 
            +
                presence_penalty: Optional[int] = None
         | 
| 69 | 
            +
                return_likelihoods: Optional[str] = None
         | 
| 70 | 
            +
                logit_bias: Optional[dict] = None
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                def __init__(
         | 
| 73 | 
            +
                    self,
         | 
| 74 | 
            +
                    num_generations: Optional[int] = None,
         | 
| 75 | 
            +
                    max_tokens: Optional[int] = None,
         | 
| 76 | 
            +
                    truncate: Optional[str] = None,
         | 
| 77 | 
            +
                    temperature: Optional[int] = None,
         | 
| 78 | 
            +
                    preset: Optional[str] = None,
         | 
| 79 | 
            +
                    end_sequences: Optional[list] = None,
         | 
| 80 | 
            +
                    stop_sequences: Optional[list] = None,
         | 
| 81 | 
            +
                    k: Optional[int] = None,
         | 
| 82 | 
            +
                    p: Optional[int] = None,
         | 
| 83 | 
            +
                    frequency_penalty: Optional[int] = None,
         | 
| 84 | 
            +
                    presence_penalty: Optional[int] = None,
         | 
| 85 | 
            +
                    return_likelihoods: Optional[str] = None,
         | 
| 86 | 
            +
                    logit_bias: Optional[dict] = None,
         | 
| 87 | 
            +
                ) -> None:
         | 
| 88 | 
            +
                    locals_ = locals()
         | 
| 89 | 
            +
                    for key, value in locals_.items():
         | 
| 90 | 
            +
                        if key != "self" and value is not None:
         | 
| 91 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                @classmethod
         | 
| 94 | 
            +
                def get_config(cls):
         | 
| 95 | 
            +
                    return {
         | 
| 96 | 
            +
                        k: v
         | 
| 97 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 98 | 
            +
                        if not k.startswith("__")
         | 
| 99 | 
            +
                        and not isinstance(
         | 
| 100 | 
            +
                            v,
         | 
| 101 | 
            +
                            (
         | 
| 102 | 
            +
                                types.FunctionType,
         | 
| 103 | 
            +
                                types.BuiltinFunctionType,
         | 
| 104 | 
            +
                                classmethod,
         | 
| 105 | 
            +
                                staticmethod,
         | 
| 106 | 
            +
                            ),
         | 
| 107 | 
            +
                        )
         | 
| 108 | 
            +
                        and v is not None
         | 
| 109 | 
            +
                    }
         | 
| 110 | 
            +
             | 
| 111 | 
            +
             | 
| 112 | 
            +
            def validate_environment(api_key):
         | 
| 113 | 
            +
                headers = {
         | 
| 114 | 
            +
                    "accept": "application/json",
         | 
| 115 | 
            +
                    "content-type": "application/json",
         | 
| 116 | 
            +
                }
         | 
| 117 | 
            +
                if api_key:
         | 
| 118 | 
            +
                    headers["Authorization"] = f"Bearer {api_key}"
         | 
| 119 | 
            +
                return headers
         | 
| 120 | 
            +
             | 
| 121 | 
            +
             | 
| 122 | 
            +
            def completion(
         | 
| 123 | 
            +
                model: str,
         | 
| 124 | 
            +
                messages: list,
         | 
| 125 | 
            +
                api_base: str,
         | 
| 126 | 
            +
                model_response: ModelResponse,
         | 
| 127 | 
            +
                print_verbose: Callable,
         | 
| 128 | 
            +
                encoding,
         | 
| 129 | 
            +
                api_key,
         | 
| 130 | 
            +
                logging_obj,
         | 
| 131 | 
            +
                optional_params=None,
         | 
| 132 | 
            +
                litellm_params=None,
         | 
| 133 | 
            +
                logger_fn=None,
         | 
| 134 | 
            +
            ):
         | 
| 135 | 
            +
                headers = validate_environment(api_key)
         | 
| 136 | 
            +
                completion_url = api_base
         | 
| 137 | 
            +
                model = model
         | 
| 138 | 
            +
                prompt = " ".join(message["content"] for message in messages)
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                ## Load Config
         | 
| 141 | 
            +
                config = litellm.CohereConfig.get_config()
         | 
| 142 | 
            +
                for k, v in config.items():
         | 
| 143 | 
            +
                    if (
         | 
| 144 | 
            +
                        k not in optional_params
         | 
| 145 | 
            +
                    ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 146 | 
            +
                        optional_params[k] = v
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                data = {
         | 
| 149 | 
            +
                    "model": model,
         | 
| 150 | 
            +
                    "prompt": prompt,
         | 
| 151 | 
            +
                    **optional_params,
         | 
| 152 | 
            +
                }
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                ## LOGGING
         | 
| 155 | 
            +
                logging_obj.pre_call(
         | 
| 156 | 
            +
                    input=prompt,
         | 
| 157 | 
            +
                    api_key=api_key,
         | 
| 158 | 
            +
                    additional_args={
         | 
| 159 | 
            +
                        "complete_input_dict": data,
         | 
| 160 | 
            +
                        "headers": headers,
         | 
| 161 | 
            +
                        "api_base": completion_url,
         | 
| 162 | 
            +
                    },
         | 
| 163 | 
            +
                )
         | 
| 164 | 
            +
                ## COMPLETION CALL
         | 
| 165 | 
            +
                response = requests.post(
         | 
| 166 | 
            +
                    completion_url,
         | 
| 167 | 
            +
                    headers=headers,
         | 
| 168 | 
            +
                    data=json.dumps(data),
         | 
| 169 | 
            +
                    stream=optional_params["stream"] if "stream" in optional_params else False,
         | 
| 170 | 
            +
                )
         | 
| 171 | 
            +
                ## error handling for cohere calls
         | 
| 172 | 
            +
                if response.status_code != 200:
         | 
| 173 | 
            +
                    raise CohereError(message=response.text, status_code=response.status_code)
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                if "stream" in optional_params and optional_params["stream"] == True:
         | 
| 176 | 
            +
                    return response.iter_lines()
         | 
| 177 | 
            +
                else:
         | 
| 178 | 
            +
                    ## LOGGING
         | 
| 179 | 
            +
                    logging_obj.post_call(
         | 
| 180 | 
            +
                        input=prompt,
         | 
| 181 | 
            +
                        api_key=api_key,
         | 
| 182 | 
            +
                        original_response=response.text,
         | 
| 183 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 184 | 
            +
                    )
         | 
| 185 | 
            +
                    print_verbose(f"raw model_response: {response.text}")
         | 
| 186 | 
            +
                    ## RESPONSE OBJECT
         | 
| 187 | 
            +
                    completion_response = response.json()
         | 
| 188 | 
            +
                    if "error" in completion_response:
         | 
| 189 | 
            +
                        raise CohereError(
         | 
| 190 | 
            +
                            message=completion_response["error"],
         | 
| 191 | 
            +
                            status_code=response.status_code,
         | 
| 192 | 
            +
                        )
         | 
| 193 | 
            +
                    else:
         | 
| 194 | 
            +
                        try:
         | 
| 195 | 
            +
                            choices_list = []
         | 
| 196 | 
            +
                            for idx, item in enumerate(completion_response["generations"]):
         | 
| 197 | 
            +
                                if len(item["text"]) > 0:
         | 
| 198 | 
            +
                                    message_obj = Message(content=item["text"])
         | 
| 199 | 
            +
                                else:
         | 
| 200 | 
            +
                                    message_obj = Message(content=None)
         | 
| 201 | 
            +
                                choice_obj = Choices(
         | 
| 202 | 
            +
                                    finish_reason=item["finish_reason"],
         | 
| 203 | 
            +
                                    index=idx + 1,
         | 
| 204 | 
            +
                                    message=message_obj,
         | 
| 205 | 
            +
                                )
         | 
| 206 | 
            +
                                choices_list.append(choice_obj)
         | 
| 207 | 
            +
                            model_response["choices"] = choices_list
         | 
| 208 | 
            +
                        except Exception as e:
         | 
| 209 | 
            +
                            raise CohereError(
         | 
| 210 | 
            +
                                message=response.text, status_code=response.status_code
         | 
| 211 | 
            +
                            )
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    ## CALCULATING USAGE
         | 
| 214 | 
            +
                    prompt_tokens = len(encoding.encode(prompt))
         | 
| 215 | 
            +
                    completion_tokens = len(
         | 
| 216 | 
            +
                        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
         | 
| 217 | 
            +
                    )
         | 
| 218 | 
            +
             | 
| 219 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 220 | 
            +
                    model_response["model"] = model
         | 
| 221 | 
            +
                    usage = Usage(
         | 
| 222 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 223 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 224 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 225 | 
            +
                    )
         | 
| 226 | 
            +
                    model_response.usage = usage
         | 
| 227 | 
            +
                    return model_response
         | 
| 228 | 
            +
             | 
| 229 | 
            +
             | 
| 230 | 
            +
            def embedding(
         | 
| 231 | 
            +
                model: str,
         | 
| 232 | 
            +
                input: list,
         | 
| 233 | 
            +
                api_key: Optional[str] = None,
         | 
| 234 | 
            +
                logging_obj=None,
         | 
| 235 | 
            +
                model_response=None,
         | 
| 236 | 
            +
                encoding=None,
         | 
| 237 | 
            +
                optional_params=None,
         | 
| 238 | 
            +
            ):
         | 
| 239 | 
            +
                headers = validate_environment(api_key)
         | 
| 240 | 
            +
                embed_url = "https://api.cohere.ai/v1/embed"
         | 
| 241 | 
            +
                model = model
         | 
| 242 | 
            +
                data = {"model": model, "texts": input, **optional_params}
         | 
| 243 | 
            +
             | 
| 244 | 
            +
                if "3" in model and "input_type" not in data:
         | 
| 245 | 
            +
                    # cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document"
         | 
| 246 | 
            +
                    data["input_type"] = "search_document"
         | 
| 247 | 
            +
             | 
| 248 | 
            +
                ## LOGGING
         | 
| 249 | 
            +
                logging_obj.pre_call(
         | 
| 250 | 
            +
                    input=input,
         | 
| 251 | 
            +
                    api_key=api_key,
         | 
| 252 | 
            +
                    additional_args={"complete_input_dict": data},
         | 
| 253 | 
            +
                )
         | 
| 254 | 
            +
                ## COMPLETION CALL
         | 
| 255 | 
            +
                response = requests.post(embed_url, headers=headers, data=json.dumps(data))
         | 
| 256 | 
            +
                ## LOGGING
         | 
| 257 | 
            +
                logging_obj.post_call(
         | 
| 258 | 
            +
                    input=input,
         | 
| 259 | 
            +
                    api_key=api_key,
         | 
| 260 | 
            +
                    additional_args={"complete_input_dict": data},
         | 
| 261 | 
            +
                    original_response=response,
         | 
| 262 | 
            +
                )
         | 
| 263 | 
            +
                """
         | 
| 264 | 
            +
                    response 
         | 
| 265 | 
            +
                    {
         | 
| 266 | 
            +
                        'object': "list",
         | 
| 267 | 
            +
                        'data': [
         | 
| 268 | 
            +
                        
         | 
| 269 | 
            +
                        ]
         | 
| 270 | 
            +
                        'model', 
         | 
| 271 | 
            +
                        'usage'
         | 
| 272 | 
            +
                    }
         | 
| 273 | 
            +
                """
         | 
| 274 | 
            +
                if response.status_code != 200:
         | 
| 275 | 
            +
                    raise CohereError(message=response.text, status_code=response.status_code)
         | 
| 276 | 
            +
                embeddings = response.json()["embeddings"]
         | 
| 277 | 
            +
                output_data = []
         | 
| 278 | 
            +
                for idx, embedding in enumerate(embeddings):
         | 
| 279 | 
            +
                    output_data.append(
         | 
| 280 | 
            +
                        {"object": "embedding", "index": idx, "embedding": embedding}
         | 
| 281 | 
            +
                    )
         | 
| 282 | 
            +
                model_response["object"] = "list"
         | 
| 283 | 
            +
                model_response["data"] = output_data
         | 
| 284 | 
            +
                model_response["model"] = model
         | 
| 285 | 
            +
                input_tokens = 0
         | 
| 286 | 
            +
                for text in input:
         | 
| 287 | 
            +
                    input_tokens += len(encoding.encode(text))
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                model_response["usage"] = {
         | 
| 290 | 
            +
                    "prompt_tokens": input_tokens,
         | 
| 291 | 
            +
                    "total_tokens": input_tokens,
         | 
| 292 | 
            +
                }
         | 
| 293 | 
            +
                return model_response
         | 
    	
        litellm/llms/custom_httpx/azure_dall_e_2.py
    ADDED
    
    | @@ -0,0 +1,136 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import time, json, httpx, asyncio
         | 
| 2 | 
            +
             | 
| 3 | 
            +
             | 
| 4 | 
            +
            class AsyncCustomHTTPTransport(httpx.AsyncHTTPTransport):
         | 
| 5 | 
            +
                """
         | 
| 6 | 
            +
                Async implementation of custom http transport
         | 
| 7 | 
            +
                """
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
         | 
| 10 | 
            +
                    if "images/generations" in request.url.path and request.url.params[
         | 
| 11 | 
            +
                        "api-version"
         | 
| 12 | 
            +
                    ] in [  # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
         | 
| 13 | 
            +
                        "2023-06-01-preview",
         | 
| 14 | 
            +
                        "2023-07-01-preview",
         | 
| 15 | 
            +
                        "2023-08-01-preview",
         | 
| 16 | 
            +
                        "2023-09-01-preview",
         | 
| 17 | 
            +
                        "2023-10-01-preview",
         | 
| 18 | 
            +
                    ]:
         | 
| 19 | 
            +
                        request.url = request.url.copy_with(
         | 
| 20 | 
            +
                            path="/openai/images/generations:submit"
         | 
| 21 | 
            +
                        )
         | 
| 22 | 
            +
                        response = await super().handle_async_request(request)
         | 
| 23 | 
            +
                        operation_location_url = response.headers["operation-location"]
         | 
| 24 | 
            +
                        request.url = httpx.URL(operation_location_url)
         | 
| 25 | 
            +
                        request.method = "GET"
         | 
| 26 | 
            +
                        response = await super().handle_async_request(request)
         | 
| 27 | 
            +
                        await response.aread()
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                        timeout_secs: int = 120
         | 
| 30 | 
            +
                        start_time = time.time()
         | 
| 31 | 
            +
                        while response.json()["status"] not in ["succeeded", "failed"]:
         | 
| 32 | 
            +
                            if time.time() - start_time > timeout_secs:
         | 
| 33 | 
            +
                                timeout = {
         | 
| 34 | 
            +
                                    "error": {
         | 
| 35 | 
            +
                                        "code": "Timeout",
         | 
| 36 | 
            +
                                        "message": "Operation polling timed out.",
         | 
| 37 | 
            +
                                    }
         | 
| 38 | 
            +
                                }
         | 
| 39 | 
            +
                                return httpx.Response(
         | 
| 40 | 
            +
                                    status_code=400,
         | 
| 41 | 
            +
                                    headers=response.headers,
         | 
| 42 | 
            +
                                    content=json.dumps(timeout).encode("utf-8"),
         | 
| 43 | 
            +
                                    request=request,
         | 
| 44 | 
            +
                                )
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                            time.sleep(int(response.headers.get("retry-after")) or 10)
         | 
| 47 | 
            +
                            response = await super().handle_async_request(request)
         | 
| 48 | 
            +
                            await response.aread()
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                        if response.json()["status"] == "failed":
         | 
| 51 | 
            +
                            error_data = response.json()
         | 
| 52 | 
            +
                            return httpx.Response(
         | 
| 53 | 
            +
                                status_code=400,
         | 
| 54 | 
            +
                                headers=response.headers,
         | 
| 55 | 
            +
                                content=json.dumps(error_data).encode("utf-8"),
         | 
| 56 | 
            +
                                request=request,
         | 
| 57 | 
            +
                            )
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                        result = response.json()["result"]
         | 
| 60 | 
            +
                        return httpx.Response(
         | 
| 61 | 
            +
                            status_code=200,
         | 
| 62 | 
            +
                            headers=response.headers,
         | 
| 63 | 
            +
                            content=json.dumps(result).encode("utf-8"),
         | 
| 64 | 
            +
                            request=request,
         | 
| 65 | 
            +
                        )
         | 
| 66 | 
            +
                    return await super().handle_async_request(request)
         | 
| 67 | 
            +
             | 
| 68 | 
            +
             | 
| 69 | 
            +
            class CustomHTTPTransport(httpx.HTTPTransport):
         | 
| 70 | 
            +
                """
         | 
| 71 | 
            +
                This class was written as a workaround to support dall-e-2 on openai > v1.x
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                Refer to this issue for more: https://github.com/openai/openai-python/issues/692
         | 
| 74 | 
            +
                """
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                def handle_request(
         | 
| 77 | 
            +
                    self,
         | 
| 78 | 
            +
                    request: httpx.Request,
         | 
| 79 | 
            +
                ) -> httpx.Response:
         | 
| 80 | 
            +
                    if "images/generations" in request.url.path and request.url.params[
         | 
| 81 | 
            +
                        "api-version"
         | 
| 82 | 
            +
                    ] in [  # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
         | 
| 83 | 
            +
                        "2023-06-01-preview",
         | 
| 84 | 
            +
                        "2023-07-01-preview",
         | 
| 85 | 
            +
                        "2023-08-01-preview",
         | 
| 86 | 
            +
                        "2023-09-01-preview",
         | 
| 87 | 
            +
                        "2023-10-01-preview",
         | 
| 88 | 
            +
                    ]:
         | 
| 89 | 
            +
                        request.url = request.url.copy_with(
         | 
| 90 | 
            +
                            path="/openai/images/generations:submit"
         | 
| 91 | 
            +
                        )
         | 
| 92 | 
            +
                        response = super().handle_request(request)
         | 
| 93 | 
            +
                        operation_location_url = response.headers["operation-location"]
         | 
| 94 | 
            +
                        request.url = httpx.URL(operation_location_url)
         | 
| 95 | 
            +
                        request.method = "GET"
         | 
| 96 | 
            +
                        response = super().handle_request(request)
         | 
| 97 | 
            +
                        response.read()
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                        timeout_secs: int = 120
         | 
| 100 | 
            +
                        start_time = time.time()
         | 
| 101 | 
            +
                        while response.json()["status"] not in ["succeeded", "failed"]:
         | 
| 102 | 
            +
                            if time.time() - start_time > timeout_secs:
         | 
| 103 | 
            +
                                timeout = {
         | 
| 104 | 
            +
                                    "error": {
         | 
| 105 | 
            +
                                        "code": "Timeout",
         | 
| 106 | 
            +
                                        "message": "Operation polling timed out.",
         | 
| 107 | 
            +
                                    }
         | 
| 108 | 
            +
                                }
         | 
| 109 | 
            +
                                return httpx.Response(
         | 
| 110 | 
            +
                                    status_code=400,
         | 
| 111 | 
            +
                                    headers=response.headers,
         | 
| 112 | 
            +
                                    content=json.dumps(timeout).encode("utf-8"),
         | 
| 113 | 
            +
                                    request=request,
         | 
| 114 | 
            +
                                )
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                            time.sleep(int(response.headers.get("retry-after")) or 10)
         | 
| 117 | 
            +
                            response = super().handle_request(request)
         | 
| 118 | 
            +
                            response.read()
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                        if response.json()["status"] == "failed":
         | 
| 121 | 
            +
                            error_data = response.json()
         | 
| 122 | 
            +
                            return httpx.Response(
         | 
| 123 | 
            +
                                status_code=400,
         | 
| 124 | 
            +
                                headers=response.headers,
         | 
| 125 | 
            +
                                content=json.dumps(error_data).encode("utf-8"),
         | 
| 126 | 
            +
                                request=request,
         | 
| 127 | 
            +
                            )
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                        result = response.json()["result"]
         | 
| 130 | 
            +
                        return httpx.Response(
         | 
| 131 | 
            +
                            status_code=200,
         | 
| 132 | 
            +
                            headers=response.headers,
         | 
| 133 | 
            +
                            content=json.dumps(result).encode("utf-8"),
         | 
| 134 | 
            +
                            request=request,
         | 
| 135 | 
            +
                        )
         | 
| 136 | 
            +
                    return super().handle_request(request)
         | 
    	
        litellm/llms/custom_httpx/bedrock_async.py
    ADDED
    
    | 
            File without changes
         | 
    	
        litellm/llms/gemini.py
    ADDED
    
    | @@ -0,0 +1,222 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, types, traceback, copy
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import time
         | 
| 5 | 
            +
            from typing import Callable, Optional
         | 
| 6 | 
            +
            from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
         | 
| 7 | 
            +
            import litellm
         | 
| 8 | 
            +
            import sys, httpx
         | 
| 9 | 
            +
            from .prompt_templates.factory import prompt_factory, custom_prompt
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            class GeminiError(Exception):
         | 
| 13 | 
            +
                def __init__(self, status_code, message):
         | 
| 14 | 
            +
                    self.status_code = status_code
         | 
| 15 | 
            +
                    self.message = message
         | 
| 16 | 
            +
                    self.request = httpx.Request(
         | 
| 17 | 
            +
                        method="POST",
         | 
| 18 | 
            +
                        url="https://developers.generativeai.google/api/python/google/generativeai/chat",
         | 
| 19 | 
            +
                    )
         | 
| 20 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 21 | 
            +
                    super().__init__(
         | 
| 22 | 
            +
                        self.message
         | 
| 23 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 24 | 
            +
             | 
| 25 | 
            +
             | 
| 26 | 
            +
            class GeminiConfig:
         | 
| 27 | 
            +
                """
         | 
| 28 | 
            +
                Reference: https://ai.google.dev/api/python/google/generativeai/GenerationConfig
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                The class `GeminiConfig` provides configuration for the Gemini's API interface. Here are the parameters:
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                - `candidate_count` (int): Number of generated responses to return.
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                - `max_output_tokens` (int): The maximum number of tokens to include in a candidate. If unset, this will default to output_token_limit specified in the model's specification.
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                - `temperature` (float): Controls the randomness of the output. Note: The default value varies by model, see the Model.temperature attribute of the Model returned the genai.get_model function. Values can range from [0.0,1.0], inclusive. A value closer to 1.0 will produce responses that are more varied and creative, while a value closer to 0.0 will typically result in more straightforward responses from the model.
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                - `top_p` (float): Optional. The maximum cumulative probability of tokens to consider when sampling.
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                - `top_k` (int): Optional. The maximum number of tokens to consider when sampling.
         | 
| 43 | 
            +
                """
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                candidate_count: Optional[int] = None
         | 
| 46 | 
            +
                stop_sequences: Optional[list] = None
         | 
| 47 | 
            +
                max_output_tokens: Optional[int] = None
         | 
| 48 | 
            +
                temperature: Optional[float] = None
         | 
| 49 | 
            +
                top_p: Optional[float] = None
         | 
| 50 | 
            +
                top_k: Optional[int] = None
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                def __init__(
         | 
| 53 | 
            +
                    self,
         | 
| 54 | 
            +
                    candidate_count: Optional[int] = None,
         | 
| 55 | 
            +
                    stop_sequences: Optional[list] = None,
         | 
| 56 | 
            +
                    max_output_tokens: Optional[int] = None,
         | 
| 57 | 
            +
                    temperature: Optional[float] = None,
         | 
| 58 | 
            +
                    top_p: Optional[float] = None,
         | 
| 59 | 
            +
                    top_k: Optional[int] = None,
         | 
| 60 | 
            +
                ) -> None:
         | 
| 61 | 
            +
                    locals_ = locals()
         | 
| 62 | 
            +
                    for key, value in locals_.items():
         | 
| 63 | 
            +
                        if key != "self" and value is not None:
         | 
| 64 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                @classmethod
         | 
| 67 | 
            +
                def get_config(cls):
         | 
| 68 | 
            +
                    return {
         | 
| 69 | 
            +
                        k: v
         | 
| 70 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 71 | 
            +
                        if not k.startswith("__")
         | 
| 72 | 
            +
                        and not isinstance(
         | 
| 73 | 
            +
                            v,
         | 
| 74 | 
            +
                            (
         | 
| 75 | 
            +
                                types.FunctionType,
         | 
| 76 | 
            +
                                types.BuiltinFunctionType,
         | 
| 77 | 
            +
                                classmethod,
         | 
| 78 | 
            +
                                staticmethod,
         | 
| 79 | 
            +
                            ),
         | 
| 80 | 
            +
                        )
         | 
| 81 | 
            +
                        and v is not None
         | 
| 82 | 
            +
                    }
         | 
| 83 | 
            +
             | 
| 84 | 
            +
             | 
| 85 | 
            +
            def completion(
         | 
| 86 | 
            +
                model: str,
         | 
| 87 | 
            +
                messages: list,
         | 
| 88 | 
            +
                model_response: ModelResponse,
         | 
| 89 | 
            +
                print_verbose: Callable,
         | 
| 90 | 
            +
                api_key,
         | 
| 91 | 
            +
                encoding,
         | 
| 92 | 
            +
                logging_obj,
         | 
| 93 | 
            +
                custom_prompt_dict: dict,
         | 
| 94 | 
            +
                acompletion: bool = False,
         | 
| 95 | 
            +
                optional_params=None,
         | 
| 96 | 
            +
                litellm_params=None,
         | 
| 97 | 
            +
                logger_fn=None,
         | 
| 98 | 
            +
            ):
         | 
| 99 | 
            +
                try:
         | 
| 100 | 
            +
                    import google.generativeai as genai
         | 
| 101 | 
            +
                except:
         | 
| 102 | 
            +
                    raise Exception(
         | 
| 103 | 
            +
                        "Importing google.generativeai failed, please run 'pip install -q google-generativeai"
         | 
| 104 | 
            +
                    )
         | 
| 105 | 
            +
                genai.configure(api_key=api_key)
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                if model in custom_prompt_dict:
         | 
| 108 | 
            +
                    # check if the model has a registered custom prompt
         | 
| 109 | 
            +
                    model_prompt_details = custom_prompt_dict[model]
         | 
| 110 | 
            +
                    prompt = custom_prompt(
         | 
| 111 | 
            +
                        role_dict=model_prompt_details["roles"],
         | 
| 112 | 
            +
                        initial_prompt_value=model_prompt_details["initial_prompt_value"],
         | 
| 113 | 
            +
                        final_prompt_value=model_prompt_details["final_prompt_value"],
         | 
| 114 | 
            +
                        messages=messages,
         | 
| 115 | 
            +
                    )
         | 
| 116 | 
            +
                else:
         | 
| 117 | 
            +
                    prompt = prompt_factory(
         | 
| 118 | 
            +
                        model=model, messages=messages, custom_llm_provider="gemini"
         | 
| 119 | 
            +
                    )
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                ## Load Config
         | 
| 122 | 
            +
                inference_params = copy.deepcopy(optional_params)
         | 
| 123 | 
            +
                inference_params.pop(
         | 
| 124 | 
            +
                    "stream", None
         | 
| 125 | 
            +
                )  # palm does not support streaming, so we handle this by fake streaming in main.py
         | 
| 126 | 
            +
                config = litellm.GeminiConfig.get_config()
         | 
| 127 | 
            +
                for k, v in config.items():
         | 
| 128 | 
            +
                    if (
         | 
| 129 | 
            +
                        k not in inference_params
         | 
| 130 | 
            +
                    ):  # completion(top_k=3) > gemini_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 131 | 
            +
                        inference_params[k] = v
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                ## LOGGING
         | 
| 134 | 
            +
                logging_obj.pre_call(
         | 
| 135 | 
            +
                    input=prompt,
         | 
| 136 | 
            +
                    api_key="",
         | 
| 137 | 
            +
                    additional_args={"complete_input_dict": {"inference_params": inference_params}},
         | 
| 138 | 
            +
                )
         | 
| 139 | 
            +
                ## COMPLETION CALL
         | 
| 140 | 
            +
                try:
         | 
| 141 | 
            +
                    _model = genai.GenerativeModel(f"models/{model}")
         | 
| 142 | 
            +
                    response = _model.generate_content(
         | 
| 143 | 
            +
                        contents=prompt,
         | 
| 144 | 
            +
                        generation_config=genai.types.GenerationConfig(**inference_params),
         | 
| 145 | 
            +
                    )
         | 
| 146 | 
            +
                except Exception as e:
         | 
| 147 | 
            +
                    raise GeminiError(
         | 
| 148 | 
            +
                        message=str(e),
         | 
| 149 | 
            +
                        status_code=500,
         | 
| 150 | 
            +
                    )
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                ## LOGGING
         | 
| 153 | 
            +
                logging_obj.post_call(
         | 
| 154 | 
            +
                    input=prompt,
         | 
| 155 | 
            +
                    api_key="",
         | 
| 156 | 
            +
                    original_response=response,
         | 
| 157 | 
            +
                    additional_args={"complete_input_dict": {}},
         | 
| 158 | 
            +
                )
         | 
| 159 | 
            +
                print_verbose(f"raw model_response: {response}")
         | 
| 160 | 
            +
                ## RESPONSE OBJECT
         | 
| 161 | 
            +
                completion_response = response
         | 
| 162 | 
            +
                try:
         | 
| 163 | 
            +
                    choices_list = []
         | 
| 164 | 
            +
                    for idx, item in enumerate(completion_response.candidates):
         | 
| 165 | 
            +
                        if len(item.content.parts) > 0:
         | 
| 166 | 
            +
                            message_obj = Message(content=item.content.parts[0].text)
         | 
| 167 | 
            +
                        else:
         | 
| 168 | 
            +
                            message_obj = Message(content=None)
         | 
| 169 | 
            +
                        choice_obj = Choices(index=idx + 1, message=message_obj)
         | 
| 170 | 
            +
                        choices_list.append(choice_obj)
         | 
| 171 | 
            +
                    model_response["choices"] = choices_list
         | 
| 172 | 
            +
                except Exception as e:
         | 
| 173 | 
            +
                    traceback.print_exc()
         | 
| 174 | 
            +
                    raise GeminiError(
         | 
| 175 | 
            +
                        message=traceback.format_exc(), status_code=response.status_code
         | 
| 176 | 
            +
                    )
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                try:
         | 
| 179 | 
            +
                    completion_response = model_response["choices"][0]["message"].get("content")
         | 
| 180 | 
            +
                    if completion_response is None: 
         | 
| 181 | 
            +
                        raise Exception
         | 
| 182 | 
            +
                except:
         | 
| 183 | 
            +
                    original_response = f"response: {response}"
         | 
| 184 | 
            +
                    if hasattr(response, "candidates"): 
         | 
| 185 | 
            +
                        original_response = f"response: {response.candidates}"
         | 
| 186 | 
            +
                        if "SAFETY" in original_response: 
         | 
| 187 | 
            +
                            original_response += "\nThe candidate content was flagged for safety reasons."
         | 
| 188 | 
            +
                        elif "RECITATION" in original_response:
         | 
| 189 | 
            +
                            original_response += "\nThe candidate content was flagged for recitation reasons."
         | 
| 190 | 
            +
                    raise GeminiError(
         | 
| 191 | 
            +
                        status_code=400,
         | 
| 192 | 
            +
                        message=f"No response received. Original response - {original_response}",
         | 
| 193 | 
            +
                    )
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                ## CALCULATING USAGE
         | 
| 196 | 
            +
                prompt_str = ""
         | 
| 197 | 
            +
                for m in messages:
         | 
| 198 | 
            +
                    if isinstance(m["content"], str):
         | 
| 199 | 
            +
                        prompt_str += m["content"]
         | 
| 200 | 
            +
                    elif isinstance(m["content"], list):
         | 
| 201 | 
            +
                        for content in m["content"]:
         | 
| 202 | 
            +
                            if content["type"] == "text":
         | 
| 203 | 
            +
                                prompt_str += content["text"]
         | 
| 204 | 
            +
                prompt_tokens = len(encoding.encode(prompt_str))
         | 
| 205 | 
            +
                completion_tokens = len(
         | 
| 206 | 
            +
                    encoding.encode(model_response["choices"][0]["message"].get("content", ""))
         | 
| 207 | 
            +
                )
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                model_response["created"] = int(time.time())
         | 
| 210 | 
            +
                model_response["model"] = "gemini/" + model
         | 
| 211 | 
            +
                usage = Usage(
         | 
| 212 | 
            +
                    prompt_tokens=prompt_tokens,
         | 
| 213 | 
            +
                    completion_tokens=completion_tokens,
         | 
| 214 | 
            +
                    total_tokens=prompt_tokens + completion_tokens,
         | 
| 215 | 
            +
                )
         | 
| 216 | 
            +
                model_response.usage = usage
         | 
| 217 | 
            +
                return model_response
         | 
| 218 | 
            +
             | 
| 219 | 
            +
             | 
| 220 | 
            +
            def embedding():
         | 
| 221 | 
            +
                # logic for parsing in - calling - parsing out model embedding calls
         | 
| 222 | 
            +
                pass
         | 
    	
        litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt
    ADDED
    
    | @@ -0,0 +1,2523 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            0xDEADBEA7/DialoGPT-small-rick
         | 
| 2 | 
            +
            1Basco/DialoGPT-small-jake
         | 
| 3 | 
            +
            2early4coffee/DialoGPT-medium-deadpool
         | 
| 4 | 
            +
            2early4coffee/DialoGPT-small-deadpool
         | 
| 5 | 
            +
            2gud/DialogGPT-small-Koopsbot
         | 
| 6 | 
            +
            ABBHISHEK/DialoGPT-small-harrypotter
         | 
| 7 | 
            +
            AIDynamics/DialoGPT-medium-MentorDealerGuy
         | 
| 8 | 
            +
            AJ/DialoGPT-small-ricksanchez
         | 
| 9 | 
            +
            AJ/rick-discord-bot
         | 
| 10 | 
            +
            AJ/rick-sanchez-bot
         | 
| 11 | 
            +
            AJ-Dude/DialoGPT-small-harrypotter
         | 
| 12 | 
            +
            AK270802/DialoGPT-small-harrypotter
         | 
| 13 | 
            +
            ATGdev/DialoGPT-small-harrypotter
         | 
| 14 | 
            +
            AVeryRealHuman/DialoGPT-small-TonyStark
         | 
| 15 | 
            +
            AbhinavSaiTheGreat/DialoGPT-small-harrypotter
         | 
| 16 | 
            +
            AccurateIsaiah/DialoGPT-small-jefftastic
         | 
| 17 | 
            +
            AccurateIsaiah/DialoGPT-small-mozark
         | 
| 18 | 
            +
            AccurateIsaiah/DialoGPT-small-mozarkv2
         | 
| 19 | 
            +
            AccurateIsaiah/DialoGPT-small-sinclair
         | 
| 20 | 
            +
            AdharshJolly/HarryPotterBot-Model
         | 
| 21 | 
            +
            AdrianGzz/DialoGPT-small-harrypotter
         | 
| 22 | 
            +
            Aero/Tsubomi-Haruno
         | 
| 23 | 
            +
            AetherIT/DialoGPT-small-Hal
         | 
| 24 | 
            +
            AiPorter/DialoGPT-small-Back_to_the_future
         | 
| 25 | 
            +
            Aibox/DialoGPT-small-rick
         | 
| 26 | 
            +
            Akjder/DialoGPT-small-harrypotter
         | 
| 27 | 
            +
            AllwynJ/HarryBoy
         | 
| 28 | 
            +
            AnthonyNelson/DialoGPT-small-ricksanchez
         | 
| 29 | 
            +
            Apisate/DialoGPT-small-jordan
         | 
| 30 | 
            +
            ArJakusz/DialoGPT-small-stark
         | 
| 31 | 
            +
            Aran/DialoGPT-medium-harrypotter
         | 
| 32 | 
            +
            Aran/DialoGPT-small-harrypotter
         | 
| 33 | 
            +
            Arcktosh/DialoGPT-small-rick
         | 
| 34 | 
            +
            AriakimTaiyo/DialoGPT-cultured-Kumiko
         | 
| 35 | 
            +
            AriakimTaiyo/DialoGPT-medium-Kumiko
         | 
| 36 | 
            +
            AriakimTaiyo/DialoGPT-revised-Kumiko
         | 
| 37 | 
            +
            AriakimTaiyo/DialoGPT-small-Kumiko
         | 
| 38 | 
            +
            AriakimTaiyo/DialoGPT-small-Rikka
         | 
| 39 | 
            +
            ArtemisZealot/DialoGTP-small-Qkarin
         | 
| 40 | 
            +
            Aruden/DialoGPT-medium-harrypotterall
         | 
| 41 | 
            +
            Aspect11/DialoGPT-Medium-LiSBot
         | 
| 42 | 
            +
            Asuramaru/DialoGPT-small-rintohsaka
         | 
| 43 | 
            +
            Atchuth/DialoGPT-small-MichaelBot
         | 
| 44 | 
            +
            Augustvember/WOKKAWOKKA
         | 
| 45 | 
            +
            Augustvember/WokkaBot3
         | 
| 46 | 
            +
            Augustvember/test
         | 
| 47 | 
            +
            Augustvember/wokka2
         | 
| 48 | 
            +
            Augustvember/wokka4
         | 
| 49 | 
            +
            Augustvember/wokka5
         | 
| 50 | 
            +
            Augustvember/wokkabottest2
         | 
| 51 | 
            +
            AvatarXD/DialoGPT-medium-Blitzo
         | 
| 52 | 
            +
            Awsaf/DialoGPT-medium-eren
         | 
| 53 | 
            +
            Awsaf/large-eren
         | 
| 54 | 
            +
            Axcel/DialoGPT-small-rick
         | 
| 55 | 
            +
            Ayjayo/DialoGPT-medium-AyjayoAI
         | 
| 56 | 
            +
            Ayran/DialoGPT-medium-harry-potter-1-through-3
         | 
| 57 | 
            +
            Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6-e18
         | 
| 58 | 
            +
            Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6
         | 
| 59 | 
            +
            Ayran/DialoGPT-small-gandalf
         | 
| 60 | 
            +
            Ayran/DialoGPT-small-harry-potter-1-through-3
         | 
| 61 | 
            +
            Azuris/DialoGPT-medium-envy
         | 
| 62 | 
            +
            Azuris/DialoGPT-medium-senorita
         | 
| 63 | 
            +
            Azuris/DialoGPT-small-envy
         | 
| 64 | 
            +
            BW/TEST
         | 
| 65 | 
            +
            Backedman/DialoGPT-small-Anika
         | 
| 66 | 
            +
            BalajiSathesh/DialoGPT-small-harrypotter
         | 
| 67 | 
            +
            Batsy24/DialoGPT-medium-Twilight_BellaBot
         | 
| 68 | 
            +
            Batsy24/DialoGPT-small-Twilight_EdBot
         | 
| 69 | 
            +
            Bee-Garbs/DialoGPT-real-cartman-small
         | 
| 70 | 
            +
            Biasface/DDDC
         | 
| 71 | 
            +
            Biasface/DDDC2
         | 
| 72 | 
            +
            BigTooth/DialoGPT-Megumin
         | 
| 73 | 
            +
            BigTooth/DialoGPT-small-tohru
         | 
| 74 | 
            +
            BigTooth/Megumin-v0.2
         | 
| 75 | 
            +
            BigeS/DialoGPT-small-Rick
         | 
| 76 | 
            +
            Bimal/my_bot_model
         | 
| 77 | 
            +
            BinksSachary/DialoGPT-small-shaxx
         | 
| 78 | 
            +
            BinksSachary/ShaxxBot
         | 
| 79 | 
            +
            BinksSachary/ShaxxBot2
         | 
| 80 | 
            +
            BlightZz/DialoGPT-medium-Kurisu
         | 
| 81 | 
            +
            BlightZz/MakiseKurisu
         | 
| 82 | 
            +
            BlueGamerBeast/DialoGPT-small-Morgana
         | 
| 83 | 
            +
            BotterHax/DialoGPT-small-harrypotter
         | 
| 84 | 
            +
            Broadus20/DialoGPT-small-joshua
         | 
| 85 | 
            +
            BrunoNogueira/DialoGPT-kungfupanda
         | 
| 86 | 
            +
            Brykee/DialoGPT-medium-Morty
         | 
| 87 | 
            +
            Bubb-les/DisloGPT-medium-HarryPotter
         | 
| 88 | 
            +
            Camzure/MaamiBot-test
         | 
| 89 | 
            +
            Canadiancaleb/DialoGPT-small-jesse
         | 
| 90 | 
            +
            Canadiancaleb/DialoGPT-small-walter
         | 
| 91 | 
            +
            CasualHomie/DialoGPT-small-harrypotter
         | 
| 92 | 
            +
            Chae/botman
         | 
| 93 | 
            +
            Chakita/Friends
         | 
| 94 | 
            +
            Chalponkey/DialoGPT-small-Barry
         | 
| 95 | 
            +
            ChaseBread/DialoGPT-small-harrypotter
         | 
| 96 | 
            +
            Chiuchiyin/DialoGPT-small-Donald
         | 
| 97 | 
            +
            ChrisVCB/DialoGPT-medium-cmjs
         | 
| 98 | 
            +
            ChrisVCB/DialoGPT-medium-ej
         | 
| 99 | 
            +
            Chuah/DialoGPT-small-harrypotter
         | 
| 100 | 
            +
            ChukSamuels/DialoGPT-small-Dr.FauciBot
         | 
| 101 | 
            +
            Ciruzzo/DialoGPT-small-harrypotter
         | 
| 102 | 
            +
            ClaudeCOULOMBE/RickBot
         | 
| 103 | 
            +
            Cloudy/DialoGPT-CJ-large
         | 
| 104 | 
            +
            ClydeWasTaken/DialoGPT-small-joshua
         | 
| 105 | 
            +
            CodeDanCode/CartmenBot
         | 
| 106 | 
            +
            CodeDanCode/SP-KyleBot
         | 
| 107 | 
            +
            CoderBoy432/DialoGPT-small-harrypotter
         | 
| 108 | 
            +
            CoderEFE/DialoGPT-marxbot
         | 
| 109 | 
            +
            Connor/DialoGPT-small-rick
         | 
| 110 | 
            +
            Connorvr/BrightBot-small
         | 
| 111 | 
            +
            CopymySkill/DialoGPT-medium-atakan
         | 
| 112 | 
            +
            Corvus/DialoGPT-medium-CaptainPrice-Extended
         | 
| 113 | 
            +
            Corvus/DialoGPT-medium-CaptainPrice
         | 
| 114 | 
            +
            Coyotl/DialoGPT-test-last-arthurmorgan
         | 
| 115 | 
            +
            Coyotl/DialoGPT-test2-arthurmorgan
         | 
| 116 | 
            +
            Coyotl/DialoGPT-test3-arthurmorgan
         | 
| 117 | 
            +
            CracklesCreeper/Piglin-Talks-Harry-Potter
         | 
| 118 | 
            +
            Cryptikdw/DialoGPT-small-rick
         | 
| 119 | 
            +
            Cthyllax/DialoGPT-medium-PaladinDanse
         | 
| 120 | 
            +
            CurtisBowser/DialoGPT-medium-sora-two
         | 
| 121 | 
            +
            CurtisBowser/DialoGPT-medium-sora
         | 
| 122 | 
            +
            CurtisBowser/DialoGPT-small-sora
         | 
| 123 | 
            +
            CyberMuffin/DialoGPT-small-ChandlerBot
         | 
| 124 | 
            +
            DARKVIP3R/DialoGPT-medium-Anakin
         | 
| 125 | 
            +
            Daivakai/DialoGPT-small-saitama
         | 
| 126 | 
            +
            Dawit/DialogGPT-small-ironman
         | 
| 127 | 
            +
            Daymarebait/Discord_BOT_RICK
         | 
| 128 | 
            +
            DecafNosebleed/DialoGPT-small-ScaraBot
         | 
| 129 | 
            +
            Denny29/DialoGPT-medium-asunayuuki
         | 
| 130 | 
            +
            Devid/DialoGPT-small-Miku
         | 
| 131 | 
            +
            Dilmk2/DialoGPT-small-harrypotter
         | 
| 132 | 
            +
            Dimedrolza/DialoGPT-small-cyberpunk
         | 
| 133 | 
            +
            DingleyMaillotUrgell/homer-bot
         | 
| 134 | 
            +
            Doiman/DialoGPT-medium-harrypotter
         | 
| 135 | 
            +
            DongHai/DialoGPT-small-rick
         | 
| 136 | 
            +
            Doquey/DialoGPT-small-Luisbot1
         | 
| 137 | 
            +
            Doquey/DialoGPT-small-Michaelbot
         | 
| 138 | 
            +
            Doxophobia/DialoGPT-medium-celeste
         | 
| 139 | 
            +
            Dragoniod1596/DialoGPT-small-Legacies
         | 
| 140 | 
            +
            Dreyzin/DialoGPT-medium-avatar
         | 
| 141 | 
            +
            DueLinx0402/DialoGPT-small-harrypotter
         | 
| 142 | 
            +
            Duugu/jakebot3000
         | 
| 143 | 
            +
            Dyzi/DialoGPT-small-landcheese
         | 
| 144 | 
            +
            EEE/DialoGPT-medium-brooke
         | 
| 145 | 
            +
            EEE/DialoGPT-small-aang
         | 
| 146 | 
            +
            EEE/DialoGPT-small-yoda
         | 
| 147 | 
            +
            ESPersonnel/DialoGPT-small-got
         | 
| 148 | 
            +
            Eagle3ye/DialoGPT-small-PeppaPig
         | 
| 149 | 
            +
            Elzen7/DialoGPT-medium-harrypotter
         | 
| 150 | 
            +
            Emi2160/DialoGPT-small-Neku
         | 
| 151 | 
            +
            EmileAjar/DialoGPT-small-harrypotter
         | 
| 152 | 
            +
            EmileAjar/DialoGPT-small-peppapig
         | 
| 153 | 
            +
            Erikaka/DialoGPT-small-loki
         | 
| 154 | 
            +
            EstoyDePaso/DialoGPT-small-harrypotter
         | 
| 155 | 
            +
            EuropeanTurtle/DialoGPT-small-mrcobb
         | 
| 156 | 
            +
            ExEngineer/DialoGPT-medium-jdt
         | 
| 157 | 
            +
            Exilon/DialoGPT-large-quirk
         | 
| 158 | 
            +
            EzioDD/house
         | 
| 159 | 
            +
            FFF000/dialogpt-FFF
         | 
| 160 | 
            +
            FangLee/DialoGPT-small-Kirito
         | 
| 161 | 
            +
            Filosofas/DialoGPT-medium-PALPATINE
         | 
| 162 | 
            +
            Flampt/DialoGPT-medium-Sheldon
         | 
| 163 | 
            +
            For/sheldonbot
         | 
| 164 | 
            +
            FosterPatch/GoT-test
         | 
| 165 | 
            +
            Fu10k/DialoGPT-medium-Rick
         | 
| 166 | 
            +
            GabbyDaBUNBUN/DialoGPT-medium-PinkiePie
         | 
| 167 | 
            +
            Galaxy/DialoGPT-small-hermoine
         | 
| 168 | 
            +
            GamerMan02/DialoGPT-medium-gamerbot
         | 
| 169 | 
            +
            Gappy/DialoGPT-small-Zhongli
         | 
| 170 | 
            +
            Geezy/DialoGPT-small-guy
         | 
| 171 | 
            +
            GenDelport/DialoGPT-small-harrypotter
         | 
| 172 | 
            +
            Gowtham25/DialoGPT-small-jackie
         | 
| 173 | 
            +
            Gregor-Davies/DialoGPT-small-rick
         | 
| 174 | 
            +
            Greysan/DialoGPT-medium-TOH
         | 
| 175 | 
            +
            Guard-SK/DialoGPT-medium-ricksanchez
         | 
| 176 | 
            +
            Guard-SK/DialoGPT-small-ricksanchez
         | 
| 177 | 
            +
            GunjanPantha/DialoGPT-small-gameofthrones
         | 
| 178 | 
            +
            Guy0/DialoGPT-small-Batmanbotty
         | 
| 179 | 
            +
            HAttORi/DialoGPT-Medium-zerotwo
         | 
| 180 | 
            +
            HackyHackyMan/DialoGPT-small-harrypotter
         | 
| 181 | 
            +
            Hadron/DialoGPT-medium-nino
         | 
| 182 | 
            +
            Hallzy/Peterbot
         | 
| 183 | 
            +
            Hamas/DialoGPT-large-jake
         | 
| 184 | 
            +
            Hamas/DialoGPT-large-jake2
         | 
| 185 | 
            +
            Hamas/DialoGPT-large-jake3
         | 
| 186 | 
            +
            Hamas/DialoGPT-large-jake4
         | 
| 187 | 
            +
            Hamhams/DialoGPT-small-rick
         | 
| 188 | 
            +
            HansAnonymous/DialoGPT-medium-rick
         | 
| 189 | 
            +
            HansAnonymous/DialoGPT-small-shrek
         | 
| 190 | 
            +
            HarryPuttar/HarryPotterDC
         | 
| 191 | 
            +
            Harshal6927/Jack_Sparrow_GPT
         | 
| 192 | 
            +
            Harshal6927/Tony_Stark_GPT
         | 
| 193 | 
            +
            Havokx/DialoGPT-small-Rick
         | 
| 194 | 
            +
            Heldhy/DialoGPT-small-tony
         | 
| 195 | 
            +
            Heldhy/testingAgain
         | 
| 196 | 
            +
            MagnusChase7/DialoGPT-medium-harrypotter
         | 
| 197 | 
            +
            Htenn/DialoGPT-small-spongebob
         | 
| 198 | 
            +
            Htenn/DialoGPT-small-spongebobv2
         | 
| 199 | 
            +
            HueJanus/DialoGPT-small-ricksanchez
         | 
| 200 | 
            +
            HypNyx/DialoGPT-small-DwightBot
         | 
| 201 | 
            +
            HypNyx/DialoGPT-small-Thanos
         | 
| 202 | 
            +
            HypedKid/PeterBot
         | 
| 203 | 
            +
            ILoveThatLady/DialoGPT-small-rickandmorty
         | 
| 204 | 
            +
            ITNODove/DialoGPT-medium-cyberbones
         | 
| 205 | 
            +
            Icemiser/chat-test
         | 
| 206 | 
            +
            Ilyabarigou/Genesis-harrybotter
         | 
| 207 | 
            +
            ImAPizza/DialoGPT-medium-albert
         | 
| 208 | 
            +
            ImAPizza/DialoGPT-medium-alberttwo
         | 
| 209 | 
            +
            Invincible/Chat_bot-Harrypotter-medium
         | 
| 210 | 
            +
            Invincible/Chat_bot-Harrypotter-small
         | 
| 211 | 
            +
            Invincible/DialoGPT-medium-harryPotter
         | 
| 212 | 
            +
            Istiaque190515/Sherlock
         | 
| 213 | 
            +
            Istiaque190515/harry_bot_discord
         | 
| 214 | 
            +
            Istiaque190515/harry_potter
         | 
| 215 | 
            +
            ItoYagura/DialoGPT-medium-tohru
         | 
| 216 | 
            +
            ItzJorinoPlays/DialoGPT-small-PickleRick
         | 
| 217 | 
            +
            J-Chiang/DialoGPT-small-thor
         | 
| 218 | 
            +
            JDS22/DialoGPT-medium-HarryPotterBot
         | 
| 219 | 
            +
            Jedi33/tonystarkAI
         | 
| 220 | 
            +
            Jeffrey/DialoGPT-small-Jeffrey
         | 
| 221 | 
            +
            JimmyHodl/DialoGPT-medium
         | 
| 222 | 
            +
            Jllama/dialoGPT-small-Joshua-test
         | 
| 223 | 
            +
            Jonesy/DialoGPT-medium_Barney
         | 
| 224 | 
            +
            Jonesy/FG_OLD
         | 
| 225 | 
            +
            Jonesy/DialoGPT-small_JT
         | 
| 226 | 
            +
            Julianqll/DialoGPT-small-finalmorty
         | 
| 227 | 
            +
            Julianqll/DialoGPT-small-ricksanchez
         | 
| 228 | 
            +
            KAIHATSU/DialoGPT-small-rick
         | 
| 229 | 
            +
            KENNETHFOO/DialoGPT-medium-harrypotter
         | 
| 230 | 
            +
            KOSTAS/DialoGPT-small-Cleverbot
         | 
| 231 | 
            +
            KP2500/KPBot
         | 
| 232 | 
            +
            Kai0857/DialoGPT-small-harrypotter
         | 
| 233 | 
            +
            Kail91/DialoGPT-small-PeraltaBot
         | 
| 234 | 
            +
            Kairu/DialoGPT-small-Rick
         | 
| 235 | 
            +
            Kairu/RICKBOT
         | 
| 236 | 
            +
            KakoSi/Smolmm3
         | 
| 237 | 
            +
            KakoSi/opaazzi
         | 
| 238 | 
            +
            Kaledmgo/DialoGPT-small-donajulia
         | 
| 239 | 
            +
            Kargan/DialoGPT-small-randombot
         | 
| 240 | 
            +
            KaydenSou/Joshua
         | 
| 241 | 
            +
            Keen/DialoGPT-small-potter
         | 
| 242 | 
            +
            KekLord/DialoGPT-small-rick3
         | 
| 243 | 
            +
            Keqing/Keqing-Siesta
         | 
| 244 | 
            +
            Keqipig/DialoGPT-small-spamton
         | 
| 245 | 
            +
            KhanAdeeb/model-tony-stark
         | 
| 246 | 
            +
            KingCodeSquid/Octavian
         | 
| 247 | 
            +
            KingCodeSquid/Octavian2
         | 
| 248 | 
            +
            Kirili4ik/ruDialoGpt3-medium-finetuned-telegram
         | 
| 249 | 
            +
            KnutZuidema/DialoGPT-small-morty
         | 
| 250 | 
            +
            Konggate/DialoGPT-small-harrypotter
         | 
| 251 | 
            +
            Koriyy/DialoGPT-medium-gf
         | 
| 252 | 
            +
            Koro/DialoGPT-medium-rickandmorty
         | 
| 253 | 
            +
            Koro/DialoGPT-small-rickandmorty
         | 
| 254 | 
            +
            KringleClaus/Dialog-santa
         | 
| 255 | 
            +
            KrispyIChris/DialoGPT-small-harrypotter
         | 
| 256 | 
            +
            Kryptone/Burobot
         | 
| 257 | 
            +
            Kryptone/RinAI
         | 
| 258 | 
            +
            Kryptone/monikAI-Unstable
         | 
| 259 | 
            +
            Kryptone/monikAI
         | 
| 260 | 
            +
            Kshaunish/DialoGPT-small-rick
         | 
| 261 | 
            +
            Kush/DialoGPT-small-harrypotter
         | 
| 262 | 
            +
            LARACHNIDE/DialogGPT-small-sw
         | 
| 263 | 
            +
            LactoseLegend/DialoGPT-small-Rick
         | 
| 264 | 
            +
            Laezor/DialoGPT-small-witcher1
         | 
| 265 | 
            +
            Laezor/DialoGPT-small-yakuza_0
         | 
| 266 | 
            +
            LaiJY/DialoGPTChatbot
         | 
| 267 | 
            +
            Laptop/DialoGPT-small-gandalf
         | 
| 268 | 
            +
            Lenza/DialoGPT-medium-Kobayashi
         | 
| 269 | 
            +
            Leonel/DialoGPT-small-chandler
         | 
| 270 | 
            +
            Leostronkest/DialoGPT-small-michael
         | 
| 271 | 
            +
            Leostronkest/DialoGPT
         | 
| 272 | 
            +
            Leviii03/Dialogpt-small-Jake99
         | 
| 273 | 
            +
            Lizardon/Peterbot
         | 
| 274 | 
            +
            Lovery/Aqua
         | 
| 275 | 
            +
            Lucdi90/DialoGPT-medium-XiaoBot
         | 
| 276 | 
            +
            LuckyWill/DialoGPT-small-JakeBot
         | 
| 277 | 
            +
            Lurka/DialoGPT-medium-isseibot
         | 
| 278 | 
            +
            Lurka/DialoGPT-medium-kon
         | 
| 279 | 
            +
            Luxiere/DialoGPT-medium-tyrion
         | 
| 280 | 
            +
            MAUtastic/DialoGPT-medium-RickandMortyBot
         | 
| 281 | 
            +
            MCUxDaredevil/DialoGPT-small-rick
         | 
| 282 | 
            +
            MS366/DialoGPT-small-vision
         | 
| 283 | 
            +
            MadhanKumar/DialoGPT-small-HarryPotter
         | 
| 284 | 
            +
            MadhanKumar/HarryPotter-Bot
         | 
| 285 | 
            +
            MagmaCubes1133/DialoGPT-large-rick
         | 
| 286 | 
            +
            Mandy/DialoGPT-small-Mikasa
         | 
| 287 | 
            +
            Manthan/DialoGPT-small-harrypotter
         | 
| 288 | 
            +
            Mara/DialoGPT-medium-harrypotter
         | 
| 289 | 
            +
            MathiasVS/DialoGPT-small-RickAndMorty
         | 
| 290 | 
            +
            MaxW0748/DialoGPT-small-Rick
         | 
| 291 | 
            +
            MayankGupta/DialoGPT-small-harrypotter
         | 
| 292 | 
            +
            MichaelTheLearner/DialoGPT-medium-harry
         | 
| 293 | 
            +
            Midhunkrishna/DialoGPT-small-bjk
         | 
| 294 | 
            +
            Mierln/SmartHarry
         | 
| 295 | 
            +
            MightyCoderX/DialoGPT-medium-EdwardElric
         | 
| 296 | 
            +
            ModzabazeR/small-okaberintaro
         | 
| 297 | 
            +
            Mohsin272/DialoGPT-medium-harrypotter
         | 
| 298 | 
            +
            Mona/DialoGPT-small-harrypotter
         | 
| 299 | 
            +
            MoonlitEtherna/DialoGPT-small-Nyivae
         | 
| 300 | 
            +
            MrDuckerino/DialoGPT-medium-Rick
         | 
| 301 | 
            +
            MrE/DialoGPT-medium-SARGE
         | 
| 302 | 
            +
            MrE/DialoGPT-medium-SARGER1
         | 
| 303 | 
            +
            MrE/DialoGPT-medium-SARGER3
         | 
| 304 | 
            +
            MrGentle/DeltaModel-genius1
         | 
| 305 | 
            +
            MrZ/DialoGPT-small-Rick
         | 
| 306 | 
            +
            Mythiie/DialoGPT-small-Modeus
         | 
| 307 | 
            +
            N8Daawg/chat_bot
         | 
| 308 | 
            +
            NASABOI/MachineLearningAI
         | 
| 309 | 
            +
            nabarun/DialoGPT-small-joshua
         | 
| 310 | 
            +
            NamPE/DialoGPT-medium-Aqua-konosuba
         | 
| 311 | 
            +
            NamPE/DialoGPT-medium-Takanashi-Rikka
         | 
| 312 | 
            +
            NamPE/DialoGPT-small-satouhina
         | 
| 313 | 
            +
            NanniKirby/DialoGPT-medium-bapi
         | 
| 314 | 
            +
            NanniKirby/bapismall
         | 
| 315 | 
            +
            Naturealbe/DialoGPT-small-harrypotter-2
         | 
| 316 | 
            +
            Naturealbe/DialoGPT-small-harrypotter
         | 
| 317 | 
            +
            Navigator/DialoGPT-medium-martymcfly
         | 
| 318 | 
            +
            Navya2608/DialoGPT-medium-chandler
         | 
| 319 | 
            +
            Navya2608/DialoGPT-medium-rachel
         | 
| 320 | 
            +
            Navya2608/DialoGPT-small-tonystarkscript
         | 
| 321 | 
            +
            Necrozma/harrypotterbot
         | 
| 322 | 
            +
            Nekoism/Zhongli-Beta
         | 
| 323 | 
            +
            NibrasShami/DialopGPT-small-HarryPotter
         | 
| 324 | 
            +
            NickCavarretta/DialoGPT-small-laffy
         | 
| 325 | 
            +
            Nihwy/DialoSqui
         | 
| 326 | 
            +
            NikhilKrishna/DialoGPT-medium-harrypotter
         | 
| 327 | 
            +
            Ninja5000/DialoGPT-medium-HarryPotter
         | 
| 328 | 
            +
            Ninja5000/DialoGPT-medium-TWEWYJoshua
         | 
| 329 | 
            +
            Niphredil/DialoGPT-small-lotr
         | 
| 330 | 
            +
            Nisarg2701/DialoGPT-medium-Rick
         | 
| 331 | 
            +
            NoLawz/DialoGPT-medium-hagrid
         | 
| 332 | 
            +
            NoLawz/DialoGPT-medium-harrypotter
         | 
| 333 | 
            +
            NoLawz/DialoGPT-medium-spongebob
         | 
| 334 | 
            +
            Nova/DialoGPT-medium-Lelouch
         | 
| 335 | 
            +
            NovaChrono/twervy
         | 
| 336 | 
            +
            Obesitycart/ChatBot
         | 
| 337 | 
            +
            Obscurity/DialoGPT-Medium-707
         | 
| 338 | 
            +
            Oji/DialoGPT-small-Rick
         | 
| 339 | 
            +
            Optimal/Harry
         | 
| 340 | 
            +
            P4RZ1V4L/DialoGPT-Medium-Tony
         | 
| 341 | 
            +
            PVAbhiram2003/DialoGPT-medium-RickandMorty
         | 
| 342 | 
            +
            Paradocx/Dialogpt-mid-hpai
         | 
| 343 | 
            +
            Pensador777critico/DialoGPT-small-RickandMorty
         | 
| 344 | 
            +
            PhilipTheGreat/DiabloGPT-small-Traveller
         | 
| 345 | 
            +
            PinoCorgi/DialoGPT-small-Shrek1
         | 
| 346 | 
            +
            Piumi/DialogGPT-small-harrypotter
         | 
| 347 | 
            +
            Plencers/DialoGPT-small-homer
         | 
| 348 | 
            +
            Poly-Pixel/shrek-medium-full
         | 
| 349 | 
            +
            Poly-Pixel/shrek-medium
         | 
| 350 | 
            +
            Poly-Pixel/shrek-test-small
         | 
| 351 | 
            +
            Pupihed/DialoGPT-small-shrek
         | 
| 352 | 
            +
            PurpleJacketGuy/My_Jarvis
         | 
| 353 | 
            +
            PurpleJacketGuy/My_Jarvis_2
         | 
| 354 | 
            +
            RAhul03/DialoGPT-small-harrypotter
         | 
| 355 | 
            +
            REAP3R/Chat-bot
         | 
| 356 | 
            +
            REZERO/DialoGPT-medium-saitama
         | 
| 357 | 
            +
            RTM/ChatBot
         | 
| 358 | 
            +
            RTM/Lucky
         | 
| 359 | 
            +
            RTurk/DialoGPT-small-TIMBOT
         | 
| 360 | 
            +
            Radicalkiddo/DialoGPT-small-Radical
         | 
| 361 | 
            +
            Rashid11/DialoGPT-small-rick
         | 
| 362 | 
            +
            Rathod/DialoGPT-small-harrypotter
         | 
| 363 | 
            +
            Redolid/DialoGPT-small-Rick
         | 
| 364 | 
            +
            Rei/DialoGPT-medium-kurisu
         | 
| 365 | 
            +
            RifsxD/DialoGPT-medium-raifu
         | 
| 366 | 
            +
            RishabhRawatt/DialoGPT-small-Rickmorty
         | 
| 367 | 
            +
            RishabhRawatt/DialoGPT-small-kela
         | 
| 368 | 
            +
            Ritchie/DialoGPT-small-Rickandmorty
         | 
| 369 | 
            +
            RizqFarIDN/DialoGPT-medium-harrypotter
         | 
| 370 | 
            +
            RizqFarIDN/DialoGPT-small-harrypotter
         | 
| 371 | 
            +
            RobinMari/DialoGPT-small-mikoto
         | 
| 372 | 
            +
            Royce23/DialoGPT-small-almas
         | 
| 373 | 
            +
            Rush11/DialoGPT-small-HarryPotter
         | 
| 374 | 
            +
            Ryanar/DialoGPT-medium-Zelda
         | 
| 375 | 
            +
            Ryukie/DialoGPT-small-Rick
         | 
| 376 | 
            +
            S34NtheGuy/DialoGPT-medium-Glass_Of_Water
         | 
| 377 | 
            +
            S34NtheGuy/DialoGPT-medium-Mona
         | 
| 378 | 
            +
            S34NtheGuy/DialoGPT-small-Harry282
         | 
| 379 | 
            +
            S34NtheGuy/DialoGPT-small-MJOLNIR_Soul
         | 
| 380 | 
            +
            S34NtheGuy/DialoGPT-small-cursedryno
         | 
| 381 | 
            +
            S34NtheGuy/DialoGPT-small-pikamew362
         | 
| 382 | 
            +
            S34NtheGuy/DialoGPT-small-wetterlettuce
         | 
| 383 | 
            +
            SJSui/RickBot
         | 
| 384 | 
            +
            SPGT/LiveSafe-DialoGPT
         | 
| 385 | 
            +
            SaffronIce/DialoGPT-medium-Jett
         | 
| 386 | 
            +
            Salma-2/DialoGPT-small-harrypotter
         | 
| 387 | 
            +
            Sammigooof/Peterbot
         | 
| 388 | 
            +
            SarahhhUwU/DialoGPT-small-ally
         | 
| 389 | 
            +
            Sarumomo/DialoGPT-small-test
         | 
| 390 | 
            +
            Saviour/ChandlerBot
         | 
| 391 | 
            +
            Saz/DialoGPT-small-paimon
         | 
| 392 | 
            +
            Saz/DialoGPT-small-saz
         | 
| 393 | 
            +
            Science-geek32/DialoGPT-small-doctor
         | 
| 394 | 
            +
            Science-geek32/DialoGPT-small-doctor2.0
         | 
| 395 | 
            +
            Scoops/SandalBot
         | 
| 396 | 
            +
            ScottaStrong/DialogGPT-medium-Scott
         | 
| 397 | 
            +
            ScottaStrong/DialogGPT-medium-joshua
         | 
| 398 | 
            +
            ScottaStrong/DialogGPT-small-Scott
         | 
| 399 | 
            +
            ScottaStrong/DialogGPT-small-joshua
         | 
| 400 | 
            +
            Sebastianthecrab/DialoGPT-small-melchior
         | 
| 401 | 
            +
            Sedge/DialoGPT-small-Sedge
         | 
| 402 | 
            +
            Shakaw/DialoGPT-small-spongebot
         | 
| 403 | 
            +
            ShayoGun/DialoGPT-small-shayo
         | 
| 404 | 
            +
            Sheel/DialoGPT-small-harrypotter
         | 
| 405 | 
            +
            Sheerwin02/DialoGPT-medium-mikasa
         | 
| 406 | 
            +
            Sheerwin02/DialoGPT-small-isla
         | 
| 407 | 
            +
            Sherman/DialoGPT-medium-joey
         | 
| 408 | 
            +
            Shike/DialoGPT_medium_harrypotter
         | 
| 409 | 
            +
            Shinx/DialoGPT-medium-myheroacademia
         | 
| 410 | 
            +
            NaturesDisaster/DialoGPT-large-Neku
         | 
| 411 | 
            +
            NaturesDisaster/DialoGPT-small-Neku
         | 
| 412 | 
            +
            ShiroNeko/DialoGPT-small-rick
         | 
| 413 | 
            +
            Shubham-Kumar-DTU/DialoGPT-small-goku
         | 
| 414 | 
            +
            SilentMyuth/sarcastic-model
         | 
| 415 | 
            +
            SilentMyuth/stableben
         | 
| 416 | 
            +
            SirBastianXVII/DialoGPT-small-TVD
         | 
| 417 | 
            +
            Sired/DialoGPT-small-trumpbot
         | 
| 418 | 
            +
            Siyris/DialoGPT-medium-SIY
         | 
| 419 | 
            +
            Siyris/SIY
         | 
| 420 | 
            +
            Skywhy/DialoGPT-medium-Churchyy
         | 
| 421 | 
            +
            Snaky/StupidEdwin
         | 
| 422 | 
            +
            Soapsy/DialoGPT-mid-cartman
         | 
| 423 | 
            +
            SonMooSans/DialoGPT-small-joshua
         | 
| 424 | 
            +
            SonMooSans/test
         | 
| 425 | 
            +
            Sora4762/DialoGPT-small-naruto
         | 
| 426 | 
            +
            Sora4762/DialoGPT-small-naruto1.1
         | 
| 427 | 
            +
            Soumyajit1008/DialoGPT-small-harryPotterssen
         | 
| 428 | 
            +
            SpacyGalaxy/DialoGPT-medium-Gandalf
         | 
| 429 | 
            +
            Spectrox/emmybot
         | 
| 430 | 
            +
            Spirax/DialoGPT-medium-sheldon
         | 
| 431 | 
            +
            Spoon/DialoGPT-small-engineer
         | 
| 432 | 
            +
            Stabley/DialoGPT-small-evelynn
         | 
| 433 | 
            +
            Stevo/DiagloGPT-medium-spamton
         | 
| 434 | 
            +
            Stoned-Code/DioloGPT-large-Rick-SC-420
         | 
| 435 | 
            +
            Sunnydx/BillCipherBot
         | 
| 436 | 
            +
            TTYU/DialoGPT-small-trump
         | 
| 437 | 
            +
            TVLG/DialoGPT-small-Iroh-Bot
         | 
| 438 | 
            +
            Taramiko/DialoGPT-small-hoshiyo_kojima
         | 
| 439 | 
            +
            Taramiko/Hoshiyo_Kojima
         | 
| 440 | 
            +
            Tejasvb/DialoGPT-small-rick
         | 
| 441 | 
            +
            Tejasvb/DialogGPT-small-rick
         | 
| 442 | 
            +
            ThatSkyFox/DialoGPT-medium-joshua
         | 
| 443 | 
            +
            ThatSkyFox/DialoGPT-small-joshua
         | 
| 444 | 
            +
            The-Programmer-With-Cool-Pens/TifaBotAIPackage
         | 
| 445 | 
            +
            TheCatsMoo/DialoGGPT-small-joshua
         | 
| 446 | 
            +
            TheDiamondKing/DialoGPT-small-harrypotter
         | 
| 447 | 
            +
            ThePeachOx/DialoGPT-small-harry
         | 
| 448 | 
            +
            TheReverendWes/DialoGPT-small-rick
         | 
| 449 | 
            +
            TheTUFGuy/HermioneChatBot
         | 
| 450 | 
            +
            Thejas/DialoGPT-small-Stewei
         | 
| 451 | 
            +
            Thejas/DialoGPT-small-elon
         | 
| 452 | 
            +
            ThoracicCosine/DialoGPT-small-harrypotter
         | 
| 453 | 
            +
            Tidum/DialoGPT-large-Michael
         | 
| 454 | 
            +
            Toadally/DialoGPT-small-david_mast
         | 
| 455 | 
            +
            Tofu05/DialoGPT-large-boon2
         | 
| 456 | 
            +
            Tofu05/DialoGPT-med-boon3
         | 
| 457 | 
            +
            TofuBoy/DialoGPT-medium-Yubin2
         | 
| 458 | 
            +
            TofuBoy/DialoGPT-medium-boon
         | 
| 459 | 
            +
            Tr1ex/DialoGPT-small-rick
         | 
| 460 | 
            +
            TrebleJeff/DialoGPT-small-Michael
         | 
| 461 | 
            +
            TrimPeachu/Deadpool
         | 
| 462 | 
            +
            Trixzy/rickai-v1
         | 
| 463 | 
            +
            Tropics/DialoGPT-small-peppa
         | 
| 464 | 
            +
            UKJ5/DialoGPT-small-harrypotter
         | 
| 465 | 
            +
            Username1/Mourinhio-medium
         | 
| 466 | 
            +
            Username1/Mourinho
         | 
| 467 | 
            +
            Username1/Wenger
         | 
| 468 | 
            +
            VLRevolution/DialogGPT-small-GGODMODEL
         | 
| 469 | 
            +
            VMET/DialoGPT-small-dumbassbot
         | 
| 470 | 
            +
            VaguelyCynical/DialoGPT-small-RickSanchez
         | 
| 471 | 
            +
            Vampiro/DialoGPT-small-dante_b
         | 
| 472 | 
            +
            Vampiro/DialoGPT-small-dante_c
         | 
| 473 | 
            +
            VariableZee/DialoGPT-small-ivylia03
         | 
| 474 | 
            +
            Verge/Peterbot
         | 
| 475 | 
            +
            VincentButterfield/DialoGPT-small-harrypotter
         | 
| 476 | 
            +
            VishalArun/DialoGPT-medium-harrypotter
         | 
| 477 | 
            +
            Vitafeu/DialoGPT-medium-ricksanchez
         | 
| 478 | 
            +
            VulcanBin/DialoGPT-small-cortana
         | 
| 479 | 
            +
            WarrenK-Design/DialoGPT-small-Rick
         | 
| 480 | 
            +
            Wessel/DiabloGPT-medium-harrypotter
         | 
| 481 | 
            +
            White/white-bot
         | 
| 482 | 
            +
            Whitez/DialoGPT-small-twety
         | 
| 483 | 
            +
            Wise/DialogGPT-small-JC
         | 
| 484 | 
            +
            WoutN2001/james3
         | 
| 485 | 
            +
            WurmWillem/DialoGPT-medium-RickandMorty3
         | 
| 486 | 
            +
            Xeouz/Ultron-Small
         | 
| 487 | 
            +
            XuguangAi/DialoGPT-small-Harry
         | 
| 488 | 
            +
            XuguangAi/DialoGPT-small-Leslie
         | 
| 489 | 
            +
            XuguangAi/DialoGPT-small-Rick
         | 
| 490 | 
            +
            Yankee/test1234
         | 
| 491 | 
            +
            Zane/Ricky
         | 
| 492 | 
            +
            Zane/Ricky3
         | 
| 493 | 
            +
            Zeer0/DialoGPT-small-ZerO
         | 
| 494 | 
            +
            Zen1/Derekbot
         | 
| 495 | 
            +
            Zen1/test1
         | 
| 496 | 
            +
            Zeph/DialoGPT-small-rick
         | 
| 497 | 
            +
            Zephaus/Chromrepo
         | 
| 498 | 
            +
            Zixtrauce/BDBot
         | 
| 499 | 
            +
            Zixtrauce/BDBot4Epoch
         | 
| 500 | 
            +
            Zixtrauce/BaekBot
         | 
| 501 | 
            +
            Zixtrauce/BrandonBot
         | 
| 502 | 
            +
            Zixtrauce/BrandonBot2
         | 
| 503 | 
            +
            Zixtrauce/JohnBot
         | 
| 504 | 
            +
            Zixtrauce/SelfAwareness
         | 
| 505 | 
            +
            Zuha/DialoGPT-small-gandalf
         | 
| 506 | 
            +
            a01709042/DialoGPT-medium
         | 
| 507 | 
            +
            aadilhassan/Chandlerbot
         | 
| 508 | 
            +
            aashutosh2102/DialoGPT-smalll-harrypotter
         | 
| 509 | 
            +
            abhiramtirumala/DialoGPT-sarcastic
         | 
| 510 | 
            +
            abhisht/DialoGPT-medium-Emilybot
         | 
| 511 | 
            +
            abjbpi/DS_small
         | 
| 512 | 
            +
            abjbpi/Dwight_Schrute
         | 
| 513 | 
            +
            aced/DialoGPT-medium-3PO
         | 
| 514 | 
            +
            adviksinghania/DialoGPT-medium-rick
         | 
| 515 | 
            +
            af1tang/personaGPT
         | 
| 516 | 
            +
            aggb/DialogGPT-small-AGGB-B
         | 
| 517 | 
            +
            aimiekhe/yummv1
         | 
| 518 | 
            +
            aimiekhe/yummv2
         | 
| 519 | 
            +
            aishanisingh/DiagloGPT-small-michaelscott
         | 
| 520 | 
            +
            aishanisingh/DialoGPT-small-harrypotter
         | 
| 521 | 
            +
            akaushik1/DialoGPT-small-kaiser
         | 
| 522 | 
            +
            akhooli/personachat-arabic
         | 
| 523 | 
            +
            alankar/DialoGPT-small-rick
         | 
| 524 | 
            +
            alipsezzar/DialoGPT-medium-harrypotter
         | 
| 525 | 
            +
            alistair7/bbt-diagpt2-model
         | 
| 526 | 
            +
            aluserhuggingface/DialoGPT-small-harrypotter
         | 
| 527 | 
            +
            alvinkobe/DialoGPT-medium-steve_biko
         | 
| 528 | 
            +
            alvinkobe/DialoGPT-small-KST
         | 
| 529 | 
            +
            andikarachman/DialoGPT-small-sheldon
         | 
| 530 | 
            +
            anduush/DialoGPT-small-Rick
         | 
| 531 | 
            +
            ange/DialoGPT-medium-Monke
         | 
| 532 | 
            +
            ankimt01/DialoGPT-small-anch
         | 
| 533 | 
            +
            ann101020/le2sbot-hp
         | 
| 534 | 
            +
            anshengli2/DialogGPT-small-Bot
         | 
| 535 | 
            +
            anweasha/DialoGPT-small-Chandler
         | 
| 536 | 
            +
            anweasha/DialoGPT-small-Jake
         | 
| 537 | 
            +
            aplnestrella/Aladdin-Bot
         | 
| 538 | 
            +
            arampacha/DialoGPT-medium-simpsons
         | 
| 539 | 
            +
            archmagos/HourAI
         | 
| 540 | 
            +
            ardatasc/miniMe-version1
         | 
| 541 | 
            +
            arifbhrn/DialogGPT-small-Rickk
         | 
| 542 | 
            +
            arnav7633/DialoGPT-medium-tony_stark
         | 
| 543 | 
            +
            aryanbhosale/DialoGPT-medium-harrypotter
         | 
| 544 | 
            +
            asad/DialoGPT-small-harryporter_bot
         | 
| 545 | 
            +
            ashwinchandran13/DialoGPT-small-harrypotter
         | 
| 546 | 
            +
            astrobreazy/DialoGPT-small-harrypotter
         | 
| 547 | 
            +
            atkh6673/DialoGPT-small-harrypotter
         | 
| 548 | 
            +
            atkh6673/DialoGPT-small-trump
         | 
| 549 | 
            +
            atomsspawn/DialoGPT-small-dumbledore
         | 
| 550 | 
            +
            augustojaba/DialoGPT-small-harrypotter
         | 
| 551 | 
            +
            avinashshrangee/DialoGPT-small-Ricky
         | 
| 552 | 
            +
            awvik360/DialoGPT-medium-plemons
         | 
| 553 | 
            +
            awvik360/DialoGPT-medium-plemons2
         | 
| 554 | 
            +
            awvik360/DialoGPT-small-plemons
         | 
| 555 | 
            +
            aydin/DialoGPT-medium-michael
         | 
| 556 | 
            +
            ayush19/rick-sanchez
         | 
| 557 | 
            +
            b0shakk/DialoGPT-small-Ragnar
         | 
| 558 | 
            +
            balta/DialoGPT-small-TestBot
         | 
| 559 | 
            +
            banden/DialoGPT-medium-RickBot
         | 
| 560 | 
            +
            banden/DialoGPT-small-LokiBot
         | 
| 561 | 
            +
            beatajackowska/DialoGPT-RickBot
         | 
| 562 | 
            +
            benajtil/DialoGPT-small-Daddyben
         | 
| 563 | 
            +
            benajtil/DialoGPT-small-RickAndMortyScripts
         | 
| 564 | 
            +
            benjaminbeilharz/dialoGPT-small-empatheticdialogues-generation
         | 
| 565 | 
            +
            benmrtnz27/DialoGPT-small-misato
         | 
| 566 | 
            +
            bensuydam/CartmanBot
         | 
| 567 | 
            +
            bestminerevah/DialoGPT-small-thetenthdoctor
         | 
| 568 | 
            +
            bhaden94/LokiDiscordBot-medium
         | 
| 569 | 
            +
            bhavya689/DialoGPT-large-chandler
         | 
| 570 | 
            +
            bleachybrain/DialoGPT-med-ss
         | 
| 571 | 
            +
            bmdonnell/DialoGPT-medium-harrypotter
         | 
| 572 | 
            +
            bonebambi/DialoGPT-small-ThakirClone
         | 
| 573 | 
            +
            bookemdan/DialoGPT-small-harrypotter
         | 
| 574 | 
            +
            boran/berkbot
         | 
| 575 | 
            +
            boydster/DialoGPT-small-gollum
         | 
| 576 | 
            +
            brimeggi/testbot2
         | 
| 577 | 
            +
            brokentx/newbrokiev2
         | 
| 578 | 
            +
            bspans/DialoGPT-small-yoda
         | 
| 579 | 
            +
            byeongal/Ko-DialoGPT
         | 
| 580 | 
            +
            bypequeno/DialoGPT-small-michaelscott
         | 
| 581 | 
            +
            caps1994/DialoGPT-small-chrisbot-caps1994
         | 
| 582 | 
            +
            caps1994/DialoGPT-small-chrisbot
         | 
| 583 | 
            +
            caps1994/DialoGPT-small-harrypotter-caps1994
         | 
| 584 | 
            +
            cartyparty/DialoGPT-small-harrypotter
         | 
| 585 | 
            +
            cartyparty/DialoGPT-small-iteration1
         | 
| 586 | 
            +
            cartyparty/DialoGPT-small-nerdherd
         | 
| 587 | 
            +
            cedpsam/chatbot_fr
         | 
| 588 | 
            +
            centon21/DialoGPT-small-harrypotter
         | 
| 589 | 
            +
            chaitrabhat/DialoGPT-small-rick
         | 
| 590 | 
            +
            chamindu/DialoGPT-medium-hermione
         | 
| 591 | 
            +
            chamodkarunasena/DialoGPT-medium-sokka
         | 
| 592 | 
            +
            chan030609/DialoGPT-medium-JAB
         | 
| 593 | 
            +
            chan030609/DialoGPT-small-JAB
         | 
| 594 | 
            +
            chellver24/DialoGPT-medium-chizuru_ichinose
         | 
| 595 | 
            +
            chip/DialoGPT-small-chizuru
         | 
| 596 | 
            +
            thu-coai/blenderbot-400M-esconv
         | 
| 597 | 
            +
            clairesb/kindness_bot
         | 
| 598 | 
            +
            clairesb/kindness_bot_repo
         | 
| 599 | 
            +
            clancystudios/DialoGPT-medium-Morty
         | 
| 600 | 
            +
            clayfox/DialoGPT-medium-Hiccup
         | 
| 601 | 
            +
            clayfox/DialoGPT-small-Hiccup
         | 
| 602 | 
            +
            cocoaclef/DialoGPT-small-kohaku
         | 
| 603 | 
            +
            codealtgeek/DiabloGPT-medium-rickmorty
         | 
| 604 | 
            +
            colochoplay/DialoGTP-small-harrypotter
         | 
| 605 | 
            +
            conniezyj/DialoGPT-small-snape
         | 
| 606 | 
            +
            cookirei/DialoGPT-medium-Joreyar
         | 
| 607 | 
            +
            cosmic/DialoGPT-Rick
         | 
| 608 | 
            +
            cosmicray001/prod-harry
         | 
| 609 | 
            +
            cosmicray001/small-harry
         | 
| 610 | 
            +
            crystalgate/DialoGPT-small-rick
         | 
| 611 | 
            +
            cumtowndiscord/DialoGPT-small-joshua
         | 
| 612 | 
            +
            cutiebunny639/DialoGPT-small-harry
         | 
| 613 | 
            +
            d4rk/harry
         | 
| 614 | 
            +
            danildany/DialoGPT-small-MichaelScott
         | 
| 615 | 
            +
            danny481/DialoGPT-small-datnguyenchatbot
         | 
| 616 | 
            +
            danny481/DialoGPT-small-harrypotter
         | 
| 617 | 
            +
            danny481/Final_ChatBot
         | 
| 618 | 
            +
            darkzek/chickenbot-jon-snow
         | 
| 619 | 
            +
            darthboii/DialoGPT-small-PickleRick
         | 
| 620 | 
            +
            darthboii/DialoGPT-small-Rick
         | 
| 621 | 
            +
            dats/DialoGPT-small-harrypotter
         | 
| 622 | 
            +
            dattam/DialoGPT-medium-TonyStarkBot
         | 
| 623 | 
            +
            dead69/GPT-small-yoda
         | 
| 624 | 
            +
            deepparag/Aeona
         | 
| 625 | 
            +
            deepparag/DumBot-Beta
         | 
| 626 | 
            +
            deepparag/DumBot
         | 
| 627 | 
            +
            delvan/DialoGPT-medium-DwightV1
         | 
| 628 | 
            +
            df4rfrrf/DialoGPT-medium-Aerith
         | 
| 629 | 
            +
            dhanushlnaik/amySan
         | 
| 630 | 
            +
            disdamoe/DialoGPT-small-moe
         | 
| 631 | 
            +
            disdamoe/TheGreatManipulator
         | 
| 632 | 
            +
            disdamoe/TheManipulator
         | 
| 633 | 
            +
            divi/Peterbot
         | 
| 634 | 
            +
            dk16gaming/DialoGPT-small-HarryPotter
         | 
| 635 | 
            +
            dkminer81/Tromm
         | 
| 636 | 
            +
            dreamline2/DialoGPT-small-joshua-demo
         | 
| 637 | 
            +
            dukeme/DialoGPT-small-RDBotv1
         | 
| 638 | 
            +
            eclare/DialoGPT-small-SCHAEFER
         | 
| 639 | 
            +
            educhav/Austin-DialoGPT-small
         | 
| 640 | 
            +
            educhav/Elijah-DialoGPT-small
         | 
| 641 | 
            +
            educhav/J-DialoGPT-small
         | 
| 642 | 
            +
            educhav/Sam-DialoGPT-small
         | 
| 643 | 
            +
            eklrivera/DialoGPT-small-harrypotter
         | 
| 644 | 
            +
            eldritch-axolotl/Rick
         | 
| 645 | 
            +
            ericklasco/DialoGPT-small-erickHarryPotter
         | 
| 646 | 
            +
            ericzhou/DialoGPT-Medium-Rick
         | 
| 647 | 
            +
            ericzhou/DialoGPT-Medium-Rick_v2
         | 
| 648 | 
            +
            ericzhou/DialoGPT-medium-elon
         | 
| 649 | 
            +
            ericzhou/tsundere_v1
         | 
| 650 | 
            +
            estehpanas/pascalbot
         | 
| 651 | 
            +
            ethzhou/jooby
         | 
| 652 | 
            +
            ethzhou/joobyChat
         | 
| 653 | 
            +
            ethzhou/newJooby
         | 
| 654 | 
            +
            f00d4tehg0dz/Peppa
         | 
| 655 | 
            +
            f00d4tehg0dz/Yoda
         | 
| 656 | 
            +
            facebook/blenderbot-1B-distill
         | 
| 657 | 
            +
            facebook/blenderbot-3B
         | 
| 658 | 
            +
            facebook/blenderbot-400M-distill
         | 
| 659 | 
            +
            facebook/blenderbot-90M
         | 
| 660 | 
            +
            facebook/blenderbot_small-90M
         | 
| 661 | 
            +
            faketermz/DialoGPT
         | 
| 662 | 
            +
            fatemaMeem98/DialoGPT-medium-HermioneGrangerBot
         | 
| 663 | 
            +
            felinecity/DioloGPT-small-KaeyaBot
         | 
| 664 | 
            +
            felinecity/DioloGPT-small-KaeyaBot2
         | 
| 665 | 
            +
            felinecity/DioloGPT-small-LisaBot
         | 
| 666 | 
            +
            felinecity/ScaraBot
         | 
| 667 | 
            +
            fibruh/DialoGPT-small-harrypotter
         | 
| 668 | 
            +
            flakje/DialoGPT-small-Marty
         | 
| 669 | 
            +
            flooptherocket/DialogGPT-small-rick
         | 
| 670 | 
            +
            ftnvir/DialoGPT-medium-bullyMaguire
         | 
| 671 | 
            +
            gabtan99/dialogpt-tagalog-medium-10
         | 
| 672 | 
            +
            gabtan99/dialogpt-tagalog-medium-20
         | 
| 673 | 
            +
            gabtan99/dialogpt-tagalog-medium-30
         | 
| 674 | 
            +
            gabtan99/dialogpt-tagalog-medium
         | 
| 675 | 
            +
            gfdream/dialogpt-small-familyguy
         | 
| 676 | 
            +
            gfdream/dialogpt-small-harrypotter
         | 
| 677 | 
            +
            ghhostboy/DialoGPT-medium-connorDBH3-1
         | 
| 678 | 
            +
            ghhostboy/DialoGPT-medium-connorDBH3-21
         | 
| 679 | 
            +
            gizmo-dev/DialoGPT-small-jake
         | 
| 680 | 
            +
            gorkemgoknar/gpt2chatbotenglish
         | 
| 681 | 
            +
            grayson124/chatbotwaifu
         | 
| 682 | 
            +
            grounddominator/DialoGPT-lar-Rick
         | 
| 683 | 
            +
            gusintheshell/DialoGPT-small-rickbot
         | 
| 684 | 
            +
            gwima/ryan-sackmott
         | 
| 685 | 
            +
            hama/Doctor_Bot
         | 
| 686 | 
            +
            hama/Harry_Bot
         | 
| 687 | 
            +
            hama/barney_bot
         | 
| 688 | 
            +
            hama/me0.01
         | 
| 689 | 
            +
            hama/rick_bot
         | 
| 690 | 
            +
            heabeoun/DiabloGPT-small-nuon-conv
         | 
| 691 | 
            +
            henryoce/DialoGPT-small-rick-and-morty
         | 
| 692 | 
            +
            hervetusse/DialogGPT-small-harrypotter
         | 
| 693 | 
            +
            hireddivas/DialoGPT-small-ray
         | 
| 694 | 
            +
            hireddivas/DialoGPT-small-scully
         | 
| 695 | 
            +
            hireddivas/dialoGPT-small-mulder
         | 
| 696 | 
            +
            hireddivas/dialoGPT-small-phil
         | 
| 697 | 
            +
            hireddivas/dialoGPT-small-sonic
         | 
| 698 | 
            +
            honguyenminh/old-zhongli
         | 
| 699 | 
            +
            houssaineamzil/DialoGPT-small-joey
         | 
| 700 | 
            +
            hrv/DialoGPT-small-rick-morty
         | 
| 701 | 
            +
            hyunwoongko/blenderbot-9B
         | 
| 702 | 
            +
            hyunwoongko/reddit-3B
         | 
| 703 | 
            +
            hyunwoongko/reddit-9B
         | 
| 704 | 
            +
            iamalpharius/GPT-Small-BenderBot
         | 
| 705 | 
            +
            ianc89/hagrid
         | 
| 706 | 
            +
            ignkai/DialoGPT-medium-spider-man-updated
         | 
| 707 | 
            +
            ilikeapple12/DialoGPT-small-Phos
         | 
| 708 | 
            +
            imran2part/DialogGPT-small-Doctor
         | 
| 709 | 
            +
            imrit1999/DialoGPT-small-MCU
         | 
| 710 | 
            +
            myynirew/DialoGPT-medium-ettengiv
         | 
| 711 | 
            +
            myynirew/DialoGPT-medium-leirbag
         | 
| 712 | 
            +
            myynirew/DialoGPT-small-awazimuruk
         | 
| 713 | 
            +
            ionite/DialoGPT-large-Sh0rtiAI-v2
         | 
| 714 | 
            +
            ionite/DialoGPT-medium-IoniteAI
         | 
| 715 | 
            +
            ionite/DialoGPT-medium-McKayAI-v2
         | 
| 716 | 
            +
            ionite/DialoGPT-medium-McKayAI
         | 
| 717 | 
            +
            ionite/DialoGPT-medium-Sh0rtiAI
         | 
| 718 | 
            +
            ionite/DialoGPT-medium-mohnjilesAI
         | 
| 719 | 
            +
            ionite/DialoGPT-medium-orangeAI
         | 
| 720 | 
            +
            ironman123/DialoGPT-small-harrypotter
         | 
| 721 | 
            +
            ishraaqparvez/DialoGPT-small-harrypotter
         | 
| 722 | 
            +
            jackky46/DialoGPT-medium-got
         | 
| 723 | 
            +
            jahz/DialoGPT-medium-FF8
         | 
| 724 | 
            +
            jalensmh/DialoGPT-medium-jalenbot
         | 
| 725 | 
            +
            jalensmh/DialoGPT-small-exophoria
         | 
| 726 | 
            +
            jamestop00/DialoGPT-spike-medium
         | 
| 727 | 
            +
            jasper/DialoGPT-large-homersimpson
         | 
| 728 | 
            +
            jchen/DialoGPT-evan
         | 
| 729 | 
            +
            jeanlks/DialogGPT-small-gayvid
         | 
| 730 | 
            +
            jeanlks/DialogGPT-small-pato
         | 
| 731 | 
            +
            jfhr1999/CharacterTest
         | 
| 732 | 
            +
            jogp10/DialoGPT-medium-arya
         | 
| 733 | 
            +
            jollmimmim/DialoGPT-small-monkeydluffy
         | 
| 734 | 
            +
            jordanhagan/DialoGPT-medium-NegaNetizen
         | 
| 735 | 
            +
            josephmagnayon/DialoGPT-medium-Alfred
         | 
| 736 | 
            +
            josepjulia/RepoHumanChatBot
         | 
| 737 | 
            +
            josh8/DialoGPT-medium-josh
         | 
| 738 | 
            +
            josh8/DialoGPT-small-josh
         | 
| 739 | 
            +
            jpsxlr8/DialoGPT-small-harrypotter
         | 
| 740 | 
            +
            jth1903/DialoGPT-small-rick
         | 
| 741 | 
            +
            julianolf/DialoGPT-small-harrypotter
         | 
| 742 | 
            +
            kaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaot1k/DialoGPT-small-Wanda
         | 
| 743 | 
            +
            kagennotsuki/DialoGPT-medium-radion
         | 
| 744 | 
            +
            kche0138/DialoGPT-medium-DIO
         | 
| 745 | 
            +
            kingabzpro/DialoGPT-small-Rick-Bot
         | 
| 746 | 
            +
            kipiiler/Rickbot
         | 
| 747 | 
            +
            knightbat/harry-potter
         | 
| 748 | 
            +
            kripanshudixit/DialoGPT-small-phoenix
         | 
| 749 | 
            +
            kris/DialoGPT-small-spock
         | 
| 750 | 
            +
            kris/DialoGPT-small-spock3
         | 
| 751 | 
            +
            kris/DialoGPT-small-spock4
         | 
| 752 | 
            +
            kris/DialoGPT-small-spock5
         | 
| 753 | 
            +
            kshitiz/testing-bot-repo
         | 
| 754 | 
            +
            kunalbhargava/DialoGPT-small-housebot
         | 
| 755 | 
            +
            kvothe28/DiabloGPT-small-Rick
         | 
| 756 | 
            +
            l41n/c3rbs
         | 
| 757 | 
            +
            lain2/Peterbot
         | 
| 758 | 
            +
            lanejm/DialoGPT-small-hagrid
         | 
| 759 | 
            +
            lapacc33/DialoGPT-medium-rick
         | 
| 760 | 
            +
            life4free96/DialogGPT-med-TeiaMoranta
         | 
| 761 | 
            +
            life4free96/DialogGPT-med-TeiaMoranta3
         | 
| 762 | 
            +
            light/small-rickk
         | 
| 763 | 
            +
            limivan/DialoGPT-small-c3po
         | 
| 764 | 
            +
            cosmicroxks/DialoGPT-small-scott
         | 
| 765 | 
            +
            logube/DialogGPT_small_harrypotter
         | 
| 766 | 
            +
            lonewanderer27/DialoGPT-small-Joshua
         | 
| 767 | 
            +
            lonewanderer27/KeitaroBot
         | 
| 768 | 
            +
            lonewanderer27/YoshinoriBot
         | 
| 769 | 
            +
            lonewanderer27/YuriBot
         | 
| 770 | 
            +
            lovellyweather/DialoGPT-medium-johnny
         | 
| 771 | 
            +
            luca-martial/DialoGPT-Elon
         | 
| 772 | 
            +
            lucas-bo/DialogGPT-small-yoda
         | 
| 773 | 
            +
            ludowoods/KujouSara
         | 
| 774 | 
            +
            lulueve3/DialoGPT-medium-Kokkoro
         | 
| 775 | 
            +
            lulueve3/DialoGPT-medium-Kokkoro2
         | 
| 776 | 
            +
            madbuda/DialoGPT-got-skippy
         | 
| 777 | 
            +
            madbuda/DialoGPT-medium-skippy
         | 
| 778 | 
            +
            majonez57/JoeBot
         | 
| 779 | 
            +
            manav/dialogpt-large-kanye-reddit
         | 
| 780 | 
            +
            manav/dialogpt-medium-berkeley-reddit
         | 
| 781 | 
            +
            maniacGhost24/MichaelScott-bot-push-small
         | 
| 782 | 
            +
            manraf/DialoGPT-smmall-harrypotter
         | 
| 783 | 
            +
            matprado/DialoGPT-small-rick-sanchez
         | 
| 784 | 
            +
            maxxx2021/DialGPT-small-harrypotter
         | 
| 785 | 
            +
            mdc1616/DialoGPT-large-sherlock
         | 
| 786 | 
            +
            melon422/DialoGPT-medium-MelonBot
         | 
| 787 | 
            +
            melon422/DialoGPT-medium-MelonBot2
         | 
| 788 | 
            +
            mewmew/DialoGPT-small-rick
         | 
| 789 | 
            +
            michelleshx/DialoGPT-small-michelle-discord-bot
         | 
| 790 | 
            +
            microsoft/DialoGPT-large
         | 
| 791 | 
            +
            microsoft/DialoGPT-medium
         | 
| 792 | 
            +
            microsoft/DialoGPT-small
         | 
| 793 | 
            +
            mikabeebee/Peterbot
         | 
| 794 | 
            +
            milayue/neosh-bot1
         | 
| 795 | 
            +
            minsiam/DialoGPT-medium-harrypotterbot
         | 
| 796 | 
            +
            minsiam/DialoGPT-small-harrypotterbot
         | 
| 797 | 
            +
            miogfd1234/ll
         | 
| 798 | 
            +
            mittalnishit/DialoGPT-medium-rickman2
         | 
| 799 | 
            +
            mittalnishit/DialoGPT-small-rickman
         | 
| 800 | 
            +
            mjstamper/DialoGPT-small-samwise
         | 
| 801 | 
            +
            mk3smo/dialogpt-med-ahiru
         | 
| 802 | 
            +
            mk3smo/dialogpt-med-duck2
         | 
| 803 | 
            +
            mk3smo/dialogpt-med-duck3
         | 
| 804 | 
            +
            mk3smo/dialogpt-med-duck5
         | 
| 805 | 
            +
            mk3smo/dialogpt-med-duckfinal
         | 
| 806 | 
            +
            mk3smo/dialogpt-med-stt3
         | 
| 807 | 
            +
            mklucifer/DialoGPT-medium-DEADPOOL
         | 
| 808 | 
            +
            mklucifer/DialoGPT-small-DEADPOOL
         | 
| 809 | 
            +
            mluengas/DialogGPT-small-michaelscott
         | 
| 810 | 
            +
            model-mili/DailoGPT-Yukub-v3
         | 
| 811 | 
            +
            model-mili/DialoGPT-small-Sapph-v1
         | 
| 812 | 
            +
            model-mili/DialoGPT-small-Yukub-v2
         | 
| 813 | 
            +
            model-mili/DialoGPT-small-Yukub
         | 
| 814 | 
            +
            mohammedks713/DialoGPT-small-harrypotter
         | 
| 815 | 
            +
            mohammedks713/DialoGPT-small-jonsnow
         | 
| 816 | 
            +
            mra1ster/DialoGPT_scully_small
         | 
| 817 | 
            +
            muhardianab/DialoGPT-small-theoffice
         | 
| 818 | 
            +
            munezah/DialoGPT-small-aot
         | 
| 819 | 
            +
            munezah/DialoGPT-small-sherlock
         | 
| 820 | 
            +
            mutamuta/DialoGPT-small-rick
         | 
| 821 | 
            +
            mutamuta/DialoGPT-spongebob-small
         | 
| 822 | 
            +
            namanrana16/DialoGPT-small-TrumpBot
         | 
| 823 | 
            +
            nanometeres/DialoGPT-medium-halbot
         | 
| 824 | 
            +
            nanometeres/DialoGPT-small-halbot
         | 
| 825 | 
            +
            ncoop57/DiGPTame-medium
         | 
| 826 | 
            +
            niharikadeokar/DialoGPT-small-Jakebot
         | 
| 827 | 
            +
            nikhilpatil2532000/DialoGPT-small-harrypotter
         | 
| 828 | 
            +
            nimrazaheer/DialoGPT-small-harrypotter
         | 
| 829 | 
            +
            nitishk/IronStarkBot
         | 
| 830 | 
            +
            nlokam/DialoGPT-digibot3.0-new
         | 
| 831 | 
            +
            nlokam/Digibot
         | 
| 832 | 
            +
            nlokam/ada_V.3
         | 
| 833 | 
            +
            nlokam/ada_V.6
         | 
| 834 | 
            +
            nlokam/ada_V.7
         | 
| 835 | 
            +
            nlokam/books_to_bots_v.00
         | 
| 836 | 
            +
            noobed/DialoGPT-small-astley
         | 
| 837 | 
            +
            norie4/DialoGPT-small-kyutebot
         | 
| 838 | 
            +
            norie4/DialoGPT-small-memoji
         | 
| 839 | 
            +
            not7even/DialoGPT-small-7evenpool
         | 
| 840 | 
            +
            npc-engine/exported-bart-light-gail-chatbot
         | 
| 841 | 
            +
            ntjrrvarma/DialoGPT-small-RickBot
         | 
| 842 | 
            +
            nwl/DialoGPT-small-enhypen
         | 
| 843 | 
            +
            nytestalkerq/DialoGPT-medium-joshua
         | 
| 844 | 
            +
            oakkas/Dialge-small-harrypotter-oguz
         | 
| 845 | 
            +
            odinmay/joebot
         | 
| 846 | 
            +
            odinmay/zackbotmodel
         | 
| 847 | 
            +
            ogpat123/DialoGPT-small-Michael
         | 
| 848 | 
            +
            ogpat23/Jules-Chatbot
         | 
| 849 | 
            +
            omkar1309/RickBot
         | 
| 850 | 
            +
            omnimokha/DialoGPT-medium-jakeamal
         | 
| 851 | 
            +
            omnimokha/DialoGPT-small-jakeamal
         | 
| 852 | 
            +
            omnimokha/jakebot2
         | 
| 853 | 
            +
            oododo/DialoGPT-small-elon
         | 
| 854 | 
            +
            otto-camp/DialoGPT-small-RickBot
         | 
| 855 | 
            +
            overgrowth/jokeboy
         | 
| 856 | 
            +
            owencubes/DialoGPT-small-Josuke
         | 
| 857 | 
            +
            paladinx00/rh-bender
         | 
| 858 | 
            +
            parigaswetha/DialoGPT-small-jakeperalta
         | 
| 859 | 
            +
            parthsinha/DialoGPT-small-rickandmorty
         | 
| 860 | 
            +
            pashin/DialoGPT-small-ironman-2
         | 
| 861 | 
            +
            pashin/DialoGPT-small-ironman-3
         | 
| 862 | 
            +
            pashin/DialoGPT-small-ironman1
         | 
| 863 | 
            +
            pastlecry/DialoGPT-small-harrypotter
         | 
| 864 | 
            +
            peamjo/DialoGPT-small-morty
         | 
| 865 | 
            +
            person123/DialoGPT-small-petergriffin
         | 
| 866 | 
            +
            pewriebontal/DialoGPT-medium-Pewpewbon
         | 
| 867 | 
            +
            phantom-deluxe/dialoGPT-RickBot
         | 
| 868 | 
            +
            phantom-deluxe/dialoGPT-harry
         | 
| 869 | 
            +
            phozon/harry-potter-medium
         | 
| 870 | 
            +
            piyushdubey/DialoGPT-Mi
         | 
| 871 | 
            +
            pompeiifreckles/DialoGPT-medium-Rick
         | 
| 872 | 
            +
            ppn/DialoGPT-small-harrypotter
         | 
| 873 | 
            +
            pranavtharoor/test
         | 
| 874 | 
            +
            professional/DialoGPT-small-joshua
         | 
| 875 | 
            +
            ps2102/DialoGPT-small-harrypotter
         | 
| 876 | 
            +
            psblade/DialoGPT-medium-PotterBot
         | 
| 877 | 
            +
            puugz/DialoGPT-small-spiderman
         | 
| 878 | 
            +
            qwerty/DialoGPT-small-rick
         | 
| 879 | 
            +
            r3cdhummingbird/DialoGPT-medium-joshua
         | 
| 880 | 
            +
            r3dhummingbird/DialoGPT-medium-joshua
         | 
| 881 | 
            +
            r3dhummingbird/DialoGPT-medium-neku
         | 
| 882 | 
            +
            r3dhummingbird/DialoGPT-small-harrypotter
         | 
| 883 | 
            +
            r3dhummingbird/DialoGPT-small-neku
         | 
| 884 | 
            +
            rachelcorey/DialoGPT-medium-kramer
         | 
| 885 | 
            +
            rachelcorey/DialoGPT-medium-niles
         | 
| 886 | 
            +
            rafakat/Botsuana-rick
         | 
| 887 | 
            +
            rahul26/DialoGPT-small-rickandmorty
         | 
| 888 | 
            +
            rahulMishra05/discord-chat-bot
         | 
| 889 | 
            +
            raj2002jain/DialoGPT-small-Light
         | 
| 890 | 
            +
            ravephelps/DialoGPT-small-MichaelSbott
         | 
| 891 | 
            +
            redbloodyknife/DialoGPT-medium-shayo
         | 
| 892 | 
            +
            rhollings/DialoGPT_small_steverogers
         | 
| 893 | 
            +
            richiellei/Childe
         | 
| 894 | 
            +
            richiellei/Childe3
         | 
| 895 | 
            +
            richiellei/DialoGPT-small-rick
         | 
| 896 | 
            +
            richielleisart/Childe
         | 
| 897 | 
            +
            ridwanpratama/DialoGPT-small-misaki
         | 
| 898 | 
            +
            rinz/DialoGPT-small-Harry-Potterrr
         | 
| 899 | 
            +
            rlagusrlagus123/XTC20000
         | 
| 900 | 
            +
            rlagusrlagus123/XTC4096
         | 
| 901 | 
            +
            rmicheal48/DialoGPT-small-steven_universe
         | 
| 902 | 
            +
            rodrigodz/DialoGPT-medium-dxd
         | 
| 903 | 
            +
            romuNoob/Mine
         | 
| 904 | 
            +
            romuNoob/test
         | 
| 905 | 
            +
            rovai/AI
         | 
| 906 | 
            +
            rovai/CARRIE
         | 
| 907 | 
            +
            rovai/Chat_pytorch1
         | 
| 908 | 
            +
            rovai/chatbotmedium1
         | 
| 909 | 
            +
            rovai/chatbotmedium2
         | 
| 910 | 
            +
            rovai/chatbotmedium3
         | 
| 911 | 
            +
            rovai/chatbotmedium4
         | 
| 912 | 
            +
            rovai/chatbotone
         | 
| 913 | 
            +
            rpeng35/DialoGPT-small-erenyeager
         | 
| 914 | 
            +
            rrtong/DialoGPT-medium-shang-chi
         | 
| 915 | 
            +
            rsd511/DialoGPT-small-house
         | 
| 916 | 
            +
            rsedlr/RickBot
         | 
| 917 | 
            +
            rsedlr/RickBotExample
         | 
| 918 | 
            +
            ruriko/bacqua
         | 
| 919 | 
            +
            ruriko/konoaqua
         | 
| 920 | 
            +
            ruriko/konodio
         | 
| 921 | 
            +
            sachdevkartik/DialoGPT-small-rick
         | 
| 922 | 
            +
            saintseer121323/DialoGPT-small-kotonoha
         | 
| 923 | 
            +
            sakai026/Chizuru
         | 
| 924 | 
            +
            sakai026/Mizuhara
         | 
| 925 | 
            +
            sam213/DialoGPT-small-harrypotter
         | 
| 926 | 
            +
            sambotx4/scamantha
         | 
| 927 | 
            +
            samuelssonm/DialoGPT-small-rick
         | 
| 928 | 
            +
            sanjanareddy226/JakeBot
         | 
| 929 | 
            +
            sankalpjha1/mr.bot_haary
         | 
| 930 | 
            +
            satkinson/DialoGPT-medium-marvin
         | 
| 931 | 
            +
            satkinson/DialoGPT-small-marvin
         | 
| 932 | 
            +
            satvikag/chatbot
         | 
| 933 | 
            +
            satvikag/chatbot2
         | 
| 934 | 
            +
            sergunow/movie-chat
         | 
| 935 | 
            +
            setiadia/DialogGPT-small-HPBot
         | 
| 936 | 
            +
            shelb-doc/DialoGPT-medium-ash
         | 
| 937 | 
            +
            shihab/HarryPotter
         | 
| 938 | 
            +
            shonuff/DialoGPT-medium-konosuba
         | 
| 939 | 
            +
            shreeshaaithal/DialoGPT-small-Michael-Scott
         | 
| 940 | 
            +
            shreeshaaithal/Discord-AI-bot
         | 
| 941 | 
            +
            shreeshaaithal/whatsapp-medium-bot-2
         | 
| 942 | 
            +
            sidkhuntia/harrypotter
         | 
| 943 | 
            +
            sifclairhelix/DialoGPT-small-harrypot
         | 
| 944 | 
            +
            simrana5/RickBotExample
         | 
| 945 | 
            +
            skynex/DialoGPT-small-batman
         | 
| 946 | 
            +
            skynex/DialoGPT-small-finalbatman
         | 
| 947 | 
            +
            sleekmike/DialoGPT-small-joshua
         | 
| 948 | 
            +
            smilesandtea/DialoGPT-medium-Rick
         | 
| 949 | 
            +
            smmzhu/DialoGPT-small-SZ
         | 
| 950 | 
            +
            solfer/DialoGPT-small-ryuji
         | 
| 951 | 
            +
            spockinese/DialoGPT-small-sherlock
         | 
| 952 | 
            +
            sreyanghosh/DialoGPT-medium-joker
         | 
| 953 | 
            +
            srirachasenpai/DialoGPT-medium-harrypotter
         | 
| 954 | 
            +
            srv/DialoGPT-medium-Breaking_Bad
         | 
| 955 | 
            +
            ssam/DialoGPT-small-RickmfSanchez
         | 
| 956 | 
            +
            ssspider/DialoGPT-medium-harrypotter
         | 
| 957 | 
            +
            stfuowned/nek
         | 
| 958 | 
            +
            stfuowned/rick
         | 
| 959 | 
            +
            sthom/DialoGPT-small-tin
         | 
| 960 | 
            +
            sudip/bot1
         | 
| 961 | 
            +
            sudoabrar/DialoGPT-small-dwight
         | 
| 962 | 
            +
            suhasjain/DailoGPT-small-harrypotter
         | 
| 963 | 
            +
            swapnil165/DialoGPT-small-Rick
         | 
| 964 | 
            +
            terter/rick-bot-test-v2
         | 
| 965 | 
            +
            thatoneguy267/DialoGPT-small-Oscar
         | 
| 966 | 
            +
            thatoneguy267/bruhpleasehelpme
         | 
| 967 | 
            +
            theChanChanMan/DialoGPT-small-chandler
         | 
| 968 | 
            +
            thefryingpan/gpt-neo-125M-splishy
         | 
| 969 | 
            +
            theiconik/hermione-granger
         | 
| 970 | 
            +
            thesamuelpena/Dialog-medium-Sonic
         | 
| 971 | 
            +
            thesamuelpena/Dialog-medium-masterchief
         | 
| 972 | 
            +
            thetlwin/DialoGPT-small-ironman
         | 
| 973 | 
            +
            thinhda/chatbot
         | 
| 974 | 
            +
            thu-coai/CDial-GPT2_LCCC-base
         | 
| 975 | 
            +
            thu-coai/CDial-GPT_LCCC-base
         | 
| 976 | 
            +
            thu-coai/CDial-GPT_LCCC-large
         | 
| 977 | 
            +
            ticet11/DialoGPT-small-BOBBY
         | 
| 978 | 
            +
            timslams666/DialoGPT-small-rick
         | 
| 979 | 
            +
            tinega/DialoGPT-small-harrypotter
         | 
| 980 | 
            +
            tngo/DialoGPT-small-HankHill
         | 
| 981 | 
            +
            toiletwater/DialoGPT-medium-ironman
         | 
| 982 | 
            +
            tom1804/HP
         | 
| 983 | 
            +
            tom1804/HP_last
         | 
| 984 | 
            +
            tom1804/hp_new
         | 
| 985 | 
            +
            tomascerejo12/DialoGPT-small-Rick
         | 
| 986 | 
            +
            tosin/dialogpt_mwoz
         | 
| 987 | 
            +
            tosin/dialogpt_sv
         | 
| 988 | 
            +
            toyfreak/DialoGPT-small-addy
         | 
| 989 | 
            +
            toyfreak/DialoGPT-small-shy
         | 
| 990 | 
            +
            tpri/DialoGPT-small-pa
         | 
| 991 | 
            +
            tprincessazula/Dialog-GPT-small-AANG
         | 
| 992 | 
            +
            tprincessazula/Dialog-GPT-small-KATARA-AVATAR
         | 
| 993 | 
            +
            tprincessazula/Dialog-GPT-small-SOKKA-AVATAR
         | 
| 994 | 
            +
            tprincessazula/Dialog-GPT-small-harrypotter
         | 
| 995 | 
            +
            transfaeries/DialoGPT-Discord
         | 
| 996 | 
            +
            transfaeries/DialoGPT-medium-Discord-1.0
         | 
| 997 | 
            +
            transfaeries/DialoGPT-small-Discord-1.0
         | 
| 998 | 
            +
            transfaeries/Twilight-Sparkle-GPT
         | 
| 999 | 
            +
            trig/DialoGPT-small-harrypotter
         | 
| 1000 | 
            +
            trig/multiverse-second
         | 
| 1001 | 
            +
            trig/multiverse
         | 
| 1002 | 
            +
            trig/sokka-chatbot-test
         | 
| 1003 | 
            +
            trig/tlok-test
         | 
| 1004 | 
            +
            troythewar/DialogGPT-small-harrypotter
         | 
| 1005 | 
            +
            truthisneverlinear/EleventhDoctor
         | 
| 1006 | 
            +
            ttntran/DialoGPT-small-human
         | 
| 1007 | 
            +
            tuantt/GroundNet
         | 
| 1008 | 
            +
            ughvom/Ginger
         | 
| 1009 | 
            +
            ughvom/britnayBOTMAIN
         | 
| 1010 | 
            +
            umr55766/DialogGPT-small-peppa-pig
         | 
| 1011 | 
            +
            usamazaheer/DialoGPT-small-harrypotter
         | 
| 1012 | 
            +
            uutkras/Pandabot
         | 
| 1013 | 
            +
            uyharold86/DialoGPT-small-RickAndMorty
         | 
| 1014 | 
            +
            valarikv/DialoGPT-small-bateman
         | 
| 1015 | 
            +
            vibranium19/DialoGPT-medium-jake
         | 
| 1016 | 
            +
            victordata/DialoGPT-small-Rick
         | 
| 1017 | 
            +
            victorswedspot/DialoGPT-small-gandalf
         | 
| 1018 | 
            +
            vijayv500/DialoGPT-small-Big-Bang-Theory-Series-Transcripts
         | 
| 1019 | 
            +
            vijote/DialoGPT-small-Morty
         | 
| 1020 | 
            +
            vivek-g-2009/DialoGPT-medium-harrypotter
         | 
| 1021 | 
            +
            vlco-o/NLboto_o-aki-dialogpt
         | 
| 1022 | 
            +
            vlco-o/NLboto_o-small-dialogpt
         | 
| 1023 | 
            +
            wadeed/DialogGPT-small-chandlerbingg
         | 
| 1024 | 
            +
            wanderer/DialoGPT-small-Phoebe
         | 
| 1025 | 
            +
            wjching/DialoGPT-small-ricksanchez
         | 
| 1026 | 
            +
            won/DialoGPT-small-harrypotter
         | 
| 1027 | 
            +
            worms3401/DialoGPT-small-Eleonora
         | 
| 1028 | 
            +
            worsterman/DialoGPT-small-mulder
         | 
| 1029 | 
            +
            wtrClover/DialoGPT-small-Flutterbot
         | 
| 1030 | 
            +
            wtrClover/DialoGPT-small-TwilightBot
         | 
| 1031 | 
            +
            xdmason/pretrainedCas
         | 
| 1032 | 
            +
            xiaoheiqaq/DialoGPT-mediumJojo
         | 
| 1033 | 
            +
            xiaoheiqaq/DialoGPT-smallharrypotter
         | 
| 1034 | 
            +
            yahya1994/DialoGPT-small-AOT-Eren
         | 
| 1035 | 
            +
            yahya1994/DialoGPT-small-DN-L
         | 
| 1036 | 
            +
            yahya1994/DialoGPT-small-DN-Light
         | 
| 1037 | 
            +
            yahya1994/DialoGPT-small-DN-Ryuk
         | 
| 1038 | 
            +
            yahya1994/DialoGPT-small-Gintama-Gintoki
         | 
| 1039 | 
            +
            yahya1994/DialoGPT-small-Parasyte-Migi
         | 
| 1040 | 
            +
            yahya1994/DialoGPT-small-ReZero-Rem
         | 
| 1041 | 
            +
            yahya1994/DialoGPT-small-ReZero-Subaru
         | 
| 1042 | 
            +
            yahya1994/DialoGPT-small-Ryuk
         | 
| 1043 | 
            +
            yusufmorsi/georgebot
         | 
| 1044 | 
            +
            zaydzuhri/lelouch-medium
         | 
| 1045 | 
            +
            zemi/jakebot
         | 
| 1046 | 
            +
            zen-satvik/BotGPT-medium-HP
         | 
| 1047 | 
            +
            zentos/DialoGPT-small-spongebob
         | 
| 1048 | 
            +
            zinary/DialoGPT-small-rick-new
         | 
| 1049 | 
            +
            zuto37/DialoGPT-small-sadao
         | 
| 1050 | 
            +
            Maxwere/DiabloGPT-medium-maxbot
         | 
| 1051 | 
            +
            Grungle/DialoGPT-medium-butters
         | 
| 1052 | 
            +
            sadkat/technoai
         | 
| 1053 | 
            +
            Grungle/DialoGPT-medium-butters2
         | 
| 1054 | 
            +
            kookyklavicle/sean-diaz-bot
         | 
| 1055 | 
            +
            kookyklavicle/sean-diaz
         | 
| 1056 | 
            +
            Aquasp34/DialoGPT-small-aqua1
         | 
| 1057 | 
            +
            zenham/khemx
         | 
| 1058 | 
            +
            aryanbhosale/smartharrypotterbot
         | 
| 1059 | 
            +
            Britain/DialoGPT-small-ZifBotTwoFixed
         | 
| 1060 | 
            +
            Britain/DialoGPT-small-DanyBotThree
         | 
| 1061 | 
            +
            infinitylyj/DialogGPT-small-rick
         | 
| 1062 | 
            +
            infinitylyj/DialogGPT-small-general
         | 
| 1063 | 
            +
            infinitylyj/DialogGPT-medium-general
         | 
| 1064 | 
            +
            jackyv/DialoGPT-small-pinocchio
         | 
| 1065 | 
            +
            Freak55/DialoGPT-small-Phoenix-Wright
         | 
| 1066 | 
            +
            Britain/DialoGPT-small-DanyBotThreeFixed
         | 
| 1067 | 
            +
            Britain/DialoGPT-small-DanyBotTwo
         | 
| 1068 | 
            +
            P4RZ1V4L/DialoGPT-medium-tonystark
         | 
| 1069 | 
            +
            Britain/DialoGPT-small-DanyBotTwoNew
         | 
| 1070 | 
            +
            zenham/mskeen_m_e4_16h
         | 
| 1071 | 
            +
            zenham/khemx_m_e4_16h
         | 
| 1072 | 
            +
            zenham/wail_m_e4_16h_2k
         | 
| 1073 | 
            +
            RTM/vilang
         | 
| 1074 | 
            +
            BeanBoi50404/DialoGPT-small-PeppaPigButBetter
         | 
| 1075 | 
            +
            nabin19677/small-cartman
         | 
| 1076 | 
            +
            Prime2911/DialoGPT-small-handsomejack
         | 
| 1077 | 
            +
            Starry/KARENTRIES
         | 
| 1078 | 
            +
            dietconk/DialogGPT-small-Orange
         | 
| 1079 | 
            +
            mafeu/DialoGPT-medium-willem
         | 
| 1080 | 
            +
            Prime2911/DialoGPT-medium-handsomejack
         | 
| 1081 | 
            +
            Meowren/DialoGPT-small-Rick-Bot
         | 
| 1082 | 
            +
            DB13067/Peterbot
         | 
| 1083 | 
            +
            Savitar/DialoGPT-medium-RickandMorty
         | 
| 1084 | 
            +
            MolePatrol/Olbot
         | 
| 1085 | 
            +
            erinchocolate/DialoGPT-small-harrypotter
         | 
| 1086 | 
            +
            Valouzze/FairuvenIA
         | 
| 1087 | 
            +
            MehSatho/Tai-medium-Hermione
         | 
| 1088 | 
            +
            Valouzze/MegaIA
         | 
| 1089 | 
            +
            Makinitas/DialoGPT-small-RickAndMortyScripts
         | 
| 1090 | 
            +
            darthrussel/DialoGPT-small-rickandmorty
         | 
| 1091 | 
            +
            vanilladucky/Friends_chatting_bot
         | 
| 1092 | 
            +
            vanilladucky/Friends_chatting_bot_redefined
         | 
| 1093 | 
            +
            chocoduck/Joey_bot
         | 
| 1094 | 
            +
            duanxingjuan/DialoGPT-medium-DEMON_SLAYER
         | 
| 1095 | 
            +
            pinkducky/Monica_Bot
         | 
| 1096 | 
            +
            Starry/HELLORUKAS
         | 
| 1097 | 
            +
            pinkducky/Rachel_Bot
         | 
| 1098 | 
            +
            trig/multiverse-third
         | 
| 1099 | 
            +
            pinkducky/Ross_Bot
         | 
| 1100 | 
            +
            duanxingjuan/DialoGPT-large-DEMON_SLAYER_v1
         | 
| 1101 | 
            +
            duanxingjuan/DialoGPT-large-DEMON
         | 
| 1102 | 
            +
            duanxingjuan/DialoGPT-large-DEMON1
         | 
| 1103 | 
            +
            issue89/DialoGPT-small-house
         | 
| 1104 | 
            +
            LeonLi279/DialoGPT-small-harrypotter
         | 
| 1105 | 
            +
            MolePatrol/DialoGPT-Medium-ConnerBot
         | 
| 1106 | 
            +
            MolePatrol/DialoGPT-Medium-MoleBot
         | 
| 1107 | 
            +
            TheDaydreamer/ricky
         | 
| 1108 | 
            +
            BeamBee/DialoGPT-small-Lavenza
         | 
| 1109 | 
            +
            Garsic/DialoGPT-medium-pecorine
         | 
| 1110 | 
            +
            CallForEcho/DialoGPT-small-harrypotter
         | 
| 1111 | 
            +
            BeamBee/DialoGPT-small-LavenzaNumTwo
         | 
| 1112 | 
            +
            Meowren/MichaelScottBott
         | 
| 1113 | 
            +
            shalpin87/dialoGPT-homer-simpson
         | 
| 1114 | 
            +
            darthrussel/DialoGPT-small-homerbot-halfdata
         | 
| 1115 | 
            +
            TheGoldenToaster/DialoGPT-medium-Woody
         | 
| 1116 | 
            +
            bemich/DialoGPT-small-GeorgeCostanza
         | 
| 1117 | 
            +
            AAAA-4/DialoGPT-small-player_03
         | 
| 1118 | 
            +
            Teyronebigdick/DialoGPT-small-harrypotter
         | 
| 1119 | 
            +
            Sammith/DialoGPT-small-miachael
         | 
| 1120 | 
            +
            Nxtxn01/DialoGPT-small-harrypotter
         | 
| 1121 | 
            +
            Teyronebigdick/DialoGPT-small-terrydavis
         | 
| 1122 | 
            +
            mczolly/DialoGPT-small-the-doctor
         | 
| 1123 | 
            +
            crazypegasus/GPT-JonSnow
         | 
| 1124 | 
            +
            MrYiRen/DialoGPT-small-harrypotter
         | 
| 1125 | 
            +
            TropicalJuice/Dialog-PeterGriffin
         | 
| 1126 | 
            +
            TheGoldenToaster/DialoGPT-medium-Bot
         | 
| 1127 | 
            +
            MrYiRen/DialoGPT-small-harrypotter2
         | 
| 1128 | 
            +
            gulgulglut/DialoGPT-small-Rick
         | 
| 1129 | 
            +
            trev/DialoGPT-small-MLP
         | 
| 1130 | 
            +
            RAJESHNEMANI/Chatbot_AI
         | 
| 1131 | 
            +
            lilapapazian/DialoGPT-small-harrypotter
         | 
| 1132 | 
            +
            Alethea/GPT2-chitchat
         | 
| 1133 | 
            +
            florentiino/DialoGPT-small-harrypotter
         | 
| 1134 | 
            +
            NUTELEX/Eva
         | 
| 1135 | 
            +
            jessicammow/DialoGPT-small-ronswanson
         | 
| 1136 | 
            +
            MrYiRen/DialoGPT-small-ZC
         | 
| 1137 | 
            +
            jessicammow/DialoGPT-medium-leslieknope
         | 
| 1138 | 
            +
            AmbricJohnson5888/death
         | 
| 1139 | 
            +
            AmbricJohnson5888/claura
         | 
| 1140 | 
            +
            DarrellTimothy/DialoGPT-small-harrypotter
         | 
| 1141 | 
            +
            RarePizzaDog/Apes_Bot
         | 
| 1142 | 
            +
            iyedr8/DialoGPT-small-rick
         | 
| 1143 | 
            +
            MEDT/ChatBot
         | 
| 1144 | 
            +
            NonzeroCornet34/DialoGPT-small-hansolo
         | 
| 1145 | 
            +
            NonzeroCornet34/DialoGPT-small-philbot
         | 
| 1146 | 
            +
            atomsspawn/DialoGPT-medium-dumbledore
         | 
| 1147 | 
            +
            florentiino/DialoGPT-small-rick
         | 
| 1148 | 
            +
            ShibaDeveloper/DialoGPT-small-harrypotter
         | 
| 1149 | 
            +
            sahilnare78/DialogGPT-medium-harrypotter
         | 
| 1150 | 
            +
            Garsic/DialoGPT-medium-jill
         | 
| 1151 | 
            +
            mdm/DialoGPT-small-Kanye
         | 
| 1152 | 
            +
            ScyKindness/Hatsune_Miku
         | 
| 1153 | 
            +
            aaaacash/DialoGPT-large-michaelscott
         | 
| 1154 | 
            +
            AntoDono/DialoGPT-Harry
         | 
| 1155 | 
            +
            BFMeriem/model
         | 
| 1156 | 
            +
            BFMeriem/chatbot-model
         | 
| 1157 | 
            +
            StringCheese/Dialog-small-bigbang
         | 
| 1158 | 
            +
            jakewillms17/capcake-model
         | 
| 1159 | 
            +
            Shivierra/DialoGPT-small-technoblade
         | 
| 1160 | 
            +
            Scaprod/DialoGPT-small-arbiter
         | 
| 1161 | 
            +
            Tlacaelel/DialoGPT-small-jarvis
         | 
| 1162 | 
            +
            spuun/kekbot-beta-1
         | 
| 1163 | 
            +
            Coma/Beter
         | 
| 1164 | 
            +
            Wavepaw/DialoGPT-medium-WardenIngo
         | 
| 1165 | 
            +
            Akarsh3053/potter-chat-bot
         | 
| 1166 | 
            +
            MachineBabs/RickBot
         | 
| 1167 | 
            +
            MachineBabs/DocBrown
         | 
| 1168 | 
            +
            spuun/kekbot-beta-1-medium
         | 
| 1169 | 
            +
            MEDT/Chatbot_Medium
         | 
| 1170 | 
            +
            tosin/dialogpt_mwoz_idioms
         | 
| 1171 | 
            +
            tosin/dialogpt_afriwoz_wolof
         | 
| 1172 | 
            +
            aakhilv/tonystark
         | 
| 1173 | 
            +
            spuun/kekbot-beta-2-medium
         | 
| 1174 | 
            +
            xiaoGato/DialoGPT-small-villanelle
         | 
| 1175 | 
            +
            Jonesy/DialoGPT-small_FG
         | 
| 1176 | 
            +
            deathknight67/DialoGPT-medium-joshua
         | 
| 1177 | 
            +
            kyriinx/DialoGPT-small-glyph
         | 
| 1178 | 
            +
            Jonesy/DialoGPT-medium_FG
         | 
| 1179 | 
            +
            spuun/kekbot-beta-3-medium
         | 
| 1180 | 
            +
            Lisia/DialoGPT-small-connor
         | 
| 1181 | 
            +
            awvik360/DialoGPT-medium-plemons-04262022
         | 
| 1182 | 
            +
            Jonesy/LisaOnIce
         | 
| 1183 | 
            +
            kvnaraya/DialoGPT-small-michael
         | 
| 1184 | 
            +
            Hyperspace/DialoGPT-small-Hyperdrive
         | 
| 1185 | 
            +
            Azuris/DialoGPT-medium-ekidona
         | 
| 1186 | 
            +
            aditeyabaral/sonobois
         | 
| 1187 | 
            +
            Jonesy/HomersNightOut
         | 
| 1188 | 
            +
            Andrei0086/Chat-small-bot
         | 
| 1189 | 
            +
            awvik360/UncleRuckus
         | 
| 1190 | 
            +
            captainswiftfox/rickandmorty
         | 
| 1191 | 
            +
            radicalrascal/DialoGPT-medium-jimmy
         | 
| 1192 | 
            +
            dmoz47/DialoGPT-small-peterparker
         | 
| 1193 | 
            +
            niprestige/GPT-small-DusabeBot
         | 
| 1194 | 
            +
            Shakerlicious/DialoGPT-small-descentbot
         | 
| 1195 | 
            +
            atomsspawn/DialoGPT-small-shelbot
         | 
| 1196 | 
            +
            atomsspawn/DialoGPT-small-sheldon
         | 
| 1197 | 
            +
            Willow/DialoGPT-medium-willow
         | 
| 1198 | 
            +
            IsekaiMeta/dapprf
         | 
| 1199 | 
            +
            farjvr/DialoGPT-small-Mortyfar
         | 
| 1200 | 
            +
            InSaiyan/DialoGPT-small-harrypotter
         | 
| 1201 | 
            +
            IsekaiMeta/dapprf3
         | 
| 1202 | 
            +
            emolyscheisse/DialoGPT-small-mandybot
         | 
| 1203 | 
            +
            IsekaiMeta/dapprf4
         | 
| 1204 | 
            +
            qgdmonilla/DialoGPT-small-harrypotter
         | 
| 1205 | 
            +
            NHStudios/DialoGPT-small-jake
         | 
| 1206 | 
            +
            Shakerlicious/DialoGPT-small-raquelbot
         | 
| 1207 | 
            +
            annasham/DialoGPT-small-myneighborTotoro
         | 
| 1208 | 
            +
            CaptAdorable/RickBot
         | 
| 1209 | 
            +
            Willow/DialoGPT-large-willow
         | 
| 1210 | 
            +
            Kabutopusu/DialoGPT-medium-NITWMae
         | 
| 1211 | 
            +
            HarmlessTarget/DialoGPT-medium-Bender
         | 
| 1212 | 
            +
            soni69/DialoGPT-medium-holmes
         | 
| 1213 | 
            +
            captainswiftfox/DialoGPT-small-rick
         | 
| 1214 | 
            +
            kathywu/DialoGPT-small-kathy
         | 
| 1215 | 
            +
            mybot/DialoGPT-medium-harrypotter
         | 
| 1216 | 
            +
            Dedemg1988/DialoGPT-small-michaelscott
         | 
| 1217 | 
            +
            pedrobaiainin/DialoGPT-small-harrypotter
         | 
| 1218 | 
            +
            kathywu/DialoGPT-medium-kathy
         | 
| 1219 | 
            +
            SNCannon/DialoGPT-medium-merc
         | 
| 1220 | 
            +
            THE-DDLM/DialoGPT-sebastian
         | 
| 1221 | 
            +
            fatirali/DialoGPT-medium-harrypotter
         | 
| 1222 | 
            +
            TejasARathod/DialoGPT-medium-BatmanBot
         | 
| 1223 | 
            +
            Varick/dialo-jarvis
         | 
| 1224 | 
            +
            Robinsd/HarryBot
         | 
| 1225 | 
            +
            dipstheman/DialoGPT-small-humanconversation
         | 
| 1226 | 
            +
            dipstheman/DialoGPT-small-humanconversationpart
         | 
| 1227 | 
            +
            LinkTheSinger/DialoGPT-small-Kanna
         | 
| 1228 | 
            +
            LinkTheSinger/DialoGPT-small-Kannav4
         | 
| 1229 | 
            +
            Robinsd/HarryBot4
         | 
| 1230 | 
            +
            SomeRandomGuy/tony
         | 
| 1231 | 
            +
            Meowren/HumanBot
         | 
| 1232 | 
            +
            marcoperez/DialoGPT-small-rickandmorty
         | 
| 1233 | 
            +
            LarsBell/DialoGPT-small-billyloomis
         | 
| 1234 | 
            +
            okwach/mawaidhaChatbot
         | 
| 1235 | 
            +
            LooksLikeIveLost/DialoGPT-medium-me
         | 
| 1236 | 
            +
            okwach/mawaidhaChatbot2
         | 
| 1237 | 
            +
            thebyy/DialoGPT-small-mortyisarick
         | 
| 1238 | 
            +
            rongina/DialoGPT-small-cartman
         | 
| 1239 | 
            +
            fransoa/arrombado-dms
         | 
| 1240 | 
            +
            ionite/DialoGPT-medium-MarkAI
         | 
| 1241 | 
            +
            ddrmaster1000/DialoGPT-medium-rick
         | 
| 1242 | 
            +
            PeritusDux/DialoGPT-small-rick
         | 
| 1243 | 
            +
            HomerChatbot/HomerSimpson
         | 
| 1244 | 
            +
            t8oo/DialoGPT-small-zeni
         | 
| 1245 | 
            +
            t8oo/DialoGPT-small-zenigata
         | 
| 1246 | 
            +
            sexomq/DialoGPT-medium-TeoBot
         | 
| 1247 | 
            +
            Char135/DialoGPT-medium-sebastian
         | 
| 1248 | 
            +
            HomerChatbot/DialoGPT-small-HomerSimpson
         | 
| 1249 | 
            +
            trev/Twilight-Sparkle
         | 
| 1250 | 
            +
            gigikenneth/family-guy-bot
         | 
| 1251 | 
            +
            ulises801/DialoGPT-medium-rick
         | 
| 1252 | 
            +
            fujuta/DialoGPT-medium-HarryPotter
         | 
| 1253 | 
            +
            fujuta/DialoGPT-medium-RonWeasley
         | 
| 1254 | 
            +
            fujuta/DialoGPT-medium-HermioneGrander
         | 
| 1255 | 
            +
            deepparag/Aeona-Beta
         | 
| 1256 | 
            +
            HomerChatbot/DialoGPT-small-homersimpsonbot
         | 
| 1257 | 
            +
            redcy/FrasierBotv1
         | 
| 1258 | 
            +
            ElMuchoDingDong/DialoGPT-medium-AudreyHepburn
         | 
| 1259 | 
            +
            natdon/DialoGPT_Michael_Scott
         | 
| 1260 | 
            +
            ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v3
         | 
| 1261 | 
            +
            deathmite/DiabloGPT-small-potaru
         | 
| 1262 | 
            +
            ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v4
         | 
| 1263 | 
            +
            DaBaap/Chat-Bot-Batman
         | 
| 1264 | 
            +
            Iwa/bot
         | 
| 1265 | 
            +
            badlawyer/DialoGPT-medium-sherlock-bot
         | 
| 1266 | 
            +
            thanhchauns2/DialoGPT-medium-Luna
         | 
| 1267 | 
            +
            jayklaws0606/DialoGPT-small-jayBot
         | 
| 1268 | 
            +
            RUCAIBox/mvp
         | 
| 1269 | 
            +
            Flem/DialoGPT-medium-alastor
         | 
| 1270 | 
            +
            keans/DialoGPT-small-highjacker
         | 
| 1271 | 
            +
            jayklaws0606/dgpt-small-jaybot
         | 
| 1272 | 
            +
            CodeMaestro/DialoGPT-small-TChalla
         | 
| 1273 | 
            +
            ElMuchoDingDong/AudreyBotBlenderBot
         | 
| 1274 | 
            +
            stfuowned/rickfinal
         | 
| 1275 | 
            +
            DuskSigma/DialogGPTHomerSimpson
         | 
| 1276 | 
            +
            hireddivas/dialoGPT-small-sonic2
         | 
| 1277 | 
            +
            N0NAne/DialoGPT-small-harrypotter
         | 
| 1278 | 
            +
            tinkoff-ai/response-quality-classifier-tiny
         | 
| 1279 | 
            +
            tinkoff-ai/response-quality-classifier-base
         | 
| 1280 | 
            +
            tinkoff-ai/response-quality-classifier-large
         | 
| 1281 | 
            +
            tinkoff-ai/response-toxicity-classifier-base
         | 
| 1282 | 
            +
            RUCAIBox/mvp-open-dialog
         | 
| 1283 | 
            +
            RUCAIBox/mtl-open-dialog
         | 
| 1284 | 
            +
            RUCAIBox/mvp-multi-task
         | 
| 1285 | 
            +
            Cirilaron/DialoGPT-medium-raiden
         | 
| 1286 | 
            +
            BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch
         | 
| 1287 | 
            +
            lucataco/DialogGPT-med-Rick
         | 
| 1288 | 
            +
            lucataco/DialoGPT-medium-rafa
         | 
| 1289 | 
            +
            gloomyworm/DialoGPT-small-ortho
         | 
| 1290 | 
            +
            kozlovtsev/DialoGPT-medium-harrypotter
         | 
| 1291 | 
            +
            Cirilaron/DialoGPT-medium-jetstreamsam
         | 
| 1292 | 
            +
            lucataco/DialoGPT-medium-omar
         | 
| 1293 | 
            +
            lucataco/DialoGPT-medium-milo
         | 
| 1294 | 
            +
            daedalus2003/HouseBot
         | 
| 1295 | 
            +
            SallyXue/DialoGPT-small-harrypotter
         | 
| 1296 | 
            +
            Averium/DialoGPT-medium-TailsBot
         | 
| 1297 | 
            +
            nlokam99/ada_sample
         | 
| 1298 | 
            +
            nlokam99/ada_sample_2
         | 
| 1299 | 
            +
            nlokam99/ada_sample_3
         | 
| 1300 | 
            +
            nlokam/adanimals_V1
         | 
| 1301 | 
            +
            spuun/kekbot-beta-4-medium
         | 
| 1302 | 
            +
            quirkys/DialoGPT-small-harrypotter
         | 
| 1303 | 
            +
            markofhope/DialoGPT-medium-HarringtonBot
         | 
| 1304 | 
            +
            AntoDono/DialoGPT-Bopy-Alpha-1.01
         | 
| 1305 | 
            +
            Hermite/DialoGPT-large-hermite
         | 
| 1306 | 
            +
            robinhad/gpt2-uk-conversational
         | 
| 1307 | 
            +
            Browbon/DialoGPT-small-LucaChangretta
         | 
| 1308 | 
            +
            gloomyworm/DialoGPT-medium-ortho
         | 
| 1309 | 
            +
            Browbon/DialoGPT-medium-LucaChangretta
         | 
| 1310 | 
            +
            Fluffypillow/DialoGPT-small-Rem
         | 
| 1311 | 
            +
            Hermite/DialoGPT-large-hermite2
         | 
| 1312 | 
            +
            Bman/DialoGPT-medium-peppapig
         | 
| 1313 | 
            +
            ZipperXYZ/DialoGPT-medium-TheWorldMachine
         | 
| 1314 | 
            +
            AlyxTheKitten/DialoGPT-medium-AgedBlaine-2
         | 
| 1315 | 
            +
            Averium/DialoGPT-medium-TailsBot1.1
         | 
| 1316 | 
            +
            Elijah629/DialoGPT-mrsanai
         | 
| 1317 | 
            +
            ZipperXYZ/DialoGPT-medium-TheWorldMachine2
         | 
| 1318 | 
            +
            damianruel/DialoGPT-medium-MySon
         | 
| 1319 | 
            +
            ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive
         | 
| 1320 | 
            +
            Elijah629/DialoGPT-shrek
         | 
| 1321 | 
            +
            AlyxTheKitten/DialoGPT-medium-Jimmis-2
         | 
| 1322 | 
            +
            dennis-fast/DialoGPT-ElonMusk
         | 
| 1323 | 
            +
            Sealgair/DialoGPT-medium-Eyden
         | 
| 1324 | 
            +
            crystallyzing/DialoGPT-small-nishikiyama
         | 
| 1325 | 
            +
            crystallyzing/DialoGPT-small-kiryu
         | 
| 1326 | 
            +
            NikkiTiredAf/DialoGPT-small-billy2
         | 
| 1327 | 
            +
            Evokus/DialoGPT-small-harrypotter
         | 
| 1328 | 
            +
            mcimmy/DialoGPT-small-bob
         | 
| 1329 | 
            +
            Laggrif/DialoGPT-medium-Luke
         | 
| 1330 | 
            +
            Laggrif/DialoGPT-medium-3PO
         | 
| 1331 | 
            +
            ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2
         | 
| 1332 | 
            +
            prprakash/DialoGPT-small-TonyStark
         | 
| 1333 | 
            +
            sexomq/TeoBot-Romanian-medium
         | 
| 1334 | 
            +
            Bman/DialoGPT-medium-dora
         | 
| 1335 | 
            +
            Hermite/DialoGPT-large-hermite3
         | 
| 1336 | 
            +
            Averium/FabioBot
         | 
| 1337 | 
            +
            arem/DialoGPT-medium-rickandmorty
         | 
| 1338 | 
            +
            soProf1998/DialoGPT-small-chattyrick
         | 
| 1339 | 
            +
            soProf1998/DialoGPT-medium-chattyrick
         | 
| 1340 | 
            +
            Dorin/DialoGPT-small-Rick
         | 
| 1341 | 
            +
            OptimalHoiboy/DialoGPT-small-kasumai
         | 
| 1342 | 
            +
            Hartmann/DialoGPT-small-koishikomeiji
         | 
| 1343 | 
            +
            Konbai/DialoGPT-small-akagi
         | 
| 1344 | 
            +
            Konbai/DialoGPT-small-akagi2
         | 
| 1345 | 
            +
            JazzyLucas/DialoGPT-small-TonyStark
         | 
| 1346 | 
            +
            mystery/DialoGPT-small-pinkiepie
         | 
| 1347 | 
            +
            sexomq/TeoBot-Romanian-medium2
         | 
| 1348 | 
            +
            erikycd/chatbot_hadita
         | 
| 1349 | 
            +
            infinix/Sheldon-bot
         | 
| 1350 | 
            +
            JamesonSpiff/chatBot_test_model
         | 
| 1351 | 
            +
            Akito1961/DialoGPT-small-C3PO
         | 
| 1352 | 
            +
            Naturealbe/DialoGPT-small-Technoblade
         | 
| 1353 | 
            +
            zR0clu/DialoGPT-medium-Mr.Roboto
         | 
| 1354 | 
            +
            reso/DialoGPT-medium-v3ga
         | 
| 1355 | 
            +
            trimox/tryingnewstuff
         | 
| 1356 | 
            +
            Nakul24/YC_Bot
         | 
| 1357 | 
            +
            casperthegazer/DiabloGPT-medium-lukedot
         | 
| 1358 | 
            +
            JamesStratford/PLord-bot-DialoGPT-medium
         | 
| 1359 | 
            +
            CaptPyrite/DialoGPT-small-cat
         | 
| 1360 | 
            +
            SafeTorpedo/DialoGPT-small-MichaelBot
         | 
| 1361 | 
            +
            brianveebee/DialoGPT-medium-bender
         | 
| 1362 | 
            +
            myynirew/DialoGPT-medium-shouko01
         | 
| 1363 | 
            +
            myynirew/2-0OKUOHS
         | 
| 1364 | 
            +
            smmzhu/DialoGPT-medium-sam
         | 
| 1365 | 
            +
            myynirew/shouko0-3
         | 
| 1366 | 
            +
            myynirew/dumbbot
         | 
| 1367 | 
            +
            Lamia/DialoGPT-small-Sundrop
         | 
| 1368 | 
            +
            ashtrindade/chatbot-stacey
         | 
| 1369 | 
            +
            tinkoff-ai/ruDialoGPT-small
         | 
| 1370 | 
            +
            tinkoff-ai/ruDialoGPT-medium
         | 
| 1371 | 
            +
            24adamaliv/DialoGPT-medium-Will
         | 
| 1372 | 
            +
            cybertelx/DialoGPT-small-drunkic0n
         | 
| 1373 | 
            +
            Rick-C137/DialoGPT-small-rick
         | 
| 1374 | 
            +
            debyve/dumbbot
         | 
| 1375 | 
            +
            Amir-UL/JimBot
         | 
| 1376 | 
            +
            BoxCrab/DialoGPT-small-Strider
         | 
| 1377 | 
            +
            AbdalK25/DialoGPT-small-TheWiseBot
         | 
| 1378 | 
            +
            casperthegazer/DialoGT-gandalf-urdot
         | 
| 1379 | 
            +
            pineappleSoup/DialoGPT-medium-707
         | 
| 1380 | 
            +
            Nakul24/AD_ChatBot
         | 
| 1381 | 
            +
            TeaTM/DialoGPT-small-bushcat
         | 
| 1382 | 
            +
            ionite/DialoGPT-medium-NakaAI
         | 
| 1383 | 
            +
            Creepton/DDLCYuri-DialoGPT-small
         | 
| 1384 | 
            +
            TeaTM/DialoGPT-large-bushcat
         | 
| 1385 | 
            +
            yazinga/DialoGPT-medium-scout
         | 
| 1386 | 
            +
            throwaway112358112358/DialoGPT-medium-script
         | 
| 1387 | 
            +
            Jingna/test_hpv_discord
         | 
| 1388 | 
            +
            anonchickenlegs/sartoshi-bot
         | 
| 1389 | 
            +
            xander-cross/DialoGPT-small-EvilMortyTheBot
         | 
| 1390 | 
            +
            Bman/DialoGPT-medium-shrek
         | 
| 1391 | 
            +
            Yank2901/DialoGPT-small-Rick
         | 
| 1392 | 
            +
            akshatpandeyme/DialoGPT-small-manpreet
         | 
| 1393 | 
            +
            Jenwvwmabskvwh/DialoGPT-small-josh444
         | 
| 1394 | 
            +
            akshatpandeyme/DialoGPT-small-parthiv
         | 
| 1395 | 
            +
            akshatpandeyme/DialoGPT-small-ParthivBot
         | 
| 1396 | 
            +
            seeksery/DialoGPT-calig
         | 
| 1397 | 
            +
            akshatpandeyme/DialoGPT-small-AnyaBot
         | 
| 1398 | 
            +
            Jordine/shitter
         | 
| 1399 | 
            +
            model-attribution-challenge/DialoGPT-large
         | 
| 1400 | 
            +
            seeksery/DialoGPT-calig2
         | 
| 1401 | 
            +
            obl1t/DialoGPT-medium-Jotaro
         | 
| 1402 | 
            +
            trickstters/DialoGPT-small-evanbot
         | 
| 1403 | 
            +
            trickstters/evanbot-gpt
         | 
| 1404 | 
            +
            AriakimTaiyo/gpt2-chat
         | 
| 1405 | 
            +
            Yank2901/DialoGPT-small-Harry
         | 
| 1406 | 
            +
            lizz27/DialoGPT-small-baymax
         | 
| 1407 | 
            +
            obl1t/DialoGPT-medium-Jolyne
         | 
| 1408 | 
            +
            seeksery/DialoGPT-calig3
         | 
| 1409 | 
            +
            Jenwvwmabskvwh/DialoGPT-small-josh445
         | 
| 1410 | 
            +
            trickstters/evbot2
         | 
| 1411 | 
            +
            Jenwvwmabskvwh/DialoGPT-small-josh450
         | 
| 1412 | 
            +
            lizz27/DialoGPT-medium-BaymaxBot
         | 
| 1413 | 
            +
            soop/DialoGPT-medium-BaymaxBot
         | 
| 1414 | 
            +
            abelblue3/DialoGPT-medium-baymax
         | 
| 1415 | 
            +
            priyankac/DialoGPT-medium-BaymaxBot
         | 
| 1416 | 
            +
            Ironpanther1/Testing
         | 
| 1417 | 
            +
            tosin/dialogpt_afriwoz_pidgin
         | 
| 1418 | 
            +
            Anon25/DialoGPT-Medium-BaymaxBot
         | 
| 1419 | 
            +
            GoldenRedstone/DialoGPT-medium-Phoenix-Wright
         | 
| 1420 | 
            +
            Primobot/DialoGPT-small-harrypotter
         | 
| 1421 | 
            +
            Lyem/LyemBotv1
         | 
| 1422 | 
            +
            JamesSantosxx/DialoGPT-small-harrypotter
         | 
| 1423 | 
            +
            Lyem/LyemBotv2
         | 
| 1424 | 
            +
            Ironpanther1/ArtoriaBot
         | 
| 1425 | 
            +
            Swervin7s/DialoGPT-medium-anakin
         | 
| 1426 | 
            +
            DogH2O/DialoGPT-small-naruto
         | 
| 1427 | 
            +
            NoPeanuts/DialoGPT-small-po
         | 
| 1428 | 
            +
            Gravitygaming/homerai
         | 
| 1429 | 
            +
            Lyem/LyemBotv3
         | 
| 1430 | 
            +
            celine45688/LuTing
         | 
| 1431 | 
            +
            antwortemir/shouko04
         | 
| 1432 | 
            +
            SebastianS/MetalSebastian
         | 
| 1433 | 
            +
            notaproblem00/DialoGPT-small-bakugou
         | 
| 1434 | 
            +
            myodoctor/DIALOGPT-medium-HarryPotterBot
         | 
| 1435 | 
            +
            aniketface/DialoGPT-medium-elon
         | 
| 1436 | 
            +
            noiseBase/DialoGPT-small-HarryPotter
         | 
| 1437 | 
            +
            karan21/DialoGPT-medium-rickandmorty
         | 
| 1438 | 
            +
            karan21/DialoGPT-medium-guin
         | 
| 1439 | 
            +
            Sophiejs/DialoGPT-small-BlaineBot
         | 
| 1440 | 
            +
            skouras/DialoGPT-small-swda
         | 
| 1441 | 
            +
            skouras/DialoGPT-small-maptask
         | 
| 1442 | 
            +
            TheodoreAinsley/LindaGold
         | 
| 1443 | 
            +
            AlbedoAI/DialoGPT-large-Albedo
         | 
| 1444 | 
            +
            AlbedoAI/DialoGPT-large-Albedo2
         | 
| 1445 | 
            +
            willmay/DialoGPT-medium-will
         | 
| 1446 | 
            +
            AlbedoAI/DialoGPT-medium-Albedo
         | 
| 1447 | 
            +
            chulainn/DialoGPT-medium-Zuko
         | 
| 1448 | 
            +
            ctoner2653/DialoGPT-medium-RickBoty
         | 
| 1449 | 
            +
            Number4/DialoGPT-medium-harrypotter
         | 
| 1450 | 
            +
            yummyhat/DialoGPT-small-spike
         | 
| 1451 | 
            +
            EllyPony/flutterbot
         | 
| 1452 | 
            +
            Suryansh-23/DialoGPT-small-MichaelScottOffice
         | 
| 1453 | 
            +
            Cirilaron/DialoGPT-medium-vergil
         | 
| 1454 | 
            +
            Izuuk/izuuk
         | 
| 1455 | 
            +
            shungyan/Diablo-small-harrypotter
         | 
| 1456 | 
            +
            bhavyasharma/DialoGPT-small-harrypotter
         | 
| 1457 | 
            +
            nintwentydo/rickbot
         | 
| 1458 | 
            +
            tylersfoot/DialoGPT-medium-rick
         | 
| 1459 | 
            +
            EJoftheVern/DialoGPT-medium-shaggy
         | 
| 1460 | 
            +
            xtraXpert/DialoGPT-small-RickAndMorty2
         | 
| 1461 | 
            +
            ANIKEThash/DialoGPT-medium-character
         | 
| 1462 | 
            +
            Noonw/DialoGPT-small-hijackersexurmom
         | 
| 1463 | 
            +
            fat32man/elon_answers
         | 
| 1464 | 
            +
            MinhP/DialoGPT-small-themis
         | 
| 1465 | 
            +
            Noonw/DialoGPT-small-osamaflyplane
         | 
| 1466 | 
            +
            Noonw/DialoGPT-small-ladenflyplane
         | 
| 1467 | 
            +
            Noonw/DialoGPT-small-ladenonjet
         | 
| 1468 | 
            +
            MinhP/DialoGPT-small-franco
         | 
| 1469 | 
            +
            Karan59/DialoGPT-small-evaModel
         | 
| 1470 | 
            +
            marblyso/DialoGPT-medium-marblesbagel
         | 
| 1471 | 
            +
            Jojo17/DialoGPT-small-RickAndMorty
         | 
| 1472 | 
            +
            deseipel/medium-LucyClarke_
         | 
| 1473 | 
            +
            DiscordBackup/model0000
         | 
| 1474 | 
            +
            SirSpiffy/IvanModel
         | 
| 1475 | 
            +
            woodmtaylor/DialoGPT-small-Heej
         | 
| 1476 | 
            +
            woodmtaylor/DialoGPT-medium-Heej
         | 
| 1477 | 
            +
            OctaviusI/marisaV0
         | 
| 1478 | 
            +
            ChloeMJM/DialoGPT-small-rick
         | 
| 1479 | 
            +
            JDesignEra/DialoGPT-small-Anya
         | 
| 1480 | 
            +
            MrE/DialoGPT-medium-SARGER4
         | 
| 1481 | 
            +
            aarya-c111/DialoGPT-small-Rogers
         | 
| 1482 | 
            +
            bozlucas/DialoGPT-medium-HermioneBot
         | 
| 1483 | 
            +
            LasseVKP/DialoGPT-Mogens
         | 
| 1484 | 
            +
            metaloopa/DialoGPT-medium-Rintaro
         | 
| 1485 | 
            +
            ingen51/DialoGPT-medium-GPT4
         | 
| 1486 | 
            +
            Divyesh/DialoGPT-medium-harrypotter
         | 
| 1487 | 
            +
            Natsuki-Chan/DialoGPT-medium-luz
         | 
| 1488 | 
            +
            akira2001/DialoGPT-medium-harrypotter
         | 
| 1489 | 
            +
            osueng02/DialoGPT-small-STAN_BOT
         | 
| 1490 | 
            +
            osueng02/DialoGPT-medium-STAN_BOT
         | 
| 1491 | 
            +
            wormed/DialoGPT-small-denai
         | 
| 1492 | 
            +
            RehanP123/DialoGPT-medium-kermit.old
         | 
| 1493 | 
            +
            Nakul24/SM_Bot
         | 
| 1494 | 
            +
            chulainn/DialoGPT-medium-Ragnar
         | 
| 1495 | 
            +
            aniketface/DialoGPT-product
         | 
| 1496 | 
            +
            shohanursobuj/DialoGPT
         | 
| 1497 | 
            +
            marblyso/DialoGPT-medium-hero
         | 
| 1498 | 
            +
            marblyso/DialoGPT-medium-kel
         | 
| 1499 | 
            +
            marblyso/DialoGPT-medium-aubrey
         | 
| 1500 | 
            +
            akil191/small-test-harryakakakaka
         | 
| 1501 | 
            +
            sanpellegrino/CoryBot
         | 
| 1502 | 
            +
            Arqhero/DialoGPT-small-adventuretime
         | 
| 1503 | 
            +
            chulainn/DialoGPT-medium-Tyrion
         | 
| 1504 | 
            +
            VTG/MentalHealthChatbotv1
         | 
| 1505 | 
            +
            luminolblue/HomunculusGPT-testbot
         | 
| 1506 | 
            +
            Paulina354/DialoGPT-small-rickandmorty
         | 
| 1507 | 
            +
            khuranagarvit019/MentalHealthChatbot
         | 
| 1508 | 
            +
            VirtualizedTrash/Chatbot
         | 
| 1509 | 
            +
            pedrocaribe/DialoGPT-medium-LL
         | 
| 1510 | 
            +
            queenaccila/DialoGPT-small-kashiwagi
         | 
| 1511 | 
            +
            GarfExit/DialogGPT-medium-707
         | 
| 1512 | 
            +
            marblyso/DialoGPT-medium-shepherd
         | 
| 1513 | 
            +
            Spectre29/DialoGPT-small-Kaisa
         | 
| 1514 | 
            +
            Spectre29/Kaisa-converse-model
         | 
| 1515 | 
            +
            ZedTheUndead/Rick_fragment
         | 
| 1516 | 
            +
            marblyso/DialoGPT-medium-mari
         | 
| 1517 | 
            +
            Delicious/DialoGPT-small-harrypotter
         | 
| 1518 | 
            +
            BBHKR/DialoGPT-small-jacksparrow
         | 
| 1519 | 
            +
            Guwon/DialoGPT-small-Quincy
         | 
| 1520 | 
            +
            epeicher/DialoGPT-small-homer-2
         | 
| 1521 | 
            +
            timmychanga/DialoGPT-small-ashley
         | 
| 1522 | 
            +
            mywateriswet/ShuanBot
         | 
| 1523 | 
            +
            epeicher/DialoGPT-small-flanders
         | 
| 1524 | 
            +
            Super-McTea/DialoGPT-small-McTea
         | 
| 1525 | 
            +
            Eronzin/meuBotzindoEron
         | 
| 1526 | 
            +
            Techdra/DialoGPT-large-theboy
         | 
| 1527 | 
            +
            Eronzin/DialoGPT-small-Frodo
         | 
| 1528 | 
            +
            gtgillott/gib
         | 
| 1529 | 
            +
            AwesomeDWNJ/EmiBot
         | 
| 1530 | 
            +
            CJ3/DialoGPT-medium-amber3
         | 
| 1531 | 
            +
            GamerMan02/DialoGPT-medium-gamerbot2
         | 
| 1532 | 
            +
            GamerMan02/DialoGPT-medium-gamerbot1
         | 
| 1533 | 
            +
            Insomnic/DialoGPT-small-harrypotter
         | 
| 1534 | 
            +
            Super-McTea/DialoGPT-small-McTeaV2
         | 
| 1535 | 
            +
            FelipeJoazeiro/chatbot-morty
         | 
| 1536 | 
            +
            microsoft/GODEL-v1_1-base-seq2seq
         | 
| 1537 | 
            +
            microsoft/GODEL-v1_1-large-seq2seq
         | 
| 1538 | 
            +
            Rencist/DialoGPT-small-rick
         | 
| 1539 | 
            +
            scorpiofrens/DialoGPT-medium-ergon
         | 
| 1540 | 
            +
            somemusicnerdwoops/DialoGPT-small-shadow
         | 
| 1541 | 
            +
            powchang/DialoGPT2-medium-CAiFE
         | 
| 1542 | 
            +
            ratneshrt/DialoGPT-small-Artico
         | 
| 1543 | 
            +
            somemusicnerdwoops/DialoGPT-distilgpt2-sonicfandub
         | 
| 1544 | 
            +
            Tsec-Research/DialoGPT-chandler-penny
         | 
| 1545 | 
            +
            neonon/DialoGPT-medium-cloy
         | 
| 1546 | 
            +
            ddae208s/DialoGPT-small-dimitri
         | 
| 1547 | 
            +
            mossfarmer/VRANAK
         | 
| 1548 | 
            +
            Matax/Aristrathor3000
         | 
| 1549 | 
            +
            brownanchovy/Harry
         | 
| 1550 | 
            +
            Overlrd/DialoGPT-small-cartman
         | 
| 1551 | 
            +
            epeicher/DialoGPT-large-homer
         | 
| 1552 | 
            +
            comradesocrates/DialoGPT-medium-stranger
         | 
| 1553 | 
            +
            Rakublu/DialoGPT-small-yasuo
         | 
| 1554 | 
            +
            neonon/DialoGPT-medium-htccc
         | 
| 1555 | 
            +
            Alt41r/gpt-simpson
         | 
| 1556 | 
            +
            Nimit-Jjw/DialoGPT-chandler-penny
         | 
| 1557 | 
            +
            Quoc123/DialoGPT-small-AQUA
         | 
| 1558 | 
            +
            marblyso/DialoGPT-medium-pearl
         | 
| 1559 | 
            +
            estus2/rick-superu-rick2
         | 
| 1560 | 
            +
            marblyso/DialoGPT-medium-marina
         | 
| 1561 | 
            +
            rovenmusic/DialoGPT-small-melodybot
         | 
| 1562 | 
            +
            deseipel/small-LucyClarke_
         | 
| 1563 | 
            +
            rovenmusic/DialoGPT-small-melodybotv2
         | 
| 1564 | 
            +
            rovenmusic/DialoGPT-small-melodybotv3
         | 
| 1565 | 
            +
            epeicher/DialoGPT-medium-homer
         | 
| 1566 | 
            +
            andrewkroening/GalaxyFarAway-DialoGPT-HanSolo
         | 
| 1567 | 
            +
            nams/nams-bot
         | 
| 1568 | 
            +
            Nicktherat/DialoGPT-medium-endella
         | 
| 1569 | 
            +
            alfirsaafauzulh/DialoGPT-small-KamuiBastion
         | 
| 1570 | 
            +
            rovenmusic/DialoGPT-small-melodyv10
         | 
| 1571 | 
            +
            somesh212/Harry_Potter-BOT
         | 
| 1572 | 
            +
            somesh212/Harry_Potter_botDialoGPT_Som2
         | 
| 1573 | 
            +
            jmagine/DialoGPT-small-metahead
         | 
| 1574 | 
            +
            somesh212/Harry_Potter_botDialoGPT_Som3
         | 
| 1575 | 
            +
            rovenmusic/DialoGPT-small-melodyvfinal
         | 
| 1576 | 
            +
            jmagine/DialoGPT-small-jmagine
         | 
| 1577 | 
            +
            jmagine/DialoGPT-small-funded
         | 
| 1578 | 
            +
            jmagine/DialoGPT-small-jimj
         | 
| 1579 | 
            +
            andrewkroening/GalaxyFarAway-DialoGPT-LukeSkywalker
         | 
| 1580 | 
            +
            andrewkroening/GalaxyFarAway-DialoGPT-Threepio
         | 
| 1581 | 
            +
            andrewkroening/GalaxyFarAway-DialoGPT-Vader
         | 
| 1582 | 
            +
            andrewkroening/GalaxyFarAway-DialoGPT-LeiaOrgana
         | 
| 1583 | 
            +
            andrewkroening/GalaxyFarAway-DialoGPT-Yoda
         | 
| 1584 | 
            +
            Wizardd/DialoGPT-small-sheldon
         | 
| 1585 | 
            +
            BenKJH/DialoGPT-small-lucybotasg
         | 
| 1586 | 
            +
            Ananjas/AwooAI
         | 
| 1587 | 
            +
            Ananjas/AwooV2
         | 
| 1588 | 
            +
            kookyklavicle/gpt-sean-diaz
         | 
| 1589 | 
            +
            kookyklavicle/SeanDiazBot2
         | 
| 1590 | 
            +
            Ananjas/AwooV3
         | 
| 1591 | 
            +
            Overlrd/DialoGPT-medium-cartman
         | 
| 1592 | 
            +
            Ananjas/AwooV6
         | 
| 1593 | 
            +
            mathecas/HarryPotterBotAI
         | 
| 1594 | 
            +
            Karina256/DialoGPT-small-dory
         | 
| 1595 | 
            +
            Tony8657/DialoGPT-small-TonyStarkBot
         | 
| 1596 | 
            +
            SebastianS/my_mim
         | 
| 1597 | 
            +
            TFS668/DialoGPT-small-Rick
         | 
| 1598 | 
            +
            redhoff/DialoGPT-Medium-RedBot
         | 
| 1599 | 
            +
            FeriVOQ/DialoGPT-small-joshua
         | 
| 1600 | 
            +
            Triobloid/DialoGPT-small-lianaharrypotter
         | 
| 1601 | 
            +
            quinnzie/DialoGPT-small-sinister
         | 
| 1602 | 
            +
            FarziBuilder/DialoGPT-medium-harrypotter
         | 
| 1603 | 
            +
            sohampatil/DialoGPT-small-mentalchatbot
         | 
| 1604 | 
            +
            gtkarber/DialoGPT-medium-columbo
         | 
| 1605 | 
            +
            PaddlePaddle/plato-mini
         | 
| 1606 | 
            +
            Junkan/DialoGPT-medium-Bilbo
         | 
| 1607 | 
            +
            ThatSkyFox/DialoGPT-medium-whatsapp
         | 
| 1608 | 
            +
            Ar4ikov/DialogAgentGPT2
         | 
| 1609 | 
            +
            reallygoodtechdeals/Bingocat-ai-Dialo-GPT-medium
         | 
| 1610 | 
            +
            thmauler/crashed
         | 
| 1611 | 
            +
            OptionaI/DialoGPT-small-beepboopy
         | 
| 1612 | 
            +
            davebathhews/DialoGPT-OTIS
         | 
| 1613 | 
            +
            GGOM/SipBotGGOM
         | 
| 1614 | 
            +
            davebathhews/DialoGPT-OTISBOT
         | 
| 1615 | 
            +
            GGOM/WillBotGGOM
         | 
| 1616 | 
            +
            GGOM/ElyasBotGGOM
         | 
| 1617 | 
            +
            reallygoodtechdeals/steve-ai-Dialo-GPT-medium
         | 
| 1618 | 
            +
            Crushtoe/DialoGPT-small-vangluss
         | 
| 1619 | 
            +
            apotempest/DialoGPT-medium-geralt
         | 
| 1620 | 
            +
            DiogoSabec/DialoGPT-small-joshua
         | 
| 1621 | 
            +
            WaleedArif/DialoGPT-small-Micheal
         | 
| 1622 | 
            +
            Crushtoe/DialoGPT-medium-vangluss
         | 
| 1623 | 
            +
            Crushtoe/GODEL-v1_1-base-seq2seq-vangluss
         | 
| 1624 | 
            +
            DiogoSabec/BOT
         | 
| 1625 | 
            +
            Le033/DialoGPT-small-rickmorty
         | 
| 1626 | 
            +
            Filosofas/DialoGPT-medium-PALPATINE2
         | 
| 1627 | 
            +
            JadansTower/jobot
         | 
| 1628 | 
            +
            NTMNathan/DialoGPT-small-harrypotter
         | 
| 1629 | 
            +
            Ashypaws/DialoGPT-medium-Ashybot
         | 
| 1630 | 
            +
            wmdosborne/DialoGPT-medium-kyritebot
         | 
| 1631 | 
            +
            worms3402/DialoGPT-small-automata2
         | 
| 1632 | 
            +
            Pi3141/DialoGPT-small-elon
         | 
| 1633 | 
            +
            Grendar/Dialo-GPT-medium-shiro
         | 
| 1634 | 
            +
            Pi3141/DialoGPT-medium-elon
         | 
| 1635 | 
            +
            Pi3141/DialoGPT-medium-elon-2
         | 
| 1636 | 
            +
            JoshuaPawlik/DialoGPT-medium-joshua
         | 
| 1637 | 
            +
            Pi3141/DialoGPT-medium-elon-3
         | 
| 1638 | 
            +
            josephthen3320/DialoGPT-small-walter
         | 
| 1639 | 
            +
            robbiegwald/Rick
         | 
| 1640 | 
            +
            Gurtej/Drbot
         | 
| 1641 | 
            +
            Hereward/DialoGPT_medium_ObiWan_Kenobi
         | 
| 1642 | 
            +
            Giu888/DialoGPT-small-sao
         | 
| 1643 | 
            +
            Grendar/blenderbot-400M-distill-Shiro
         | 
| 1644 | 
            +
            keeg8/Book-0-1500
         | 
| 1645 | 
            +
            keeg8/Book-1500-1700
         | 
| 1646 | 
            +
            keeg8/Book-1850-1900
         | 
| 1647 | 
            +
            keeg8/Book-1700-1850
         | 
| 1648 | 
            +
            karlreimond/DialoGPT-small-harrypotter
         | 
| 1649 | 
            +
            lenartlola/SpongeBob
         | 
| 1650 | 
            +
            lenartlola/rick-bot
         | 
| 1651 | 
            +
            Deedlit/DialoGPT-small-southpark
         | 
| 1652 | 
            +
            babylasagne/DialoGPT-small-narryuto
         | 
| 1653 | 
            +
            babylasagne/DialoGPT-small-harry
         | 
| 1654 | 
            +
            babylasagne/DialoGPT-small-spider
         | 
| 1655 | 
            +
            babylasagne/DialoGPT-small-batman
         | 
| 1656 | 
            +
            BradHeffernan/rickNmortyModel
         | 
| 1657 | 
            +
            UmUDev/DialoGPT-medium-AlexVN
         | 
| 1658 | 
            +
            ukikunz/gas-kenji-medium
         | 
| 1659 | 
            +
            ukikunz/gas-kenji
         | 
| 1660 | 
            +
            Isokeel/DialoGPT-medium-KMbot
         | 
| 1661 | 
            +
            KakoSi/AcciGPT-smol
         | 
| 1662 | 
            +
            Spoofed/DiabloGPT-small-peter
         | 
| 1663 | 
            +
            sophiadt/DialoGPT-medium-707
         | 
| 1664 | 
            +
            UmUDev/DialoGPT-medium-Alex
         | 
| 1665 | 
            +
            PygmalionAI/pygmalion-350m
         | 
| 1666 | 
            +
            sophiadt/DialoGPT-medium-reigen
         | 
| 1667 | 
            +
            rexfi/DialoGPT-small-peter
         | 
| 1668 | 
            +
            rexfi/NafezBot-DialoGPT
         | 
| 1669 | 
            +
            caps1994/chris-bot
         | 
| 1670 | 
            +
            rexfi/RickyBot
         | 
| 1671 | 
            +
            allenai/cosmo-xl
         | 
| 1672 | 
            +
            woodmtaylor/DialoGPT-large-Dumpling
         | 
| 1673 | 
            +
            rexfi/MikeScottBot
         | 
| 1674 | 
            +
            apfallinus/RickBot
         | 
| 1675 | 
            +
            apfallinus/HarryBot
         | 
| 1676 | 
            +
            apfallinus/MedBot
         | 
| 1677 | 
            +
            apfallinus/AeonaBot
         | 
| 1678 | 
            +
            apfallinus/BatmanBot
         | 
| 1679 | 
            +
            apfallinus/AiBot
         | 
| 1680 | 
            +
            LostXOR/TotallyNotARobot
         | 
| 1681 | 
            +
            gachaddict/DialoGPT-medium-ike
         | 
| 1682 | 
            +
            OctaviusI/staging
         | 
| 1683 | 
            +
            PygmalionAI/pygmalion-1.3b
         | 
| 1684 | 
            +
            Terrymir/DialoGPT-medium-Soraka
         | 
| 1685 | 
            +
            SantiPingui58/DialoGPT-small-hika
         | 
| 1686 | 
            +
            ss1612/montana-chat
         | 
| 1687 | 
            +
            MrEmpty/DialoGPT-small-rickandmorty
         | 
| 1688 | 
            +
            shikiskhakis/DialoGPT-small-blackdoom
         | 
| 1689 | 
            +
            alexandreteles/GPTChizuru
         | 
| 1690 | 
            +
            Chae/scottbot_med
         | 
| 1691 | 
            +
            AhmedMostafa/DialoGPT-small-Rick
         | 
| 1692 | 
            +
            metkoon/30dollarceo
         | 
| 1693 | 
            +
            Dinocroth/DialoGPT-medium-Trevor-PhilipsV2
         | 
| 1694 | 
            +
            metkoon/MatBot
         | 
| 1695 | 
            +
            SmallQ/DialoGPT-small-Anya
         | 
| 1696 | 
            +
            bigbossa/aiko6
         | 
| 1697 | 
            +
            GK123/DialoGPT-medium-hanbot
         | 
| 1698 | 
            +
            TheHappyDrone/DialoGPT-medium-salesman
         | 
| 1699 | 
            +
            Pcik/DialoGPT-medium-Jaiden
         | 
| 1700 | 
            +
            TheHappyDrone/DialoGPT-medium-Nexus-Nova
         | 
| 1701 | 
            +
            Pcik/DialoGPT-medium-Dante
         | 
| 1702 | 
            +
            AlmightyDeathCheater/DialoGPT-medium-harrypotter
         | 
| 1703 | 
            +
            Pcik/DialoGPT-medium-Kirby
         | 
| 1704 | 
            +
            Starry/COUNTNARC
         | 
| 1705 | 
            +
            TheHappyDrone/DialoGPT-medium-Nexus-Nova-turing-v2
         | 
| 1706 | 
            +
            wetwoteraq/DialoGPT-medium-aqua
         | 
| 1707 | 
            +
            wetwoteraq/DialoGPT-small-peter
         | 
| 1708 | 
            +
            wetwoteraq/DialoGPT-medium-peter
         | 
| 1709 | 
            +
            lilexo2/DialoGPT-medium-Monica
         | 
| 1710 | 
            +
            momo10/DialoGPT-small-harryPotter
         | 
| 1711 | 
            +
            Antale123/ConorBot
         | 
| 1712 | 
            +
            shikiskhakis/DialoGPT-small-xemnas
         | 
| 1713 | 
            +
            Ecook/DialoGPT-medium-Ecook
         | 
| 1714 | 
            +
            PygmalionAI/pygmalion-2.7b
         | 
| 1715 | 
            +
            FowlerF/DiscordChatBot
         | 
| 1716 | 
            +
            JoeRoganfan-69420/DialoGPT-medium-HarryPotterbot
         | 
| 1717 | 
            +
            dusty310/DialoGPT-medium-Misaki
         | 
| 1718 | 
            +
            Gurtej/Drbot2
         | 
| 1719 | 
            +
            Gurtej/Drbot3
         | 
| 1720 | 
            +
            Gurtej/Drbot4
         | 
| 1721 | 
            +
            Gurtej/Drbot5
         | 
| 1722 | 
            +
            Gurtej/Drbot6
         | 
| 1723 | 
            +
            Gurtej/Drbot7
         | 
| 1724 | 
            +
            Gurtej/Drbot8
         | 
| 1725 | 
            +
            Gurtej/Drbot9
         | 
| 1726 | 
            +
            PygmalionAI/pygmalion-6b
         | 
| 1727 | 
            +
            Gurtej/Drbot11
         | 
| 1728 | 
            +
            navygup/Mood-Tracker
         | 
| 1729 | 
            +
            Maraslumunnus/DialoGPT-small-ivern
         | 
| 1730 | 
            +
            DAS9051/BatemanChatBot
         | 
| 1731 | 
            +
            SmallQLALA/DialoGPT-small-Anya
         | 
| 1732 | 
            +
            RinkaDev/GPT-Peppa-Pig
         | 
| 1733 | 
            +
            thu-coai/blenderbot-1B-augesc
         | 
| 1734 | 
            +
            siyaT/DialoGPT-harrypotter-small
         | 
| 1735 | 
            +
            keircare/DialoGPT-small-RickSanchez
         | 
| 1736 | 
            +
            shiiiroe/DialoGPT-medium-kirito
         | 
| 1737 | 
            +
            jdakillah/Rick
         | 
| 1738 | 
            +
            kielljoy/DialoGPT-small-stupidspecialkay
         | 
| 1739 | 
            +
            Ashypaws/DialoGPT-medium-Kitaibot
         | 
| 1740 | 
            +
            jdakillah/RICK-V2
         | 
| 1741 | 
            +
            jdakillah/Bender
         | 
| 1742 | 
            +
            jdakillah/Generalbot
         | 
| 1743 | 
            +
            kielljoy/DialoGPT-medium-ryanbot
         | 
| 1744 | 
            +
            emre/spanish-dialoGPT
         | 
| 1745 | 
            +
            vuminhtue/DialoGPT-large-HarryPotter3
         | 
| 1746 | 
            +
            ralphsorz/DialoGPT-small-samwise
         | 
| 1747 | 
            +
            SumYin/DialoGPT-small-Homer
         | 
| 1748 | 
            +
            JamesRoy/DGPT-DC
         | 
| 1749 | 
            +
            Blizzchor/DialoGPT-medium-HarryBotter
         | 
| 1750 | 
            +
            gjhghjk/rick
         | 
| 1751 | 
            +
            gjhghjk/rick2
         | 
| 1752 | 
            +
            SumYin/ZeroTwo-Medium-DialoGPT
         | 
| 1753 | 
            +
            Blizzchor/DialoGPT-medium-gamora
         | 
| 1754 | 
            +
            Mydia2/DialoGPT-small-Flonnealive
         | 
| 1755 | 
            +
            AL-CT/DialoGPT-small-slayer
         | 
| 1756 | 
            +
            DhruvShek/Webraft-Ai
         | 
| 1757 | 
            +
            arno2077/DiabloGPT-small-harrypotter
         | 
| 1758 | 
            +
            keyonecs/fourept-debique-gpt
         | 
| 1759 | 
            +
            Blizzchor/DialoGPT-medium-QuillLord
         | 
| 1760 | 
            +
            callmeclover/Stinger-CONVRS_MODL
         | 
| 1761 | 
            +
            aminFelah/DialogueGPT-very-small-harryPotter
         | 
| 1762 | 
            +
            Keijuro/aeris-dialogpt
         | 
| 1763 | 
            +
            Abdelrahman853/DialoGPT-small-echo
         | 
| 1764 | 
            +
            Bearfoot/DialoGPT-medium-shrek
         | 
| 1765 | 
            +
            arthme2/jay
         | 
| 1766 | 
            +
            arthme2/DialoGPT-medium-Jay
         | 
| 1767 | 
            +
            42meow/DialoGPT-medium-42meow
         | 
| 1768 | 
            +
            Peeepy/Evie
         | 
| 1769 | 
            +
            HorniFolks/Unicorn
         | 
| 1770 | 
            +
            waifu-workshop/pygmalion-6b
         | 
| 1771 | 
            +
            agenttylostudios/DialoGPT-small-Bocchi
         | 
| 1772 | 
            +
            GregariousJamie/DialoGPT-small-jamie
         | 
| 1773 | 
            +
            Fuwaguwa/DialoGPT-Medium-AzurLaneMusashi-v8
         | 
| 1774 | 
            +
            s3nh/DialoGPT-large-Rick
         | 
| 1775 | 
            +
            s3nh/DialoGPT-large-Morty
         | 
| 1776 | 
            +
            s3nh/DialoGPT-small-morty
         | 
| 1777 | 
            +
            Givinghawk/GPT-Morty
         | 
| 1778 | 
            +
            DhruvShek/swearbot
         | 
| 1779 | 
            +
            grart/DialoGPT-small-gillion
         | 
| 1780 | 
            +
            interpixle/Sir_Caladan
         | 
| 1781 | 
            +
            s3nh/DialoGPT-tony-montana
         | 
| 1782 | 
            +
            s3nh/DialoGPT-small-harry-potter-goblet-of-fire
         | 
| 1783 | 
            +
            s3nh/DialoGPT-small-hermione-granger-goblet-of-fire
         | 
| 1784 | 
            +
            s3nh/DialoGPT-small-woody-toy-story
         | 
| 1785 | 
            +
            s3nh/DialoGPT-small-buzz-toy-story
         | 
| 1786 | 
            +
            puj0/DialoGPT-small-joshua
         | 
| 1787 | 
            +
            julianvd49/DialoGPT-medium-EllieBot
         | 
| 1788 | 
            +
            Sreyas/DialoGPT-small-elit
         | 
| 1789 | 
            +
            DiscordRequestsAPI/DialoGPT-medium-NurDeeps
         | 
| 1790 | 
            +
            MarinHinawa/DialoGPT-medium-Ene
         | 
| 1791 | 
            +
            polandball/polanball
         | 
| 1792 | 
            +
            whoami24142/DialoGPT-small-padilha
         | 
| 1793 | 
            +
            DiscordRequestsAPI/NurDeeps-Bot
         | 
| 1794 | 
            +
            Vaibhav-rm/GPT2-Shri-v1
         | 
| 1795 | 
            +
            chrisrowles/DialoGPT-small-chrisrowles
         | 
| 1796 | 
            +
            espeon98/DialoGPT-kenny-bot
         | 
| 1797 | 
            +
            espeon98/DialoGPT-kenny-bot-2
         | 
| 1798 | 
            +
            polandball/GPT-Polen
         | 
| 1799 | 
            +
            chrisrowles/DialoGPT-medium-chrisrowles
         | 
| 1800 | 
            +
            DiscordRequestsAPI/NurDeeps-Bot-2
         | 
| 1801 | 
            +
            steerevo88/DialoGPT-small-baiken
         | 
| 1802 | 
            +
            akiFQC/japanese-dialogpt-small-aozora
         | 
| 1803 | 
            +
            Ngao/DialoGPT-small-ngao
         | 
| 1804 | 
            +
            Mineroero/DialoGPT-medium-M4SOPMOD
         | 
| 1805 | 
            +
            simple2312/DialoGPT-nayeon
         | 
| 1806 | 
            +
            nemowet88/DialoGPT-small-ricktest
         | 
| 1807 | 
            +
            Abraxas3d/house
         | 
| 1808 | 
            +
            vampiregirl/DialoGPT-medium-lennoxram
         | 
| 1809 | 
            +
            aisingapore/coherence-momentum
         | 
| 1810 | 
            +
            simple2312/DialoGPT-Ellie
         | 
| 1811 | 
            +
            simple2312/DialoGPT-Twice
         | 
| 1812 | 
            +
            testaws/DialoGPT-small-joshua
         | 
| 1813 | 
            +
            nemowet88/output-pythia-test
         | 
| 1814 | 
            +
            Gurtej/Drbot12
         | 
| 1815 | 
            +
            Gurtej/Drbot13
         | 
| 1816 | 
            +
            Gurtej/Drbot14
         | 
| 1817 | 
            +
            Gurtej/Drbot16
         | 
| 1818 | 
            +
            EZSNoVa/DialogGPT-medium-NoVa
         | 
| 1819 | 
            +
            mattallio/Archivist-medium-dialoGPT
         | 
| 1820 | 
            +
            rlatt/DialoGPT-small-RickSanchez
         | 
| 1821 | 
            +
            Lyforth/DialoGPT-Medium-Maribelle
         | 
| 1822 | 
            +
            kittenwhiperer/Deadpool
         | 
| 1823 | 
            +
            KumquatJoe/DialoGPT-medium-MaleToucherBot
         | 
| 1824 | 
            +
            lmkhoa/GODEL_base_model
         | 
| 1825 | 
            +
            JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023
         | 
| 1826 | 
            +
            LrxLcs/DialogGPT2-SMAL
         | 
| 1827 | 
            +
            Delcos/internal_chat_model_e2
         | 
| 1828 | 
            +
            euvu/DialoGPT-small-harrypotter
         | 
| 1829 | 
            +
            LrxLcs/GPT2-V2
         | 
| 1830 | 
            +
            LrxLcs/GPT2-Test
         | 
| 1831 | 
            +
            euvu/euvu-rickbot
         | 
| 1832 | 
            +
            Weeeeeeeeeeeee00/DialoGPT-small-harrypotter
         | 
| 1833 | 
            +
            slyslasher24/DialoGPT-Medium-Pondweed
         | 
| 1834 | 
            +
            slyslasher24/DialoGPT-Small-Pondweed
         | 
| 1835 | 
            +
            bradydawg/AI-Bot2
         | 
| 1836 | 
            +
            aisingapore/rumour-detection-twitter
         | 
| 1837 | 
            +
            RatInChat/Pilup7575
         | 
| 1838 | 
            +
            rlatt/DialoGPT-large-RickSanchez
         | 
| 1839 | 
            +
            Kira225784/Klarabot-test
         | 
| 1840 | 
            +
            bigbossa/DialoGPT-small-aikogirl
         | 
| 1841 | 
            +
            sckova/DialoGPT-small-joshua
         | 
| 1842 | 
            +
            sckova/DialoGPT-medium-joshua
         | 
| 1843 | 
            +
            sckova/DialoGPT-medium
         | 
| 1844 | 
            +
            Beltenebros/DialoGPT-small-PerionOfGaul
         | 
| 1845 | 
            +
            Byteno/DialoGPT-medium-glamrockfreddy
         | 
| 1846 | 
            +
            audreycl/audreycl-testagain
         | 
| 1847 | 
            +
            aisingapore/Lif3WayAp
         | 
| 1848 | 
            +
            audreycl/DialoGPT-RoyalPurpleFish
         | 
| 1849 | 
            +
            audreycl/DialoGPT-RPF
         | 
| 1850 | 
            +
            Axelajs26/DialoGPT-small-alicetendou
         | 
| 1851 | 
            +
            Noohance/DialoGPT-medium-noohbot
         | 
| 1852 | 
            +
            Draptor/DialoGPT-small-coolco
         | 
| 1853 | 
            +
            David042/DialoGPT-LucasBot
         | 
| 1854 | 
            +
            Hobospider132/DialoGPT-Mahiru-Proto
         | 
| 1855 | 
            +
            Draptor/DialoGPT-medium-moto
         | 
| 1856 | 
            +
            aisingapore/SPANBert
         | 
| 1857 | 
            +
            JYBX/DialoGPT-small-Penny
         | 
| 1858 | 
            +
            JYBX/DialoGPT-small-Pennybot
         | 
| 1859 | 
            +
            aisingapore/RoBERTa-base
         | 
| 1860 | 
            +
            JYBX/DialoGPT-small-Amybot
         | 
| 1861 | 
            +
            LuckyBor11/Figure
         | 
| 1862 | 
            +
            FlyingGrayson0304/Gandalf-stupid-version
         | 
| 1863 | 
            +
            BlinksFly/Harry_Potter-Ai
         | 
| 1864 | 
            +
            PhilipN/DialoGPT-small-KeqingBot
         | 
| 1865 | 
            +
            YTTD/DialoGPT-medium-sou
         | 
| 1866 | 
            +
            PhilipN/DialoGPT-large-KeqingBot
         | 
| 1867 | 
            +
            YTTD/DialoGPT-medium-souv2
         | 
| 1868 | 
            +
            keonju/chat_bot
         | 
| 1869 | 
            +
            MysteriousAmazon/DialoGPT-medium-alastor
         | 
| 1870 | 
            +
            mICHPl/MINI_AI
         | 
| 1871 | 
            +
            rlatt/DialoGPT-large-King-James-Bible-test
         | 
| 1872 | 
            +
            v3nom1704/DialoGPT-small-potterbot
         | 
| 1873 | 
            +
            Techcs002/DialoGPT-medium-AboTalkTest
         | 
| 1874 | 
            +
            MysteriousAmazon/DialoGPT-medium-freddy
         | 
| 1875 | 
            +
            ICAMPB204/DialoGPT-small-HarryPotter
         | 
| 1876 | 
            +
            kelvinhang/DialoGPT-medium-badguy
         | 
| 1877 | 
            +
            tatsumis6/MonikaAI
         | 
| 1878 | 
            +
            kennethhendricks/DialoGPT-medium-PowPowGaming-Gen1
         | 
| 1879 | 
            +
            rlatt/DialoGPT-large-King-James-Bible-test-accurate
         | 
| 1880 | 
            +
            kennethhendricks/DialoGPT-medium-PowPowGaming
         | 
| 1881 | 
            +
            kelvinhang/DialoGPT-medium-badguy2
         | 
| 1882 | 
            +
            zami0011/qqpbksdj
         | 
| 1883 | 
            +
            vladiyudi/Morty-data
         | 
| 1884 | 
            +
            RazaK18/DialoGPT-small-harrypotter
         | 
| 1885 | 
            +
            comradesocrates/DialoGPT-large-io
         | 
| 1886 | 
            +
            kelvinhang/DialoGPT-medium-okakoro
         | 
| 1887 | 
            +
            Monchic/chatwithkani
         | 
| 1888 | 
            +
            zami0011/rickdick
         | 
| 1889 | 
            +
            CallMeJeremy/DialoGPT-medium-THREEPIO
         | 
| 1890 | 
            +
            Leomas/DialoGPT-medium-Leomas
         | 
| 1891 | 
            +
            RehanP123/DialoGPT-large-kermit
         | 
| 1892 | 
            +
            shahules786/Safetybot-T5-base
         | 
| 1893 | 
            +
            huolongguo10/CDial-GPT2-LCCC-Base-copy
         | 
| 1894 | 
            +
            yashR4J/TyrionBOT
         | 
| 1895 | 
            +
            TakoIsATaco/DialoGPT-small-ShinAI
         | 
| 1896 | 
            +
            MrLamBam/DialoGPT-medium-LUKEBot
         | 
| 1897 | 
            +
            Zeda/DialoGPT-Medium-ZedaBot
         | 
| 1898 | 
            +
            princedream/DialoGPT-small-harrypotter
         | 
| 1899 | 
            +
            shahules786/Safetybot-mt5-base
         | 
| 1900 | 
            +
            xiaomengdotcom/Chatgpt-harryP
         | 
| 1901 | 
            +
            ProtonPLUS/Colab
         | 
| 1902 | 
            +
            YTTD/DialoGPT-medium-saf
         | 
| 1903 | 
            +
            jasondubon/HubermanGPT-small-v1
         | 
| 1904 | 
            +
            YTTD/DialoGPT-medium-safv2
         | 
| 1905 | 
            +
            YTTD/DialoGPT-medium-safv3
         | 
| 1906 | 
            +
            kennethhendricks/DialoGPT-medium-jared-hendricks-gen1
         | 
| 1907 | 
            +
            Cohee/pygmalion-6b-pyggyback-v6_40_v8p4_60
         | 
| 1908 | 
            +
            DiogenesGois/DialoGPT-medium-Rick
         | 
| 1909 | 
            +
            LordDanielDE/DialoGPT-medium-Hina
         | 
| 1910 | 
            +
            ITG/DialoGPT-medium-spanish-chitchat
         | 
| 1911 | 
            +
            kemsa51/DialoGPT-medium-cartman
         | 
| 1912 | 
            +
            Mogwhy/DialoGPT-medium-Arrobot
         | 
| 1913 | 
            +
            nRuaif/Pyg6B-V8P2
         | 
| 1914 | 
            +
            Seer-luma/DialoGPT-small-SeerBot
         | 
| 1915 | 
            +
            Dinoloverwii/DialoGPT-Sachibot
         | 
| 1916 | 
            +
            flayeddie/Mike
         | 
| 1917 | 
            +
            wooldover/krautbot
         | 
| 1918 | 
            +
            kielljoy/DialoGPT-small-k
         | 
| 1919 | 
            +
            WAHCLAN/DialoGPT-Medium-DAN
         | 
| 1920 | 
            +
            ss1612/loki-chat
         | 
| 1921 | 
            +
            IceBruhOne/mytestcharacter
         | 
| 1922 | 
            +
            wooldover/pygbot
         | 
| 1923 | 
            +
            IceBruhOne/DialoGPT-medium-subjectai
         | 
| 1924 | 
            +
            YukioKoito/DialoGPT-small-ozua
         | 
| 1925 | 
            +
            gaytrimoh/DialoGPT-small-harrypotter
         | 
| 1926 | 
            +
            YukioKoito/DialoGPT-small-doog
         | 
| 1927 | 
            +
            IceBruhOne/DialoGPT-medium-subjectai2
         | 
| 1928 | 
            +
            custads23/DialoGPT-medium-aubrey
         | 
| 1929 | 
            +
            HaHaMagpie/DialoGPT-small-phineas
         | 
| 1930 | 
            +
            Carslo45/DialoGPT-medium-ddlc-monika
         | 
| 1931 | 
            +
            zl111/ChatDoctor
         | 
| 1932 | 
            +
            MarinHinawa/DialoGPT-medium-haruka
         | 
| 1933 | 
            +
            custads23/DialoGPT-medium-basil
         | 
| 1934 | 
            +
            IceBruhOne/DialoGPT-medium-complexai
         | 
| 1935 | 
            +
            MarinHinawa/DialoGPT-medium-Shintaro
         | 
| 1936 | 
            +
            jlsalty9999/DialoGPT-medium-Riddle
         | 
| 1937 | 
            +
            custads23/DialoGPT-medium-mincy
         | 
| 1938 | 
            +
            Wtfsquad/DialoGPT-small-pulpfictionVincent
         | 
| 1939 | 
            +
            ss1612/erika-chatv4
         | 
| 1940 | 
            +
            WAHCLAN/DialoGPT-Large-DAN
         | 
| 1941 | 
            +
            Speedemon/jake-peralta-ai
         | 
| 1942 | 
            +
            Speedemon/cobalt
         | 
| 1943 | 
            +
            DeliveryBoy/DiabloGPT-medium-Kurisu
         | 
| 1944 | 
            +
            AbbyRhea/DialoGPT-small-adrienbot
         | 
| 1945 | 
            +
            monish162/kirthin-waifuu
         | 
| 1946 | 
            +
            janna42/DialoGPT-small-phoenix
         | 
| 1947 | 
            +
            AbbyRhea/DialoGPT-medium-AA
         | 
| 1948 | 
            +
            FrozenSmoothie/DialoGPT-medium-star
         | 
| 1949 | 
            +
            Fizi12341/astro_bot1234
         | 
| 1950 | 
            +
            stiGGy/DialoGPT-medium-raymond
         | 
| 1951 | 
            +
            patthebaker45/DialoGPT-small-Carlbot
         | 
| 1952 | 
            +
            r4k4n1/DialoGPT-small-joshua
         | 
| 1953 | 
            +
            Sukul/DialoGPT-small-Harsabot
         | 
| 1954 | 
            +
            Sukul/DialoGPT-small-Harsabot1
         | 
| 1955 | 
            +
            hihihotdog/DialoGPT-bot
         | 
| 1956 | 
            +
            LarsJonasson/pythia-1.4b-deduped-sft-swedish
         | 
| 1957 | 
            +
            mayaeary/pygmalion-6b-4bit-128g
         | 
| 1958 | 
            +
            mayaeary/pygmalion-6b_dev-4bit-128g
         | 
| 1959 | 
            +
            Inhaexpress/DialoGPT-medium-paimon
         | 
| 1960 | 
            +
            sanyasna517/DialoGPT-medium-Zhongli
         | 
| 1961 | 
            +
            StephenBrink/DialoGPT-small-will
         | 
| 1962 | 
            +
            StanleyRoberts/Nix
         | 
| 1963 | 
            +
            boudchicha/soluzione
         | 
| 1964 | 
            +
            mayaeary/PPO_Pygway-V8p4_Dev-6b-4bit-128g
         | 
| 1965 | 
            +
            ToborWinner/DialoGPT-medium-jolly
         | 
| 1966 | 
            +
            mayaeary/PPO_Pygway-6b-Mix-4bit-128g
         | 
| 1967 | 
            +
            ayushutkarsh/t3
         | 
| 1968 | 
            +
            Inhaexpress/DialoGPT-medium-paimon2
         | 
| 1969 | 
            +
            eepyblanky/DialoGPT-medium-malina
         | 
| 1970 | 
            +
            eachadea/legacy-ggml-vicuna-13b-4bit
         | 
| 1971 | 
            +
            eachadea/ggml-gpt4-x-alpaca-13b-native-4bit
         | 
| 1972 | 
            +
            totallynotbrent/brotGPT
         | 
| 1973 | 
            +
            Inhaexpress/DialoGPT-medium-harry_potter_ps
         | 
| 1974 | 
            +
            robintan66/DialoGPT-small-harrypotter
         | 
| 1975 | 
            +
            MajorCrayon7047/MadboneAssistantGPT-2
         | 
| 1976 | 
            +
            VennuT/DialoGPT-medium-Alphinaud
         | 
| 1977 | 
            +
            triple777/annicebot
         | 
| 1978 | 
            +
            totallynotbrent/aaronGPTalpha
         | 
| 1979 | 
            +
            Plaaasma/gerald-model
         | 
| 1980 | 
            +
            yashugupta786/bart_large_xsum_samsum_conv_summarizer
         | 
| 1981 | 
            +
            eachadea/legacy-ggml-vicuna-7b-4bit
         | 
| 1982 | 
            +
            ColtonAi/Llmtrain
         | 
| 1983 | 
            +
            ColtonAi/Chem4
         | 
| 1984 | 
            +
            IchtacaKemeRaz/favabean
         | 
| 1985 | 
            +
            Stromello/DialoGPT-medium-ZeroTwo
         | 
| 1986 | 
            +
            totallynotbrent/brotGPTplus
         | 
| 1987 | 
            +
            storminstakk/Stormin-Stakk
         | 
| 1988 | 
            +
            ToddGoldfarb/Cadet-Tiny
         | 
| 1989 | 
            +
            aghelan3/eggIncubationRepo
         | 
| 1990 | 
            +
            hackathon-somos-nlp-2023/SalpiBloomZ_15949_input_1024-1b7
         | 
| 1991 | 
            +
            JosephusCheung/Guanaco
         | 
| 1992 | 
            +
            raymondho/DialoGPT-small-harry
         | 
| 1993 | 
            +
            Capitalist/DialoGPT-small-rick
         | 
| 1994 | 
            +
            gfgddfg/DialoGPT-small-qiu_chat
         | 
| 1995 | 
            +
            eachadea/ggml-toolpaca-13b-4bit
         | 
| 1996 | 
            +
            CNR223/DialoGPT-small-MasterO
         | 
| 1997 | 
            +
            Abigaming75/Bot_wa
         | 
| 1998 | 
            +
            pranitrai07/DialoGPT-medium-harrypotter
         | 
| 1999 | 
            +
            IlyaGusev/saiga_7b_lora
         | 
| 2000 | 
            +
            Ancestral/Dolly_Shygmalion-6b-4bit-128g
         | 
| 2001 | 
            +
            Ancestral/PPO_Shygmalion-6b-4bit-128g
         | 
| 2002 | 
            +
            wyskiski/winonabot
         | 
| 2003 | 
            +
            hcpwr/DialoGPT-medium-samantha
         | 
| 2004 | 
            +
            Roguwan/DialoGPT-medium-rogu
         | 
| 2005 | 
            +
            totallynotbrent/aaronGPTplus
         | 
| 2006 | 
            +
            Ancestral/Dolly_Malion-6b-4bit-128g
         | 
| 2007 | 
            +
            vantozdad/DialoGPT-medium-Dumbledore
         | 
| 2008 | 
            +
            Abyss-fyf/DialoGPT-small-discord
         | 
| 2009 | 
            +
            CrystalzAura/DialoGPT-small-elysia
         | 
| 2010 | 
            +
            eachadea/ggml-gpt4all-7b-4bit
         | 
| 2011 | 
            +
            inu-ai/alpaca-guanaco-japanese-gpt-1b
         | 
| 2012 | 
            +
            Husnul/pepper-bot-morty
         | 
| 2013 | 
            +
            TheBloke/vicuna-13B-1.1-GPTQ
         | 
| 2014 | 
            +
            CRD716/ggml-vicuna-1.1-quantized
         | 
| 2015 | 
            +
            4bit/pygmalion-6b-4bit-128g
         | 
| 2016 | 
            +
            Reaver1092/DialoGPT-small-bones
         | 
| 2017 | 
            +
            Ibnelaiq/Makise-Amadeus-Kurisu-small
         | 
| 2018 | 
            +
            inu-ai/dolly-japanese-gpt-1b
         | 
| 2019 | 
            +
            clawrex/DialoGPT-medium-walt
         | 
| 2020 | 
            +
            IlyaGusev/saiga_13b_lora
         | 
| 2021 | 
            +
            Zeda/DialoGPT-Large-ZedaBot
         | 
| 2022 | 
            +
            Ibnelaiq/Makise-Amadeus-Kurisu
         | 
| 2023 | 
            +
            Jaxon/DialoGPT-medium-kirito
         | 
| 2024 | 
            +
            glitchie/bb
         | 
| 2025 | 
            +
            Aqua002/DialoGPT-small-deadpool
         | 
| 2026 | 
            +
            Aqua002/discord-chatbot
         | 
| 2027 | 
            +
            lemoniada/Przembot
         | 
| 2028 | 
            +
            Avitas8485/Dialogpt-small-v1
         | 
| 2029 | 
            +
            Jprafol/DialoGPT-large-ARCHIBot
         | 
| 2030 | 
            +
            Jprafol/DialoGPT-large-ARCHIBotV2
         | 
| 2031 | 
            +
            spitfire4794/ben-ultra
         | 
| 2032 | 
            +
            IlyaGusev/saiga_30b_lora
         | 
| 2033 | 
            +
            NbAiLab/nb-gpt-j-6B-norpaca
         | 
| 2034 | 
            +
            winglian/vicuna-self-reflect-13b
         | 
| 2035 | 
            +
            0x044/test-1
         | 
| 2036 | 
            +
            0x044/dgpt
         | 
| 2037 | 
            +
            ss1612/erika-chatv6
         | 
| 2038 | 
            +
            TestingCoder463632/DialoGPT-small-palpatine
         | 
| 2039 | 
            +
            Blizzchor/DialoGPT-medium-BarryB
         | 
| 2040 | 
            +
            sasha0552/pygmalion-6b-f16-ggml
         | 
| 2041 | 
            +
            kavindu999/BetterEnglishGPT-v1
         | 
| 2042 | 
            +
            kavindu999/BetterEnglishGPT-v2
         | 
| 2043 | 
            +
            EnterNameBros/DialoGPT-small-FoxySan
         | 
| 2044 | 
            +
            OrientalDude/DialoGPT-medium-GOKU
         | 
| 2045 | 
            +
            Avitas8485/Dialogpt-medium-v1
         | 
| 2046 | 
            +
            finex/pfe-mohamed-Harry
         | 
| 2047 | 
            +
            Avitas8485/Dialogpt-medium-finetuned
         | 
| 2048 | 
            +
            psyamk/DialoGPT-small-harrypotter
         | 
| 2049 | 
            +
            Jamesonn/DialoGPT-small-jumin
         | 
| 2050 | 
            +
            CNXT/CNXT
         | 
| 2051 | 
            +
            Ilangraterol/Dataset_model
         | 
| 2052 | 
            +
            IlyaGusev/saiga_30b_ggml
         | 
| 2053 | 
            +
            Locutusque/gpt2-conversational-or-qa
         | 
| 2054 | 
            +
            TrippingFollowing39/AMOGUS
         | 
| 2055 | 
            +
            moomoomer/DialoGPT-medium-garfield
         | 
| 2056 | 
            +
            PygmalionAI/pygmalion-7b
         | 
| 2057 | 
            +
            Viperxyz/DialoGPT-small-Cartman
         | 
| 2058 | 
            +
            Neko-Institute-of-Science/pygmalion-7b
         | 
| 2059 | 
            +
            TehVenom/Pygmalion-7b-Merged-Safetensors
         | 
| 2060 | 
            +
            BiaDd/DialoGPT-medium-Punko
         | 
| 2061 | 
            +
            NewBreaker/chatglm-6b-int4
         | 
| 2062 | 
            +
            TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors
         | 
| 2063 | 
            +
            TehVenom/Pygmalion-7b-4bit-Q4_1-GGML
         | 
| 2064 | 
            +
            userzyzz/piggySharded
         | 
| 2065 | 
            +
            steinhaug/models-bck
         | 
| 2066 | 
            +
            blueberrycheesecake/DialoGPT-small-misssophie
         | 
| 2067 | 
            +
            Imablank/P1GM4L10N-7B-MERGED_WEIGHTS
         | 
| 2068 | 
            +
            MrToast/idk
         | 
| 2069 | 
            +
            SouroJ/DialoGPT-medium-Mordecai
         | 
| 2070 | 
            +
            sasha0552/pygmalion-7b-bf16
         | 
| 2071 | 
            +
            swajan/DialoGPT-small-Trail-1
         | 
| 2072 | 
            +
            RobiKenobi/DialoGPT-medium-pete
         | 
| 2073 | 
            +
            sasha0552/pygmalion-7b-f16-ggml
         | 
| 2074 | 
            +
            sasha0552/pygmalion-7b-f16
         | 
| 2075 | 
            +
            winglian/llama-adapter-13b
         | 
| 2076 | 
            +
            MatLumber/Bisho
         | 
| 2077 | 
            +
            iconical/MortyChatbotAI
         | 
| 2078 | 
            +
            swajan/Trail-1
         | 
| 2079 | 
            +
            swajan/Trail-2
         | 
| 2080 | 
            +
            Misfit2/DialoGPT-large-Sonic
         | 
| 2081 | 
            +
            ToddGoldfarb/Cadet-Medium
         | 
| 2082 | 
            +
            ajpieroni/DiabloGPT-medium-medea
         | 
| 2083 | 
            +
            AliiaR/DialoGPT-medium-empathetic-dialogues
         | 
| 2084 | 
            +
            Chun121/ChocolaChat
         | 
| 2085 | 
            +
            lemoniada/kicerobot
         | 
| 2086 | 
            +
            Kazeyami-o7/DialoGPT-medium-beterbiffin
         | 
| 2087 | 
            +
            Elucia/Diluc_Bot
         | 
| 2088 | 
            +
            Elucia/Diluc_Bot_1.1
         | 
| 2089 | 
            +
            Elucia/Diluc_Bot_1.2
         | 
| 2090 | 
            +
            neurofumo/DialoGPT-small-joshua
         | 
| 2091 | 
            +
            Elucia/Diluc_Bot_1.3
         | 
| 2092 | 
            +
            GraphicStylz/Stylz
         | 
| 2093 | 
            +
            naybiblu/ChizuruBot
         | 
| 2094 | 
            +
            calvindoingstuff/DialoGPT-medium-luffy
         | 
| 2095 | 
            +
            xZephy/DialoGPT-small-HelperBot
         | 
| 2096 | 
            +
            crazywombat/DialoGPT-small-abandonware
         | 
| 2097 | 
            +
            anshengli2/DialoGPT-small-counter-hate
         | 
| 2098 | 
            +
            sephwalker3/piggy-7b
         | 
| 2099 | 
            +
            apricxty/DialoGPT-small-chatbot
         | 
| 2100 | 
            +
            leadmaister/langchain-prompt-master
         | 
| 2101 | 
            +
            Covriar/DialoGPT-med-kiryu
         | 
| 2102 | 
            +
            yesuns/DialoGPT-small-yesun
         | 
| 2103 | 
            +
            davidviriato/DialoGPT-small-joshua
         | 
| 2104 | 
            +
            VMware/open-llama-0.3T-7B-open-instruct-v1.1
         | 
| 2105 | 
            +
            prabhguron/DialoGPT-small-harrypotter
         | 
| 2106 | 
            +
            xHexyy/small-test
         | 
| 2107 | 
            +
            malteos/bloom-6b4-clp-german-oasst-v0.1
         | 
| 2108 | 
            +
            Pcik/DialoGPT-medium-Ruby
         | 
| 2109 | 
            +
            sasha0552/pygmalion-7b-q4_0-ggml
         | 
| 2110 | 
            +
            sasha0552/pygmalion-7b-q4_1-ggml
         | 
| 2111 | 
            +
            sasha0552/pygmalion-7b-q5_0-ggml
         | 
| 2112 | 
            +
            sasha0552/pygmalion-7b-q5_1-ggml
         | 
| 2113 | 
            +
            sasha0552/pygmalion-7b-q8_0-ggml
         | 
| 2114 | 
            +
            rjorg543/DialoGPT-small-ben
         | 
| 2115 | 
            +
            eachadea/ggml-gpt4-x-vicuna-13b
         | 
| 2116 | 
            +
            Tlethal/DialoGPT-small-harrypotter
         | 
| 2117 | 
            +
            xHexyy/test2
         | 
| 2118 | 
            +
            xHexyy/test3
         | 
| 2119 | 
            +
            ldilov/stablelm-tuned-alpha-7b-4bit-128g-descact-sym-true-sequential
         | 
| 2120 | 
            +
            AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token
         | 
| 2121 | 
            +
            jun-ai/BeethovenBot
         | 
| 2122 | 
            +
            channashi/DialoGPT-small-rocket
         | 
| 2123 | 
            +
            biscuitbutb/biscuitbot-dialogpt-model
         | 
| 2124 | 
            +
            ytrbqrkflbvbhy/DialoGPT-small-me-rus
         | 
| 2125 | 
            +
            Pruz0/VescGPT
         | 
| 2126 | 
            +
            IlyaGusev/saiga_7b_ggml
         | 
| 2127 | 
            +
            IlyaGusev/saiga_13b_ggml
         | 
| 2128 | 
            +
            TechTay/DialoGPT-small-Luciano
         | 
| 2129 | 
            +
            BlackBull/yeet
         | 
| 2130 | 
            +
            WAHCLAN/DialoGPT-Medium-SAM
         | 
| 2131 | 
            +
            MistyIce/dialog-gpt-Heshan
         | 
| 2132 | 
            +
            Pruz0/LennGPT
         | 
| 2133 | 
            +
            Wanfq/MAKER-mwoz-full-kb-t5-base
         | 
| 2134 | 
            +
            Wanfq/MAKER-mwoz-full-kb-t5-large
         | 
| 2135 | 
            +
            Wanfq/MAKER-smd-condensed-kb-t5-base
         | 
| 2136 | 
            +
            Wanfq/MAKER-smd-condensed-kb-t5-large
         | 
| 2137 | 
            +
            Wanfq/MAKER-camrest-condensed-kb-t5-base
         | 
| 2138 | 
            +
            Wanfq/MAKER-camrest-condensed-kb-t5-large
         | 
| 2139 | 
            +
            Wanfq/MAKER-camrest-full-kb-t5-base
         | 
| 2140 | 
            +
            Wanfq/MAKER-camrest-full-kb-t5-large
         | 
| 2141 | 
            +
            Wanfq/MAKER-mwoz-condensed-kb-t5-base
         | 
| 2142 | 
            +
            Wanfq/MAKER-mwoz-condensed-kb-t5-large
         | 
| 2143 | 
            +
            raphaman/test
         | 
| 2144 | 
            +
            Pruz0/HaLLGPT
         | 
| 2145 | 
            +
            Binaryy/blender-bot-distill-finetuned
         | 
| 2146 | 
            +
            alex297/DialoGPT-small-sparky
         | 
| 2147 | 
            +
            Pruz0/GeoGPT
         | 
| 2148 | 
            +
            Pruz0/PruzGPT
         | 
| 2149 | 
            +
            dorkai/pygmalion-2.7b
         | 
| 2150 | 
            +
            ikocx-to24/DialoGPT-medium-plankton
         | 
| 2151 | 
            +
            th3d4nk/llamaModel1
         | 
| 2152 | 
            +
            PygmalionAI/pygmalion-13b
         | 
| 2153 | 
            +
            TehVenom/Pygmalion-13b-Merged
         | 
| 2154 | 
            +
            ivaan01/TFG-Mauri
         | 
| 2155 | 
            +
            alex297/DialoGPT-medium-fox
         | 
| 2156 | 
            +
            Crataco/Pygmalion-1.3B-GGML
         | 
| 2157 | 
            +
            SaintMcMuffins/DialoGPT-small-brain2.0
         | 
| 2158 | 
            +
            dujade18/DialoGPT-medium-dwightoffice
         | 
| 2159 | 
            +
            TehVenom/Pygmalion-13b-8bit-GPTQ
         | 
| 2160 | 
            +
            helloerikaaa/chandlerGPT
         | 
| 2161 | 
            +
            SaintMcMuffins/Brain2.1
         | 
| 2162 | 
            +
            kb2c37g/DialoGPT-small-Rick
         | 
| 2163 | 
            +
            alex297/DialoGPT-small-fox
         | 
| 2164 | 
            +
            TeraSpace/dialofrednocontext
         | 
| 2165 | 
            +
            EnterNameBros/DialoGPT-small-Senko
         | 
| 2166 | 
            +
            EnterNameBros/DialoGPT-small-Senko-san
         | 
| 2167 | 
            +
            4bit/pyg-7b
         | 
| 2168 | 
            +
            EnterNameBros/DialoGPT-small-Senko-san-ver
         | 
| 2169 | 
            +
            Lumiras/rachbot
         | 
| 2170 | 
            +
            kevintest1234/DialoGPT-small-harrypotter
         | 
| 2171 | 
            +
            EnterNameBros/DialoGPT-small-Senko-san-ver-2
         | 
| 2172 | 
            +
            EnterNameBros/DialoGPT-large-Senko-san-ver-2
         | 
| 2173 | 
            +
            Delmarfish/Delmar
         | 
| 2174 | 
            +
            diankymar/kitty
         | 
| 2175 | 
            +
            TatonkaHF/ruDialoGpt3-medium-finetuned-russian-joke
         | 
| 2176 | 
            +
            EggsInAJar/DialoGPT-small-MerrickBot
         | 
| 2177 | 
            +
            DBoi/Mayreel2
         | 
| 2178 | 
            +
            hosst/FridgeLLM
         | 
| 2179 | 
            +
            loitran/DialoGPT-medium-peppapig
         | 
| 2180 | 
            +
            Syamil/DialoGPT-small-pixal
         | 
| 2181 | 
            +
            Avitas8485/Dialogpt-medium-v2
         | 
| 2182 | 
            +
            Inhaexpress/DialoGPT-medium-harrypotter
         | 
| 2183 | 
            +
            loitran/DialoGPT-medium-HarryPotter
         | 
| 2184 | 
            +
            Syamil/DialoGPT-medium-pixal
         | 
| 2185 | 
            +
            roykim/ko_chat
         | 
| 2186 | 
            +
            Syamil/DialoGPT-medium-pixals
         | 
| 2187 | 
            +
            minhcrafters/DialoGPT-small-Fukuya
         | 
| 2188 | 
            +
            Warren00/DialoGPT-Med-peppa05a
         | 
| 2189 | 
            +
            Syamil/DialoGPT-medium-pixalbot
         | 
| 2190 | 
            +
            LelouchH/DiabloGPT-small-RaidenBot
         | 
| 2191 | 
            +
            Inhaexpress/DialoGPT-medium-shrek124
         | 
| 2192 | 
            +
            Inhaexpress/DialoGPT-medium-terra1
         | 
| 2193 | 
            +
            nascar123/Discordtester000
         | 
| 2194 | 
            +
            EnterNameBros/Offical-Senko-medium-update
         | 
| 2195 | 
            +
            EnterNameBros/Offical-Senko-medium-update-2
         | 
| 2196 | 
            +
            EnterNameBros/Offical-Senko-medium-update-3
         | 
| 2197 | 
            +
            EnterNameBros/Senko-medium
         | 
| 2198 | 
            +
            jiezhou1996/test
         | 
| 2199 | 
            +
            ElMater06/SpaceCore
         | 
| 2200 | 
            +
            EnterNameBros/Offical-Senko-medium
         | 
| 2201 | 
            +
            EnterNameBros/Senko-san
         | 
| 2202 | 
            +
            DBoi/Mayreel
         | 
| 2203 | 
            +
            VMware/open-llama-0.7T-7B-open-instruct-v1.1
         | 
| 2204 | 
            +
            Warren00/DialoGPT-Small-Peppa06_053123
         | 
| 2205 | 
            +
            mpalacio/DialoGPT_ootwl
         | 
| 2206 | 
            +
            protag07/DialoGPT-small-harrypotter
         | 
| 2207 | 
            +
            h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2
         | 
| 2208 | 
            +
            cosimoiaia/Loquace-70m
         | 
| 2209 | 
            +
            cosimoiaia/Loquace-410m
         | 
| 2210 | 
            +
            MareNoceda/DialoGPT-medium-Luz
         | 
| 2211 | 
            +
            GarrisonBot/DialoGPT-medium-herbertgarrison
         | 
| 2212 | 
            +
            cosimoiaia/Loquace-12B
         | 
| 2213 | 
            +
            cosimoiaia/Loquace-7B
         | 
| 2214 | 
            +
            Deojoandco/ahGPT-small-v1
         | 
| 2215 | 
            +
            PeachHeles/bmo
         | 
| 2216 | 
            +
            Rays236/DialoGPT-small-harrypotter
         | 
| 2217 | 
            +
            Deojoandco/ahGPT-small-v2
         | 
| 2218 | 
            +
            Syamil/DialoGPT-medium-newpixal
         | 
| 2219 | 
            +
            Coderhuynin/DialoGPT-large-TonyStark
         | 
| 2220 | 
            +
            SotirisLegkas/final_socratic_dialoGPT
         | 
| 2221 | 
            +
            ademfatnassi/bonjourGPT-small
         | 
| 2222 | 
            +
            ikocx-to24/DialoGPT-small-planktongpt2
         | 
| 2223 | 
            +
            EricYou/RickBot
         | 
| 2224 | 
            +
            Ayaakaa/DialoGPT-small-Yoisaki-Kanade
         | 
| 2225 | 
            +
            DoesNoPro/DialoGPT-small-RaidenG
         | 
| 2226 | 
            +
            rajeshbot/DialoGPT-medium-Harry-to-Hari
         | 
| 2227 | 
            +
            DoesNoPro/DialoGPT-small-RaidenG2
         | 
| 2228 | 
            +
            SamsonP/pygmalion-6b-sft
         | 
| 2229 | 
            +
            Deojoandco/ahDialoGPT-small-v4
         | 
| 2230 | 
            +
            Syamil/GPTNeo-PIXAL-Model
         | 
| 2231 | 
            +
            Syamil/GPTNeo-PIXAL-new
         | 
| 2232 | 
            +
            Lattori/DiabloGPT-small-ConanBot
         | 
| 2233 | 
            +
            Badzee/DialoGPT-medium-jackbot
         | 
| 2234 | 
            +
            meowsynth/DialoGPT-small-sophie
         | 
| 2235 | 
            +
            EnterNameBros/Senko-san-medium-baby
         | 
| 2236 | 
            +
            Deojoandco/ah-GPT2-v4
         | 
| 2237 | 
            +
            cosimoiaia/Loquace-20B
         | 
| 2238 | 
            +
            EnterNameBros/Senko-san-medium-fox
         | 
| 2239 | 
            +
            MarkyMarx/DialoGPT-medium-jimmybot2
         | 
| 2240 | 
            +
            DhruvShek/DialoGPT
         | 
| 2241 | 
            +
            Doge22/DialoGPT-medium-max
         | 
| 2242 | 
            +
            lyogavin/Anima33B
         | 
| 2243 | 
            +
            steerevo88/testThotBot
         | 
| 2244 | 
            +
            steerevo88/workingthotBot
         | 
| 2245 | 
            +
            YTTD/DialoGPT-medium-keiji
         | 
| 2246 | 
            +
            MisguidedKerbal/DialoGPT-medium-kerbal
         | 
| 2247 | 
            +
            Blueify/DialoGPT-small-model-lotr
         | 
| 2248 | 
            +
            steerevo88/newthotBot
         | 
| 2249 | 
            +
            paripi/Malishka
         | 
| 2250 | 
            +
            finex/pfe-mohamed2023-RON
         | 
| 2251 | 
            +
            DhruvShek/CMDGPT
         | 
| 2252 | 
            +
            finex/pfe-mohamed2023-Hermione
         | 
| 2253 | 
            +
            SkylerBlu9/DialoGPT-medium-CitrAI
         | 
| 2254 | 
            +
            SkylerBlu9/DialoGPT-medium-autismobot
         | 
| 2255 | 
            +
            MisguidedKerbal/DialoGPT-kerbalV2
         | 
| 2256 | 
            +
            EnterNameBros/Senko-san-medium-a
         | 
| 2257 | 
            +
            dderr/testmodel
         | 
| 2258 | 
            +
            priyanshdahiya/DialoGPT-small-rick
         | 
| 2259 | 
            +
            Goodnoway/DialoGPT-nerbalV2
         | 
| 2260 | 
            +
            WompWomp1/DialoGPT-medium-Kirin
         | 
| 2261 | 
            +
            lyogavin/Anima33B-merged
         | 
| 2262 | 
            +
            peytonai/DialoGPT-small-wali-joshua
         | 
| 2263 | 
            +
            MisguidedKerbal/DialoGPT-kerbalV3
         | 
| 2264 | 
            +
            WompWomp1/DialoGPT-medium-Kaori
         | 
| 2265 | 
            +
            OmarDiab/DialoGPT-small-Amogus
         | 
| 2266 | 
            +
            servetier/DialoGPT-large-miguel
         | 
| 2267 | 
            +
            OmarDiab/DialoGPT-small-Amogus-2
         | 
| 2268 | 
            +
            steveglover/falcon-7b-instruct-telco-chat
         | 
| 2269 | 
            +
            Lazycuber/Janemalion-6B
         | 
| 2270 | 
            +
            Goodnoway/DialoGPT-nerbalV4
         | 
| 2271 | 
            +
            gvij/gpt-j-6B-alpaca-gpt4
         | 
| 2272 | 
            +
            papahawk/keya-560m
         | 
| 2273 | 
            +
            JavRedstone/DialoGPT-small-tesseractist
         | 
| 2274 | 
            +
            imuncomfortable/DiabloGPT-small-CocoAtarashi
         | 
| 2275 | 
            +
            Amod/falcon7b-fine-tuned-therapy-merged
         | 
| 2276 | 
            +
            Oshirigami1980/DialoGPT-medium-Steven
         | 
| 2277 | 
            +
            Drevanil/DialoGPT-small-try
         | 
| 2278 | 
            +
            Yaewe/1
         | 
| 2279 | 
            +
            DataHammer/mozi_emotional_7b
         | 
| 2280 | 
            +
            udxyz/HarryPotterBot
         | 
| 2281 | 
            +
            Kasyapa/DialoGPT-medium-hagridbot
         | 
| 2282 | 
            +
            lyogavin/Anima33B-DPO-Belle-1k
         | 
| 2283 | 
            +
            JeanL-0/TestingModel-01
         | 
| 2284 | 
            +
            TejasC2/DialoGPT-TejasBot
         | 
| 2285 | 
            +
            lyogavin/Anima33B-DPO-Belle-1k-merged
         | 
| 2286 | 
            +
            InterruptAI/Interrupt-350M
         | 
| 2287 | 
            +
            Lucideds/Lucideds
         | 
| 2288 | 
            +
            EnterNameBros/Senko-san-medium-sc
         | 
| 2289 | 
            +
            EnterNameBros/Senko-san-medium-scl
         | 
| 2290 | 
            +
            DaddySen/tighnari
         | 
| 2291 | 
            +
            ettevyemerald/DialoGPT-medium-beomgyu
         | 
| 2292 | 
            +
            minhcrafters/DialoGPT-small-mindwandering
         | 
| 2293 | 
            +
            JNDankwah/DialoGPT-small-ThorCB
         | 
| 2294 | 
            +
            minhcrafters/DialoGPT-medium-Zephirel
         | 
| 2295 | 
            +
            papahawk/falcon-40b
         | 
| 2296 | 
            +
            sonntt/DialoGPT-small-mindwandering
         | 
| 2297 | 
            +
            pundapog/DialoGPT-medium-ethanbot
         | 
| 2298 | 
            +
            TheBloke/Pygmalion-7B-SuperHOT-8K-GGML
         | 
| 2299 | 
            +
            TheBloke/Pygmalion-7B-SuperHOT-8K-fp16
         | 
| 2300 | 
            +
            pobierz69/model-6b-read-desc
         | 
| 2301 | 
            +
            sidca/Cam
         | 
| 2302 | 
            +
            EnterNameBros/Senko-san-medium-abc
         | 
| 2303 | 
            +
            abhi-8/DialoGPT-medium-Michael
         | 
| 2304 | 
            +
            abhi-8/DialoGPT-medium-Rick
         | 
| 2305 | 
            +
            abhi-8/DialoGPT-medium-Joshua-twevy
         | 
| 2306 | 
            +
            spitfire4794/dialogpt-small-rick
         | 
| 2307 | 
            +
            abhi-8/Joshua-bot
         | 
| 2308 | 
            +
            Justus-Jonas/Imaginary-Embeddings-Classic
         | 
| 2309 | 
            +
            Justus-Jonas/Imaginary-Embeddings-SpeakerTokens
         | 
| 2310 | 
            +
            Justus-Jonas/Imaginary-Embeddings-SpeakerTokens-STP
         | 
| 2311 | 
            +
            spitfire4794/dialogpt-small-morty
         | 
| 2312 | 
            +
            Kauru/DialoGPT-medium-Ranni
         | 
| 2313 | 
            +
            crazydamns/DialoGPT-Johnny2
         | 
| 2314 | 
            +
            jpandeinge/DialoGPT-medium-Oshiwambo-Bot
         | 
| 2315 | 
            +
            custads23/pygmalion-1.3b
         | 
| 2316 | 
            +
            HatCha01/DialoGPT-small-Batman
         | 
| 2317 | 
            +
            crazydamns/DialoGPT-Johnny3
         | 
| 2318 | 
            +
            assembleteams/curiouspi
         | 
| 2319 | 
            +
            Kauru/DialoGPT-medium-Ranniv2
         | 
| 2320 | 
            +
            SatwikShrivastava/narutoAI-chatbot
         | 
| 2321 | 
            +
            digitalmax1/max
         | 
| 2322 | 
            +
            adr2432/small-Joshua-Bot
         | 
| 2323 | 
            +
            ObsessedCitrus/DialoGPT-small-PeterBot_ChatBot
         | 
| 2324 | 
            +
            suarkadipa/HubermanGPT-small-v1
         | 
| 2325 | 
            +
            suarkadipa/HarryPotterGPT-small-v1
         | 
| 2326 | 
            +
            wevie1978/DialoGPT-medium-Kebb
         | 
| 2327 | 
            +
            kopeqwerty/DialoGPT-medium-idotbot
         | 
| 2328 | 
            +
            zelalt/Chatbot_T5-Prmtrs
         | 
| 2329 | 
            +
            jarvissss/DialoGPT-medium-idotbot
         | 
| 2330 | 
            +
            Magmadue/DiabloGPT-small-ei
         | 
| 2331 | 
            +
            nicbull/DialoGPT-small-cryptonic
         | 
| 2332 | 
            +
            nicbull/DialoGPT-small-cryptonic2
         | 
| 2333 | 
            +
            chloe0x0/DialoGPT-small-Muty
         | 
| 2334 | 
            +
            chloe0x0/mutyGPT
         | 
| 2335 | 
            +
            alexwang05/DialoGPT-small-soph
         | 
| 2336 | 
            +
            BHAndersonJr/DialoGPT-small-fry
         | 
| 2337 | 
            +
            timothykim04/DialoGPT-medium-timothykim
         | 
| 2338 | 
            +
            timothykim04/DialoGPT-medium-harrypotter
         | 
| 2339 | 
            +
            Luca999/Limitlessai99
         | 
| 2340 | 
            +
            Madgimmy/DiabloGPT-small-Madgimmy
         | 
| 2341 | 
            +
            chloe0x0/mutyGPT-v2
         | 
| 2342 | 
            +
            nuggster/DialoGPT-small-ianbot
         | 
| 2343 | 
            +
            we1kkk/llama2-hf-qlora-oasst1
         | 
| 2344 | 
            +
            IlyaGusev/saiga2_7b_lora
         | 
| 2345 | 
            +
            IlyaGusev/gigasaiga_lora
         | 
| 2346 | 
            +
            jliu03/JustinBot
         | 
| 2347 | 
            +
            heliosbrahma/falcon-7b-finetuned-mental-health-conversational
         | 
| 2348 | 
            +
            drunknmonk/GPT-Chandler
         | 
| 2349 | 
            +
            jun-ai/llama2-qlora-finetunined-french
         | 
| 2350 | 
            +
            WompWomp1/DialoGPT-large-Kirin
         | 
| 2351 | 
            +
            WompWomp1/DialoGPT-large-Kirin-2
         | 
| 2352 | 
            +
            WompWomp1/DialoGPT-large-Rin
         | 
| 2353 | 
            +
            or4cl3ai/Aiden_t5
         | 
| 2354 | 
            +
            jstawski/Llama-2-13b-hf-finetuned-SNG
         | 
| 2355 | 
            +
            Gelmo/Halouf
         | 
| 2356 | 
            +
            IlyaGusev/saiga2_13b_lora
         | 
| 2357 | 
            +
            sophji/DialoGPT-small-GodlyLJ
         | 
| 2358 | 
            +
            ATrapenard/Discord-Impersonation-Bot
         | 
| 2359 | 
            +
            hiamitabha/llama2forbittlerobot
         | 
| 2360 | 
            +
            IlyaGusev/saiga2_7b_gguf
         | 
| 2361 | 
            +
            IlyaGusev/saiga2_13b_gguf
         | 
| 2362 | 
            +
            TejasC2/DialoGPT-TejasBot2
         | 
| 2363 | 
            +
            CNR223/DialoGPT-medium-MalcolmReynold
         | 
| 2364 | 
            +
            minh-hahaha/DialoGPT-small-harrypotter
         | 
| 2365 | 
            +
            phucnq1591999/SolanaChatBot
         | 
| 2366 | 
            +
            marclove/llama-2-7b-chat-functions
         | 
| 2367 | 
            +
            Sheerapi/test
         | 
| 2368 | 
            +
            YukioKoito/DialoGPT-small-chibi
         | 
| 2369 | 
            +
            YukioKoito/DialoGPT-small-twilight
         | 
| 2370 | 
            +
            amzrana/lora
         | 
| 2371 | 
            +
            ierhon/basic-chatbot
         | 
| 2372 | 
            +
            Pula23/Hggjg
         | 
| 2373 | 
            +
            Focs/DialoGPT-medium-tony-stark
         | 
| 2374 | 
            +
            Kenobiwan/DialoGPT-small-AizakkuBot2
         | 
| 2375 | 
            +
            drado/DialoGPT-small-joshua
         | 
| 2376 | 
            +
            rah-1/Rahulio
         | 
| 2377 | 
            +
            tanishqvashisht/DialoGPT-small-Joshua
         | 
| 2378 | 
            +
            Kenobiwan/DialoGPT-small-AizakkuBot3
         | 
| 2379 | 
            +
            Ridloo/DialogGPT-small-harrypotter
         | 
| 2380 | 
            +
            dyuhong80/DialoGPT-large-ModerateEffortBombGPT
         | 
| 2381 | 
            +
            ai-forever/paper_persi_chat
         | 
| 2382 | 
            +
            paralleldynamix/paralleldynamix-model101
         | 
| 2383 | 
            +
            kelSidenna/SoftwareRequirements-T5-Base
         | 
| 2384 | 
            +
            renahime/DialoGPT-medium-umineko
         | 
| 2385 | 
            +
            Shaun1204/RedGPT-Gormlee
         | 
| 2386 | 
            +
            diwas7777/HarryBot
         | 
| 2387 | 
            +
            heliosbrahma/falcon-7b-sharded-bf16-finetuned-mental-health-conversational
         | 
| 2388 | 
            +
            kelSidenna/SoftwareReq-DialoGPT-medium
         | 
| 2389 | 
            +
            shanover/medbot-conv
         | 
| 2390 | 
            +
            J-Wiggler/DialoGPT-medium-Stanley
         | 
| 2391 | 
            +
            gearski/DialoGPT-small-itskleb
         | 
| 2392 | 
            +
            wozniakclub/llama-2-7b-medtext-llama2
         | 
| 2393 | 
            +
            gearski/DialoGPT-medium-itskleb
         | 
| 2394 | 
            +
            rebornrulz/Rulz-AI
         | 
| 2395 | 
            +
            Quantsr/DialogGPT-small-Aeris
         | 
| 2396 | 
            +
            ostorc/rick-sanchez-chatbot
         | 
| 2397 | 
            +
            nicbull/DialoGPT-medium-nic
         | 
| 2398 | 
            +
            nicbull/DialoGPT-medium-nic2
         | 
| 2399 | 
            +
            gorkemgoknar/llama2-7f-moviechatbot-ggml-q4
         | 
| 2400 | 
            +
            aka-nikko/ainz-ooal-gown
         | 
| 2401 | 
            +
            llSourcell/medllama2_7b
         | 
| 2402 | 
            +
            xtuner/Llama-2-7b-qlora-moss-003-sft
         | 
| 2403 | 
            +
            xtuner/Llama-2-7b-qlora-arxiv-gentitle
         | 
| 2404 | 
            +
            xtuner/internlm-7b-qlora-arxiv-gentitle
         | 
| 2405 | 
            +
            xtuner/internlm-7b-qlora-alpaca-enzh
         | 
| 2406 | 
            +
            xtuner/Baichuan-7B-qlora-arxiv-gentitle
         | 
| 2407 | 
            +
            xtuner/Baichuan-7B-qlora-alpaca-enzh
         | 
| 2408 | 
            +
            nicbull/DialoGPT-medium-leric
         | 
| 2409 | 
            +
            Ian-14/llm13
         | 
| 2410 | 
            +
            theastro/starkbot
         | 
| 2411 | 
            +
            yupimrandy/DialoGPT-medium-butcher
         | 
| 2412 | 
            +
            hclaim/clamgptattempt4
         | 
| 2413 | 
            +
            yupimrandy/DialoGPT-medium-hughie
         | 
| 2414 | 
            +
            nekohacker591/google1
         | 
| 2415 | 
            +
            zhmx31/Mychatbot
         | 
| 2416 | 
            +
            sk8ingcat/DialoGPT-small-TonyStark
         | 
| 2417 | 
            +
            SanchoJR/meX
         | 
| 2418 | 
            +
            xtuner/Qwen-7B-qlora-moss-003-sft
         | 
| 2419 | 
            +
            xtuner/Qwen-7B-qlora-arxiv-gentitle
         | 
| 2420 | 
            +
            xtuner/Qwen-7B-qlora-alpaca-enzh
         | 
| 2421 | 
            +
            xtuner/Qwen-7B-qlora-oasst1
         | 
| 2422 | 
            +
            xtuner/Baichuan-7B-qlora-oasst1
         | 
| 2423 | 
            +
            xtuner/internlm-7b-qlora-oasst1
         | 
| 2424 | 
            +
            4bit/medllama2_7b
         | 
| 2425 | 
            +
            JGKD/JangoGPTv1.0
         | 
| 2426 | 
            +
            kwankwan1000/DialoGPT-small-peppa
         | 
| 2427 | 
            +
            JGKD/JangoGPTv1.5
         | 
| 2428 | 
            +
            SoniR/config
         | 
| 2429 | 
            +
            mjyh/falcon-7b-qlora-sclue-20230601-04-merged
         | 
| 2430 | 
            +
            sadzip/SiberianPersona-ruGPT-3.5-qlora
         | 
| 2431 | 
            +
            Wolffire88/DialoGPT-medium-Android16
         | 
| 2432 | 
            +
            nolly3317/DialoGPT-small-alice
         | 
| 2433 | 
            +
            feelinrealcute/pym-6b
         | 
| 2434 | 
            +
            nixsy/AvasLove
         | 
| 2435 | 
            +
            feelinrealcute/pym-13b7
         | 
| 2436 | 
            +
            AleksiDu/HarryPotterBot
         | 
| 2437 | 
            +
            Belcebuzzz/DialoGPT-small-TomoGF
         | 
| 2438 | 
            +
            xtuner/internlm-7b-qlora-lawyer
         | 
| 2439 | 
            +
            xtuner/internlm-7b-qlora-colorist
         | 
| 2440 | 
            +
            xtuner/internlm-7b-qlora-coder
         | 
| 2441 | 
            +
            xtuner/internlm-7b-qlora-open-platypus
         | 
| 2442 | 
            +
            xtuner/internlm-7b-qlora-sql
         | 
| 2443 | 
            +
            inception-mbzuai/jais-13b-chat
         | 
| 2444 | 
            +
            Fredithefish/Guanaco-3B-Uncensored
         | 
| 2445 | 
            +
            garrachonr/LlamaDos
         | 
| 2446 | 
            +
            literallywood/DialoGPT-small-ekansh
         | 
| 2447 | 
            +
            IALABS/Arturosfastfood
         | 
| 2448 | 
            +
            javieitor/DialoGPT-medium-Rick
         | 
| 2449 | 
            +
            Kuduxaaa/ava-small
         | 
| 2450 | 
            +
            Al-Hathboor-Bikal-ai-2023/SRTIP-GPT-F7B-base
         | 
| 2451 | 
            +
            L-R/LLmRa-355M
         | 
| 2452 | 
            +
            Fredithefish/Guanaco-3B-Uncensored-v2
         | 
| 2453 | 
            +
            xtuner/Llama-2-7b-qlora-colorist
         | 
| 2454 | 
            +
            KE-AI/basicchatbot-kel
         | 
| 2455 | 
            +
            josepholiver/TEST_MODEL_1
         | 
| 2456 | 
            +
            PlaceReporter99/Utility_Bot_Chat
         | 
| 2457 | 
            +
            J-Wiggler2/Caesar
         | 
| 2458 | 
            +
            J-Wiggler2/Caesar2
         | 
| 2459 | 
            +
            matvalan/vittae-cot
         | 
| 2460 | 
            +
            Dawnstarhunter/DialoGPT-medium-Eveline
         | 
| 2461 | 
            +
            sahilxyd/DialoGPT-small-joshua
         | 
| 2462 | 
            +
            EnterNameBros/Senko-san-medium-abcd
         | 
| 2463 | 
            +
            6adityaverma/DialoGPT-large-Walter
         | 
| 2464 | 
            +
            6adityaverma/DialoGPT-large-Rick
         | 
| 2465 | 
            +
            IlyaGusev/saiga2_70b_lora
         | 
| 2466 | 
            +
            AyushK0808/StarWarsBot
         | 
| 2467 | 
            +
            EnterNameBros/Senko-ai-medium
         | 
| 2468 | 
            +
            Fredithefish/Guanaco-7B-Uncensored
         | 
| 2469 | 
            +
            IlyaGusev/saiga2_70b_gguf
         | 
| 2470 | 
            +
            glassofwine/DialoGPT-medium-johanwine
         | 
| 2471 | 
            +
            zattio770/120-Days-of-LORA-v2-13B
         | 
| 2472 | 
            +
            cannice/blenderbot-400M-distill-empathetic
         | 
| 2473 | 
            +
            Likelihood94/Jackoftrades
         | 
| 2474 | 
            +
            Hapski/DialoGPT-small-nene
         | 
| 2475 | 
            +
            Fredithefish/Guanaco-13B-Uncensored
         | 
| 2476 | 
            +
            kitbear444/DialoGPT-medium-kit
         | 
| 2477 | 
            +
            SonnyAu/DialoGPT-dumbledore
         | 
| 2478 | 
            +
            TheBloke/Guanaco-7B-Uncensored-GGUF
         | 
| 2479 | 
            +
            TheBloke/Guanaco-13B-Uncensored-GGUF
         | 
| 2480 | 
            +
            TheBloke/Guanaco-7B-Uncensored-GPTQ
         | 
| 2481 | 
            +
            TheBloke/Guanaco-13B-Uncensored-GPTQ
         | 
| 2482 | 
            +
            TheBloke/Guanaco-3B-Uncensored-v2-GPTQ
         | 
| 2483 | 
            +
            TheBloke/Guanaco-3B-Uncensored-v2-GGML
         | 
| 2484 | 
            +
            Codexister/DialoGPT-medium-KafkaBotV1
         | 
| 2485 | 
            +
            mfodwo/STUGPT-small-v1
         | 
| 2486 | 
            +
            asas-ai/jais-13b-chat-8bit
         | 
| 2487 | 
            +
            SoupChickn/Valeen-DialoGPT
         | 
| 2488 | 
            +
            Codexister/DialoGPT-medium-KafkaBotV2
         | 
| 2489 | 
            +
            KoalaAI/OPT-1.3b-Chat
         | 
| 2490 | 
            +
            Nafaille/nafaille6b
         | 
| 2491 | 
            +
            DiTy/dialogpt
         | 
| 2492 | 
            +
            Severus27/BeingWell_llama2_7b
         | 
| 2493 | 
            +
            rayho/DialoGPT-small-polysoft
         | 
| 2494 | 
            +
            TuningAI/Llama2_13B_startup_Assistant
         | 
| 2495 | 
            +
            dipxsy/testmodel
         | 
| 2496 | 
            +
            dipxsy/Jarvis-small
         | 
| 2497 | 
            +
            Lazycuber/L2-7b-Chat-Guanaco-Uncensored
         | 
| 2498 | 
            +
            dipxsy/jarvis-blend
         | 
| 2499 | 
            +
            TheBloke/Guanaco-13B-Uncensored-AWQ
         | 
| 2500 | 
            +
            TheBloke/Guanaco-7B-Uncensored-AWQ
         | 
| 2501 | 
            +
            wstock04/shiddeatorBotV1
         | 
| 2502 | 
            +
            Boqianshen/llama-2-7b-miniguanaco
         | 
| 2503 | 
            +
            sebastiantrbl/distilgpt2-finetuned-wikitext2
         | 
| 2504 | 
            +
            herzlixh/DialoGPTs_HarryFromHogwarts
         | 
| 2505 | 
            +
            poiccard/jais-13b-chat-adn
         | 
| 2506 | 
            +
            sebastiantrbl/test-DialoGPT-finetune
         | 
| 2507 | 
            +
            uffergist/DialoGPT-small-cummy
         | 
| 2508 | 
            +
            wstock04/shiddeatorBotV3.0
         | 
| 2509 | 
            +
            wstock04/shiddeatorBotDUMB
         | 
| 2510 | 
            +
            Applekinz/John
         | 
| 2511 | 
            +
            Or4cl3/1nsfw
         | 
| 2512 | 
            +
            sebastiantrbl/DialoGPT-finetuned-daily-dialog
         | 
| 2513 | 
            +
            LTC-AI-Labs/L2-7b-Base-WVG-Uncensored
         | 
| 2514 | 
            +
            hussain2030/jais13bchat2
         | 
| 2515 | 
            +
            subabi/DialoGPT-medium-subabicord
         | 
| 2516 | 
            +
            marblyso/DialoGPT-medium-collin
         | 
| 2517 | 
            +
            Crataco/Pygmalion-6B-GGML
         | 
| 2518 | 
            +
            dipxsy/jl
         | 
| 2519 | 
            +
            testerhubhai/krnedo
         | 
| 2520 | 
            +
            IAteSpaghettiForLunch/DialoGPT-medium-GLADoS
         | 
| 2521 | 
            +
            IAteSpaghettiForLunch/GLADoSBOT
         | 
| 2522 | 
            +
            Nikolai5592/DialoGPT-Medium-RickBot
         | 
| 2523 | 
            +
            KuroganeNiello/medium-NebBot
         | 
    	
        litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        litellm/llms/huggingface_restapi.py
    ADDED
    
    | @@ -0,0 +1,750 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ## Uses the huggingface text generation inference API
         | 
| 2 | 
            +
            import os, copy, types
         | 
| 3 | 
            +
            import json
         | 
| 4 | 
            +
            from enum import Enum
         | 
| 5 | 
            +
            import httpx, requests
         | 
| 6 | 
            +
            from .base import BaseLLM
         | 
| 7 | 
            +
            import time
         | 
| 8 | 
            +
            import litellm
         | 
| 9 | 
            +
            from typing import Callable, Dict, List, Any
         | 
| 10 | 
            +
            from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, Usage
         | 
| 11 | 
            +
            from typing import Optional
         | 
| 12 | 
            +
            from .prompt_templates.factory import prompt_factory, custom_prompt
         | 
| 13 | 
            +
             | 
| 14 | 
            +
             | 
| 15 | 
            +
            class HuggingfaceError(Exception):
         | 
| 16 | 
            +
                def __init__(
         | 
| 17 | 
            +
                    self,
         | 
| 18 | 
            +
                    status_code,
         | 
| 19 | 
            +
                    message,
         | 
| 20 | 
            +
                    request: Optional[httpx.Request] = None,
         | 
| 21 | 
            +
                    response: Optional[httpx.Response] = None,
         | 
| 22 | 
            +
                ):
         | 
| 23 | 
            +
                    self.status_code = status_code
         | 
| 24 | 
            +
                    self.message = message
         | 
| 25 | 
            +
                    if request is not None:
         | 
| 26 | 
            +
                        self.request = request
         | 
| 27 | 
            +
                    else:
         | 
| 28 | 
            +
                        self.request = httpx.Request(
         | 
| 29 | 
            +
                            method="POST", url="https://api-inference.huggingface.co/models"
         | 
| 30 | 
            +
                        )
         | 
| 31 | 
            +
                    if response is not None:
         | 
| 32 | 
            +
                        self.response = response
         | 
| 33 | 
            +
                    else:
         | 
| 34 | 
            +
                        self.response = httpx.Response(
         | 
| 35 | 
            +
                            status_code=status_code, request=self.request
         | 
| 36 | 
            +
                        )
         | 
| 37 | 
            +
                    super().__init__(
         | 
| 38 | 
            +
                        self.message
         | 
| 39 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 40 | 
            +
             | 
| 41 | 
            +
             | 
| 42 | 
            +
            class HuggingfaceConfig:
         | 
| 43 | 
            +
                """
         | 
| 44 | 
            +
                Reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/compat_generate
         | 
| 45 | 
            +
                """
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                best_of: Optional[int] = None
         | 
| 48 | 
            +
                decoder_input_details: Optional[bool] = None
         | 
| 49 | 
            +
                details: Optional[bool] = True  # enables returning logprobs + best of
         | 
| 50 | 
            +
                max_new_tokens: Optional[int] = None
         | 
| 51 | 
            +
                repetition_penalty: Optional[float] = None
         | 
| 52 | 
            +
                return_full_text: Optional[
         | 
| 53 | 
            +
                    bool
         | 
| 54 | 
            +
                ] = False  # by default don't return the input as part of the output
         | 
| 55 | 
            +
                seed: Optional[int] = None
         | 
| 56 | 
            +
                temperature: Optional[float] = None
         | 
| 57 | 
            +
                top_k: Optional[int] = None
         | 
| 58 | 
            +
                top_n_tokens: Optional[int] = None
         | 
| 59 | 
            +
                top_p: Optional[int] = None
         | 
| 60 | 
            +
                truncate: Optional[int] = None
         | 
| 61 | 
            +
                typical_p: Optional[float] = None
         | 
| 62 | 
            +
                watermark: Optional[bool] = None
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                def __init__(
         | 
| 65 | 
            +
                    self,
         | 
| 66 | 
            +
                    best_of: Optional[int] = None,
         | 
| 67 | 
            +
                    decoder_input_details: Optional[bool] = None,
         | 
| 68 | 
            +
                    details: Optional[bool] = None,
         | 
| 69 | 
            +
                    max_new_tokens: Optional[int] = None,
         | 
| 70 | 
            +
                    repetition_penalty: Optional[float] = None,
         | 
| 71 | 
            +
                    return_full_text: Optional[bool] = None,
         | 
| 72 | 
            +
                    seed: Optional[int] = None,
         | 
| 73 | 
            +
                    temperature: Optional[float] = None,
         | 
| 74 | 
            +
                    top_k: Optional[int] = None,
         | 
| 75 | 
            +
                    top_n_tokens: Optional[int] = None,
         | 
| 76 | 
            +
                    top_p: Optional[int] = None,
         | 
| 77 | 
            +
                    truncate: Optional[int] = None,
         | 
| 78 | 
            +
                    typical_p: Optional[float] = None,
         | 
| 79 | 
            +
                    watermark: Optional[bool] = None,
         | 
| 80 | 
            +
                ) -> None:
         | 
| 81 | 
            +
                    locals_ = locals()
         | 
| 82 | 
            +
                    for key, value in locals_.items():
         | 
| 83 | 
            +
                        if key != "self" and value is not None:
         | 
| 84 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                @classmethod
         | 
| 87 | 
            +
                def get_config(cls):
         | 
| 88 | 
            +
                    return {
         | 
| 89 | 
            +
                        k: v
         | 
| 90 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 91 | 
            +
                        if not k.startswith("__")
         | 
| 92 | 
            +
                        and not isinstance(
         | 
| 93 | 
            +
                            v,
         | 
| 94 | 
            +
                            (
         | 
| 95 | 
            +
                                types.FunctionType,
         | 
| 96 | 
            +
                                types.BuiltinFunctionType,
         | 
| 97 | 
            +
                                classmethod,
         | 
| 98 | 
            +
                                staticmethod,
         | 
| 99 | 
            +
                            ),
         | 
| 100 | 
            +
                        )
         | 
| 101 | 
            +
                        and v is not None
         | 
| 102 | 
            +
                    }
         | 
| 103 | 
            +
             | 
| 104 | 
            +
             | 
| 105 | 
            +
            def output_parser(generated_text: str):
         | 
| 106 | 
            +
                """
         | 
| 107 | 
            +
                Parse the output text to remove any special characters. In our current approach we just check for ChatML tokens.
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                Initial issue that prompted this - https://github.com/BerriAI/litellm/issues/763
         | 
| 110 | 
            +
                """
         | 
| 111 | 
            +
                chat_template_tokens = ["<|assistant|>", "<|system|>", "<|user|>", "<s>", "</s>"]
         | 
| 112 | 
            +
                for token in chat_template_tokens:
         | 
| 113 | 
            +
                    if generated_text.strip().startswith(token):
         | 
| 114 | 
            +
                        generated_text = generated_text.replace(token, "", 1)
         | 
| 115 | 
            +
                    if generated_text.endswith(token):
         | 
| 116 | 
            +
                        generated_text = generated_text[::-1].replace(token[::-1], "", 1)[::-1]
         | 
| 117 | 
            +
                return generated_text
         | 
| 118 | 
            +
             | 
| 119 | 
            +
             | 
| 120 | 
            +
            tgi_models_cache = None
         | 
| 121 | 
            +
            conv_models_cache = None
         | 
| 122 | 
            +
             | 
| 123 | 
            +
             | 
| 124 | 
            +
            def read_tgi_conv_models():
         | 
| 125 | 
            +
                try:
         | 
| 126 | 
            +
                    global tgi_models_cache, conv_models_cache
         | 
| 127 | 
            +
                    # Check if the cache is already populated
         | 
| 128 | 
            +
                    # so we don't keep on reading txt file if there are 1k requests
         | 
| 129 | 
            +
                    if (tgi_models_cache is not None) and (conv_models_cache is not None):
         | 
| 130 | 
            +
                        return tgi_models_cache, conv_models_cache
         | 
| 131 | 
            +
                    # If not, read the file and populate the cache
         | 
| 132 | 
            +
                    tgi_models = set()
         | 
| 133 | 
            +
                    script_directory = os.path.dirname(os.path.abspath(__file__))
         | 
| 134 | 
            +
                    # Construct the file path relative to the script's directory
         | 
| 135 | 
            +
                    file_path = os.path.join(
         | 
| 136 | 
            +
                        script_directory,
         | 
| 137 | 
            +
                        "huggingface_llms_metadata",
         | 
| 138 | 
            +
                        "hf_text_generation_models.txt",
         | 
| 139 | 
            +
                    )
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                    with open(file_path, "r") as file:
         | 
| 142 | 
            +
                        for line in file:
         | 
| 143 | 
            +
                            tgi_models.add(line.strip())
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    # Cache the set for future use
         | 
| 146 | 
            +
                    tgi_models_cache = tgi_models
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                    # If not, read the file and populate the cache
         | 
| 149 | 
            +
                    file_path = os.path.join(
         | 
| 150 | 
            +
                        script_directory,
         | 
| 151 | 
            +
                        "huggingface_llms_metadata",
         | 
| 152 | 
            +
                        "hf_conversational_models.txt",
         | 
| 153 | 
            +
                    )
         | 
| 154 | 
            +
                    conv_models = set()
         | 
| 155 | 
            +
                    with open(file_path, "r") as file:
         | 
| 156 | 
            +
                        for line in file:
         | 
| 157 | 
            +
                            conv_models.add(line.strip())
         | 
| 158 | 
            +
                    # Cache the set for future use
         | 
| 159 | 
            +
                    conv_models_cache = conv_models
         | 
| 160 | 
            +
                    return tgi_models, conv_models
         | 
| 161 | 
            +
                except:
         | 
| 162 | 
            +
                    return set(), set()
         | 
| 163 | 
            +
             | 
| 164 | 
            +
             | 
| 165 | 
            +
            def get_hf_task_for_model(model):
         | 
| 166 | 
            +
                # read text file, cast it to set
         | 
| 167 | 
            +
                # read the file called "huggingface_llms_metadata/hf_text_generation_models.txt"
         | 
| 168 | 
            +
                tgi_models, conversational_models = read_tgi_conv_models()
         | 
| 169 | 
            +
                if model in tgi_models:
         | 
| 170 | 
            +
                    return "text-generation-inference"
         | 
| 171 | 
            +
                elif model in conversational_models:
         | 
| 172 | 
            +
                    return "conversational"
         | 
| 173 | 
            +
                elif "roneneldan/TinyStories" in model:
         | 
| 174 | 
            +
                    return None
         | 
| 175 | 
            +
                else:
         | 
| 176 | 
            +
                    return "text-generation-inference"  # default to tgi
         | 
| 177 | 
            +
             | 
| 178 | 
            +
             | 
| 179 | 
            +
            class Huggingface(BaseLLM):
         | 
| 180 | 
            +
                _client_session: Optional[httpx.Client] = None
         | 
| 181 | 
            +
                _aclient_session: Optional[httpx.AsyncClient] = None
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                def __init__(self) -> None:
         | 
| 184 | 
            +
                    super().__init__()
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                def validate_environment(self, api_key, headers):
         | 
| 187 | 
            +
                    default_headers = {
         | 
| 188 | 
            +
                        "content-type": "application/json",
         | 
| 189 | 
            +
                    }
         | 
| 190 | 
            +
                    if api_key and headers is None:
         | 
| 191 | 
            +
                        default_headers[
         | 
| 192 | 
            +
                            "Authorization"
         | 
| 193 | 
            +
                        ] = f"Bearer {api_key}"  # Huggingface Inference Endpoint default is to accept bearer tokens
         | 
| 194 | 
            +
                        headers = default_headers
         | 
| 195 | 
            +
                    elif headers:
         | 
| 196 | 
            +
                        headers = headers
         | 
| 197 | 
            +
                    else:
         | 
| 198 | 
            +
                        headers = default_headers
         | 
| 199 | 
            +
                    return headers
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                def convert_to_model_response_object(
         | 
| 202 | 
            +
                    self,
         | 
| 203 | 
            +
                    completion_response,
         | 
| 204 | 
            +
                    model_response,
         | 
| 205 | 
            +
                    task,
         | 
| 206 | 
            +
                    optional_params,
         | 
| 207 | 
            +
                    encoding,
         | 
| 208 | 
            +
                    input_text,
         | 
| 209 | 
            +
                    model,
         | 
| 210 | 
            +
                ):
         | 
| 211 | 
            +
                    if task == "conversational":
         | 
| 212 | 
            +
                        if len(completion_response["generated_text"]) > 0:  # type: ignore
         | 
| 213 | 
            +
                            model_response["choices"][0]["message"][
         | 
| 214 | 
            +
                                "content"
         | 
| 215 | 
            +
                            ] = completion_response[
         | 
| 216 | 
            +
                                "generated_text"
         | 
| 217 | 
            +
                            ]  # type: ignore
         | 
| 218 | 
            +
                    elif task == "text-generation-inference":
         | 
| 219 | 
            +
                        if (
         | 
| 220 | 
            +
                            not isinstance(completion_response, list)
         | 
| 221 | 
            +
                            or not isinstance(completion_response[0], dict)
         | 
| 222 | 
            +
                            or "generated_text" not in completion_response[0]
         | 
| 223 | 
            +
                        ):
         | 
| 224 | 
            +
                            raise HuggingfaceError(
         | 
| 225 | 
            +
                                status_code=422,
         | 
| 226 | 
            +
                                message=f"response is not in expected format - {completion_response}",
         | 
| 227 | 
            +
                            )
         | 
| 228 | 
            +
             | 
| 229 | 
            +
                        if len(completion_response[0]["generated_text"]) > 0:
         | 
| 230 | 
            +
                            model_response["choices"][0]["message"]["content"] = output_parser(
         | 
| 231 | 
            +
                                completion_response[0]["generated_text"]
         | 
| 232 | 
            +
                            )
         | 
| 233 | 
            +
                        ## GETTING LOGPROBS + FINISH REASON
         | 
| 234 | 
            +
                        if (
         | 
| 235 | 
            +
                            "details" in completion_response[0]
         | 
| 236 | 
            +
                            and "tokens" in completion_response[0]["details"]
         | 
| 237 | 
            +
                        ):
         | 
| 238 | 
            +
                            model_response.choices[0].finish_reason = completion_response[0][
         | 
| 239 | 
            +
                                "details"
         | 
| 240 | 
            +
                            ]["finish_reason"]
         | 
| 241 | 
            +
                            sum_logprob = 0
         | 
| 242 | 
            +
                            for token in completion_response[0]["details"]["tokens"]:
         | 
| 243 | 
            +
                                if token["logprob"] != None:
         | 
| 244 | 
            +
                                    sum_logprob += token["logprob"]
         | 
| 245 | 
            +
                            model_response["choices"][0]["message"]._logprob = sum_logprob
         | 
| 246 | 
            +
                        if "best_of" in optional_params and optional_params["best_of"] > 1:
         | 
| 247 | 
            +
                            if (
         | 
| 248 | 
            +
                                "details" in completion_response[0]
         | 
| 249 | 
            +
                                and "best_of_sequences" in completion_response[0]["details"]
         | 
| 250 | 
            +
                            ):
         | 
| 251 | 
            +
                                choices_list = []
         | 
| 252 | 
            +
                                for idx, item in enumerate(
         | 
| 253 | 
            +
                                    completion_response[0]["details"]["best_of_sequences"]
         | 
| 254 | 
            +
                                ):
         | 
| 255 | 
            +
                                    sum_logprob = 0
         | 
| 256 | 
            +
                                    for token in item["tokens"]:
         | 
| 257 | 
            +
                                        if token["logprob"] != None:
         | 
| 258 | 
            +
                                            sum_logprob += token["logprob"]
         | 
| 259 | 
            +
                                    if len(item["generated_text"]) > 0:
         | 
| 260 | 
            +
                                        message_obj = Message(
         | 
| 261 | 
            +
                                            content=output_parser(item["generated_text"]),
         | 
| 262 | 
            +
                                            logprobs=sum_logprob,
         | 
| 263 | 
            +
                                        )
         | 
| 264 | 
            +
                                    else:
         | 
| 265 | 
            +
                                        message_obj = Message(content=None)
         | 
| 266 | 
            +
                                    choice_obj = Choices(
         | 
| 267 | 
            +
                                        finish_reason=item["finish_reason"],
         | 
| 268 | 
            +
                                        index=idx + 1,
         | 
| 269 | 
            +
                                        message=message_obj,
         | 
| 270 | 
            +
                                    )
         | 
| 271 | 
            +
                                    choices_list.append(choice_obj)
         | 
| 272 | 
            +
                                model_response["choices"].extend(choices_list)
         | 
| 273 | 
            +
                    else:
         | 
| 274 | 
            +
                        if len(completion_response[0]["generated_text"]) > 0:
         | 
| 275 | 
            +
                            model_response["choices"][0]["message"]["content"] = output_parser(
         | 
| 276 | 
            +
                                completion_response[0]["generated_text"]
         | 
| 277 | 
            +
                            )
         | 
| 278 | 
            +
                    ## CALCULATING USAGE
         | 
| 279 | 
            +
                    prompt_tokens = 0
         | 
| 280 | 
            +
                    try:
         | 
| 281 | 
            +
                        prompt_tokens = len(
         | 
| 282 | 
            +
                            encoding.encode(input_text)
         | 
| 283 | 
            +
                        )  ##[TODO] use the llama2 tokenizer here
         | 
| 284 | 
            +
                    except:
         | 
| 285 | 
            +
                        # this should remain non blocking we should not block a response returning if calculating usage fails
         | 
| 286 | 
            +
                        pass
         | 
| 287 | 
            +
                    output_text = model_response["choices"][0]["message"].get("content", "")
         | 
| 288 | 
            +
                    if output_text is not None and len(output_text) > 0:
         | 
| 289 | 
            +
                        completion_tokens = 0
         | 
| 290 | 
            +
                        try:
         | 
| 291 | 
            +
                            completion_tokens = len(
         | 
| 292 | 
            +
                                encoding.encode(
         | 
| 293 | 
            +
                                    model_response["choices"][0]["message"].get("content", "")
         | 
| 294 | 
            +
                                )
         | 
| 295 | 
            +
                            )  ##[TODO] use the llama2 tokenizer here
         | 
| 296 | 
            +
                        except:
         | 
| 297 | 
            +
                            # this should remain non blocking we should not block a response returning if calculating usage fails
         | 
| 298 | 
            +
                            pass
         | 
| 299 | 
            +
                    else:
         | 
| 300 | 
            +
                        completion_tokens = 0
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 303 | 
            +
                    model_response["model"] = model
         | 
| 304 | 
            +
                    usage = Usage(
         | 
| 305 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 306 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 307 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 308 | 
            +
                    )
         | 
| 309 | 
            +
                    model_response.usage = usage
         | 
| 310 | 
            +
                    model_response._hidden_params["original_response"] = completion_response
         | 
| 311 | 
            +
                    return model_response
         | 
| 312 | 
            +
             | 
| 313 | 
            +
                def completion(
         | 
| 314 | 
            +
                    self,
         | 
| 315 | 
            +
                    model: str,
         | 
| 316 | 
            +
                    messages: list,
         | 
| 317 | 
            +
                    api_base: Optional[str],
         | 
| 318 | 
            +
                    headers: Optional[dict],
         | 
| 319 | 
            +
                    model_response: ModelResponse,
         | 
| 320 | 
            +
                    print_verbose: Callable,
         | 
| 321 | 
            +
                    timeout: float,
         | 
| 322 | 
            +
                    encoding,
         | 
| 323 | 
            +
                    api_key,
         | 
| 324 | 
            +
                    logging_obj,
         | 
| 325 | 
            +
                    custom_prompt_dict={},
         | 
| 326 | 
            +
                    acompletion: bool = False,
         | 
| 327 | 
            +
                    optional_params=None,
         | 
| 328 | 
            +
                    litellm_params=None,
         | 
| 329 | 
            +
                    logger_fn=None,
         | 
| 330 | 
            +
                ):
         | 
| 331 | 
            +
                    super().completion()
         | 
| 332 | 
            +
                    exception_mapping_worked = False
         | 
| 333 | 
            +
                    try:
         | 
| 334 | 
            +
                        headers = self.validate_environment(api_key, headers)
         | 
| 335 | 
            +
                        task = get_hf_task_for_model(model)
         | 
| 336 | 
            +
                        print_verbose(f"{model}, {task}")
         | 
| 337 | 
            +
                        completion_url = ""
         | 
| 338 | 
            +
                        input_text = ""
         | 
| 339 | 
            +
                        if "https" in model:
         | 
| 340 | 
            +
                            completion_url = model
         | 
| 341 | 
            +
                        elif api_base:
         | 
| 342 | 
            +
                            completion_url = api_base
         | 
| 343 | 
            +
                        elif "HF_API_BASE" in os.environ:
         | 
| 344 | 
            +
                            completion_url = os.getenv("HF_API_BASE", "")
         | 
| 345 | 
            +
                        elif "HUGGINGFACE_API_BASE" in os.environ:
         | 
| 346 | 
            +
                            completion_url = os.getenv("HUGGINGFACE_API_BASE", "")
         | 
| 347 | 
            +
                        else:
         | 
| 348 | 
            +
                            completion_url = f"https://api-inference.huggingface.co/models/{model}"
         | 
| 349 | 
            +
             | 
| 350 | 
            +
                        ## Load Config
         | 
| 351 | 
            +
                        config = litellm.HuggingfaceConfig.get_config()
         | 
| 352 | 
            +
                        for k, v in config.items():
         | 
| 353 | 
            +
                            if (
         | 
| 354 | 
            +
                                k not in optional_params
         | 
| 355 | 
            +
                            ):  # completion(top_k=3) > huggingfaceConfig(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 356 | 
            +
                                optional_params[k] = v
         | 
| 357 | 
            +
             | 
| 358 | 
            +
                        ### MAP INPUT PARAMS
         | 
| 359 | 
            +
                        if task == "conversational":
         | 
| 360 | 
            +
                            inference_params = copy.deepcopy(optional_params)
         | 
| 361 | 
            +
                            inference_params.pop("details")
         | 
| 362 | 
            +
                            inference_params.pop("return_full_text")
         | 
| 363 | 
            +
                            past_user_inputs = []
         | 
| 364 | 
            +
                            generated_responses = []
         | 
| 365 | 
            +
                            text = ""
         | 
| 366 | 
            +
                            for message in messages:
         | 
| 367 | 
            +
                                if message["role"] == "user":
         | 
| 368 | 
            +
                                    if text != "":
         | 
| 369 | 
            +
                                        past_user_inputs.append(text)
         | 
| 370 | 
            +
                                    text = message["content"]
         | 
| 371 | 
            +
                                elif message["role"] == "assistant" or message["role"] == "system":
         | 
| 372 | 
            +
                                    generated_responses.append(message["content"])
         | 
| 373 | 
            +
                            data = {
         | 
| 374 | 
            +
                                "inputs": {
         | 
| 375 | 
            +
                                    "text": text,
         | 
| 376 | 
            +
                                    "past_user_inputs": past_user_inputs,
         | 
| 377 | 
            +
                                    "generated_responses": generated_responses,
         | 
| 378 | 
            +
                                },
         | 
| 379 | 
            +
                                "parameters": inference_params,
         | 
| 380 | 
            +
                            }
         | 
| 381 | 
            +
                            input_text = "".join(message["content"] for message in messages)
         | 
| 382 | 
            +
                        elif task == "text-generation-inference":
         | 
| 383 | 
            +
                            # always send "details" and "return_full_text" as params
         | 
| 384 | 
            +
                            if model in custom_prompt_dict:
         | 
| 385 | 
            +
                                # check if the model has a registered custom prompt
         | 
| 386 | 
            +
                                model_prompt_details = custom_prompt_dict[model]
         | 
| 387 | 
            +
                                prompt = custom_prompt(
         | 
| 388 | 
            +
                                    role_dict=model_prompt_details.get("roles", None),
         | 
| 389 | 
            +
                                    initial_prompt_value=model_prompt_details.get(
         | 
| 390 | 
            +
                                        "initial_prompt_value", ""
         | 
| 391 | 
            +
                                    ),
         | 
| 392 | 
            +
                                    final_prompt_value=model_prompt_details.get(
         | 
| 393 | 
            +
                                        "final_prompt_value", ""
         | 
| 394 | 
            +
                                    ),
         | 
| 395 | 
            +
                                    messages=messages,
         | 
| 396 | 
            +
                                )
         | 
| 397 | 
            +
                            else:
         | 
| 398 | 
            +
                                prompt = prompt_factory(model=model, messages=messages)
         | 
| 399 | 
            +
                            data = {
         | 
| 400 | 
            +
                                "inputs": prompt,
         | 
| 401 | 
            +
                                "parameters": optional_params,
         | 
| 402 | 
            +
                                "stream": True
         | 
| 403 | 
            +
                                if "stream" in optional_params and optional_params["stream"] == True
         | 
| 404 | 
            +
                                else False,
         | 
| 405 | 
            +
                            }
         | 
| 406 | 
            +
                            input_text = prompt
         | 
| 407 | 
            +
                        else:
         | 
| 408 | 
            +
                            # Non TGI and Conversational llms
         | 
| 409 | 
            +
                            # We need this branch, it removes 'details' and 'return_full_text' from params
         | 
| 410 | 
            +
                            if model in custom_prompt_dict:
         | 
| 411 | 
            +
                                # check if the model has a registered custom prompt
         | 
| 412 | 
            +
                                model_prompt_details = custom_prompt_dict[model]
         | 
| 413 | 
            +
                                prompt = custom_prompt(
         | 
| 414 | 
            +
                                    role_dict=model_prompt_details.get("roles", {}),
         | 
| 415 | 
            +
                                    initial_prompt_value=model_prompt_details.get(
         | 
| 416 | 
            +
                                        "initial_prompt_value", ""
         | 
| 417 | 
            +
                                    ),
         | 
| 418 | 
            +
                                    final_prompt_value=model_prompt_details.get(
         | 
| 419 | 
            +
                                        "final_prompt_value", ""
         | 
| 420 | 
            +
                                    ),
         | 
| 421 | 
            +
                                    bos_token=model_prompt_details.get("bos_token", ""),
         | 
| 422 | 
            +
                                    eos_token=model_prompt_details.get("eos_token", ""),
         | 
| 423 | 
            +
                                    messages=messages,
         | 
| 424 | 
            +
                                )
         | 
| 425 | 
            +
                            else:
         | 
| 426 | 
            +
                                prompt = prompt_factory(model=model, messages=messages)
         | 
| 427 | 
            +
                            inference_params = copy.deepcopy(optional_params)
         | 
| 428 | 
            +
                            inference_params.pop("details")
         | 
| 429 | 
            +
                            inference_params.pop("return_full_text")
         | 
| 430 | 
            +
                            data = {
         | 
| 431 | 
            +
                                "inputs": prompt,
         | 
| 432 | 
            +
                                "parameters": inference_params,
         | 
| 433 | 
            +
                                "stream": True
         | 
| 434 | 
            +
                                if "stream" in optional_params and optional_params["stream"] == True
         | 
| 435 | 
            +
                                else False,
         | 
| 436 | 
            +
                            }
         | 
| 437 | 
            +
                            input_text = prompt
         | 
| 438 | 
            +
                        ## LOGGING
         | 
| 439 | 
            +
                        logging_obj.pre_call(
         | 
| 440 | 
            +
                            input=input_text,
         | 
| 441 | 
            +
                            api_key=api_key,
         | 
| 442 | 
            +
                            additional_args={
         | 
| 443 | 
            +
                                "complete_input_dict": data,
         | 
| 444 | 
            +
                                "task": task,
         | 
| 445 | 
            +
                                "headers": headers,
         | 
| 446 | 
            +
                                "api_base": completion_url,
         | 
| 447 | 
            +
                                "acompletion": acompletion,
         | 
| 448 | 
            +
                            },
         | 
| 449 | 
            +
                        )
         | 
| 450 | 
            +
                        ## COMPLETION CALL
         | 
| 451 | 
            +
                        if acompletion is True:
         | 
| 452 | 
            +
                            ### ASYNC STREAMING
         | 
| 453 | 
            +
                            if optional_params.get("stream", False):
         | 
| 454 | 
            +
                                return self.async_streaming(logging_obj=logging_obj, api_base=completion_url, data=data, headers=headers, model_response=model_response, model=model, timeout=timeout)  # type: ignore
         | 
| 455 | 
            +
                            else:
         | 
| 456 | 
            +
                                ### ASYNC COMPLETION
         | 
| 457 | 
            +
                                return self.acompletion(api_base=completion_url, data=data, headers=headers, model_response=model_response, task=task, encoding=encoding, input_text=input_text, model=model, optional_params=optional_params, timeout=timeout)  # type: ignore
         | 
| 458 | 
            +
                        ### SYNC STREAMING
         | 
| 459 | 
            +
                        if "stream" in optional_params and optional_params["stream"] == True:
         | 
| 460 | 
            +
                            response = requests.post(
         | 
| 461 | 
            +
                                completion_url,
         | 
| 462 | 
            +
                                headers=headers,
         | 
| 463 | 
            +
                                data=json.dumps(data),
         | 
| 464 | 
            +
                                stream=optional_params["stream"],
         | 
| 465 | 
            +
                            )
         | 
| 466 | 
            +
                            return response.iter_lines()
         | 
| 467 | 
            +
                        ### SYNC COMPLETION
         | 
| 468 | 
            +
                        else:
         | 
| 469 | 
            +
                            response = requests.post(
         | 
| 470 | 
            +
                                completion_url, headers=headers, data=json.dumps(data)
         | 
| 471 | 
            +
                            )
         | 
| 472 | 
            +
             | 
| 473 | 
            +
                            ## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
         | 
| 474 | 
            +
                            is_streamed = False
         | 
| 475 | 
            +
                            if (
         | 
| 476 | 
            +
                                response.__dict__["headers"].get("Content-Type", "")
         | 
| 477 | 
            +
                                == "text/event-stream"
         | 
| 478 | 
            +
                            ):
         | 
| 479 | 
            +
                                is_streamed = True
         | 
| 480 | 
            +
             | 
| 481 | 
            +
                            # iterate over the complete streamed response, and return the final answer
         | 
| 482 | 
            +
                            if is_streamed:
         | 
| 483 | 
            +
                                streamed_response = CustomStreamWrapper(
         | 
| 484 | 
            +
                                    completion_stream=response.iter_lines(),
         | 
| 485 | 
            +
                                    model=model,
         | 
| 486 | 
            +
                                    custom_llm_provider="huggingface",
         | 
| 487 | 
            +
                                    logging_obj=logging_obj,
         | 
| 488 | 
            +
                                )
         | 
| 489 | 
            +
                                content = ""
         | 
| 490 | 
            +
                                for chunk in streamed_response:
         | 
| 491 | 
            +
                                    content += chunk["choices"][0]["delta"]["content"]
         | 
| 492 | 
            +
                                completion_response: List[Dict[str, Any]] = [
         | 
| 493 | 
            +
                                    {"generated_text": content}
         | 
| 494 | 
            +
                                ]
         | 
| 495 | 
            +
                                ## LOGGING
         | 
| 496 | 
            +
                                logging_obj.post_call(
         | 
| 497 | 
            +
                                    input=input_text,
         | 
| 498 | 
            +
                                    api_key=api_key,
         | 
| 499 | 
            +
                                    original_response=completion_response,
         | 
| 500 | 
            +
                                    additional_args={"complete_input_dict": data, "task": task},
         | 
| 501 | 
            +
                                )
         | 
| 502 | 
            +
                            else:
         | 
| 503 | 
            +
                                ## LOGGING
         | 
| 504 | 
            +
                                logging_obj.post_call(
         | 
| 505 | 
            +
                                    input=input_text,
         | 
| 506 | 
            +
                                    api_key=api_key,
         | 
| 507 | 
            +
                                    original_response=response.text,
         | 
| 508 | 
            +
                                    additional_args={"complete_input_dict": data, "task": task},
         | 
| 509 | 
            +
                                )
         | 
| 510 | 
            +
                                ## RESPONSE OBJECT
         | 
| 511 | 
            +
                                try:
         | 
| 512 | 
            +
                                    completion_response = response.json()
         | 
| 513 | 
            +
                                    if isinstance(completion_response, dict):
         | 
| 514 | 
            +
                                        completion_response = [completion_response]
         | 
| 515 | 
            +
                                except:
         | 
| 516 | 
            +
                                    import traceback
         | 
| 517 | 
            +
             | 
| 518 | 
            +
                                    raise HuggingfaceError(
         | 
| 519 | 
            +
                                        message=f"Original Response received: {response.text}; Stacktrace: {traceback.format_exc()}",
         | 
| 520 | 
            +
                                        status_code=response.status_code,
         | 
| 521 | 
            +
                                    )
         | 
| 522 | 
            +
                            print_verbose(f"response: {completion_response}")
         | 
| 523 | 
            +
                            if (
         | 
| 524 | 
            +
                                isinstance(completion_response, dict)
         | 
| 525 | 
            +
                                and "error" in completion_response
         | 
| 526 | 
            +
                            ):
         | 
| 527 | 
            +
                                print_verbose(f"completion error: {completion_response['error']}")
         | 
| 528 | 
            +
                                print_verbose(f"response.status_code: {response.status_code}")
         | 
| 529 | 
            +
                                raise HuggingfaceError(
         | 
| 530 | 
            +
                                    message=completion_response["error"],
         | 
| 531 | 
            +
                                    status_code=response.status_code,
         | 
| 532 | 
            +
                                )
         | 
| 533 | 
            +
                            return self.convert_to_model_response_object(
         | 
| 534 | 
            +
                                completion_response=completion_response,
         | 
| 535 | 
            +
                                model_response=model_response,
         | 
| 536 | 
            +
                                task=task,
         | 
| 537 | 
            +
                                optional_params=optional_params,
         | 
| 538 | 
            +
                                encoding=encoding,
         | 
| 539 | 
            +
                                input_text=input_text,
         | 
| 540 | 
            +
                                model=model,
         | 
| 541 | 
            +
                            )
         | 
| 542 | 
            +
                    except HuggingfaceError as e:
         | 
| 543 | 
            +
                        exception_mapping_worked = True
         | 
| 544 | 
            +
                        raise e
         | 
| 545 | 
            +
                    except Exception as e:
         | 
| 546 | 
            +
                        if exception_mapping_worked:
         | 
| 547 | 
            +
                            raise e
         | 
| 548 | 
            +
                        else:
         | 
| 549 | 
            +
                            import traceback
         | 
| 550 | 
            +
             | 
| 551 | 
            +
                            raise HuggingfaceError(status_code=500, message=traceback.format_exc())
         | 
| 552 | 
            +
             | 
| 553 | 
            +
                async def acompletion(
         | 
| 554 | 
            +
                    self,
         | 
| 555 | 
            +
                    api_base: str,
         | 
| 556 | 
            +
                    data: dict,
         | 
| 557 | 
            +
                    headers: dict,
         | 
| 558 | 
            +
                    model_response: ModelResponse,
         | 
| 559 | 
            +
                    task: str,
         | 
| 560 | 
            +
                    encoding: Any,
         | 
| 561 | 
            +
                    input_text: str,
         | 
| 562 | 
            +
                    model: str,
         | 
| 563 | 
            +
                    optional_params: dict,
         | 
| 564 | 
            +
                    timeout: float
         | 
| 565 | 
            +
                ):
         | 
| 566 | 
            +
                    response = None
         | 
| 567 | 
            +
                    try:
         | 
| 568 | 
            +
                        async with httpx.AsyncClient(timeout=timeout) as client:
         | 
| 569 | 
            +
                            response = await client.post(
         | 
| 570 | 
            +
                                url=api_base, json=data, headers=headers
         | 
| 571 | 
            +
                            )
         | 
| 572 | 
            +
                            response_json = response.json()
         | 
| 573 | 
            +
                            if response.status_code != 200:
         | 
| 574 | 
            +
                                raise HuggingfaceError(
         | 
| 575 | 
            +
                                    status_code=response.status_code,
         | 
| 576 | 
            +
                                    message=response.text,
         | 
| 577 | 
            +
                                    request=response.request,
         | 
| 578 | 
            +
                                    response=response,
         | 
| 579 | 
            +
                                )
         | 
| 580 | 
            +
             | 
| 581 | 
            +
                            ## RESPONSE OBJECT
         | 
| 582 | 
            +
                            return self.convert_to_model_response_object(
         | 
| 583 | 
            +
                                completion_response=response_json,
         | 
| 584 | 
            +
                                model_response=model_response,
         | 
| 585 | 
            +
                                task=task,
         | 
| 586 | 
            +
                                encoding=encoding,
         | 
| 587 | 
            +
                                input_text=input_text,
         | 
| 588 | 
            +
                                model=model,
         | 
| 589 | 
            +
                                optional_params=optional_params,
         | 
| 590 | 
            +
                            )
         | 
| 591 | 
            +
                    except Exception as e:
         | 
| 592 | 
            +
                        if isinstance(e, httpx.TimeoutException):
         | 
| 593 | 
            +
                            raise HuggingfaceError(status_code=500, message="Request Timeout Error")
         | 
| 594 | 
            +
                        elif response is not None and hasattr(response, "text"):
         | 
| 595 | 
            +
                            raise HuggingfaceError(
         | 
| 596 | 
            +
                                status_code=500,
         | 
| 597 | 
            +
                                message=f"{str(e)}\n\nOriginal Response: {response.text}",
         | 
| 598 | 
            +
                            )
         | 
| 599 | 
            +
                        else:
         | 
| 600 | 
            +
                            raise HuggingfaceError(status_code=500, message=f"{str(e)}")
         | 
| 601 | 
            +
             | 
| 602 | 
            +
                async def async_streaming(
         | 
| 603 | 
            +
                    self,
         | 
| 604 | 
            +
                    logging_obj,
         | 
| 605 | 
            +
                    api_base: str,
         | 
| 606 | 
            +
                    data: dict,
         | 
| 607 | 
            +
                    headers: dict,
         | 
| 608 | 
            +
                    model_response: ModelResponse,
         | 
| 609 | 
            +
                    model: str,
         | 
| 610 | 
            +
                    timeout: float
         | 
| 611 | 
            +
                ):
         | 
| 612 | 
            +
                    async with httpx.AsyncClient(timeout=timeout) as client:
         | 
| 613 | 
            +
                        response = client.stream(
         | 
| 614 | 
            +
                            "POST", url=f"{api_base}", json=data, headers=headers
         | 
| 615 | 
            +
                        )
         | 
| 616 | 
            +
                        async with response as r:
         | 
| 617 | 
            +
                            if r.status_code != 200:
         | 
| 618 | 
            +
                                raise HuggingfaceError(
         | 
| 619 | 
            +
                                    status_code=r.status_code,
         | 
| 620 | 
            +
                                    message="An error occurred while streaming",
         | 
| 621 | 
            +
                                )
         | 
| 622 | 
            +
                            streamwrapper = CustomStreamWrapper(
         | 
| 623 | 
            +
                                completion_stream=r.aiter_lines(),
         | 
| 624 | 
            +
                                model=model,
         | 
| 625 | 
            +
                                custom_llm_provider="huggingface",
         | 
| 626 | 
            +
                                logging_obj=logging_obj,
         | 
| 627 | 
            +
                            )
         | 
| 628 | 
            +
                            async for transformed_chunk in streamwrapper:
         | 
| 629 | 
            +
                                yield transformed_chunk
         | 
| 630 | 
            +
             | 
| 631 | 
            +
                def embedding(
         | 
| 632 | 
            +
                    self,
         | 
| 633 | 
            +
                    model: str,
         | 
| 634 | 
            +
                    input: list,
         | 
| 635 | 
            +
                    api_key: Optional[str] = None,
         | 
| 636 | 
            +
                    api_base: Optional[str] = None,
         | 
| 637 | 
            +
                    logging_obj=None,
         | 
| 638 | 
            +
                    model_response=None,
         | 
| 639 | 
            +
                    encoding=None,
         | 
| 640 | 
            +
                ):
         | 
| 641 | 
            +
                    super().embedding()
         | 
| 642 | 
            +
                    headers = self.validate_environment(api_key, headers=None)
         | 
| 643 | 
            +
                    # print_verbose(f"{model}, {task}")
         | 
| 644 | 
            +
                    embed_url = ""
         | 
| 645 | 
            +
                    if "https" in model:
         | 
| 646 | 
            +
                        embed_url = model
         | 
| 647 | 
            +
                    elif api_base:
         | 
| 648 | 
            +
                        embed_url = api_base
         | 
| 649 | 
            +
                    elif "HF_API_BASE" in os.environ:
         | 
| 650 | 
            +
                        embed_url = os.getenv("HF_API_BASE", "")
         | 
| 651 | 
            +
                    elif "HUGGINGFACE_API_BASE" in os.environ:
         | 
| 652 | 
            +
                        embed_url = os.getenv("HUGGINGFACE_API_BASE", "")
         | 
| 653 | 
            +
                    else:
         | 
| 654 | 
            +
                        embed_url = f"https://api-inference.huggingface.co/models/{model}"
         | 
| 655 | 
            +
             | 
| 656 | 
            +
                    if "sentence-transformers" in model:
         | 
| 657 | 
            +
                        if len(input) == 0:
         | 
| 658 | 
            +
                            raise HuggingfaceError(
         | 
| 659 | 
            +
                                status_code=400,
         | 
| 660 | 
            +
                                message="sentence transformers requires 2+ sentences",
         | 
| 661 | 
            +
                            )
         | 
| 662 | 
            +
                        data = {
         | 
| 663 | 
            +
                            "inputs": {
         | 
| 664 | 
            +
                                "source_sentence": input[0],
         | 
| 665 | 
            +
                                "sentences": [
         | 
| 666 | 
            +
                                    "That is a happy dog",
         | 
| 667 | 
            +
                                    "That is a very happy person",
         | 
| 668 | 
            +
                                    "Today is a sunny day",
         | 
| 669 | 
            +
                                ],
         | 
| 670 | 
            +
                            }
         | 
| 671 | 
            +
                        }
         | 
| 672 | 
            +
                    else:
         | 
| 673 | 
            +
                        data = {"inputs": input}  # type: ignore
         | 
| 674 | 
            +
             | 
| 675 | 
            +
                    ## LOGGING
         | 
| 676 | 
            +
                    logging_obj.pre_call(
         | 
| 677 | 
            +
                        input=input,
         | 
| 678 | 
            +
                        api_key=api_key,
         | 
| 679 | 
            +
                        additional_args={
         | 
| 680 | 
            +
                            "complete_input_dict": data,
         | 
| 681 | 
            +
                            "headers": headers,
         | 
| 682 | 
            +
                            "api_base": embed_url,
         | 
| 683 | 
            +
                        },
         | 
| 684 | 
            +
                    )
         | 
| 685 | 
            +
                    ## COMPLETION CALL
         | 
| 686 | 
            +
                    response = requests.post(embed_url, headers=headers, data=json.dumps(data))
         | 
| 687 | 
            +
             | 
| 688 | 
            +
                    ## LOGGING
         | 
| 689 | 
            +
                    logging_obj.post_call(
         | 
| 690 | 
            +
                        input=input,
         | 
| 691 | 
            +
                        api_key=api_key,
         | 
| 692 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 693 | 
            +
                        original_response=response,
         | 
| 694 | 
            +
                    )
         | 
| 695 | 
            +
             | 
| 696 | 
            +
                    embeddings = response.json()
         | 
| 697 | 
            +
             | 
| 698 | 
            +
                    if "error" in embeddings:
         | 
| 699 | 
            +
                        raise HuggingfaceError(status_code=500, message=embeddings["error"])
         | 
| 700 | 
            +
             | 
| 701 | 
            +
                    output_data = []
         | 
| 702 | 
            +
                    if "similarities" in embeddings:
         | 
| 703 | 
            +
                        for idx, embedding in embeddings["similarities"]:
         | 
| 704 | 
            +
                            output_data.append(
         | 
| 705 | 
            +
                                {
         | 
| 706 | 
            +
                                    "object": "embedding",
         | 
| 707 | 
            +
                                    "index": idx,
         | 
| 708 | 
            +
                                    "embedding": embedding,  # flatten list returned from hf
         | 
| 709 | 
            +
                                }
         | 
| 710 | 
            +
                            )
         | 
| 711 | 
            +
                    else:
         | 
| 712 | 
            +
                        for idx, embedding in enumerate(embeddings):
         | 
| 713 | 
            +
                            if isinstance(embedding, float):
         | 
| 714 | 
            +
                                output_data.append(
         | 
| 715 | 
            +
                                    {
         | 
| 716 | 
            +
                                        "object": "embedding",
         | 
| 717 | 
            +
                                        "index": idx,
         | 
| 718 | 
            +
                                        "embedding": embedding,  # flatten list returned from hf
         | 
| 719 | 
            +
                                    }
         | 
| 720 | 
            +
                                )
         | 
| 721 | 
            +
                            elif isinstance(embedding, list) and isinstance(embedding[0], float):
         | 
| 722 | 
            +
                                output_data.append(
         | 
| 723 | 
            +
                                    {
         | 
| 724 | 
            +
                                        "object": "embedding",
         | 
| 725 | 
            +
                                        "index": idx,
         | 
| 726 | 
            +
                                        "embedding": embedding,  # flatten list returned from hf
         | 
| 727 | 
            +
                                    }
         | 
| 728 | 
            +
                                )
         | 
| 729 | 
            +
                            else:
         | 
| 730 | 
            +
                                output_data.append(
         | 
| 731 | 
            +
                                    {
         | 
| 732 | 
            +
                                        "object": "embedding",
         | 
| 733 | 
            +
                                        "index": idx,
         | 
| 734 | 
            +
                                        "embedding": embedding[0][
         | 
| 735 | 
            +
                                            0
         | 
| 736 | 
            +
                                        ],  # flatten list returned from hf
         | 
| 737 | 
            +
                                    }
         | 
| 738 | 
            +
                                )
         | 
| 739 | 
            +
                    model_response["object"] = "list"
         | 
| 740 | 
            +
                    model_response["data"] = output_data
         | 
| 741 | 
            +
                    model_response["model"] = model
         | 
| 742 | 
            +
                    input_tokens = 0
         | 
| 743 | 
            +
                    for text in input:
         | 
| 744 | 
            +
                        input_tokens += len(encoding.encode(text))
         | 
| 745 | 
            +
             | 
| 746 | 
            +
                    model_response["usage"] = {
         | 
| 747 | 
            +
                        "prompt_tokens": input_tokens,
         | 
| 748 | 
            +
                        "total_tokens": input_tokens,
         | 
| 749 | 
            +
                    }
         | 
| 750 | 
            +
                    return model_response
         | 
    	
        litellm/llms/maritalk.py
    ADDED
    
    | @@ -0,0 +1,189 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, types
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import time, traceback
         | 
| 6 | 
            +
            from typing import Callable, Optional, List
         | 
| 7 | 
            +
            from litellm.utils import ModelResponse, Choices, Message, Usage
         | 
| 8 | 
            +
            import litellm
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            class MaritalkError(Exception):
         | 
| 12 | 
            +
                def __init__(self, status_code, message):
         | 
| 13 | 
            +
                    self.status_code = status_code
         | 
| 14 | 
            +
                    self.message = message
         | 
| 15 | 
            +
                    super().__init__(
         | 
| 16 | 
            +
                        self.message
         | 
| 17 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            class MaritTalkConfig:
         | 
| 21 | 
            +
                """
         | 
| 22 | 
            +
                The class `MaritTalkConfig` provides configuration for the MaritTalk's API interface. Here are the parameters:
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default is 1.
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                - `model` (string): The model used for conversation. Default is 'maritalk'.
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                - `do_sample` (boolean): If set to True, the API will generate a response using sampling. Default is True.
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.7.
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                - `top_p` (number): Selection threshold for token inclusion based on cumulative probability. Default is 0.95.
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                - `repetition_penalty` (number): Penalty for repetition in the generated conversation. Default is 1.
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                - `stopping_tokens` (list of string): List of tokens where the conversation can be stopped/stopped.
         | 
| 37 | 
            +
                """
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                max_tokens: Optional[int] = None
         | 
| 40 | 
            +
                model: Optional[str] = None
         | 
| 41 | 
            +
                do_sample: Optional[bool] = None
         | 
| 42 | 
            +
                temperature: Optional[float] = None
         | 
| 43 | 
            +
                top_p: Optional[float] = None
         | 
| 44 | 
            +
                repetition_penalty: Optional[float] = None
         | 
| 45 | 
            +
                stopping_tokens: Optional[List[str]] = None
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def __init__(
         | 
| 48 | 
            +
                    self,
         | 
| 49 | 
            +
                    max_tokens: Optional[int] = None,
         | 
| 50 | 
            +
                    model: Optional[str] = None,
         | 
| 51 | 
            +
                    do_sample: Optional[bool] = None,
         | 
| 52 | 
            +
                    temperature: Optional[float] = None,
         | 
| 53 | 
            +
                    top_p: Optional[float] = None,
         | 
| 54 | 
            +
                    repetition_penalty: Optional[float] = None,
         | 
| 55 | 
            +
                    stopping_tokens: Optional[List[str]] = None,
         | 
| 56 | 
            +
                ) -> None:
         | 
| 57 | 
            +
                    locals_ = locals()
         | 
| 58 | 
            +
                    for key, value in locals_.items():
         | 
| 59 | 
            +
                        if key != "self" and value is not None:
         | 
| 60 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                @classmethod
         | 
| 63 | 
            +
                def get_config(cls):
         | 
| 64 | 
            +
                    return {
         | 
| 65 | 
            +
                        k: v
         | 
| 66 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 67 | 
            +
                        if not k.startswith("__")
         | 
| 68 | 
            +
                        and not isinstance(
         | 
| 69 | 
            +
                            v,
         | 
| 70 | 
            +
                            (
         | 
| 71 | 
            +
                                types.FunctionType,
         | 
| 72 | 
            +
                                types.BuiltinFunctionType,
         | 
| 73 | 
            +
                                classmethod,
         | 
| 74 | 
            +
                                staticmethod,
         | 
| 75 | 
            +
                            ),
         | 
| 76 | 
            +
                        )
         | 
| 77 | 
            +
                        and v is not None
         | 
| 78 | 
            +
                    }
         | 
| 79 | 
            +
             | 
| 80 | 
            +
             | 
| 81 | 
            +
            def validate_environment(api_key):
         | 
| 82 | 
            +
                headers = {
         | 
| 83 | 
            +
                    "accept": "application/json",
         | 
| 84 | 
            +
                    "content-type": "application/json",
         | 
| 85 | 
            +
                }
         | 
| 86 | 
            +
                if api_key:
         | 
| 87 | 
            +
                    headers["Authorization"] = f"Key {api_key}"
         | 
| 88 | 
            +
                return headers
         | 
| 89 | 
            +
             | 
| 90 | 
            +
             | 
| 91 | 
            +
            def completion(
         | 
| 92 | 
            +
                model: str,
         | 
| 93 | 
            +
                messages: list,
         | 
| 94 | 
            +
                api_base: str,
         | 
| 95 | 
            +
                model_response: ModelResponse,
         | 
| 96 | 
            +
                print_verbose: Callable,
         | 
| 97 | 
            +
                encoding,
         | 
| 98 | 
            +
                api_key,
         | 
| 99 | 
            +
                logging_obj,
         | 
| 100 | 
            +
                optional_params=None,
         | 
| 101 | 
            +
                litellm_params=None,
         | 
| 102 | 
            +
                logger_fn=None,
         | 
| 103 | 
            +
            ):
         | 
| 104 | 
            +
                headers = validate_environment(api_key)
         | 
| 105 | 
            +
                completion_url = api_base
         | 
| 106 | 
            +
                model = model
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                ## Load Config
         | 
| 109 | 
            +
                config = litellm.MaritTalkConfig.get_config()
         | 
| 110 | 
            +
                for k, v in config.items():
         | 
| 111 | 
            +
                    if (
         | 
| 112 | 
            +
                        k not in optional_params
         | 
| 113 | 
            +
                    ):  # completion(top_k=3) > maritalk_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 114 | 
            +
                        optional_params[k] = v
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                data = {
         | 
| 117 | 
            +
                    "messages": messages,
         | 
| 118 | 
            +
                    **optional_params,
         | 
| 119 | 
            +
                }
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                ## LOGGING
         | 
| 122 | 
            +
                logging_obj.pre_call(
         | 
| 123 | 
            +
                    input=messages,
         | 
| 124 | 
            +
                    api_key=api_key,
         | 
| 125 | 
            +
                    additional_args={"complete_input_dict": data},
         | 
| 126 | 
            +
                )
         | 
| 127 | 
            +
                ## COMPLETION CALL
         | 
| 128 | 
            +
                response = requests.post(
         | 
| 129 | 
            +
                    completion_url,
         | 
| 130 | 
            +
                    headers=headers,
         | 
| 131 | 
            +
                    data=json.dumps(data),
         | 
| 132 | 
            +
                    stream=optional_params["stream"] if "stream" in optional_params else False,
         | 
| 133 | 
            +
                )
         | 
| 134 | 
            +
                if "stream" in optional_params and optional_params["stream"] == True:
         | 
| 135 | 
            +
                    return response.iter_lines()
         | 
| 136 | 
            +
                else:
         | 
| 137 | 
            +
                    ## LOGGING
         | 
| 138 | 
            +
                    logging_obj.post_call(
         | 
| 139 | 
            +
                        input=messages,
         | 
| 140 | 
            +
                        api_key=api_key,
         | 
| 141 | 
            +
                        original_response=response.text,
         | 
| 142 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 143 | 
            +
                    )
         | 
| 144 | 
            +
                    print_verbose(f"raw model_response: {response.text}")
         | 
| 145 | 
            +
                    ## RESPONSE OBJECT
         | 
| 146 | 
            +
                    completion_response = response.json()
         | 
| 147 | 
            +
                    if "error" in completion_response:
         | 
| 148 | 
            +
                        raise MaritalkError(
         | 
| 149 | 
            +
                            message=completion_response["error"],
         | 
| 150 | 
            +
                            status_code=response.status_code,
         | 
| 151 | 
            +
                        )
         | 
| 152 | 
            +
                    else:
         | 
| 153 | 
            +
                        try:
         | 
| 154 | 
            +
                            if len(completion_response["answer"]) > 0:
         | 
| 155 | 
            +
                                model_response["choices"][0]["message"][
         | 
| 156 | 
            +
                                    "content"
         | 
| 157 | 
            +
                                ] = completion_response["answer"]
         | 
| 158 | 
            +
                        except Exception as e:
         | 
| 159 | 
            +
                            raise MaritalkError(
         | 
| 160 | 
            +
                                message=response.text, status_code=response.status_code
         | 
| 161 | 
            +
                            )
         | 
| 162 | 
            +
             | 
| 163 | 
            +
                    ## CALCULATING USAGE
         | 
| 164 | 
            +
                    prompt = "".join(m["content"] for m in messages)
         | 
| 165 | 
            +
                    prompt_tokens = len(encoding.encode(prompt))
         | 
| 166 | 
            +
                    completion_tokens = len(
         | 
| 167 | 
            +
                        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
         | 
| 168 | 
            +
                    )
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 171 | 
            +
                    model_response["model"] = model
         | 
| 172 | 
            +
                    usage = Usage(
         | 
| 173 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 174 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 175 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 176 | 
            +
                    )
         | 
| 177 | 
            +
                    model_response.usage = usage
         | 
| 178 | 
            +
                    return model_response
         | 
| 179 | 
            +
             | 
| 180 | 
            +
             | 
| 181 | 
            +
            def embedding(
         | 
| 182 | 
            +
                model: str,
         | 
| 183 | 
            +
                input: list,
         | 
| 184 | 
            +
                api_key: Optional[str] = None,
         | 
| 185 | 
            +
                logging_obj=None,
         | 
| 186 | 
            +
                model_response=None,
         | 
| 187 | 
            +
                encoding=None,
         | 
| 188 | 
            +
            ):
         | 
| 189 | 
            +
                pass
         | 
    	
        litellm/llms/nlp_cloud.py
    ADDED
    
    | @@ -0,0 +1,243 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os, types
         | 
| 2 | 
            +
            import json
         | 
| 3 | 
            +
            from enum import Enum
         | 
| 4 | 
            +
            import requests
         | 
| 5 | 
            +
            import time
         | 
| 6 | 
            +
            from typing import Callable, Optional
         | 
| 7 | 
            +
            import litellm
         | 
| 8 | 
            +
            from litellm.utils import ModelResponse, Usage
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            class NLPCloudError(Exception):
         | 
| 12 | 
            +
                def __init__(self, status_code, message):
         | 
| 13 | 
            +
                    self.status_code = status_code
         | 
| 14 | 
            +
                    self.message = message
         | 
| 15 | 
            +
                    super().__init__(
         | 
| 16 | 
            +
                        self.message
         | 
| 17 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            class NLPCloudConfig:
         | 
| 21 | 
            +
                """
         | 
| 22 | 
            +
                Reference: https://docs.nlpcloud.com/#generation
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                - `max_length` (int): Optional. The maximum number of tokens that the generated text should contain.
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                - `length_no_input` (boolean): Optional. Whether `min_length` and `max_length` should not include the length of the input text.
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                - `end_sequence` (string): Optional. A specific token that should be the end of the generated sequence.
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                - `remove_end_sequence` (boolean): Optional. Whether to remove the `end_sequence` string from the result.
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                - `remove_input` (boolean): Optional. Whether to remove the input text from the result.
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                - `bad_words` (list of strings): Optional. List of tokens that are not allowed to be generated.
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                - `temperature` (float): Optional. Temperature sampling. It modulates the next token probabilities.
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                - `top_p` (float): Optional. Top P sampling. Below 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                - `top_k` (int): Optional. Top K sampling. The number of highest probability vocabulary tokens to keep for top k filtering.
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                - `repetition_penalty` (float): Optional. Prevents the same word from being repeated too many times.
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                - `num_beams` (int): Optional. Number of beams for beam search.
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                - `num_return_sequences` (int): Optional. The number of independently computed returned sequences.
         | 
| 47 | 
            +
                """
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                max_length: Optional[int] = None
         | 
| 50 | 
            +
                length_no_input: Optional[bool] = None
         | 
| 51 | 
            +
                end_sequence: Optional[str] = None
         | 
| 52 | 
            +
                remove_end_sequence: Optional[bool] = None
         | 
| 53 | 
            +
                remove_input: Optional[bool] = None
         | 
| 54 | 
            +
                bad_words: Optional[list] = None
         | 
| 55 | 
            +
                temperature: Optional[float] = None
         | 
| 56 | 
            +
                top_p: Optional[float] = None
         | 
| 57 | 
            +
                top_k: Optional[int] = None
         | 
| 58 | 
            +
                repetition_penalty: Optional[float] = None
         | 
| 59 | 
            +
                num_beams: Optional[int] = None
         | 
| 60 | 
            +
                num_return_sequences: Optional[int] = None
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                def __init__(
         | 
| 63 | 
            +
                    self,
         | 
| 64 | 
            +
                    max_length: Optional[int] = None,
         | 
| 65 | 
            +
                    length_no_input: Optional[bool] = None,
         | 
| 66 | 
            +
                    end_sequence: Optional[str] = None,
         | 
| 67 | 
            +
                    remove_end_sequence: Optional[bool] = None,
         | 
| 68 | 
            +
                    remove_input: Optional[bool] = None,
         | 
| 69 | 
            +
                    bad_words: Optional[list] = None,
         | 
| 70 | 
            +
                    temperature: Optional[float] = None,
         | 
| 71 | 
            +
                    top_p: Optional[float] = None,
         | 
| 72 | 
            +
                    top_k: Optional[int] = None,
         | 
| 73 | 
            +
                    repetition_penalty: Optional[float] = None,
         | 
| 74 | 
            +
                    num_beams: Optional[int] = None,
         | 
| 75 | 
            +
                    num_return_sequences: Optional[int] = None,
         | 
| 76 | 
            +
                ) -> None:
         | 
| 77 | 
            +
                    locals_ = locals()
         | 
| 78 | 
            +
                    for key, value in locals_.items():
         | 
| 79 | 
            +
                        if key != "self" and value is not None:
         | 
| 80 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                @classmethod
         | 
| 83 | 
            +
                def get_config(cls):
         | 
| 84 | 
            +
                    return {
         | 
| 85 | 
            +
                        k: v
         | 
| 86 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 87 | 
            +
                        if not k.startswith("__")
         | 
| 88 | 
            +
                        and not isinstance(
         | 
| 89 | 
            +
                            v,
         | 
| 90 | 
            +
                            (
         | 
| 91 | 
            +
                                types.FunctionType,
         | 
| 92 | 
            +
                                types.BuiltinFunctionType,
         | 
| 93 | 
            +
                                classmethod,
         | 
| 94 | 
            +
                                staticmethod,
         | 
| 95 | 
            +
                            ),
         | 
| 96 | 
            +
                        )
         | 
| 97 | 
            +
                        and v is not None
         | 
| 98 | 
            +
                    }
         | 
| 99 | 
            +
             | 
| 100 | 
            +
             | 
| 101 | 
            +
            def validate_environment(api_key):
         | 
| 102 | 
            +
                headers = {
         | 
| 103 | 
            +
                    "accept": "application/json",
         | 
| 104 | 
            +
                    "content-type": "application/json",
         | 
| 105 | 
            +
                }
         | 
| 106 | 
            +
                if api_key:
         | 
| 107 | 
            +
                    headers["Authorization"] = f"Token {api_key}"
         | 
| 108 | 
            +
                return headers
         | 
| 109 | 
            +
             | 
| 110 | 
            +
             | 
| 111 | 
            +
            def completion(
         | 
| 112 | 
            +
                model: str,
         | 
| 113 | 
            +
                messages: list,
         | 
| 114 | 
            +
                api_base: str,
         | 
| 115 | 
            +
                model_response: ModelResponse,
         | 
| 116 | 
            +
                print_verbose: Callable,
         | 
| 117 | 
            +
                encoding,
         | 
| 118 | 
            +
                api_key,
         | 
| 119 | 
            +
                logging_obj,
         | 
| 120 | 
            +
                optional_params=None,
         | 
| 121 | 
            +
                litellm_params=None,
         | 
| 122 | 
            +
                logger_fn=None,
         | 
| 123 | 
            +
                default_max_tokens_to_sample=None,
         | 
| 124 | 
            +
            ):
         | 
| 125 | 
            +
                headers = validate_environment(api_key)
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                ## Load Config
         | 
| 128 | 
            +
                config = litellm.NLPCloudConfig.get_config()
         | 
| 129 | 
            +
                for k, v in config.items():
         | 
| 130 | 
            +
                    if (
         | 
| 131 | 
            +
                        k not in optional_params
         | 
| 132 | 
            +
                    ):  # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 133 | 
            +
                        optional_params[k] = v
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                completion_url_fragment_1 = api_base
         | 
| 136 | 
            +
                completion_url_fragment_2 = "/generation"
         | 
| 137 | 
            +
                model = model
         | 
| 138 | 
            +
                text = " ".join(message["content"] for message in messages)
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                data = {
         | 
| 141 | 
            +
                    "text": text,
         | 
| 142 | 
            +
                    **optional_params,
         | 
| 143 | 
            +
                }
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                ## LOGGING
         | 
| 148 | 
            +
                logging_obj.pre_call(
         | 
| 149 | 
            +
                    input=text,
         | 
| 150 | 
            +
                    api_key=api_key,
         | 
| 151 | 
            +
                    additional_args={
         | 
| 152 | 
            +
                        "complete_input_dict": data,
         | 
| 153 | 
            +
                        "headers": headers,
         | 
| 154 | 
            +
                        "api_base": completion_url,
         | 
| 155 | 
            +
                    },
         | 
| 156 | 
            +
                )
         | 
| 157 | 
            +
                ## COMPLETION CALL
         | 
| 158 | 
            +
                response = requests.post(
         | 
| 159 | 
            +
                    completion_url,
         | 
| 160 | 
            +
                    headers=headers,
         | 
| 161 | 
            +
                    data=json.dumps(data),
         | 
| 162 | 
            +
                    stream=optional_params["stream"] if "stream" in optional_params else False,
         | 
| 163 | 
            +
                )
         | 
| 164 | 
            +
                if "stream" in optional_params and optional_params["stream"] == True:
         | 
| 165 | 
            +
                    return clean_and_iterate_chunks(response)
         | 
| 166 | 
            +
                else:
         | 
| 167 | 
            +
                    ## LOGGING
         | 
| 168 | 
            +
                    logging_obj.post_call(
         | 
| 169 | 
            +
                        input=text,
         | 
| 170 | 
            +
                        api_key=api_key,
         | 
| 171 | 
            +
                        original_response=response.text,
         | 
| 172 | 
            +
                        additional_args={"complete_input_dict": data},
         | 
| 173 | 
            +
                    )
         | 
| 174 | 
            +
                    print_verbose(f"raw model_response: {response.text}")
         | 
| 175 | 
            +
                    ## RESPONSE OBJECT
         | 
| 176 | 
            +
                    try:
         | 
| 177 | 
            +
                        completion_response = response.json()
         | 
| 178 | 
            +
                    except:
         | 
| 179 | 
            +
                        raise NLPCloudError(message=response.text, status_code=response.status_code)
         | 
| 180 | 
            +
                    if "error" in completion_response:
         | 
| 181 | 
            +
                        raise NLPCloudError(
         | 
| 182 | 
            +
                            message=completion_response["error"],
         | 
| 183 | 
            +
                            status_code=response.status_code,
         | 
| 184 | 
            +
                        )
         | 
| 185 | 
            +
                    else:
         | 
| 186 | 
            +
                        try:
         | 
| 187 | 
            +
                            if len(completion_response["generated_text"]) > 0:
         | 
| 188 | 
            +
                                model_response["choices"][0]["message"][
         | 
| 189 | 
            +
                                    "content"
         | 
| 190 | 
            +
                                ] = completion_response["generated_text"]
         | 
| 191 | 
            +
                        except:
         | 
| 192 | 
            +
                            raise NLPCloudError(
         | 
| 193 | 
            +
                                message=json.dumps(completion_response),
         | 
| 194 | 
            +
                                status_code=response.status_code,
         | 
| 195 | 
            +
                            )
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                    ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
         | 
| 198 | 
            +
                    prompt_tokens = completion_response["nb_input_tokens"]
         | 
| 199 | 
            +
                    completion_tokens = completion_response["nb_generated_tokens"]
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                    model_response["created"] = int(time.time())
         | 
| 202 | 
            +
                    model_response["model"] = model
         | 
| 203 | 
            +
                    usage = Usage(
         | 
| 204 | 
            +
                        prompt_tokens=prompt_tokens,
         | 
| 205 | 
            +
                        completion_tokens=completion_tokens,
         | 
| 206 | 
            +
                        total_tokens=prompt_tokens + completion_tokens,
         | 
| 207 | 
            +
                    )
         | 
| 208 | 
            +
                    model_response.usage = usage
         | 
| 209 | 
            +
                    return model_response
         | 
| 210 | 
            +
             | 
| 211 | 
            +
             | 
| 212 | 
            +
            # def clean_and_iterate_chunks(response):
         | 
| 213 | 
            +
            #     def process_chunk(chunk):
         | 
| 214 | 
            +
            #         print(f"received chunk: {chunk}")
         | 
| 215 | 
            +
            #         cleaned_chunk = chunk.decode("utf-8")
         | 
| 216 | 
            +
            #         # Perform further processing based on your needs
         | 
| 217 | 
            +
            #         return cleaned_chunk
         | 
| 218 | 
            +
             | 
| 219 | 
            +
             | 
| 220 | 
            +
            #     for line in response.iter_lines():
         | 
| 221 | 
            +
            #         if line:
         | 
| 222 | 
            +
            #             yield process_chunk(line)
         | 
| 223 | 
            +
            def clean_and_iterate_chunks(response):
         | 
| 224 | 
            +
                buffer = b""
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                for chunk in response.iter_content(chunk_size=1024):
         | 
| 227 | 
            +
                    if not chunk:
         | 
| 228 | 
            +
                        break
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    buffer += chunk
         | 
| 231 | 
            +
                    while b"\x00" in buffer:
         | 
| 232 | 
            +
                        buffer = buffer.replace(b"\x00", b"")
         | 
| 233 | 
            +
                        yield buffer.decode("utf-8")
         | 
| 234 | 
            +
                        buffer = b""
         | 
| 235 | 
            +
             | 
| 236 | 
            +
                # No more data expected, yield any remaining data in the buffer
         | 
| 237 | 
            +
                if buffer:
         | 
| 238 | 
            +
                    yield buffer.decode("utf-8")
         | 
| 239 | 
            +
             | 
| 240 | 
            +
             | 
| 241 | 
            +
            def embedding():
         | 
| 242 | 
            +
                # logic for parsing in - calling - parsing out model embedding calls
         | 
| 243 | 
            +
                pass
         | 
    	
        litellm/llms/ollama.py
    ADDED
    
    | @@ -0,0 +1,400 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import requests, types, time
         | 
| 2 | 
            +
            import json, uuid
         | 
| 3 | 
            +
            import traceback
         | 
| 4 | 
            +
            from typing import Optional
         | 
| 5 | 
            +
            import litellm
         | 
| 6 | 
            +
            import httpx, aiohttp, asyncio
         | 
| 7 | 
            +
            from .prompt_templates.factory import prompt_factory, custom_prompt
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            class OllamaError(Exception):
         | 
| 11 | 
            +
                def __init__(self, status_code, message):
         | 
| 12 | 
            +
                    self.status_code = status_code
         | 
| 13 | 
            +
                    self.message = message
         | 
| 14 | 
            +
                    self.request = httpx.Request(method="POST", url="http://localhost:11434")
         | 
| 15 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 16 | 
            +
                    super().__init__(
         | 
| 17 | 
            +
                        self.message
         | 
| 18 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 19 | 
            +
             | 
| 20 | 
            +
             | 
| 21 | 
            +
            class OllamaConfig:
         | 
| 22 | 
            +
                """
         | 
| 23 | 
            +
                Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                The class `OllamaConfig` provides the configuration for the Ollama's API interface. Below are the parameters:
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                - `mirostat` (int): Enable Mirostat sampling for controlling perplexity. Default is 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0. Example usage: mirostat 0
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                - `mirostat_eta` (float): Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. Default: 0.1. Example usage: mirostat_eta 0.1
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                - `mirostat_tau` (float): Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. Default: 5.0. Example usage: mirostat_tau 5.0
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                - `num_ctx` (int): Sets the size of the context window used to generate the next token. Default: 2048. Example usage: num_ctx 4096
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                - `num_gqa` (int): The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b. Example usage: num_gqa 1
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                - `num_gpu` (int): The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. Example usage: num_gpu 0
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                - `num_thread` (int): Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Example usage: num_thread 8
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                - `repeat_last_n` (int): Sets how far back for the model to look back to prevent repetition. Default: 64, 0 = disabled, -1 = num_ctx. Example usage: repeat_last_n 64
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                - `repeat_penalty` (float): Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. Default: 1.1. Example usage: repeat_penalty 1.1
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                - `num_predict` (int): Maximum number of tokens to predict when generating text. Default: 128, -1 = infinite generation, -2 = fill context. Example usage: num_predict 42
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                - `top_k` (int): Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. Default: 40. Example usage: top_k 40
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                - `top_p` (float): Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. Default: 0.9. Example usage: top_p 0.9
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                - `system` (string): system prompt for model (overrides what is defined in the Modelfile)
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                - `template` (string): the full prompt or prompt template (overrides what is defined in the Modelfile)
         | 
| 60 | 
            +
                """
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                mirostat: Optional[int] = None
         | 
| 63 | 
            +
                mirostat_eta: Optional[float] = None
         | 
| 64 | 
            +
                mirostat_tau: Optional[float] = None
         | 
| 65 | 
            +
                num_ctx: Optional[int] = None
         | 
| 66 | 
            +
                num_gqa: Optional[int] = None
         | 
| 67 | 
            +
                num_thread: Optional[int] = None
         | 
| 68 | 
            +
                repeat_last_n: Optional[int] = None
         | 
| 69 | 
            +
                repeat_penalty: Optional[float] = None
         | 
| 70 | 
            +
                temperature: Optional[float] = None
         | 
| 71 | 
            +
                stop: Optional[
         | 
| 72 | 
            +
                    list
         | 
| 73 | 
            +
                ] = None  # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
         | 
| 74 | 
            +
                tfs_z: Optional[float] = None
         | 
| 75 | 
            +
                num_predict: Optional[int] = None
         | 
| 76 | 
            +
                top_k: Optional[int] = None
         | 
| 77 | 
            +
                top_p: Optional[float] = None
         | 
| 78 | 
            +
                system: Optional[str] = None
         | 
| 79 | 
            +
                template: Optional[str] = None
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                def __init__(
         | 
| 82 | 
            +
                    self,
         | 
| 83 | 
            +
                    mirostat: Optional[int] = None,
         | 
| 84 | 
            +
                    mirostat_eta: Optional[float] = None,
         | 
| 85 | 
            +
                    mirostat_tau: Optional[float] = None,
         | 
| 86 | 
            +
                    num_ctx: Optional[int] = None,
         | 
| 87 | 
            +
                    num_gqa: Optional[int] = None,
         | 
| 88 | 
            +
                    num_thread: Optional[int] = None,
         | 
| 89 | 
            +
                    repeat_last_n: Optional[int] = None,
         | 
| 90 | 
            +
                    repeat_penalty: Optional[float] = None,
         | 
| 91 | 
            +
                    temperature: Optional[float] = None,
         | 
| 92 | 
            +
                    stop: Optional[list] = None,
         | 
| 93 | 
            +
                    tfs_z: Optional[float] = None,
         | 
| 94 | 
            +
                    num_predict: Optional[int] = None,
         | 
| 95 | 
            +
                    top_k: Optional[int] = None,
         | 
| 96 | 
            +
                    top_p: Optional[float] = None,
         | 
| 97 | 
            +
                    system: Optional[str] = None,
         | 
| 98 | 
            +
                    template: Optional[str] = None,
         | 
| 99 | 
            +
                ) -> None:
         | 
| 100 | 
            +
                    locals_ = locals()
         | 
| 101 | 
            +
                    for key, value in locals_.items():
         | 
| 102 | 
            +
                        if key != "self" and value is not None:
         | 
| 103 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                @classmethod
         | 
| 106 | 
            +
                def get_config(cls):
         | 
| 107 | 
            +
                    return {
         | 
| 108 | 
            +
                        k: v
         | 
| 109 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 110 | 
            +
                        if not k.startswith("__")
         | 
| 111 | 
            +
                        and not isinstance(
         | 
| 112 | 
            +
                            v,
         | 
| 113 | 
            +
                            (
         | 
| 114 | 
            +
                                types.FunctionType,
         | 
| 115 | 
            +
                                types.BuiltinFunctionType,
         | 
| 116 | 
            +
                                classmethod,
         | 
| 117 | 
            +
                                staticmethod,
         | 
| 118 | 
            +
                            ),
         | 
| 119 | 
            +
                        )
         | 
| 120 | 
            +
                        and v is not None
         | 
| 121 | 
            +
                    }
         | 
| 122 | 
            +
             | 
| 123 | 
            +
             | 
| 124 | 
            +
            # ollama implementation
         | 
| 125 | 
            +
            def get_ollama_response(
         | 
| 126 | 
            +
                api_base="http://localhost:11434",
         | 
| 127 | 
            +
                model="llama2",
         | 
| 128 | 
            +
                prompt="Why is the sky blue?",
         | 
| 129 | 
            +
                optional_params=None,
         | 
| 130 | 
            +
                logging_obj=None,
         | 
| 131 | 
            +
                acompletion: bool = False,
         | 
| 132 | 
            +
                model_response=None,
         | 
| 133 | 
            +
                encoding=None,
         | 
| 134 | 
            +
            ):
         | 
| 135 | 
            +
                if api_base.endswith("/api/generate"):
         | 
| 136 | 
            +
                    url = api_base
         | 
| 137 | 
            +
                else:
         | 
| 138 | 
            +
                    url = f"{api_base}/api/generate"
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                ## Load Config
         | 
| 141 | 
            +
                config = litellm.OllamaConfig.get_config()
         | 
| 142 | 
            +
                for k, v in config.items():
         | 
| 143 | 
            +
                    if (
         | 
| 144 | 
            +
                        k not in optional_params
         | 
| 145 | 
            +
                    ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 146 | 
            +
                        optional_params[k] = v
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                optional_params["stream"] = optional_params.get("stream", False)
         | 
| 149 | 
            +
                data = {"model": model, "prompt": prompt, **optional_params}
         | 
| 150 | 
            +
                ## LOGGING
         | 
| 151 | 
            +
                logging_obj.pre_call(
         | 
| 152 | 
            +
                    input=None,
         | 
| 153 | 
            +
                    api_key=None,
         | 
| 154 | 
            +
                    additional_args={
         | 
| 155 | 
            +
                        "api_base": url,
         | 
| 156 | 
            +
                        "complete_input_dict": data,
         | 
| 157 | 
            +
                        "headers": {},
         | 
| 158 | 
            +
                        "acompletion": acompletion,
         | 
| 159 | 
            +
                    },
         | 
| 160 | 
            +
                )
         | 
| 161 | 
            +
                if acompletion is True:
         | 
| 162 | 
            +
                    if optional_params.get("stream", False) == True:
         | 
| 163 | 
            +
                        response = ollama_async_streaming(
         | 
| 164 | 
            +
                            url=url,
         | 
| 165 | 
            +
                            data=data,
         | 
| 166 | 
            +
                            model_response=model_response,
         | 
| 167 | 
            +
                            encoding=encoding,
         | 
| 168 | 
            +
                            logging_obj=logging_obj,
         | 
| 169 | 
            +
                        )
         | 
| 170 | 
            +
                    else:
         | 
| 171 | 
            +
                        response = ollama_acompletion(
         | 
| 172 | 
            +
                            url=url,
         | 
| 173 | 
            +
                            data=data,
         | 
| 174 | 
            +
                            model_response=model_response,
         | 
| 175 | 
            +
                            encoding=encoding,
         | 
| 176 | 
            +
                            logging_obj=logging_obj,
         | 
| 177 | 
            +
                        )
         | 
| 178 | 
            +
                    return response
         | 
| 179 | 
            +
                elif optional_params.get("stream", False) == True:
         | 
| 180 | 
            +
                    return ollama_completion_stream(url=url, data=data, logging_obj=logging_obj)
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                response = requests.post(url=f"{url}", json=data, timeout=litellm.request_timeout)
         | 
| 183 | 
            +
                if response.status_code != 200:
         | 
| 184 | 
            +
                    raise OllamaError(status_code=response.status_code, message=response.text)
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                ## LOGGING
         | 
| 187 | 
            +
                logging_obj.post_call(
         | 
| 188 | 
            +
                    input=prompt,
         | 
| 189 | 
            +
                    api_key="",
         | 
| 190 | 
            +
                    original_response=response.text,
         | 
| 191 | 
            +
                    additional_args={
         | 
| 192 | 
            +
                        "headers": None,
         | 
| 193 | 
            +
                        "api_base": api_base,
         | 
| 194 | 
            +
                    },
         | 
| 195 | 
            +
                )
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                response_json = response.json()
         | 
| 198 | 
            +
             | 
| 199 | 
            +
                ## RESPONSE OBJECT
         | 
| 200 | 
            +
                model_response["choices"][0]["finish_reason"] = "stop"
         | 
| 201 | 
            +
                if optional_params.get("format", "") == "json":
         | 
| 202 | 
            +
                    message = litellm.Message(
         | 
| 203 | 
            +
                        content=None,
         | 
| 204 | 
            +
                        tool_calls=[
         | 
| 205 | 
            +
                            {
         | 
| 206 | 
            +
                                "id": f"call_{str(uuid.uuid4())}",
         | 
| 207 | 
            +
                                "function": {"arguments": response_json["response"], "name": ""},
         | 
| 208 | 
            +
                                "type": "function",
         | 
| 209 | 
            +
                            }
         | 
| 210 | 
            +
                        ],
         | 
| 211 | 
            +
                    )
         | 
| 212 | 
            +
                    model_response["choices"][0]["message"] = message
         | 
| 213 | 
            +
                else:
         | 
| 214 | 
            +
                    model_response["choices"][0]["message"]["content"] = response_json["response"]
         | 
| 215 | 
            +
                model_response["created"] = int(time.time())
         | 
| 216 | 
            +
                model_response["model"] = "ollama/" + model
         | 
| 217 | 
            +
                prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt)))  # type: ignore
         | 
| 218 | 
            +
                completion_tokens = response_json["eval_count"]
         | 
| 219 | 
            +
                model_response["usage"] = litellm.Usage(
         | 
| 220 | 
            +
                    prompt_tokens=prompt_tokens,
         | 
| 221 | 
            +
                    completion_tokens=completion_tokens,
         | 
| 222 | 
            +
                    total_tokens=prompt_tokens + completion_tokens,
         | 
| 223 | 
            +
                )
         | 
| 224 | 
            +
                return model_response
         | 
| 225 | 
            +
             | 
| 226 | 
            +
             | 
| 227 | 
            +
            def ollama_completion_stream(url, data, logging_obj):
         | 
| 228 | 
            +
                with httpx.stream(
         | 
| 229 | 
            +
                    url=url, json=data, method="POST", timeout=litellm.request_timeout
         | 
| 230 | 
            +
                ) as response:
         | 
| 231 | 
            +
                    try:
         | 
| 232 | 
            +
                        if response.status_code != 200:
         | 
| 233 | 
            +
                            raise OllamaError(
         | 
| 234 | 
            +
                                status_code=response.status_code, message=response.text
         | 
| 235 | 
            +
                            )
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                        streamwrapper = litellm.CustomStreamWrapper(
         | 
| 238 | 
            +
                            completion_stream=response.iter_lines(),
         | 
| 239 | 
            +
                            model=data["model"],
         | 
| 240 | 
            +
                            custom_llm_provider="ollama",
         | 
| 241 | 
            +
                            logging_obj=logging_obj,
         | 
| 242 | 
            +
                        )
         | 
| 243 | 
            +
                        for transformed_chunk in streamwrapper:
         | 
| 244 | 
            +
                            yield transformed_chunk
         | 
| 245 | 
            +
                    except Exception as e:
         | 
| 246 | 
            +
                        raise e
         | 
| 247 | 
            +
             | 
| 248 | 
            +
             | 
| 249 | 
            +
            async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
         | 
| 250 | 
            +
                try:
         | 
| 251 | 
            +
                    client = httpx.AsyncClient()
         | 
| 252 | 
            +
                    async with client.stream(
         | 
| 253 | 
            +
                        url=f"{url}", json=data, method="POST", timeout=litellm.request_timeout
         | 
| 254 | 
            +
                    ) as response:
         | 
| 255 | 
            +
                        if response.status_code != 200:
         | 
| 256 | 
            +
                            raise OllamaError(
         | 
| 257 | 
            +
                                status_code=response.status_code, message=response.text
         | 
| 258 | 
            +
                            )
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                        streamwrapper = litellm.CustomStreamWrapper(
         | 
| 261 | 
            +
                            completion_stream=response.aiter_lines(),
         | 
| 262 | 
            +
                            model=data["model"],
         | 
| 263 | 
            +
                            custom_llm_provider="ollama",
         | 
| 264 | 
            +
                            logging_obj=logging_obj,
         | 
| 265 | 
            +
                        )
         | 
| 266 | 
            +
                        async for transformed_chunk in streamwrapper:
         | 
| 267 | 
            +
                            yield transformed_chunk
         | 
| 268 | 
            +
                except Exception as e:
         | 
| 269 | 
            +
                    traceback.print_exc()
         | 
| 270 | 
            +
             | 
| 271 | 
            +
             | 
| 272 | 
            +
            async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
         | 
| 273 | 
            +
                data["stream"] = False
         | 
| 274 | 
            +
                try:
         | 
| 275 | 
            +
                    timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
         | 
| 276 | 
            +
                    async with aiohttp.ClientSession(timeout=timeout) as session:
         | 
| 277 | 
            +
                        resp = await session.post(url, json=data)
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                        if resp.status != 200:
         | 
| 280 | 
            +
                            text = await resp.text()
         | 
| 281 | 
            +
                            raise OllamaError(status_code=resp.status, message=text)
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                        ## LOGGING
         | 
| 284 | 
            +
                        logging_obj.post_call(
         | 
| 285 | 
            +
                            input=data["prompt"],
         | 
| 286 | 
            +
                            api_key="",
         | 
| 287 | 
            +
                            original_response=resp.text,
         | 
| 288 | 
            +
                            additional_args={
         | 
| 289 | 
            +
                                "headers": None,
         | 
| 290 | 
            +
                                "api_base": url,
         | 
| 291 | 
            +
                            },
         | 
| 292 | 
            +
                        )
         | 
| 293 | 
            +
             | 
| 294 | 
            +
                        response_json = await resp.json()
         | 
| 295 | 
            +
                        ## RESPONSE OBJECT
         | 
| 296 | 
            +
                        model_response["choices"][0]["finish_reason"] = "stop"
         | 
| 297 | 
            +
                        if data.get("format", "") == "json":
         | 
| 298 | 
            +
                            message = litellm.Message(
         | 
| 299 | 
            +
                                content=None,
         | 
| 300 | 
            +
                                tool_calls=[
         | 
| 301 | 
            +
                                    {
         | 
| 302 | 
            +
                                        "id": f"call_{str(uuid.uuid4())}",
         | 
| 303 | 
            +
                                        "function": {
         | 
| 304 | 
            +
                                            "arguments": response_json["response"],
         | 
| 305 | 
            +
                                            "name": "",
         | 
| 306 | 
            +
                                        },
         | 
| 307 | 
            +
                                        "type": "function",
         | 
| 308 | 
            +
                                    }
         | 
| 309 | 
            +
                                ],
         | 
| 310 | 
            +
                            )
         | 
| 311 | 
            +
                            model_response["choices"][0]["message"] = message
         | 
| 312 | 
            +
                        else:
         | 
| 313 | 
            +
                            model_response["choices"][0]["message"]["content"] = response_json[
         | 
| 314 | 
            +
                                "response"
         | 
| 315 | 
            +
                            ]
         | 
| 316 | 
            +
                        model_response["created"] = int(time.time())
         | 
| 317 | 
            +
                        model_response["model"] = "ollama/" + data["model"]
         | 
| 318 | 
            +
                        prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"])))  # type: ignore
         | 
| 319 | 
            +
                        completion_tokens = response_json["eval_count"]
         | 
| 320 | 
            +
                        model_response["usage"] = litellm.Usage(
         | 
| 321 | 
            +
                            prompt_tokens=prompt_tokens,
         | 
| 322 | 
            +
                            completion_tokens=completion_tokens,
         | 
| 323 | 
            +
                            total_tokens=prompt_tokens + completion_tokens,
         | 
| 324 | 
            +
                        )
         | 
| 325 | 
            +
                        return model_response
         | 
| 326 | 
            +
                except Exception as e:
         | 
| 327 | 
            +
                    traceback.print_exc()
         | 
| 328 | 
            +
                    raise e
         | 
| 329 | 
            +
             | 
| 330 | 
            +
             | 
| 331 | 
            +
            async def ollama_aembeddings(
         | 
| 332 | 
            +
                api_base="http://localhost:11434",
         | 
| 333 | 
            +
                model="llama2",
         | 
| 334 | 
            +
                prompt="Why is the sky blue?",
         | 
| 335 | 
            +
                optional_params=None,
         | 
| 336 | 
            +
                logging_obj=None,
         | 
| 337 | 
            +
                model_response=None,
         | 
| 338 | 
            +
                encoding=None,
         | 
| 339 | 
            +
            ):
         | 
| 340 | 
            +
                if api_base.endswith("/api/embeddings"):
         | 
| 341 | 
            +
                    url = api_base
         | 
| 342 | 
            +
                else:
         | 
| 343 | 
            +
                    url = f"{api_base}/api/embeddings"
         | 
| 344 | 
            +
             | 
| 345 | 
            +
                ## Load Config
         | 
| 346 | 
            +
                config = litellm.OllamaConfig.get_config()
         | 
| 347 | 
            +
                for k, v in config.items():
         | 
| 348 | 
            +
                    if (
         | 
| 349 | 
            +
                        k not in optional_params
         | 
| 350 | 
            +
                    ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 351 | 
            +
                        optional_params[k] = v
         | 
| 352 | 
            +
             | 
| 353 | 
            +
                data = {
         | 
| 354 | 
            +
                    "model": model,
         | 
| 355 | 
            +
                    "prompt": prompt,
         | 
| 356 | 
            +
                }
         | 
| 357 | 
            +
                ## LOGGING
         | 
| 358 | 
            +
                logging_obj.pre_call(
         | 
| 359 | 
            +
                    input=None,
         | 
| 360 | 
            +
                    api_key=None,
         | 
| 361 | 
            +
                    additional_args={"api_base": url, "complete_input_dict": data, "headers": {}},
         | 
| 362 | 
            +
                )
         | 
| 363 | 
            +
                timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
         | 
| 364 | 
            +
                async with aiohttp.ClientSession(timeout=timeout) as session:
         | 
| 365 | 
            +
                    response = await session.post(url, json=data)
         | 
| 366 | 
            +
             | 
| 367 | 
            +
                    if response.status != 200:
         | 
| 368 | 
            +
                        text = await response.text()
         | 
| 369 | 
            +
                        raise OllamaError(status_code=response.status, message=text)
         | 
| 370 | 
            +
             | 
| 371 | 
            +
                    ## LOGGING
         | 
| 372 | 
            +
                    logging_obj.post_call(
         | 
| 373 | 
            +
                        input=prompt,
         | 
| 374 | 
            +
                        api_key="",
         | 
| 375 | 
            +
                        original_response=response.text,
         | 
| 376 | 
            +
                        additional_args={
         | 
| 377 | 
            +
                            "headers": None,
         | 
| 378 | 
            +
                            "api_base": api_base,
         | 
| 379 | 
            +
                        },
         | 
| 380 | 
            +
                    )
         | 
| 381 | 
            +
             | 
| 382 | 
            +
                    response_json = await response.json()
         | 
| 383 | 
            +
                    embeddings = response_json["embedding"]
         | 
| 384 | 
            +
                    ## RESPONSE OBJECT
         | 
| 385 | 
            +
                    output_data = []
         | 
| 386 | 
            +
                    for idx, embedding in enumerate(embeddings):
         | 
| 387 | 
            +
                        output_data.append(
         | 
| 388 | 
            +
                            {"object": "embedding", "index": idx, "embedding": embedding}
         | 
| 389 | 
            +
                        )
         | 
| 390 | 
            +
                    model_response["object"] = "list"
         | 
| 391 | 
            +
                    model_response["data"] = output_data
         | 
| 392 | 
            +
                    model_response["model"] = model
         | 
| 393 | 
            +
             | 
| 394 | 
            +
                    input_tokens = len(encoding.encode(prompt))
         | 
| 395 | 
            +
             | 
| 396 | 
            +
                    model_response["usage"] = {
         | 
| 397 | 
            +
                        "prompt_tokens": input_tokens,
         | 
| 398 | 
            +
                        "total_tokens": input_tokens,
         | 
| 399 | 
            +
                    }
         | 
| 400 | 
            +
                    return model_response
         | 
    	
        litellm/llms/ollama_chat.py
    ADDED
    
    | @@ -0,0 +1,333 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import requests, types, time
         | 
| 2 | 
            +
            import json, uuid
         | 
| 3 | 
            +
            import traceback
         | 
| 4 | 
            +
            from typing import Optional
         | 
| 5 | 
            +
            import litellm
         | 
| 6 | 
            +
            import httpx, aiohttp, asyncio
         | 
| 7 | 
            +
            from .prompt_templates.factory import prompt_factory, custom_prompt
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            class OllamaError(Exception):
         | 
| 11 | 
            +
                def __init__(self, status_code, message):
         | 
| 12 | 
            +
                    self.status_code = status_code
         | 
| 13 | 
            +
                    self.message = message
         | 
| 14 | 
            +
                    self.request = httpx.Request(method="POST", url="http://localhost:11434")
         | 
| 15 | 
            +
                    self.response = httpx.Response(status_code=status_code, request=self.request)
         | 
| 16 | 
            +
                    super().__init__(
         | 
| 17 | 
            +
                        self.message
         | 
| 18 | 
            +
                    )  # Call the base class constructor with the parameters it needs
         | 
| 19 | 
            +
             | 
| 20 | 
            +
             | 
| 21 | 
            +
            class OllamaConfig:
         | 
| 22 | 
            +
                """
         | 
| 23 | 
            +
                Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                The class `OllamaConfig` provides the configuration for the Ollama's API interface. Below are the parameters:
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                - `mirostat` (int): Enable Mirostat sampling for controlling perplexity. Default is 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0. Example usage: mirostat 0
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                - `mirostat_eta` (float): Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. Default: 0.1. Example usage: mirostat_eta 0.1
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                - `mirostat_tau` (float): Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. Default: 5.0. Example usage: mirostat_tau 5.0
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                - `num_ctx` (int): Sets the size of the context window used to generate the next token. Default: 2048. Example usage: num_ctx 4096
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                - `num_gqa` (int): The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b. Example usage: num_gqa 1
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                - `num_gpu` (int): The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. Example usage: num_gpu 0
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                - `num_thread` (int): Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Example usage: num_thread 8
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                - `repeat_last_n` (int): Sets how far back for the model to look back to prevent repetition. Default: 64, 0 = disabled, -1 = num_ctx. Example usage: repeat_last_n 64
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                - `repeat_penalty` (float): Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. Default: 1.1. Example usage: repeat_penalty 1.1
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                - `num_predict` (int): Maximum number of tokens to predict when generating text. Default: 128, -1 = infinite generation, -2 = fill context. Example usage: num_predict 42
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                - `top_k` (int): Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. Default: 40. Example usage: top_k 40
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                - `top_p` (float): Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. Default: 0.9. Example usage: top_p 0.9
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                - `system` (string): system prompt for model (overrides what is defined in the Modelfile)
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                - `template` (string): the full prompt or prompt template (overrides what is defined in the Modelfile)
         | 
| 60 | 
            +
                """
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                mirostat: Optional[int] = None
         | 
| 63 | 
            +
                mirostat_eta: Optional[float] = None
         | 
| 64 | 
            +
                mirostat_tau: Optional[float] = None
         | 
| 65 | 
            +
                num_ctx: Optional[int] = None
         | 
| 66 | 
            +
                num_gqa: Optional[int] = None
         | 
| 67 | 
            +
                num_thread: Optional[int] = None
         | 
| 68 | 
            +
                repeat_last_n: Optional[int] = None
         | 
| 69 | 
            +
                repeat_penalty: Optional[float] = None
         | 
| 70 | 
            +
                temperature: Optional[float] = None
         | 
| 71 | 
            +
                stop: Optional[
         | 
| 72 | 
            +
                    list
         | 
| 73 | 
            +
                ] = None  # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
         | 
| 74 | 
            +
                tfs_z: Optional[float] = None
         | 
| 75 | 
            +
                num_predict: Optional[int] = None
         | 
| 76 | 
            +
                top_k: Optional[int] = None
         | 
| 77 | 
            +
                top_p: Optional[float] = None
         | 
| 78 | 
            +
                system: Optional[str] = None
         | 
| 79 | 
            +
                template: Optional[str] = None
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                def __init__(
         | 
| 82 | 
            +
                    self,
         | 
| 83 | 
            +
                    mirostat: Optional[int] = None,
         | 
| 84 | 
            +
                    mirostat_eta: Optional[float] = None,
         | 
| 85 | 
            +
                    mirostat_tau: Optional[float] = None,
         | 
| 86 | 
            +
                    num_ctx: Optional[int] = None,
         | 
| 87 | 
            +
                    num_gqa: Optional[int] = None,
         | 
| 88 | 
            +
                    num_thread: Optional[int] = None,
         | 
| 89 | 
            +
                    repeat_last_n: Optional[int] = None,
         | 
| 90 | 
            +
                    repeat_penalty: Optional[float] = None,
         | 
| 91 | 
            +
                    temperature: Optional[float] = None,
         | 
| 92 | 
            +
                    stop: Optional[list] = None,
         | 
| 93 | 
            +
                    tfs_z: Optional[float] = None,
         | 
| 94 | 
            +
                    num_predict: Optional[int] = None,
         | 
| 95 | 
            +
                    top_k: Optional[int] = None,
         | 
| 96 | 
            +
                    top_p: Optional[float] = None,
         | 
| 97 | 
            +
                    system: Optional[str] = None,
         | 
| 98 | 
            +
                    template: Optional[str] = None,
         | 
| 99 | 
            +
                ) -> None:
         | 
| 100 | 
            +
                    locals_ = locals()
         | 
| 101 | 
            +
                    for key, value in locals_.items():
         | 
| 102 | 
            +
                        if key != "self" and value is not None:
         | 
| 103 | 
            +
                            setattr(self.__class__, key, value)
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                @classmethod
         | 
| 106 | 
            +
                def get_config(cls):
         | 
| 107 | 
            +
                    return {
         | 
| 108 | 
            +
                        k: v
         | 
| 109 | 
            +
                        for k, v in cls.__dict__.items()
         | 
| 110 | 
            +
                        if not k.startswith("__")
         | 
| 111 | 
            +
                        and not isinstance(
         | 
| 112 | 
            +
                            v,
         | 
| 113 | 
            +
                            (
         | 
| 114 | 
            +
                                types.FunctionType,
         | 
| 115 | 
            +
                                types.BuiltinFunctionType,
         | 
| 116 | 
            +
                                classmethod,
         | 
| 117 | 
            +
                                staticmethod,
         | 
| 118 | 
            +
                            ),
         | 
| 119 | 
            +
                        )
         | 
| 120 | 
            +
                        and v is not None
         | 
| 121 | 
            +
                    }
         | 
| 122 | 
            +
             | 
| 123 | 
            +
             | 
| 124 | 
            +
            # ollama implementation
         | 
| 125 | 
            +
            def get_ollama_response(
         | 
| 126 | 
            +
                api_base="http://localhost:11434",
         | 
| 127 | 
            +
                model="llama2",
         | 
| 128 | 
            +
                messages=None,
         | 
| 129 | 
            +
                optional_params=None,
         | 
| 130 | 
            +
                logging_obj=None,
         | 
| 131 | 
            +
                acompletion: bool = False,
         | 
| 132 | 
            +
                model_response=None,
         | 
| 133 | 
            +
                encoding=None,
         | 
| 134 | 
            +
            ):
         | 
| 135 | 
            +
                if api_base.endswith("/api/chat"):
         | 
| 136 | 
            +
                    url = api_base
         | 
| 137 | 
            +
                else:
         | 
| 138 | 
            +
                    url = f"{api_base}/api/chat"
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                ## Load Config
         | 
| 141 | 
            +
                config = litellm.OllamaConfig.get_config()
         | 
| 142 | 
            +
                for k, v in config.items():
         | 
| 143 | 
            +
                    if (
         | 
| 144 | 
            +
                        k not in optional_params
         | 
| 145 | 
            +
                    ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
         | 
| 146 | 
            +
                        optional_params[k] = v
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                optional_params["stream"] = optional_params.get("stream", False)
         | 
| 149 | 
            +
                data = {"model": model, "messages": messages, **optional_params}
         | 
| 150 | 
            +
                ## LOGGING
         | 
| 151 | 
            +
                logging_obj.pre_call(
         | 
| 152 | 
            +
                    input=None,
         | 
| 153 | 
            +
                    api_key=None,
         | 
| 154 | 
            +
                    additional_args={
         | 
| 155 | 
            +
                        "api_base": url,
         | 
| 156 | 
            +
                        "complete_input_dict": data,
         | 
| 157 | 
            +
                        "headers": {},
         | 
| 158 | 
            +
                        "acompletion": acompletion,
         | 
| 159 | 
            +
                    },
         | 
| 160 | 
            +
                )
         | 
| 161 | 
            +
                if acompletion is True:
         | 
| 162 | 
            +
                    if optional_params.get("stream", False) == True:
         | 
| 163 | 
            +
                        response = ollama_async_streaming(
         | 
| 164 | 
            +
                            url=url,
         | 
| 165 | 
            +
                            data=data,
         | 
| 166 | 
            +
                            model_response=model_response,
         | 
| 167 | 
            +
                            encoding=encoding,
         | 
| 168 | 
            +
                            logging_obj=logging_obj,
         | 
| 169 | 
            +
                        )
         | 
| 170 | 
            +
                    else:
         | 
| 171 | 
            +
                        response = ollama_acompletion(
         | 
| 172 | 
            +
                            url=url,
         | 
| 173 | 
            +
                            data=data,
         | 
| 174 | 
            +
                            model_response=model_response,
         | 
| 175 | 
            +
                            encoding=encoding,
         | 
| 176 | 
            +
                            logging_obj=logging_obj,
         | 
| 177 | 
            +
                        )
         | 
| 178 | 
            +
                    return response
         | 
| 179 | 
            +
                elif optional_params.get("stream", False) == True:
         | 
| 180 | 
            +
                    return ollama_completion_stream(url=url, data=data, logging_obj=logging_obj)
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                response = requests.post(
         | 
| 183 | 
            +
                    url=f"{url}",
         | 
| 184 | 
            +
                    json=data,
         | 
| 185 | 
            +
                )
         | 
| 186 | 
            +
                if response.status_code != 200:
         | 
| 187 | 
            +
                    raise OllamaError(status_code=response.status_code, message=response.text)
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                ## LOGGING
         | 
| 190 | 
            +
                logging_obj.post_call(
         | 
| 191 | 
            +
                    input=messages,
         | 
| 192 | 
            +
                    api_key="",
         | 
| 193 | 
            +
                    original_response=response.text,
         | 
| 194 | 
            +
                    additional_args={
         | 
| 195 | 
            +
                        "headers": None,
         | 
| 196 | 
            +
                        "api_base": api_base,
         | 
| 197 | 
            +
                    },
         | 
| 198 | 
            +
                )
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                response_json = response.json()
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                ## RESPONSE OBJECT
         | 
| 203 | 
            +
                model_response["choices"][0]["finish_reason"] = "stop"
         | 
| 204 | 
            +
                if data.get("format", "") == "json":
         | 
| 205 | 
            +
                    message = litellm.Message(
         | 
| 206 | 
            +
                        content=None,
         | 
| 207 | 
            +
                        tool_calls=[
         | 
| 208 | 
            +
                            {
         | 
| 209 | 
            +
                                "id": f"call_{str(uuid.uuid4())}",
         | 
| 210 | 
            +
                                "function": {
         | 
| 211 | 
            +
                                    "arguments": response_json["message"]["content"],
         | 
| 212 | 
            +
                                    "name": "",
         | 
| 213 | 
            +
                                },
         | 
| 214 | 
            +
                                "type": "function",
         | 
| 215 | 
            +
                            }
         | 
| 216 | 
            +
                        ],
         | 
| 217 | 
            +
                    )
         | 
| 218 | 
            +
                    model_response["choices"][0]["message"] = message
         | 
| 219 | 
            +
                else:
         | 
| 220 | 
            +
                    model_response["choices"][0]["message"] = response_json["message"]
         | 
| 221 | 
            +
                model_response["created"] = int(time.time())
         | 
| 222 | 
            +
                model_response["model"] = "ollama/" + model
         | 
| 223 | 
            +
                prompt_tokens = response_json["prompt_eval_count"]  # type: ignore
         | 
| 224 | 
            +
                completion_tokens = response_json["eval_count"]
         | 
| 225 | 
            +
                model_response["usage"] = litellm.Usage(
         | 
| 226 | 
            +
                    prompt_tokens=prompt_tokens,
         | 
| 227 | 
            +
                    completion_tokens=completion_tokens,
         | 
| 228 | 
            +
                    total_tokens=prompt_tokens + completion_tokens,
         | 
| 229 | 
            +
                )
         | 
| 230 | 
            +
                return model_response
         | 
| 231 | 
            +
             | 
| 232 | 
            +
             | 
| 233 | 
            +
            def ollama_completion_stream(url, data, logging_obj):
         | 
| 234 | 
            +
                with httpx.stream(
         | 
| 235 | 
            +
                    url=url, json=data, method="POST", timeout=litellm.request_timeout
         | 
| 236 | 
            +
                ) as response:
         | 
| 237 | 
            +
                    try:
         | 
| 238 | 
            +
                        if response.status_code != 200:
         | 
| 239 | 
            +
                            raise OllamaError(
         | 
| 240 | 
            +
                                status_code=response.status_code, message=response.iter_lines()
         | 
| 241 | 
            +
                            )
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                        streamwrapper = litellm.CustomStreamWrapper(
         | 
| 244 | 
            +
                            completion_stream=response.iter_lines(),
         | 
| 245 | 
            +
                            model=data["model"],
         | 
| 246 | 
            +
                            custom_llm_provider="ollama_chat",
         | 
| 247 | 
            +
                            logging_obj=logging_obj,
         | 
| 248 | 
            +
                        )
         | 
| 249 | 
            +
                        for transformed_chunk in streamwrapper:
         | 
| 250 | 
            +
                            yield transformed_chunk
         | 
| 251 | 
            +
                    except Exception as e:
         | 
| 252 | 
            +
                        raise e
         | 
| 253 | 
            +
             | 
| 254 | 
            +
             | 
| 255 | 
            +
            async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
         | 
| 256 | 
            +
                try:
         | 
| 257 | 
            +
                    client = httpx.AsyncClient()
         | 
| 258 | 
            +
                    async with client.stream(
         | 
| 259 | 
            +
                        url=f"{url}", json=data, method="POST", timeout=litellm.request_timeout
         | 
| 260 | 
            +
                    ) as response:
         | 
| 261 | 
            +
                        if response.status_code != 200:
         | 
| 262 | 
            +
                            raise OllamaError(
         | 
| 263 | 
            +
                                status_code=response.status_code, message=response.text
         | 
| 264 | 
            +
                            )
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                        streamwrapper = litellm.CustomStreamWrapper(
         | 
| 267 | 
            +
                            completion_stream=response.aiter_lines(),
         | 
| 268 | 
            +
                            model=data["model"],
         | 
| 269 | 
            +
                            custom_llm_provider="ollama_chat",
         | 
| 270 | 
            +
                            logging_obj=logging_obj,
         | 
| 271 | 
            +
                        )
         | 
| 272 | 
            +
                        async for transformed_chunk in streamwrapper:
         | 
| 273 | 
            +
                            yield transformed_chunk
         | 
| 274 | 
            +
                except Exception as e:
         | 
| 275 | 
            +
                    traceback.print_exc()
         | 
| 276 | 
            +
             | 
| 277 | 
            +
             | 
| 278 | 
            +
            async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
         | 
| 279 | 
            +
                data["stream"] = False
         | 
| 280 | 
            +
                try:
         | 
| 281 | 
            +
                    timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
         | 
| 282 | 
            +
                    async with aiohttp.ClientSession(timeout=timeout) as session:
         | 
| 283 | 
            +
                        resp = await session.post(url, json=data)
         | 
| 284 | 
            +
             | 
| 285 | 
            +
                        if resp.status != 200:
         | 
| 286 | 
            +
                            text = await resp.text()
         | 
| 287 | 
            +
                            raise OllamaError(status_code=resp.status, message=text)
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                        response_json = await resp.json()
         | 
| 290 | 
            +
             | 
| 291 | 
            +
                        ## LOGGING
         | 
| 292 | 
            +
                        logging_obj.post_call(
         | 
| 293 | 
            +
                            input=data,
         | 
| 294 | 
            +
                            api_key="",
         | 
| 295 | 
            +
                            original_response=response_json,
         | 
| 296 | 
            +
                            additional_args={
         | 
| 297 | 
            +
                                "headers": None,
         | 
| 298 | 
            +
                                "api_base": url,
         | 
| 299 | 
            +
                            },
         | 
| 300 | 
            +
                        )
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                        ## RESPONSE OBJECT
         | 
| 303 | 
            +
                        model_response["choices"][0]["finish_reason"] = "stop"
         | 
| 304 | 
            +
                        if data.get("format", "") == "json":
         | 
| 305 | 
            +
                            message = litellm.Message(
         | 
| 306 | 
            +
                                content=None,
         | 
| 307 | 
            +
                                tool_calls=[
         | 
| 308 | 
            +
                                    {
         | 
| 309 | 
            +
                                        "id": f"call_{str(uuid.uuid4())}",
         | 
| 310 | 
            +
                                        "function": {
         | 
| 311 | 
            +
                                            "arguments": response_json["message"]["content"],
         | 
| 312 | 
            +
                                            "name": "",
         | 
| 313 | 
            +
                                        },
         | 
| 314 | 
            +
                                        "type": "function",
         | 
| 315 | 
            +
                                    }
         | 
| 316 | 
            +
                                ],
         | 
| 317 | 
            +
                            )
         | 
| 318 | 
            +
                            model_response["choices"][0]["message"] = message
         | 
| 319 | 
            +
                        else:
         | 
| 320 | 
            +
                            model_response["choices"][0]["message"] = response_json["message"]
         | 
| 321 | 
            +
                        model_response["created"] = int(time.time())
         | 
| 322 | 
            +
                        model_response["model"] = "ollama/" + data["model"]
         | 
| 323 | 
            +
                        prompt_tokens = response_json["prompt_eval_count"]  # type: ignore
         | 
| 324 | 
            +
                        completion_tokens = response_json["eval_count"]
         | 
| 325 | 
            +
                        model_response["usage"] = litellm.Usage(
         | 
| 326 | 
            +
                            prompt_tokens=prompt_tokens,
         | 
| 327 | 
            +
                            completion_tokens=completion_tokens,
         | 
| 328 | 
            +
                            total_tokens=prompt_tokens + completion_tokens,
         | 
| 329 | 
            +
                        )
         | 
| 330 | 
            +
                        return model_response
         | 
| 331 | 
            +
                except Exception as e:
         | 
| 332 | 
            +
                    traceback.print_exc()
         | 
| 333 | 
            +
                    raise e
         |