| import streamlit as st | |
| import tiktoken | |
| from .content import TOKEN_ESTIMATOR_TEXT | |
| def num_tokens_from_string(string: str, encoding_name: str) -> int: | |
| """Returns the number of tokens in a text string.""" | |
| encoding = tiktoken.get_encoding(encoding_name) | |
| num_tokens = len(encoding.encode(string)) | |
| return num_tokens | |
| def token_estimator(): | |
| st.markdown("### 🪙 Tokens estimator") | |
| st.markdown( | |
| "As our methodology deeply relies on the number of tokens processed by the model *(and as no-one is token-fluent)*, we provide you with a tool to estimate the number of tokens in a given text." | |
| ) | |
| st.expander("ℹ️ What is a token anyway ?", expanded=False).markdown( | |
| TOKEN_ESTIMATOR_TEXT | |
| ) | |
| user_text_input = st.text_area( | |
| "Type or paste some text to estimate the amount of tokens.", | |
| "EcoLogits is a great project!", | |
| ) | |
| _, col2, _ = st.columns([2, 1, 2]) | |
| with col2: | |
| st.metric( | |
| label="tokens estimated amount", | |
| # label_visibility = 'hidden', | |
| value=num_tokens_from_string(user_text_input, "cl100k_base"), | |
| border=True, | |
| ) | |