Spaces:

tiiuae
/

Falcon-H1-playground

Running

App Files Files Community

Falcon-H1-playground / app.py

DhiyaEddine

make temperature fixed to 0.1

b011efd verified 6 months ago

raw

history blame

7.94 kB

	import os
	from datetime import date
	import gradio as gr
	import openai

	# Model configuration dictionary
	MODEL_CONFIGS = {
	"Falcon-H1-34B-Instruct": {
	"model_id": "tiiuae/Falcon-H1-34B-Instruct",
	"api_key_env": "XXL_API_KEY",
	"base_url_env": "XXL_URL",
	"description": "XXL (34B)"
	},
	"Falcon-H1-7B-Instruct": {
	"model_id": "tiiuae/Falcon-H1-7B-Instruct",
	"api_key_env": "L_API_KEY",
	"base_url_env": "L_URL",
	"description": "L (7B)"
	},
	"Falcon-H1-3B-Instruct": {
	"model_id": "tiiuae/Falcon-H1-3B-Instruct",
	"api_key_env": "M_API_KEY",
	"base_url_env": "M_URL",
	"description": "M (3B)"
	},
	"Falcon-H1-1.5B-Deep-Instruct": {
	"model_id": "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
	"api_key_env": "S_API_KEY",
	"base_url_env": "S_URL",
	"description": "S (1.5B Deep)"
	},
	"Falcon-H1-1.5B-Instruct": {
	"model_id": "tiiuae/Falcon-H1-1.5B-Instruct",
	"api_key_env": "XS_API_KEY",
	"base_url_env": "XS_URL",
	"description": "XS (1.5B)"
	},
	"Falcon-H1-0.5B-Instruct": {
	"model_id": "tiiuae/Falcon-H1-0.5B-Instruct",
	"api_key_env": "XXS_API_KEY",
	"base_url_env": "XXS_URL",
	"description": "XXS (0.5B)"
	},
	}

	today = date.today()
	SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT")

	# CSS styling more similar to the second code
	CSS = """
	/* Main style improvements */
	.container {
	max-width: 900px !important;
	margin-left: auto !important;
	margin-right: auto !important;
	}

	/* Title styling */
	h1 {
	background: linear-gradient(90deg, #4776E6 0%, #8E54E9 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-weight: 700 !important;
	text-align: center;
	margin-bottom: 0.5rem !important;
	}

	.subtitle {
	text-align: center;
	color: #666;
	margin-bottom: 1rem;
	}

	/* Button styling */
	.duplicate-button {
	margin: 1rem auto !important;
	display: block !important;
	color: #fff !important;
	background: linear-gradient(90deg, #4776E6 0%, #8E54E9 100%) !important;
	border-radius: 100vh !important;
	padding: 0.5rem 1.5rem !important;
	font-weight: 600 !important;
	border: none !important;
	box-shadow: 0 4px 6px rgba(50, 50, 93, 0.11), 0 1px 3px rgba(0, 0, 0, 0.08) !important;
	}

	/* Parameter accordion styling */
	.accordion {
	border-radius: 8px !important;
	overflow: hidden !important;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1) !important;
	margin-bottom: 1rem !important;
	}

	h3 {
	text-align: center;
	}
	"""

	TITLE = "<h1>Falcon-H1 Playground</h1>"
	SUB_TITLE = """
	<p class='subtitle'>Falcon-H1 is a new SoTA hybrid model by TII in Abu Dhabi. It is open source and available on Hugging Face. This demo is powered by <a href="https://openinnovation.ai">OpenInnovationAI</a>. Try out our <a href="https://chat.falconllm.tii.ae/auth">chat interface</a>.</p>
	<p class='subtitle' style='font-size: 0.9rem; color: #888;'>Today: {today_date}</p>
	"""

	def stream_chat(
	message: str,
	history: list,
	model_name: str,
	temperature: float = 0.1,
	max_new_tokens: int = 1024,
	top_p: float = 1.0,
	top_k: int = 20,
	presence_penalty: float = 1.2,
	):
	"""Chat function that streams responses from the selected model"""

	cfg = MODEL_CONFIGS[model_name]
	api_key = os.getenv(cfg["api_key_env"])
	base_url = os.getenv(cfg.get("base_url_env", ""), None)

	if not api_key:
	yield f"❌ Env-var `{cfg['api_key_env']}` not set."
	return

	if cfg.get("base_url_env") and not base_url:
	yield f"❌ Env-var `{cfg['base_url_env']}` not set."
	return

	client = openai.OpenAI(api_key=api_key, base_url=base_url)

	msgs = [{"role": "system", "content": SYSTEM_PROMPT},]
	for u, a in history:
	msgs += [{"role": "user", "content": u},
	{"role": "assistant", "content": a}]
	msgs.append({"role": "user", "content": message})

	try:
	stream = client.chat.completions.create(
	model=cfg["model_id"],
	messages=msgs,
	temperature=temperature,
	top_p=top_p,
	max_tokens=max_new_tokens,
	presence_penalty=presence_penalty,
	stream=True,
	)

	partial = ""
	for chunk in stream:
	if (delta := chunk.choices[0].delta).content:
	partial += delta.content
	yield partial
	except Exception as e:
	yield f"❌ Error: {str(e)}"

	# Create the Gradio interface
	with gr.Blocks(css=CSS, theme="soft") as demo:
	# Header section
	gr.HTML(TITLE)
	gr.HTML(SUB_TITLE.format(today_date=today.strftime('%B %d, %Y')))

	gr.DuplicateButton(value="Duplicate Space", elem_classes="duplicate-button")

	# Create model selection with descriptions
	model_options = list(MODEL_CONFIGS.keys())

	with gr.Row():
	model_dropdown = gr.Dropdown(
	choices=model_options,
	value=model_options[0],
	label="Select Falcon-H1 Model",
	info="Choose which model checkpoint to use"
	)

	# Create chatbot
	chatbot = gr.Chatbot(height=600, latex_delimiters=[
	{"left": "$$", "right": "$$", "display": True}, # For display mode math
	{"left": "$", "right": "$", "display": False}, # For inline math
	{"left": "\$", "right": "\$", "display": False}, # Common inline delimiters
	{"left": "\\[", "right": "\\]", "display": True} # Common display delimiters
	]
	)

	# Message input area with a cleaner layout
	with gr.Row():
	with gr.Column(scale=0.85):
	msg = gr.Textbox(
	scale=1,
	show_label=False,
	placeholder="Enter text and press enter",
	container=False
	)
	with gr.Column(scale=0.15, min_width=0):
	submit_btn = gr.Button("Submit", variant="primary")

	# Parameters in accordion similar to second code
	with gr.Accordion("⚙️ Parameters", open=False, elem_classes="accordion"):
	max_new_tokens = gr.Slider(minimum=64, maximum=4096*8, value=1024, step=64, label="Max new tokens", info="Maximum length of generated response")
	top_p = gr.Slider(minimum=0, maximum=1, value=1.0, step=0.05, label="top_p", info="1.0 means no filtering")
	top_k = gr.Slider(minimum=1, maximum=20, step=1, value=20, label="top_k")
	presence_penalty = gr.Slider(minimum=0, maximum=2, value=1.2, step=0.1, label="Presence penalty", info="Penalizes repetition")

	# Examples section
	gr.Examples(
	examples=[
	["Hello there, can you suggest a few places to visit in UAE?"],
	["What is UAE known for?"],
	],
	inputs=msg,
	)


	# Chat handler function
	def user(user_message, history):
	return "", history + [[user_message, None]]

	def bot(history, model_choice, max_tokens, top_p_val, top_k_val, penalty):
	temp = 0.1
	user_message = history[-1][0]
	history[-1][1] = ""
	for character in stream_chat(
	user_message,
	history[:-1],
	model_choice,
	temp,
	max_tokens,
	top_p_val,
	top_k_val,
	penalty
	):
	history[-1][1] = character
	yield history

	# Set up event handlers
	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, [chatbot, model_dropdown, max_new_tokens, top_p, top_k, presence_penalty], chatbot
	)
	submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, [chatbot, model_dropdown, max_new_tokens, top_p, top_k, presence_penalty], chatbot
	)

	if __name__ == "__main__":
	demo.launch()