Spaces:

Aakash098
/

gemma4-4b

Build error

gemma4-4b / app.py

Update app.py

843b3d1 verified 18 days ago

1.29 kB

	import gradio as gr
	from llama_cpp import Llama

	# 1. Model Details (GGUF direct link handle karega)
	# Hum 2B version use kar rahe hain taaki Free Space par speed fast rahe
	repo_id = "unsloth/gemma-4-E4B-it-GGUF"
	filename = "gemma-4-E2B-it-Q4_K_M.gguf"

	print("Model download ho raha hai... isme 1-2 minute lag sakte hain.")

	# 2. Model Load karna
	# Hugging Face ki free space par RAM kam hoti hai, isliye parameters optimized hain
	llm = Llama.from_pretrained(
	repo_id=repo_id,
	filename=filename,
	n_ctx=2048, # Context window
	verbose=False
	)

	def chat_function(message, history):
	# Gemma ka special prompt format
	formatted_prompt = f"<bos><start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"

	response = llm(
	formatted_prompt,
	max_tokens=512,
	stop=["<end_of_turn>"],
	stream=True
	)

	output_text = ""
	for chunk in response:
	token = chunk["choices"][0]["text"]
	output_text += token
	yield output_text

	# 3. Gradio Interface
	demo = gr.ChatInterface(
	fn=chat_function,
	title="Gemma 4 Cloud Chat",
	description="Running on Hugging Face Spaces (CPU Only)",
	theme="soft"
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)