testpipeline / app.py
John6666's picture
Upload app.py
3b53436 verified
raw
history blame
1.86 kB
import os
import gradio as gr
from huggingface_hub import login
from transformers import pipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load the gated model
#model_name = "RickyDeSkywalker/TheoremLlama"
#model_name = "unsloth/Llama-3.2-1B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "internlm/internlm2-math-plus-7b"
HF_TOKEN = os.environ.get("HF_TOKEN")
#login(HF_TOKEN)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).eval().to(device)
model = model.eval()
#generator = pipeline('text-generation', model=model_name, trust_remote_code=True, token=HF_TOKEN)
# Function for generating Lean 4 code
@torch.inference_mode()
def generate_lean4_code(prompt):
#result = generator(prompt, max_length=1000, num_return_sequences=1)
#return result[0]['generated_text']
response, history = model.chat(tokenizer, prompt, history=[], meta_instruction="")
print(response, history)
return response
# Gradio Interface
title = "Lean 4 Code Generation with TheoremLlama"
description = "Enter a natural language prompt to generate Lean 4 code."
interface = gr.Interface(
fn=generate_lean4_code,
inputs=gr.Textbox(
label="Prompt",
placeholder="Prove that the sum of two even numbers is even.",
lines=4
),
#outputs=gr.Code(label="Generated Lean 4 Code", language="lean"),
outputs=gr.Code(label="Generated Lean 4 Code"),
title=title,
description=description
)
# Launch the Gradio app
interface.launch(ssr_mode=False)