Spaces:

Not-Grim-Refer
/

Falcoder-7b-Extended-Code-Generator

Runtime error

App Files Files Community

Not-Grim-Refer commited on Jul 5, 2023

Commit

e936a3f

1 Parent(s): c12c1d4

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -159

app.py CHANGED Viewed

@@ -1,162 +1,60 @@
-# Import necessary libraries
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import gradio as gr
-import torch
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Set device to GPU if available, otherwise CPU
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("mrm8488/falcoder-7b")
-model = AutoModelForCausalLM.from_pretrained("mrm8488/falcoder-7b")
-def generate_text(prompt, max_length, do_sample, temperature, top_k, top_p):
-    """
-    Generates text completion given a prompt and specified parameters.
-    :param prompt: Input prompt for text generation.
-    :type prompt: str
-    :param max_length: Maximum length of generated text.
-    :type max_length: int
-    :param do_sample: Whether to use sampling for text generation.
-    :type do_sample: bool
-    :param temperature: Sampling temperature for text generation.
-    :type temperature: float
-    :param top_k: Value for top-k sampling.
-    :type top_k: int
-    :param top_p: Value for top-p sampling.
-    :type top_p: float
-    :return: Generated text completion.
-    :rtype: str
-    """
-    # Format prompt
-    formatted_prompt = "\n" + prompt
-    if not ',' in prompt:
-        formatted_prompt += ','
-    # Tokenize prompt and move to device
-    prompt = tokenizer(formatted_prompt, return_tensors='pt')
-    prompt = {key: value.to(device) for key, value in prompt.items()}
-    # Generate text completion using model and specified parameters
-    out = model.generate(**prompt, max_length=max_length, do_sample=do_sample, temperature=temperature,
-                          no_repeat_ngram_size=3, top_k=top_k, top_p=top_p)
-    output = tokenizer.decode(out[0])
-    clean_output = output.replace('\n', '\n')
-    # Log generated text completion
-    logger.info("Text generated: %s", clean_output)
-    return clean_output
-# Define Gradio interface
-custom_css = """
-.gradio-container {
-    background-color: #0D1525;
-    color:white
-}
-#orange-button {
-    background: #F26207 !important;
-    color: white;
-}
-.cm-gutters{
-    border: none !important;
-}
-"""
-def post_processing(prompt, completion):
-    """
-    Formats generated text completion for display.
-    :param prompt: Input prompt for text generation.
-    :type prompt: str
-    :param completion: Generated text completion.
-    :type completion: str
-    :return: Formatted text completion.
-    :rtype: str
-    """
-    return prompt + completion
-def code_generation(prompt, max_new_tokens, temperature=0.2, seed=42, top_p=0.9, top_k=None, use_cache=True, repetition_penalty=1.0):
-    """
-    Generates code completion given a prompt and specified parameters.
-    :param prompt: Input prompt for code generation.
-    :type prompt: str
-    :param max_new_tokens: Maximum number of tokens to generate.
-    :type max_new_tokens: int
-    :param temperature: Sampling temperature for code generation.
-    :type temperature: float
-    :param seed: Random seed for code generation.
-    :type seed: int
-    :param top_p: Value for top-p sampling.
-    :type top_p: float
-    :param top_k: Value for top-k sampling.
-    :type top_k: int
-    :param use_cache: Whether to use cache for code generation.
-    :type use_cache: bool
-    :param repetition_penalty: Value for repetition penalty.
-    :type repetition_penalty: float
-    :return: Generated code completion.
-    :rtype: str
-    """
-    # Truncate prompt if too long
-    MAX_INPUT_TOKENS = 2048
-    if len(prompt) > MAX_INPUT_TOKENS:
-        prompt = prompt[-MAX_INPUT_TOKENS:]
-    # Tokenize prompt and move to device
-    x = tokenizer.encode(prompt, return_tensors="pt", max_length=MAX_INPUT_TOKENS, truncation=True).to(device)
-    logger.info("Prompt shape: %s", x.shape)
-    # Generate code completion using model and specified parameters
-    set_seed(seed)
-    y = model.generate(x,
-                       max_new_tokens=max_new_tokens,
-                       temperature=temperature,
-                       pad_token_id=tokenizer.pad_token_id,
-                       eos_token_id=tokenizer.eos_token_id,
-                       top_p=top_p,
-                       top_k=top_k,
-                       use_cache=use_cache,
-                       repetition_penalty=repetition_penalty
-                    )
-    completion = tokenizer.decode(y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
-    completion = completion[len(prompt):]
-    return post_processing(prompt, completion)
-description = """
-### Falcoder
-Falcoder is a GPT-2 model fine-tuned on Python code. It can be used for generating code completions given a prompt.
-### Text Generation
-Use the text generation section to generate text completions given a prompt. You can adjust the maximum length of the generated text, whether to use sampling, the sampling temperature, and the top-k and top-p values for sampling.
-### Code Generation
-Use the code generation section to generate code completions given a prompt. You can adjust the maximum number of tokens to generate, the sampling temperature, the random seed, the top-p and top-k values for sampling, whether to use cache, and the repetition penalty.
 """
-demo = gr.Interface(
-    [generate_text, code_generation],
-    ["textbox", "textbox"],
-    ["textbox", "textbox"],
-    title="Falcoder",
-    description=description,
-    theme="compact",
-    layout="vertical",
-    css=custom_css
-)
-# Launch Gradio interface
-demo.launch()

+import streamlit as st
+st.title("Falcon QA Bot")
+# import chainlit as cl
+import os
+huggingfacehub_api_token = st.secrets["hf_token"]
+from langchain import HuggingFaceHub, PromptTemplate, LLMChain
+repo_id = "tiiuae/falcon-7b-instruct"
+llm = HuggingFaceHub(huggingfacehub_api_token=huggingfacehub_api_token,
+                     repo_id=repo_id,
+                     model_kwargs={"temperature":0.2, "max_new_tokens":2000})
+template = """
+You are an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+{question}
 """
+# input = st.text_input("What do you want to ask about", placeholder="Input your question here")
+# # @cl.langchain_factory
+# def factory():
+#     prompt = PromptTemplate(template=template, input_variables=['question'])
+#     llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)
+#     return llm_chain
+prompt = PromptTemplate(template=template, input_variables=["question"])
+llm_chain = LLMChain(prompt=prompt,verbose=True,llm=llm)
+# result = llm_chain.predict(question=input)
+# print(result)
+def chat(query):
+    # prompt = PromptTemplate(template=template, input_variables=["question"])
+    # llm_chain = LLMChain(prompt=prompt,verbose=True,llm=llm)
+    result = llm_chain.predict(question=query)
+    return result
+def main():
+    input = st.text_input("What do you want to ask about", placeholder="Input your question here")
+    if input:
+        output = chat(input)
+        st.write(output,unsafe_allow_html=True)
+if __name__ == '__main__':
+    main()