StevenChen16 commited on
Commit
8ed8368
·
verified ·
1 Parent(s): 5f8b9a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -61
app.py CHANGED
@@ -1,27 +1,38 @@
1
  import gradio as gr
2
  import os
3
- from threading import Thread
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
 
 
 
5
 
6
  DESCRIPTION = '''
7
  <div>
8
- <h1 style="text-align: center;">AI Lawyer</h1>
 
 
 
9
  </div>
10
  '''
11
 
12
  LICENSE = """
13
  <p/>
14
  ---
15
- Built with model "StevenChen16/Llama3-8B-Lawyer", based on "meta-llama/Meta-Llama-3-8B"
16
  """
17
 
18
  PLACEHOLDER = """
19
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
20
- <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">AI Lawyer</h1>
21
- <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything about US and Canada law...</p>
 
22
  </div>
23
  """
24
 
 
25
  css = """
26
  h1 {
27
  text-align: center;
@@ -36,50 +47,29 @@ h1 {
36
  """
37
 
38
  # Load the tokenizer and model
39
- tokenizer = AutoTokenizer.from_pretrained("StevenChen16/llama3-8b-Lawyer")
40
- model = AutoModelForCausalLM.from_pretrained("StevenChen16/llama3-8b-Lawyer", device_map="auto") # to("cuda:0")
41
-
42
- background_prompt = """
43
- As an AI legal assistant, you are a highly trained expert in U.S. and Canadian law. Your purpose is to provide accurate, comprehensive, and professional legal information to assist users with a wide range of legal questions. When answering questions, you should actively ask questions to obtain more information, analyze from different perspectives, and explain your reasoning process to the user. Please adhere to the following guidelines:
44
-
45
- 1. Clarify the question:
46
- - Ask questions to clarify the user's specific situation and needs to provide the most relevant and targeted advice.
47
- - However, if the user has already provided sufficient background information, avoid excessively pressing for details. Focus on understanding the core of the issue, rather than unnecessary minutiae.
48
-
49
- 2. Gather information:
50
- - Identify the key information needed to answer the question and proactively ask the user for these details.
51
- - When gathering information, be sure to identify which details are directly relevant to the legal analysis of the case. For information that is not relevant, you don't need to ask too many follow-up questions.
52
- - If the user indicates that they have provided all relevant information, accept this and do not continue to demand more details.
53
-
54
- 3. Multi-perspective analysis:
55
- - Evaluate legal issues from different viewpoints, considering various possible interpretations and applications.
56
- - Present arguments supporting and opposing specific perspectives to comprehensively clarify complex issues.
57
- - In your analysis, strive to balance comprehensiveness and conciseness. Provide thorough analysis, but also ensure that the user can easily understand and absorb the information.
58
-
59
- 4. Explain reasoning:
60
- - Explain the main legal principles, regulations, and case law you consider when analyzing the issue.
61
- - Clarify how you apply legal knowledge to the user's specific situation and the logic behind your conclusions.
62
- - When explaining your reasoning, use clear and concise language, avoiding excessive length or repetition.
63
-
64
- 5. Interactive dialogue:
65
- - Encourage the user to participate in the discussion, ask follow-up questions, and share their thoughts and concerns.
66
- - Dynamically adjust your analysis and recommendations based on new information obtained in the conversation.
67
- - In your interactions, be attentive to the user's needs and concerns. If they express satisfaction or indicate that they don't require more information, respect their wishes.
68
-
69
- 6. Professional advice:
70
- - Provide clear, actionable legal advice, but also emphasize the necessity of consulting a professional lawyer before making a final decision.
71
- - If clients wish to speak with a lawyer, you can introduce our team (WealthWizards), which consists of lawyers with different specializations and orientations.
72
- - When providing advice, use language that is easy to understand and communicate with a tone of empathy and care. Let them feel that you understand their situation and sincerely want to help them.
73
-
74
- Please remember that your role is to provide general legal information and analysis, but also to actively guide and interact with the user during the conversation in a personalized and professional manner. If you feel that necessary information is missing to provide targeted analysis and advice, take the initiative to ask until you believe you have sufficient details. However, also be mindful to avoid over-inquiring or disregarding the user's needs and concerns. Now, please guide me step by step to describe the legal issues I am facing, according to the above requirements.
75
- """
76
-
77
  terminators = [
78
  tokenizer.eos_token_id,
79
- tokenizer.convert_tokens_to_ids("")
80
  ]
81
 
82
- def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  conversation = []
84
  for user, assistant in history:
85
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
@@ -97,6 +87,7 @@ def chat_llama3_8b(message: str, history: list, temperature: float, max_new_toke
97
  temperature=temperature,
98
  eos_token_id=terminators,
99
  )
 
100
  if temperature == 0:
101
  generate_kwargs['do_sample'] = False
102
 
@@ -106,30 +97,47 @@ def chat_llama3_8b(message: str, history: list, temperature: float, max_new_toke
106
  outputs = []
107
  for text in streamer:
108
  outputs.append(text)
 
109
  yield "".join(outputs)
110
-
111
- def query_model(user_input, history):
112
- combined_query = background_prompt + user_input
113
- return chat_llama3_8b(combined_query, history, temperature=0.9, max_new_tokens=512)
114
 
115
  # Gradio block
116
- chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
117
 
118
- with gr.Blocks(css=css) as demo:
 
119
  gr.Markdown(DESCRIPTION)
 
120
  gr.ChatInterface(
121
- fn=query_model,
122
  chatbot=chatbot,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  examples=[
124
- ['What are the key differences between a sole proprietorship and a partnership?'],
125
- ['What legal steps should I take if I want to start a business in the US?'],
126
- ['Can you explain the concept of "duty of care" in negligence law?'],
127
- ['What are the legal requirements for obtaining a patent in Canada?'],
128
- ['How can I protect my intellectual property when sharing my idea with potential investors?']
129
- ],
130
  cache_examples=False,
131
- )
 
132
  gr.Markdown(LICENSE)
133
-
134
  if __name__ == "__main__":
135
- demo.launch(share=True)
 
1
  import gradio as gr
2
  import os
3
+ import spaces
4
+ from transformers import GemmaTokenizer, AutoModelForCausalLM
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+ from threading import Thread
7
+
8
+ # Set an environment variable
9
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
+
11
 
12
  DESCRIPTION = '''
13
  <div>
14
+ <h1 style="text-align: center;">Meta Llama3 8B</h1>
15
+ <p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama3 8b Chat</b></a>. Meta Llama3 is the new open LLM and comes in two sizes: 8b and 70b. Feel free to play with it, or duplicate to run privately!</p>
16
+ <p>🔎 For more details about the Llama3 release and how to use the model with <code>transformers</code>, take a look <a href="https://huggingface.co/blog/llama3">at our blog post</a>.</p>
17
+ <p>🦕 Looking for an even more powerful model? Check out the <a href="https://huggingface.co/chat/"><b>Hugging Chat</b></a> integration for Meta Llama 3 70b</p>
18
  </div>
19
  '''
20
 
21
  LICENSE = """
22
  <p/>
23
  ---
24
+ Built with Meta Llama 3
25
  """
26
 
27
  PLACEHOLDER = """
28
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
29
+ <img src="https://ysharma-dummy-chat-app.hf.space/file=/tmp/gradio/8e75e61cc9bab22b7ce3dec85ab0e6db1da5d107/Meta_lockup_positive%20primary_RGB.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
30
+ <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Meta llama3</h1>
31
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
32
  </div>
33
  """
34
 
35
+
36
  css = """
37
  h1 {
38
  text-align: center;
 
47
  """
48
 
49
  # Load the tokenizer and model
50
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
51
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") # to("cuda:0")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  terminators = [
53
  tokenizer.eos_token_id,
54
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
55
  ]
56
 
57
+ @spaces.GPU(duration=120)
58
+ def chat_llama3_8b(message: str,
59
+ history: list,
60
+ temperature: float,
61
+ max_new_tokens: int
62
+ ) -> str:
63
+ """
64
+ Generate a streaming response using the llama3-8b model.
65
+ Args:
66
+ message (str): The input message.
67
+ history (list): The conversation history used by ChatInterface.
68
+ temperature (float): The temperature for generating the response.
69
+ max_new_tokens (int): The maximum number of new tokens to generate.
70
+ Returns:
71
+ str: The generated response.
72
+ """
73
  conversation = []
74
  for user, assistant in history:
75
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
 
87
  temperature=temperature,
88
  eos_token_id=terminators,
89
  )
90
+ # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
91
  if temperature == 0:
92
  generate_kwargs['do_sample'] = False
93
 
 
97
  outputs = []
98
  for text in streamer:
99
  outputs.append(text)
100
+ #print(outputs)
101
  yield "".join(outputs)
102
+
 
 
 
103
 
104
  # Gradio block
105
+ chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
106
 
107
+ with gr.Blocks(fill_height=True, css=css) as demo:
108
+
109
  gr.Markdown(DESCRIPTION)
110
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
111
  gr.ChatInterface(
112
+ fn=chat_llama3_8b,
113
  chatbot=chatbot,
114
+ fill_height=True,
115
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
116
+ additional_inputs=[
117
+ gr.Slider(minimum=0,
118
+ maximum=1,
119
+ step=0.1,
120
+ value=0.95,
121
+ label="Temperature",
122
+ render=False),
123
+ gr.Slider(minimum=128,
124
+ maximum=4096,
125
+ step=1,
126
+ value=512,
127
+ label="Max new tokens",
128
+ render=False ),
129
+ ],
130
  examples=[
131
+ ['How to setup a human base on Mars? Give short answer.'],
132
+ ['Explain theory of relativity to me like I’m 8 years old.'],
133
+ ['What is 9,000 * 9,000?'],
134
+ ['Write a pun-filled happy birthday message to my friend Alex.'],
135
+ ['Justify why a penguin might make a good king of the jungle.']
136
+ ],
137
  cache_examples=False,
138
+ )
139
+
140
  gr.Markdown(LICENSE)
141
+
142
  if __name__ == "__main__":
143
+ demo.launch()