halimbahae commited on
Commit
3683f9c
·
verified ·
1 Parent(s): 94447f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -28
app.py CHANGED
@@ -1,11 +1,111 @@
1
  import gradio as gr
 
 
2
  from huggingface_hub import InferenceClient
 
 
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
 
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -14,51 +114,72 @@ def respond(
14
  max_tokens,
15
  temperature,
16
  top_p,
 
17
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  messages = [{"role": "system", "content": system_message}]
19
-
20
  for val in history:
21
  if val[0]:
22
  messages.append({"role": "user", "content": val[0]})
23
  if val[1]:
24
  messages.append({"role": "assistant", "content": val[1]})
25
-
26
  messages.append({"role": "user", "content": message})
27
-
28
  response = ""
29
-
30
- for message in client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
36
  ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
  yield response
41
 
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
  ],
 
 
60
  )
61
 
62
-
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import re
4
  from huggingface_hub import InferenceClient
5
+ import spacy
6
+ from collections import Counter
7
+ import plotly.express as px
8
+ import plotly.graph_objects as go
9
+ from datetime import datetime
10
 
11
+ # Load SpaCy model for NLP
12
+ nlp = spacy.load("en_core_web_sm")
13
+
14
+ # Initialize Hugging Face client
15
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
16
 
17
+ def parse_message(message):
18
+ """Extract information from a chat message using regex and NLP."""
19
+ info = {}
20
+
21
+ # Extract timestamp and phone number
22
+ timestamp_match = re.search(r'\[(.*?)\]', message)
23
+ phone_match = re.search(r'\] (.*?):', message)
24
+
25
+ if timestamp_match and phone_match:
26
+ info['timestamp'] = timestamp_match.group(1)
27
+ info['phone'] = phone_match.group(1)
28
+
29
+ # Extract rest of the message
30
+ content = message.split(':', 1)[1].strip()
31
+
32
+ # Extract name
33
+ name_match = re.match(r'^([^•\n-]+)', content)
34
+ if name_match:
35
+ info['name'] = name_match.group(1).strip()
36
+
37
+ # Extract affiliation
38
+ affiliation_match = re.search(r'[Aa]ffiliation:?\s*([^•\n]+)', content)
39
+ if affiliation_match:
40
+ info['affiliation'] = affiliation_match.group(1).strip()
41
+
42
+ # Extract research field/interests
43
+ field_match = re.search(r'([Ff]ield of [Ii]nterest|[Dd]omaine de recherche|[Rr]esearch area|[Aa]reas of interest):?\s*([^•\n]+)', content)
44
+ if field_match:
45
+ info['research_field'] = field_match.group(2).strip()
46
+
47
+ # Extract thesis topic
48
+ thesis_match = re.search(r'[Tt]hesis:?\s*([^•\n]+)', content)
49
+ if thesis_match:
50
+ info['thesis_topic'] = thesis_match.group(1).strip()
51
+
52
+ # Extract LinkedIn URL
53
+ linkedin_match = re.search(r'https?://(?:www\.)?linkedin\.com\S+', content)
54
+ if linkedin_match:
55
+ info['linkedin'] = linkedin_match.group(0)
56
+
57
+ return info
58
+
59
+ def create_researcher_df(chat_history):
60
+ """Convert chat messages to structured DataFrame."""
61
+ researchers = []
62
+ messages = chat_history.split('\n')
63
+
64
+ for message in messages:
65
+ if message.strip():
66
+ info = parse_message(message)
67
+ if info:
68
+ researchers.append(info)
69
+
70
+ df = pd.DataFrame(researchers)
71
+ return df
72
+
73
+ def analyze_research_fields(df):
74
+ """Analyze and categorize research fields."""
75
+ if 'research_field' not in df.columns:
76
+ return pd.Series()
77
+
78
+ fields = df['research_field'].dropna()
79
+ # Split fields and flatten
80
+ all_fields = [field.strip().lower() for fields_list in fields for field in fields_list.split(',')]
81
+ return pd.Series(Counter(all_fields))
82
+
83
+ def create_visualizations(df):
84
+ """Create visualizations from the researcher data."""
85
+ figures = []
86
+
87
+ # 1. Affiliation Distribution
88
+ if 'affiliation' in df.columns:
89
+ affiliation_counts = df['affiliation'].value_counts()
90
+ fig_affiliation = px.pie(
91
+ values=affiliation_counts.values,
92
+ names=affiliation_counts.index,
93
+ title='Distribution of Researchers by Affiliation'
94
+ )
95
+ figures.append(fig_affiliation)
96
+
97
+ # 2. Research Fields Analysis
98
+ field_counts = analyze_research_fields(df)
99
+ if not field_counts.empty:
100
+ fig_fields = px.bar(
101
+ x=field_counts.index,
102
+ y=field_counts.values,
103
+ title='Popular Research Fields',
104
+ labels={'x': 'Field', 'y': 'Count'}
105
+ )
106
+ figures.append(fig_fields)
107
+
108
+ return figures
109
 
110
  def respond(
111
  message,
 
114
  max_tokens,
115
  temperature,
116
  top_p,
117
+ chat_history_text=""
118
  ):
119
+ """Enhanced response function with data analysis capabilities."""
120
+ # Process chat history if provided
121
+ if chat_history_text:
122
+ df = create_researcher_df(chat_history_text)
123
+
124
+ # Generate analysis summary
125
+ summary = f"Analysis of {len(df)} researchers:\n"
126
+ if 'affiliation' in df.columns:
127
+ summary += f"- Institutions represented: {df['affiliation'].nunique()}\n"
128
+
129
+ field_counts = analyze_research_fields(df)
130
+ if not field_counts.empty:
131
+ top_fields = field_counts.nlargest(3)
132
+ summary += "- Top research fields:\n"
133
+ for field, count in top_fields.items():
134
+ summary += f" • {field}: {count} researchers\n"
135
+
136
+ # Create visualizations
137
+ figures = create_visualizations(df)
138
+
139
+ # Add analysis to message
140
+ message += f"\n\nCommunity Analysis:\n{summary}"
141
+
142
+ # Generate response using the LLM
143
  messages = [{"role": "system", "content": system_message}]
 
144
  for val in history:
145
  if val[0]:
146
  messages.append({"role": "user", "content": val[0]})
147
  if val[1]:
148
  messages.append({"role": "assistant", "content": val[1]})
149
+
150
  messages.append({"role": "user", "content": message})
151
+
152
  response = ""
153
+ for token in client.chat_completion(
 
154
  messages,
155
  max_tokens=max_tokens,
156
  stream=True,
157
  temperature=temperature,
158
  top_p=top_p,
159
  ):
160
+ token_content = token.choices[0].delta.content
161
+ response += token_content
 
162
  yield response
163
 
164
+ # Create enhanced Gradio interface
165
+ demo = gr.Interface(
166
+ fn=respond,
167
+ inputs=[
168
+ gr.Textbox(label="Message"),
169
+ gr.State([]), # history
170
+ gr.Textbox(value="You are a friendly Research Community Chatbot.", label="System message"),
 
171
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
172
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
173
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
174
+ gr.Textbox(label="Chat History", lines=10)
175
+ ],
176
+ outputs=[
177
+ gr.Textbox(label="Response"),
178
+ gr.Plot(label="Community Analysis")
 
179
  ],
180
+ title="Research Community Analyzer",
181
+ description="An enhanced chatbot that analyzes research community data and provides visualizations."
182
  )
183
 
 
184
  if __name__ == "__main__":
185
+ demo.launch()