Spaces:

halimbahae
/

CohortBot

Sleeping

App Files Files Community

halimbahae commited on Dec 23, 2024

Commit

3683f9c

verified ·

1 Parent(s): 94447f8

Update app.py

Browse files

Files changed (1) hide show

app.py +149 -28

app.py CHANGED Viewed

@@ -1,11 +1,111 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
@@ -14,51 +114,72 @@ def respond(
     max_tokens,
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        token = message.choices[0].delta.content
-        response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import pandas as pd
+import re
 from huggingface_hub import InferenceClient
+import spacy
+from collections import Counter
+import plotly.express as px
+import plotly.graph_objects as go
+from datetime import datetime
+# Load SpaCy model for NLP
+nlp = spacy.load("en_core_web_sm")
+# Initialize Hugging Face client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def parse_message(message):
+    """Extract information from a chat message using regex and NLP."""
+    info = {}
+    # Extract timestamp and phone number
+    timestamp_match = re.search(r'\[(.*?)\]', message)
+    phone_match = re.search(r'\] (.*?):', message)
+    if timestamp_match and phone_match:
+        info['timestamp'] = timestamp_match.group(1)
+        info['phone'] = phone_match.group(1)
+        # Extract rest of the message
+        content = message.split(':', 1)[1].strip()
+        # Extract name
+        name_match = re.match(r'^([^•\n-]+)', content)
+        if name_match:
+            info['name'] = name_match.group(1).strip()
+        # Extract affiliation
+        affiliation_match = re.search(r'[Aa]ffiliation:?\s*([^•\n]+)', content)
+        if affiliation_match:
+            info['affiliation'] = affiliation_match.group(1).strip()
+        # Extract research field/interests
+        field_match = re.search(r'([Ff]ield of [Ii]nterest|[Dd]omaine de recherche|[Rr]esearch area|[Aa]reas of interest):?\s*([^•\n]+)', content)
+        if field_match:
+            info['research_field'] = field_match.group(2).strip()
+        # Extract thesis topic
+        thesis_match = re.search(r'[Tt]hesis:?\s*([^•\n]+)', content)
+        if thesis_match:
+            info['thesis_topic'] = thesis_match.group(1).strip()
+        # Extract LinkedIn URL
+        linkedin_match = re.search(r'https?://(?:www\.)?linkedin\.com\S+', content)
+        if linkedin_match:
+            info['linkedin'] = linkedin_match.group(0)
+    return info
+def create_researcher_df(chat_history):
+    """Convert chat messages to structured DataFrame."""
+    researchers = []
+    messages = chat_history.split('\n')
+    for message in messages:
+        if message.strip():
+            info = parse_message(message)
+            if info:
+                researchers.append(info)
+    df = pd.DataFrame(researchers)
+    return df
+def analyze_research_fields(df):
+    """Analyze and categorize research fields."""
+    if 'research_field' not in df.columns:
+        return pd.Series()
+    fields = df['research_field'].dropna()
+    # Split fields and flatten
+    all_fields = [field.strip().lower() for fields_list in fields for field in fields_list.split(',')]
+    return pd.Series(Counter(all_fields))
+def create_visualizations(df):
+    """Create visualizations from the researcher data."""
+    figures = []
+    # 1. Affiliation Distribution
+    if 'affiliation' in df.columns:
+        affiliation_counts = df['affiliation'].value_counts()
+        fig_affiliation = px.pie(
+            values=affiliation_counts.values,
+            names=affiliation_counts.index,
+            title='Distribution of Researchers by Affiliation'
+        )
+        figures.append(fig_affiliation)
+    # 2. Research Fields Analysis
+    field_counts = analyze_research_fields(df)
+    if not field_counts.empty:
+        fig_fields = px.bar(
+            x=field_counts.index,
+            y=field_counts.values,
+            title='Popular Research Fields',
+            labels={'x': 'Field', 'y': 'Count'}
+        )
+        figures.append(fig_fields)
+    return figures
 def respond(
     message,
     max_tokens,
     temperature,
     top_p,
+    chat_history_text=""
 ):
+    """Enhanced response function with data analysis capabilities."""
+    # Process chat history if provided
+    if chat_history_text:
+        df = create_researcher_df(chat_history_text)
+        # Generate analysis summary
+        summary = f"Analysis of {len(df)} researchers:\n"
+        if 'affiliation' in df.columns:
+            summary += f"- Institutions represented: {df['affiliation'].nunique()}\n"
+        field_counts = analyze_research_fields(df)
+        if not field_counts.empty:
+            top_fields = field_counts.nlargest(3)
+            summary += "- Top research fields:\n"
+            for field, count in top_fields.items():
+                summary += f"  • {field}: {count} researchers\n"
+        # Create visualizations
+        figures = create_visualizations(df)
+        # Add analysis to message
+        message += f"\n\nCommunity Analysis:\n{summary}"
+    # Generate response using the LLM
     messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
+    for token in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
+        token_content = token.choices[0].delta.content
+        response += token_content
         yield response
+# Create enhanced Gradio interface
+demo = gr.Interface(
+    fn=respond,
+    inputs=[
+        gr.Textbox(label="Message"),
+        gr.State([]),  # history
+        gr.Textbox(value="You are a friendly Research Community Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
+        gr.Textbox(label="Chat History", lines=10)
+    ],
+    outputs=[
+        gr.Textbox(label="Response"),
+        gr.Plot(label="Community Analysis")
     ],
+    title="Research Community Analyzer",
+    description="An enhanced chatbot that analyzes research community data and provides visualizations."
 )
 if __name__ == "__main__":
+    demo.launch()