wakeupmh commited on
Commit
e348a54
·
1 Parent(s): 58be7e5

refactor: improve response

Browse files
Files changed (2) hide show
  1. app.py +48 -34
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,10 +1,9 @@
1
  import streamlit as st
2
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
- import os
4
- from datasets import load_from_disk, Dataset
5
  import torch
6
  import logging
7
- import pandas as pd
 
8
  import arxiv
9
  import requests
10
  import xml.etree.ElementTree as ET
@@ -17,14 +16,14 @@ logging.basicConfig(level=logging.INFO)
17
  DATA_DIR = "/data" if os.path.exists("/data") else "."
18
  DATASET_DIR = os.path.join(DATA_DIR, "rag_dataset")
19
  DATASET_PATH = os.path.join(DATASET_DIR, "dataset")
20
- MODEL_PATH = "t5-small" # Changed to T5-small for better CPU compatibility
21
 
22
  @st.cache_resource
23
  def load_local_model():
24
  """Load the local Hugging Face model"""
25
  try:
26
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
27
- model = AutoModelForSeq2SeqLM.from_pretrained(
28
  MODEL_PATH,
29
  device_map={"": "cpu"}, # Force CPU
30
  torch_dtype=torch.float32
@@ -206,37 +205,46 @@ def generate_answer(question, context, max_length=512):
206
 
207
  # Clean and format the context
208
  clean_context = clean_text(context)
 
209
 
210
- # Format the context as a structured query
211
- prompt = f"""You are an expert in autism research. Based on the following research papers, provide a clear and comprehensive answer about autism.
212
 
213
- Question: {clean_text(question)}
214
 
215
  Research Papers:
216
  {clean_context}
217
 
218
- Instructions: Please provide a well-structured response that:
219
- 1. Starts with a clear, general explanation of the topic
220
- 2. Includes specific findings from the research papers when relevant
221
- 3. Explains practical implications for people with autism and their families
222
- 4. Notes any limitations or areas needing more research
223
 
224
- Keep your answer focused, clear, and helpful for someone wanting to understand autism better."""
225
 
226
  try:
227
- # Generate response
228
- inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
 
 
 
 
229
 
230
  with torch.inference_mode():
231
  outputs = model.generate(
232
  **inputs,
233
  max_length=max_length,
234
- min_length=150,
235
- num_beams=4,
236
  length_penalty=1.5,
237
  temperature=0.7,
238
  repetition_penalty=1.2,
239
- early_stopping=True
 
 
 
 
240
  )
241
 
242
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -247,28 +255,34 @@ Keep your answer focused, clear, and helpful for someone wanting to understand a
247
  return f"""Here's what we know about autism in relation to your question:
248
 
249
  1. General Understanding:
250
- - Autism Spectrum Disorder (ASD) is a complex developmental condition
251
- - It affects how a person communicates, learns, and interacts with others
252
- - Each person with autism has unique strengths and challenges
 
253
 
254
- 2. Key Aspects:
255
- - Communication and social interaction patterns
256
  - Repetitive behaviors and specific interests
257
  - Sensory sensitivities
258
- - Early intervention is important
 
 
 
 
 
 
259
 
260
- 3. Research Focus:
261
- - Scientists are studying various aspects including:
262
- * Brain development and function
263
- * Genetic factors
264
- * Environmental influences
265
- * Effective interventions and supports
266
 
267
  For more specific information, try asking about:
268
- - Specific symptoms or characteristics
269
  - Diagnostic processes
270
  - Treatment approaches
271
- - Recent research findings"""
272
 
273
  # Format the response for better readability
274
  formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")
 
1
  import streamlit as st
2
+ import pandas as pd
 
 
3
  import torch
4
  import logging
5
+ import os
6
+ from transformers import AutoTokenizer, T5ForConditionalGeneration
7
  import arxiv
8
  import requests
9
  import xml.etree.ElementTree as ET
 
16
  DATA_DIR = "/data" if os.path.exists("/data") else "."
17
  DATASET_DIR = os.path.join(DATA_DIR, "rag_dataset")
18
  DATASET_PATH = os.path.join(DATASET_DIR, "dataset")
19
+ MODEL_PATH = "google/flan-t5-small" # Using flan-t5-small for better performance
20
 
21
  @st.cache_resource
22
  def load_local_model():
23
  """Load the local Hugging Face model"""
24
  try:
25
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
26
+ model = T5ForConditionalGeneration.from_pretrained(
27
  MODEL_PATH,
28
  device_map={"": "cpu"}, # Force CPU
29
  torch_dtype=torch.float32
 
205
 
206
  # Clean and format the context
207
  clean_context = clean_text(context)
208
+ clean_question = clean_text(question)
209
 
210
+ # Format the input for T5 (it expects a specific format)
211
+ input_text = f"""Answer the following question about autism using the provided research papers.
212
 
213
+ Question: {clean_question}
214
 
215
  Research Papers:
216
  {clean_context}
217
 
218
+ Instructions: Provide a detailed answer that:
219
+ 1. Explains the main concepts clearly
220
+ 2. Uses specific evidence from the research
221
+ 3. Discusses practical implications
222
+ 4. Notes any limitations
223
 
224
+ Answer:"""
225
 
226
  try:
227
+ # T5 expects a specific format for the input
228
+ inputs = tokenizer(input_text,
229
+ return_tensors="pt",
230
+ max_length=1024,
231
+ truncation=True,
232
+ padding=True)
233
 
234
  with torch.inference_mode():
235
  outputs = model.generate(
236
  **inputs,
237
  max_length=max_length,
238
+ min_length=100,
239
+ num_beams=5,
240
  length_penalty=1.5,
241
  temperature=0.7,
242
  repetition_penalty=1.2,
243
+ early_stopping=True,
244
+ no_repeat_ngram_size=3,
245
+ do_sample=True,
246
+ top_k=50,
247
+ top_p=0.95
248
  )
249
 
250
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
255
  return f"""Here's what we know about autism in relation to your question:
256
 
257
  1. General Understanding:
258
+ - Autism Spectrum Disorder (ASD) is a complex neurodevelopmental condition
259
+ - It affects how a person perceives, communicates, and interacts with the world
260
+ - Each individual with autism has unique strengths and challenges
261
+ - Early identification and support are crucial
262
 
263
+ 2. Key Characteristics:
264
+ - Social communication and interaction patterns
265
  - Repetitive behaviors and specific interests
266
  - Sensory sensitivities
267
+ - Variable cognitive and language abilities
268
+
269
+ 3. Important Considerations:
270
+ - Autism is a spectrum, meaning it affects each person differently
271
+ - Support needs vary from person to person
272
+ - Many individuals with autism have unique talents and abilities
273
+ - Research continues to improve our understanding
274
 
275
+ 4. Current Research Areas:
276
+ - Brain development and neurology
277
+ - Genetic and environmental factors
278
+ - Early intervention methods
279
+ - Support strategies and therapies
 
280
 
281
  For more specific information, try asking about:
282
+ - Specific autism characteristics
283
  - Diagnostic processes
284
  - Treatment approaches
285
+ - Latest research findings"""
286
 
287
  # Format the response for better readability
288
  formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  streamlit>=1.32.0
2
- transformers>=4.37.0
3
  datasets>=2.17.0
4
  --extra-index-url https://download.pytorch.org/whl/cpu
5
  torch>=2.2.0
 
1
  streamlit>=1.32.0
2
+ transformers==4.36.2
3
  datasets>=2.17.0
4
  --extra-index-url https://download.pytorch.org/whl/cpu
5
  torch>=2.2.0