Spaces:

liang-huggingface
/

PubmedSearch

Runtime error

File size: 7,753 Bytes

import gradio as gr
import pandas as pd
from Bio import Entrez
import requests

import os 

HF_API = os.getenv('HF_API')

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

if False:
    # Load the model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto",trust_remote_code=True).eval()

def generate_summary(prompt):
    # Add instructions to the prompt to signal that you want a summary
    instructions = "Summarize the following text:"
    prompt_with_instructions = f"{instructions}\n{prompt}"

    # Tokenize the prompt text and return PyTorch tensors
    inputs = tokenizer.encode(prompt_with_instructions, return_tensors="pt")

    # Generate a response using the model
    outputs = model.generate(inputs, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)

    # Decode the response
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

def generate_response(prompt):
    # Tokenize the prompt text and return PyTorch tensors
    inputs = tokenizer.encode(prompt, return_tensors="pt")

    # Generate a response using the model
    outputs = model.generate(inputs, max_length=512, num_return_sequences=1)

    # Decode the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response
    
# Function to search PubMed for articles
def search_pubmed(query, retmax):
    Entrez.email = '[email protected]'
    handle = Entrez.esearch(db="pubmed", term=query, retmax=retmax)
    record = Entrez.read(handle)
    handle.close()
    idlist = record['IdList']
    handle = Entrez.efetch(db="pubmed", id=idlist, retmode="xml")
    articles = Entrez.read(handle)['PubmedArticle']
    handle.close()
    article_list = []
    for article in articles:
        article_dict = {
            'PMID': str(article['MedlineCitation']['PMID']),
            'Authors': ' '.join([author['LastName'] + ' ' + author.get('Initials', '')
                                 for author in article['MedlineCitation']['Article']['AuthorList']]),
            'Title': article['MedlineCitation']['Article']['ArticleTitle'],
            'Abstract': article['MedlineCitation']['Article'].get('Abstract', {}).get('AbstractText', [None])[0]
        }
        article_list.append(article_dict)
    return pd.DataFrame(article_list)

# Function to summarize articles using Hugging Face's API
def summarize_with_huggingface(model, selected_articles, USE_LOCAL=False):
    API_URL = f"https://api-inference.huggingface.co/models/{model}"
    # Your Hugging Face API key
    API_KEY = HF_API 
    headers = {"Authorization": f"Bearer {API_KEY}"}
    # Prepare the text to summarize: concatenate all abstracts
    print(type(selected_articles))
    print(selected_articles.to_dict(orient='records'))
    text_to_summarize = " ".join(
        [f"PMID: {article['PMID']}. Authors: {article['Authors']}. Title: {article['Title']}. Abstract: {article['Abstract']}." 
         for article in selected_articles.to_dict(orient='records')]
    )
    # Define the payload
    payload = {
        "inputs": text_to_summarize,
        "parameters": {"max_length": 300}  # Adjust as needed
    }

    if USE_LOCAL:
        response = generate_response(text_to_summarize)
    else:
        # Make the POST request to the Hugging Face API
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()  # Raise an HTTPError if the HTTP request returned an unsuccessful status code
    # The API returns a list of dictionaries. We extract the summary from the first one.
    return response.json()[0]['generated_text']


    

import gradio as gr
from Bio import Entrez

# Always tell NCBI who you are
Entrez.email = "[email protected]"


def process_query(keywords, top_k):
    articles = search_pubmed(keywords, top_k)
    # Convert each article from a dictionary to a list of values in the correct order
    articles_for_display = [[article['pmid'], article['authors'], article['title'], article['abstract']] for article in articles]
    return articles_for_display


def summarize_articles(indices, articles_for_display):
    # Convert indices to a list of integers
    selected_indices = [int(index.strip()) for index in indices.split(',') if index.strip().isdigit()]
    # Convert the DataFrame to a list of dictionaries
    articles_list = articles_for_display.to_dict(orient='records')
    # Select articles based on the provided indices
    selected_articles = [articles_list[index] for index in selected_indices]
    # Generate the summary
    summary = summarize_with_huggingface(selected_articles)
    return summary

PASSWORD = "pass"

def check_password(password):
    if password == PASSWORD:
        return True, "Welcome!"
    else:
        return False, "Incorrect username or password."
        
# Gradio interface
with gr.Blocks() as demo:
    
    gr.Markdown("### PubMed Article Summarizer")

    with gr.Row():
        password_input = gr.Textbox(label="Password", type="password")

        login_button = gr.Button("Login")
        login_result = gr.Textbox(label="Login Result", interactive=False)

    login_button.click(check_password, inputs=[username_input, password_input], outputs=[login_result])

    with gr.Row():
        model_input = gr.Textbox(label="Enter the model to use", value="h2oai/h2ogpt-4096-llama2-7b-chat")
        query_input = gr.Textbox(label="Query Keywords")
        retmax_input = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of articles")
        search_button = gr.Button("Search")
        output_table = gr.Dataframe(headers=["PMID", "Authors", "Title","Abstract" ])
        summarize_button = gr.Button("Summarize")
        summary_output = gr.Textbox()

        model_input.visible = False
        query_input.visible = False
        summarize_button.visible = False

    def process_login(is_success, message):
        if is_success:
            model_input.visible = True
            query_button.visible = True
            summarize_button.visible = True
        login_result.update(value=message)
        login_result.visible = True
    
    login_button.click(check_password, inputs=[password_input], outputs=[process_login])

    def update_output_table(query, retmax):
        df = search_pubmed(query, retmax)
#        output_table.update(value=df)
        return df
    search_button.click(update_output_table, inputs=[query_input, retmax_input], outputs=output_table)
    summarize_button.click(fn=summarize_with_huggingface, inputs=[model_input, output_table], outputs=summary_output)

demo.launch(debug=True)

if False:
    with gr.Blocks() as demo:
        gr.Markdown("### PubMed Article Summarizer")
        with gr.Row():
            query_input = gr.Textbox(label="Query Keywords")
            top_k_input = gr.Slider(minimum=1, maximum=20, value=5,  step=1, label="Top K Results")
        search_button = gr.Button("Search")
        output_table = gr.Dataframe(headers=["Title", "Authors", "Abstract", "PMID"])
        indices_input = gr.Textbox(label="Enter indices of articles to summarize (comma-separated)")
        summarize_button = gr.Button("Summarize Selected Articles")
        summary_output = gr.Textbox(label="Summary")
    
        search_button.click(
            fn=process_query,
            inputs=[query_input, top_k_input],
            outputs=output_table
        )
    
        summarize_button.click(
            fn=summarize_articles,
            inputs=[indices_input, output_table],
            outputs=summary_output
        )
    
    demo.launch(debug=True)