import re import gradio as gr import os import accelerate import spaces from tqdm import tqdm import subprocess from huggingface_hub import hf_hub_download from llama_cpp import Llama from huggingface_hub import login from docling.document_converter import DocumentConverter login(token = os.getenv('HF_TOKEN')) repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF" model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf" local_dir = "models" hf_hub_download( repo_id=repo_id, filename=model_id, local_dir = local_dir ) def harmonize_doc(llm, pdftext, prompt, maxtokens, temperature, top_probability, model_name): prompt = """ Please reformat the provided medical report into the following standardized structure: 1. Hospital Information: - Name of Hospital: [Name of hospital] - Department: [Relevant department or 'N/A'] 2. Patient Information: - Name: [Full Name] - Gender: [Gender] - Date of Birth: [Date of Birth] - Address: [Full Address or 'N/A'] - ID Numbers: - [Relevant identifiers such as NHS Number, Case Number, etc.] 3. Procedure Details: - Date of Procedure: [Date] - Referring Doctor: [Name or 'N/A'] - Performed By: - Consultant: [Name or 'N/A'] - Additional Clinicians: [Name(s) or 'N/A'] - Nurses: [Name(s) or 'N/A'] - Details: - Indications: [Symptoms, reasons for procedure] - Instrument: [Instrument details or 'N/A'] - Co-morbidities: [Relevant conditions or 'N/A'] - ASA Status: [ASA classification or 'N/A'] - Procedure: [Details of patient preparation and exact description of procedures performed as in the original report or 'N/A'] - Findings: [Exact findings from the report, including any locations, measurements, or observations] - Specimens Taken: [Details on specimens, if any, or 'N/A'] - Comments: [Additional notes, advice, or remarks from the report] 4. Diagnosis and Outcomes: - Diagnosis: [Exact diagnosis or 'N/A'] - Therapeutic Actions: [Treatments performed or 'N/A'] - Complications: [Details on complications or 'No complications'] - Follow-Up: [Exact follow-up recommendations from the report] Instructions for Output: 1. Use the exact wording and details from the original report wherever possible. Do not summarize or interpret information. 2. If any information is missing in the original report, use 'N/A' for the corresponding field. 3. Ensure the output matches the given structure exactly, without omitting any fields. 4. Retain all medical terms, values, and phrases as stated in the report. """ output = llm.create_chat_completion( messages=[ {"role": "assistant", "content": prompt}, { "role": "user", "content": pdftext } ], max_tokens=maxtokens, temperature=temperature ) output = output['choices'][0]['message']['content'] find_index = output.find(' '.join(pdftext.split()[:3])) if find_index != -1: output = output[find_index:].strip() return output @spaces.GPU(duration=120) def pdf_to_text(files, input_text='', prompt='', model_name='default', temperature=0, maxtokens=2048, top_probability=0.95): llm = Llama( model_path="models/" + model_id, flash_attn=True, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) harmonized_text = '' for file in files: converter = DocumentConverter() result = converter.convert(file) pdftext = result.document.export_to_markdown() input_text = pdftext harmonized_text += harmonize_doc(llm, input_text, prompt, maxtokens, temperature, top_probability, model_name) harmonized_text += '\n\n-----------------------------------------------------------------\n\n' return harmonized_text temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value") model_name = gr.Dropdown(["default", "fine-tuned"], label="LLama Model") max_tokens = gr.Number(value=600, label="Max Tokens") input_text = gr.Text(label='Input Text') input_prompt = gr.Text(label='Prompt') input_files = gr.File(file_count="multiple") output_path_component = gr.File(label="Select Output Path") iface = gr.Interface( fn=pdf_to_text, inputs=input_files, outputs=['text'], title='COBIx Endoscopy Report Harmonization', description="This application helps standardize medical reports into a consistent format", theme=gr.themes.Soft(), ) iface.launch()