Spaces:

histofyai
/

HarmonizeReports

Runtime error

App Files Files Community

srijaydeshpande commited on Dec 2, 2024

Commit

152a01f

verified ·

1 Parent(s): 9e346b5

Create app.py

Browse files

Files changed (1) hide show

app.py +129 -0

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import re
+import gradio as gr
+import os
+import accelerate
+import spaces
+from tqdm import tqdm
+import subprocess
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from huggingface_hub import login
+from docling.document_converter import DocumentConverter
+login(token = os.getenv('HF_TOKEN'))
+repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF"
+model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf"
+local_dir = "models"
+hf_hub_download(
+    repo_id=repo_id,
+    filename=model_id,
+    local_dir = local_dir
+)
+def harmonize_doc(llm, pdftext, prompt, maxtokens, temperature, top_probability, model_name):
+    prompt = """
+            Please reformat the provided medical report into the following standardized structure:
+            1. Hospital Information:
+               - Name of Hospital: [Name of hospital]
+               - Department: [Relevant department or 'N/A']
+            2. Patient Information:
+               - Name: [Full Name]
+               - Gender: [Gender]
+               - Date of Birth: [Date of Birth]
+               - Address: [Full Address or 'N/A']
+               - ID Numbers:
+                 - [Relevant identifiers such as NHS Number, Case Number, etc.]
+            3. Procedure Details:
+               - Date of Procedure: [Date]
+               - Referring Doctor: [Name or 'N/A']
+               - Performed By:
+                 - Consultant: [Name or 'N/A']
+                 - Additional Clinicians: [Name(s) or 'N/A']
+                 - Nurses: [Name(s) or 'N/A']
+               - Details:
+                 - Indications: [Symptoms, reasons for procedure]
+                 - Instrument: [Instrument details or 'N/A']
+                 - Co-morbidities: [Relevant conditions or 'N/A']
+                 - ASA Status: [ASA classification or 'N/A']
+                 - Procedure: [Details of patient preparation and exact description of procedures performed as in the original report or 'N/A']
+                 - Findings: [Exact findings from the report, including any locations, measurements, or observations]
+                 - Specimens Taken: [Details on specimens, if any, or 'N/A']
+                 - Comments: [Additional notes, advice, or remarks from the report]
+            4. Diagnosis and Outcomes:
+               - Diagnosis: [Exact diagnosis or 'N/A']
+               - Therapeutic Actions: [Treatments performed or 'N/A']
+               - Complications: [Details on complications or 'No complications']
+               - Follow-Up: [Exact follow-up recommendations from the report]
+            Instructions for Output:
+            1. Use the exact wording and details from the original report wherever possible. Do not summarize or interpret information.
+            2. If any information is missing in the original report, use 'N/A' for the corresponding field.
+            3. Ensure the output matches the given structure exactly, without omitting any fields.
+            4. Retain all medical terms, values, and phrases as stated in the report.
+            """
+    output = llm.create_chat_completion(
+            messages=[
+                {"role": "assistant", "content": prompt},
+                {
+                    "role": "user",
+                    "content": pdftext
+                }
+            ],
+            max_tokens=maxtokens,
+            temperature=temperature
+        )
+    output = output['choices'][0]['message']['content']
+    find_index = output.find(' '.join(pdftext.split()[:3]))
+    if find_index != -1:
+        output = output[find_index:].strip()
+    return output
+@spaces.GPU(duration=120)
+def pdf_to_text(files, input_text='', prompt='', model_name='default', temperature=0, maxtokens=2048, top_probability=0.95):
+    llm = Llama(
+        model_path="models/" + model_id,
+        flash_attn=True,
+        n_gpu_layers=81,
+        n_batch=1024,
+        n_ctx=8192,
+    )
+    harmonized_text = ''
+    for file in files:
+        converter = DocumentConverter()
+        result = converter.convert(file)
+        pdftext = result.document.export_to_markdown()
+        input_text = pdftext
+        harmonized_text += harmonize_doc(llm, input_text, prompt, maxtokens, temperature, top_probability, model_name)
+        harmonized_text += '\n\n-----------------------------------------------------------------\n\n'
+    return harmonized_text
+temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value")
+model_name = gr.Dropdown(["default", "fine-tuned"], label="LLama Model")
+max_tokens = gr.Number(value=600, label="Max Tokens")
+input_text = gr.Text(label='Input Text')
+input_prompt = gr.Text(label='Prompt')
+input_files = gr.File(file_count="multiple")
+output_path_component = gr.File(label="Select Output Path")
+iface = gr.Interface(
+    fn=pdf_to_text,
+    inputs=input_files,
+    outputs=['text'],
+    title='COBIx Endoscopy Report Harmonization',
+    description="This application helps standardize medical reports into a consistent format",
+    theme=gr.themes.Soft(),
+)
+iface.launch()