srijaydeshpande commited on
Commit
152a01f
·
verified ·
1 Parent(s): 9e346b5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ import os
4
+ import accelerate
5
+ import spaces
6
+ from tqdm import tqdm
7
+ import subprocess
8
+ from huggingface_hub import hf_hub_download
9
+ from llama_cpp import Llama
10
+ from huggingface_hub import login
11
+ from docling.document_converter import DocumentConverter
12
+
13
+ login(token = os.getenv('HF_TOKEN'))
14
+
15
+ repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF"
16
+ model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf"
17
+
18
+ local_dir = "models"
19
+
20
+ hf_hub_download(
21
+ repo_id=repo_id,
22
+ filename=model_id,
23
+ local_dir = local_dir
24
+ )
25
+
26
+ def harmonize_doc(llm, pdftext, prompt, maxtokens, temperature, top_probability, model_name):
27
+
28
+ prompt = """
29
+ Please reformat the provided medical report into the following standardized structure:
30
+
31
+ 1. Hospital Information:
32
+ - Name of Hospital: [Name of hospital]
33
+ - Department: [Relevant department or 'N/A']
34
+
35
+ 2. Patient Information:
36
+ - Name: [Full Name]
37
+ - Gender: [Gender]
38
+ - Date of Birth: [Date of Birth]
39
+ - Address: [Full Address or 'N/A']
40
+ - ID Numbers:
41
+ - [Relevant identifiers such as NHS Number, Case Number, etc.]
42
+
43
+ 3. Procedure Details:
44
+ - Date of Procedure: [Date]
45
+ - Referring Doctor: [Name or 'N/A']
46
+ - Performed By:
47
+ - Consultant: [Name or 'N/A']
48
+ - Additional Clinicians: [Name(s) or 'N/A']
49
+ - Nurses: [Name(s) or 'N/A']
50
+ - Details:
51
+ - Indications: [Symptoms, reasons for procedure]
52
+ - Instrument: [Instrument details or 'N/A']
53
+ - Co-morbidities: [Relevant conditions or 'N/A']
54
+ - ASA Status: [ASA classification or 'N/A']
55
+ - Procedure: [Details of patient preparation and exact description of procedures performed as in the original report or 'N/A']
56
+ - Findings: [Exact findings from the report, including any locations, measurements, or observations]
57
+ - Specimens Taken: [Details on specimens, if any, or 'N/A']
58
+ - Comments: [Additional notes, advice, or remarks from the report]
59
+
60
+ 4. Diagnosis and Outcomes:
61
+ - Diagnosis: [Exact diagnosis or 'N/A']
62
+ - Therapeutic Actions: [Treatments performed or 'N/A']
63
+ - Complications: [Details on complications or 'No complications']
64
+ - Follow-Up: [Exact follow-up recommendations from the report]
65
+
66
+ Instructions for Output:
67
+ 1. Use the exact wording and details from the original report wherever possible. Do not summarize or interpret information.
68
+ 2. If any information is missing in the original report, use 'N/A' for the corresponding field.
69
+ 3. Ensure the output matches the given structure exactly, without omitting any fields.
70
+ 4. Retain all medical terms, values, and phrases as stated in the report.
71
+ """
72
+
73
+
74
+
75
+ output = llm.create_chat_completion(
76
+ messages=[
77
+ {"role": "assistant", "content": prompt},
78
+ {
79
+ "role": "user",
80
+ "content": pdftext
81
+ }
82
+ ],
83
+ max_tokens=maxtokens,
84
+ temperature=temperature
85
+ )
86
+
87
+ output = output['choices'][0]['message']['content']
88
+ find_index = output.find(' '.join(pdftext.split()[:3]))
89
+ if find_index != -1:
90
+ output = output[find_index:].strip()
91
+ return output
92
+
93
+
94
+ @spaces.GPU(duration=120)
95
+ def pdf_to_text(files, input_text='', prompt='', model_name='default', temperature=0, maxtokens=2048, top_probability=0.95):
96
+ llm = Llama(
97
+ model_path="models/" + model_id,
98
+ flash_attn=True,
99
+ n_gpu_layers=81,
100
+ n_batch=1024,
101
+ n_ctx=8192,
102
+ )
103
+ harmonized_text = ''
104
+ for file in files:
105
+ converter = DocumentConverter()
106
+ result = converter.convert(file)
107
+ pdftext = result.document.export_to_markdown()
108
+ input_text = pdftext
109
+ harmonized_text += harmonize_doc(llm, input_text, prompt, maxtokens, temperature, top_probability, model_name)
110
+ harmonized_text += '\n\n-----------------------------------------------------------------\n\n'
111
+ return harmonized_text
112
+
113
+
114
+ temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value")
115
+ model_name = gr.Dropdown(["default", "fine-tuned"], label="LLama Model")
116
+ max_tokens = gr.Number(value=600, label="Max Tokens")
117
+ input_text = gr.Text(label='Input Text')
118
+ input_prompt = gr.Text(label='Prompt')
119
+ input_files = gr.File(file_count="multiple")
120
+ output_path_component = gr.File(label="Select Output Path")
121
+ iface = gr.Interface(
122
+ fn=pdf_to_text,
123
+ inputs=input_files,
124
+ outputs=['text'],
125
+ title='COBIx Endoscopy Report Harmonization',
126
+ description="This application helps standardize medical reports into a consistent format",
127
+ theme=gr.themes.Soft(),
128
+ )
129
+ iface.launch()