Update app.py
Browse files
app.py
CHANGED
@@ -44,10 +44,8 @@ from typing import Optional
|
|
44 |
from urllib.parse import quote
|
45 |
from xml.etree import ElementTree as ET
|
46 |
|
47 |
-
# OpenAI client initialization
|
48 |
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
|
49 |
|
50 |
-
# Logging setup
|
51 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
52 |
logger = logging.getLogger(__name__)
|
53 |
log_records = []
|
@@ -56,7 +54,6 @@ class LogCaptureHandler(logging.Handler):
|
|
56 |
log_records.append(record)
|
57 |
logger.addHandler(LogCaptureHandler())
|
58 |
|
59 |
-
# Streamlit configuration
|
60 |
st.set_page_config(
|
61 |
page_title="AI Multimodal Titan 🚀",
|
62 |
page_icon="🤖",
|
@@ -69,7 +66,6 @@ st.set_page_config(
|
|
69 |
}
|
70 |
)
|
71 |
|
72 |
-
# Session state initialization
|
73 |
for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
|
74 |
st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
|
75 |
st.session_state.setdefault('builder', None)
|
@@ -82,7 +78,37 @@ st.session_state.setdefault('cam0_file', None)
|
|
82 |
st.session_state.setdefault('cam1_file', None)
|
83 |
st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
|
84 |
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
@dataclass
|
87 |
class ModelConfig:
|
88 |
name: str
|
@@ -112,8 +138,8 @@ class ModelBuilder:
|
|
112 |
self.jokes = [
|
113 |
"Why did the AI go to therapy? Too many layers to unpack! 😂",
|
114 |
"Training complete! Time for a binary coffee break. ☕",
|
115 |
-
"I told my neural network a joke; it couldn
|
116 |
-
"I asked the AI for a pun, and it said, 'I
|
117 |
"Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
|
118 |
]
|
119 |
def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
|
@@ -124,8 +150,9 @@ class ModelBuilder:
|
|
124 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
125 |
if config:
|
126 |
self.config = config
|
127 |
-
|
128 |
-
|
|
|
129 |
return self
|
130 |
def save_model(self, path: str):
|
131 |
with st.spinner("Saving model... 💾"):
|
@@ -153,7 +180,6 @@ class DiffusionBuilder:
|
|
153 |
def generate(self, prompt: str):
|
154 |
return self.pipeline(prompt, num_inference_steps=20).images[0]
|
155 |
|
156 |
-
# Utility functions
|
157 |
def generate_filename(sequence, ext="png", prompt=None):
|
158 |
central = pytz.timezone('US/Central')
|
159 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
@@ -197,11 +223,10 @@ def download_pdf(url, output_path):
|
|
197 |
logger.error(f"Failed to download {url}: {e}")
|
198 |
return False
|
199 |
|
200 |
-
# Processing functions
|
201 |
async def process_pdf_snapshot(pdf_path, mode="single"):
|
202 |
start_time = time.time()
|
203 |
status = st.empty()
|
204 |
-
status.text(f"Processing PDF Snapshot ({mode})... (0s)")
|
205 |
try:
|
206 |
doc = fitz.open(pdf_path)
|
207 |
output_files = []
|
@@ -249,13 +274,15 @@ async def process_ocr(image, output_file):
|
|
249 |
status = st.empty()
|
250 |
status.text("Processing GOT-OCR2_0... (0s)")
|
251 |
tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
|
252 |
-
model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32)
|
|
|
|
|
253 |
temp_file = generate_filename("temp", "png")
|
254 |
image.save(temp_file)
|
255 |
result = model.chat(tokenizer, temp_file, ocr_type='ocr')
|
256 |
os.remove(temp_file)
|
257 |
elapsed = int(time.time() - start_time)
|
258 |
-
status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
|
259 |
async with aiofiles.open(output_file, "w") as f:
|
260 |
await f.write(result)
|
261 |
return result
|
@@ -555,8 +582,7 @@ def FileSidebar():
|
|
555 |
|
556 |
FileSidebar()
|
557 |
|
558 |
-
|
559 |
-
tabs = st.tabs(["Camera 📷", "Download 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑💻", "Gallery 📚", "Search 🔎"])
|
560 |
(tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
|
561 |
|
562 |
with tab_camera:
|
@@ -597,14 +623,13 @@ with tab_ocr:
|
|
597 |
st.header("Test OCR 🔍")
|
598 |
all_files = get_gallery_files()
|
599 |
if all_files:
|
600 |
-
# Filter for only PNG and PDF files
|
601 |
ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
|
602 |
if st.button("OCR All Assets 🚀"):
|
603 |
full_text = "# OCR Results\n\n"
|
604 |
for file in ocr_files:
|
605 |
if file.endswith('.png'):
|
606 |
image = Image.open(file)
|
607 |
-
else:
|
608 |
try:
|
609 |
doc = fitz.open(file)
|
610 |
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
@@ -626,7 +651,7 @@ with tab_ocr:
|
|
626 |
if selected_file:
|
627 |
if selected_file.endswith('.png'):
|
628 |
image = Image.open(selected_file)
|
629 |
-
else:
|
630 |
try:
|
631 |
doc = fitz.open(selected_file)
|
632 |
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
@@ -776,9 +801,9 @@ with tab_search:
|
|
776 |
result = search_arxiv(query)
|
777 |
st.markdown(result)
|
778 |
|
779 |
-
# Sidebar
|
780 |
st.sidebar.subheader("Gallery Settings")
|
781 |
st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
|
|
|
782 |
st.sidebar.subheader("Action Logs 📜")
|
783 |
for record in log_records:
|
784 |
st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
|
@@ -812,7 +837,6 @@ def update_gallery():
|
|
812 |
|
813 |
update_gallery()
|
814 |
|
815 |
-
# Chatbot
|
816 |
if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
|
817 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
818 |
with st.chat_message("user"):
|
@@ -824,4 +848,8 @@ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with
|
|
824 |
if chunk.choices[0].delta.content:
|
825 |
response += chunk.choices[0].delta.content
|
826 |
st.write(response)
|
827 |
-
st.session_state.messages.append({"role": "assistant", "content": response})
|
|
|
|
|
|
|
|
|
|
44 |
from urllib.parse import quote
|
45 |
from xml.etree import ElementTree as ET
|
46 |
|
|
|
47 |
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
|
48 |
|
|
|
49 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
50 |
logger = logging.getLogger(__name__)
|
51 |
log_records = []
|
|
|
54 |
log_records.append(record)
|
55 |
logger.addHandler(LogCaptureHandler())
|
56 |
|
|
|
57 |
st.set_page_config(
|
58 |
page_title="AI Multimodal Titan 🚀",
|
59 |
page_icon="🤖",
|
|
|
66 |
}
|
67 |
)
|
68 |
|
|
|
69 |
for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
|
70 |
st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
|
71 |
st.session_state.setdefault('builder', None)
|
|
|
78 |
st.session_state.setdefault('cam1_file', None)
|
79 |
st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
|
80 |
|
81 |
+
def get_gpu_info():
|
82 |
+
if torch.cuda.is_available():
|
83 |
+
gpu_name = torch.cuda.get_device_name(0)
|
84 |
+
total_memory = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
|
85 |
+
reserved_memory = torch.cuda.memory_reserved(0) / (1024 ** 3)
|
86 |
+
allocated_memory = torch.cuda.memory_allocated(0) / (1024 ** 3)
|
87 |
+
free_memory = total_memory - allocated_memory
|
88 |
+
utilization = torch.cuda.utilization(0)
|
89 |
+
return {
|
90 |
+
"GPU Name": gpu_name,
|
91 |
+
"Total Memory (GB)": f"{total_memory:.2f}",
|
92 |
+
"Reserved Memory (GB)": f"{reserved_memory:.2f}",
|
93 |
+
"Allocated Memory (GB)": f"{allocated_memory:.2f}",
|
94 |
+
"Free Memory (GB)": f"{free_memory:.2f}",
|
95 |
+
"Utilization (%)": utilization
|
96 |
+
}
|
97 |
+
else:
|
98 |
+
return {"Status": "No GPU detected"}
|
99 |
+
|
100 |
+
def display_gpu_info():
|
101 |
+
gpu_info = get_gpu_info()
|
102 |
+
st.sidebar.subheader("GPU Status 📊")
|
103 |
+
if "Status" in gpu_info and gpu_info["Status"] == "No GPU detected":
|
104 |
+
st.sidebar.warning("No GPU detected. Running on CPU.")
|
105 |
+
else:
|
106 |
+
for key, value in gpu_info.items():
|
107 |
+
st.sidebar.write(f"{key}: {value}")
|
108 |
+
memory_usage_percent = (float(gpu_info["Allocated Memory (GB)"]) / float(gpu_info["Total Memory (GB)"])) * 100
|
109 |
+
st.sidebar.progress(min(memory_usage_percent / 100, 1.0))
|
110 |
+
st.sidebar.caption(f"Memory Usage: {memory_usage_percent:.1f}%")
|
111 |
+
|
112 |
@dataclass
|
113 |
class ModelConfig:
|
114 |
name: str
|
|
|
138 |
self.jokes = [
|
139 |
"Why did the AI go to therapy? Too many layers to unpack! 😂",
|
140 |
"Training complete! Time for a binary coffee break. ☕",
|
141 |
+
"I told my neural network a joke; it couldn’t stop dropping bits! 🤖",
|
142 |
+
"I asked the AI for a pun, and it said, 'I’m punning on parallel processing!' 😄",
|
143 |
"Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
|
144 |
]
|
145 |
def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
|
|
|
150 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
151 |
if config:
|
152 |
self.config = config
|
153 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
154 |
+
self.model.to(device)
|
155 |
+
st.success(f"Model loaded on {device}! 🎉 {random.choice(self.jokes)}")
|
156 |
return self
|
157 |
def save_model(self, path: str):
|
158 |
with st.spinner("Saving model... 💾"):
|
|
|
180 |
def generate(self, prompt: str):
|
181 |
return self.pipeline(prompt, num_inference_steps=20).images[0]
|
182 |
|
|
|
183 |
def generate_filename(sequence, ext="png", prompt=None):
|
184 |
central = pytz.timezone('US/Central')
|
185 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
|
|
223 |
logger.error(f"Failed to download {url}: {e}")
|
224 |
return False
|
225 |
|
|
|
226 |
async def process_pdf_snapshot(pdf_path, mode="single"):
|
227 |
start_time = time.time()
|
228 |
status = st.empty()
|
229 |
+
status.text(f"Processing PDF SnapshotK Snapshot ({mode})... (0s)")
|
230 |
try:
|
231 |
doc = fitz.open(pdf_path)
|
232 |
output_files = []
|
|
|
274 |
status = st.empty()
|
275 |
status.text("Processing GOT-OCR2_0... (0s)")
|
276 |
tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
|
277 |
+
model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32)
|
278 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
279 |
+
model.to(device).eval()
|
280 |
temp_file = generate_filename("temp", "png")
|
281 |
image.save(temp_file)
|
282 |
result = model.chat(tokenizer, temp_file, ocr_type='ocr')
|
283 |
os.remove(temp_file)
|
284 |
elapsed = int(time.time() - start_time)
|
285 |
+
status.text(f"GOT-OCR2_0 completed in {elapsed}s on {device}!")
|
286 |
async with aiofiles.open(output_file, "w") as f:
|
287 |
await f.write(result)
|
288 |
return result
|
|
|
582 |
|
583 |
FileSidebar()
|
584 |
|
585 |
+
tabs = st.tabs(["Camera 📷", "Download RFP 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑💻", "Gallery 📚", "Search 🔎"])
|
|
|
586 |
(tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
|
587 |
|
588 |
with tab_camera:
|
|
|
623 |
st.header("Test OCR 🔍")
|
624 |
all_files = get_gallery_files()
|
625 |
if all_files:
|
|
|
626 |
ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
|
627 |
if st.button("OCR All Assets 🚀"):
|
628 |
full_text = "# OCR Results\n\n"
|
629 |
for file in ocr_files:
|
630 |
if file.endswith('.png'):
|
631 |
image = Image.open(file)
|
632 |
+
else:
|
633 |
try:
|
634 |
doc = fitz.open(file)
|
635 |
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
|
|
651 |
if selected_file:
|
652 |
if selected_file.endswith('.png'):
|
653 |
image = Image.open(selected_file)
|
654 |
+
else:
|
655 |
try:
|
656 |
doc = fitz.open(selected_file)
|
657 |
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
|
|
801 |
result = search_arxiv(query)
|
802 |
st.markdown(result)
|
803 |
|
|
|
804 |
st.sidebar.subheader("Gallery Settings")
|
805 |
st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
|
806 |
+
display_gpu_info()
|
807 |
st.sidebar.subheader("Action Logs 📜")
|
808 |
for record in log_records:
|
809 |
st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
|
|
|
837 |
|
838 |
update_gallery()
|
839 |
|
|
|
840 |
if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
|
841 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
842 |
with st.chat_message("user"):
|
|
|
848 |
if chunk.choices[0].delta.content:
|
849 |
response += chunk.choices[0].delta.content
|
850 |
st.write(response)
|
851 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
852 |
+
|
853 |
+
def create_audio_file(filename, audio_input, flag):
|
854 |
+
with open(filename, "wb") as f:
|
855 |
+
f.write(audio_input)
|