awacke1 commited on
Commit
ac52042
·
verified ·
1 Parent(s): 794b68e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -22
app.py CHANGED
@@ -44,10 +44,8 @@ from typing import Optional
44
  from urllib.parse import quote
45
  from xml.etree import ElementTree as ET
46
 
47
- # OpenAI client initialization
48
  client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
49
 
50
- # Logging setup
51
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
52
  logger = logging.getLogger(__name__)
53
  log_records = []
@@ -56,7 +54,6 @@ class LogCaptureHandler(logging.Handler):
56
  log_records.append(record)
57
  logger.addHandler(LogCaptureHandler())
58
 
59
- # Streamlit configuration
60
  st.set_page_config(
61
  page_title="AI Multimodal Titan 🚀",
62
  page_icon="🤖",
@@ -69,7 +66,6 @@ st.set_page_config(
69
  }
70
  )
71
 
72
- # Session state initialization
73
  for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
74
  st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
75
  st.session_state.setdefault('builder', None)
@@ -82,7 +78,37 @@ st.session_state.setdefault('cam0_file', None)
82
  st.session_state.setdefault('cam1_file', None)
83
  st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
84
 
85
- # Model configurations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  @dataclass
87
  class ModelConfig:
88
  name: str
@@ -112,8 +138,8 @@ class ModelBuilder:
112
  self.jokes = [
113
  "Why did the AI go to therapy? Too many layers to unpack! 😂",
114
  "Training complete! Time for a binary coffee break. ☕",
115
- "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
116
- "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
117
  "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
118
  ]
119
  def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
@@ -124,8 +150,9 @@ class ModelBuilder:
124
  self.tokenizer.pad_token = self.tokenizer.eos_token
125
  if config:
126
  self.config = config
127
- self.model.to("cuda" if torch.cuda.is_available() else "cpu")
128
- st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
 
129
  return self
130
  def save_model(self, path: str):
131
  with st.spinner("Saving model... 💾"):
@@ -153,7 +180,6 @@ class DiffusionBuilder:
153
  def generate(self, prompt: str):
154
  return self.pipeline(prompt, num_inference_steps=20).images[0]
155
 
156
- # Utility functions
157
  def generate_filename(sequence, ext="png", prompt=None):
158
  central = pytz.timezone('US/Central')
159
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
@@ -197,11 +223,10 @@ def download_pdf(url, output_path):
197
  logger.error(f"Failed to download {url}: {e}")
198
  return False
199
 
200
- # Processing functions
201
  async def process_pdf_snapshot(pdf_path, mode="single"):
202
  start_time = time.time()
203
  status = st.empty()
204
- status.text(f"Processing PDF Snapshot ({mode})... (0s)")
205
  try:
206
  doc = fitz.open(pdf_path)
207
  output_files = []
@@ -249,13 +274,15 @@ async def process_ocr(image, output_file):
249
  status = st.empty()
250
  status.text("Processing GOT-OCR2_0... (0s)")
251
  tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
252
- model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
 
 
253
  temp_file = generate_filename("temp", "png")
254
  image.save(temp_file)
255
  result = model.chat(tokenizer, temp_file, ocr_type='ocr')
256
  os.remove(temp_file)
257
  elapsed = int(time.time() - start_time)
258
- status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
259
  async with aiofiles.open(output_file, "w") as f:
260
  await f.write(result)
261
  return result
@@ -555,8 +582,7 @@ def FileSidebar():
555
 
556
  FileSidebar()
557
 
558
- # Tabs
559
- tabs = st.tabs(["Camera 📷", "Download 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑‍💻", "Gallery 📚", "Search 🔎"])
560
  (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
561
 
562
  with tab_camera:
@@ -597,14 +623,13 @@ with tab_ocr:
597
  st.header("Test OCR 🔍")
598
  all_files = get_gallery_files()
599
  if all_files:
600
- # Filter for only PNG and PDF files
601
  ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
602
  if st.button("OCR All Assets 🚀"):
603
  full_text = "# OCR Results\n\n"
604
  for file in ocr_files:
605
  if file.endswith('.png'):
606
  image = Image.open(file)
607
- else: # PDF
608
  try:
609
  doc = fitz.open(file)
610
  pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
@@ -626,7 +651,7 @@ with tab_ocr:
626
  if selected_file:
627
  if selected_file.endswith('.png'):
628
  image = Image.open(selected_file)
629
- else: # PDF
630
  try:
631
  doc = fitz.open(selected_file)
632
  pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
@@ -776,9 +801,9 @@ with tab_search:
776
  result = search_arxiv(query)
777
  st.markdown(result)
778
 
779
- # Sidebar
780
  st.sidebar.subheader("Gallery Settings")
781
  st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
 
782
  st.sidebar.subheader("Action Logs 📜")
783
  for record in log_records:
784
  st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
@@ -812,7 +837,6 @@ def update_gallery():
812
 
813
  update_gallery()
814
 
815
- # Chatbot
816
  if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
817
  st.session_state.messages.append({"role": "user", "content": prompt})
818
  with st.chat_message("user"):
@@ -824,4 +848,8 @@ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with
824
  if chunk.choices[0].delta.content:
825
  response += chunk.choices[0].delta.content
826
  st.write(response)
827
- st.session_state.messages.append({"role": "assistant", "content": response})
 
 
 
 
 
44
  from urllib.parse import quote
45
  from xml.etree import ElementTree as ET
46
 
 
47
  client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
48
 
 
49
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
50
  logger = logging.getLogger(__name__)
51
  log_records = []
 
54
  log_records.append(record)
55
  logger.addHandler(LogCaptureHandler())
56
 
 
57
  st.set_page_config(
58
  page_title="AI Multimodal Titan 🚀",
59
  page_icon="🤖",
 
66
  }
67
  )
68
 
 
69
  for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
70
  st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
71
  st.session_state.setdefault('builder', None)
 
78
  st.session_state.setdefault('cam1_file', None)
79
  st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
80
 
81
+ def get_gpu_info():
82
+ if torch.cuda.is_available():
83
+ gpu_name = torch.cuda.get_device_name(0)
84
+ total_memory = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
85
+ reserved_memory = torch.cuda.memory_reserved(0) / (1024 ** 3)
86
+ allocated_memory = torch.cuda.memory_allocated(0) / (1024 ** 3)
87
+ free_memory = total_memory - allocated_memory
88
+ utilization = torch.cuda.utilization(0)
89
+ return {
90
+ "GPU Name": gpu_name,
91
+ "Total Memory (GB)": f"{total_memory:.2f}",
92
+ "Reserved Memory (GB)": f"{reserved_memory:.2f}",
93
+ "Allocated Memory (GB)": f"{allocated_memory:.2f}",
94
+ "Free Memory (GB)": f"{free_memory:.2f}",
95
+ "Utilization (%)": utilization
96
+ }
97
+ else:
98
+ return {"Status": "No GPU detected"}
99
+
100
+ def display_gpu_info():
101
+ gpu_info = get_gpu_info()
102
+ st.sidebar.subheader("GPU Status 📊")
103
+ if "Status" in gpu_info and gpu_info["Status"] == "No GPU detected":
104
+ st.sidebar.warning("No GPU detected. Running on CPU.")
105
+ else:
106
+ for key, value in gpu_info.items():
107
+ st.sidebar.write(f"{key}: {value}")
108
+ memory_usage_percent = (float(gpu_info["Allocated Memory (GB)"]) / float(gpu_info["Total Memory (GB)"])) * 100
109
+ st.sidebar.progress(min(memory_usage_percent / 100, 1.0))
110
+ st.sidebar.caption(f"Memory Usage: {memory_usage_percent:.1f}%")
111
+
112
  @dataclass
113
  class ModelConfig:
114
  name: str
 
138
  self.jokes = [
139
  "Why did the AI go to therapy? Too many layers to unpack! 😂",
140
  "Training complete! Time for a binary coffee break. ☕",
141
+ "I told my neural network a joke; it couldnt stop dropping bits! 🤖",
142
+ "I asked the AI for a pun, and it said, 'Im punning on parallel processing!' 😄",
143
  "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
144
  ]
145
  def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
 
150
  self.tokenizer.pad_token = self.tokenizer.eos_token
151
  if config:
152
  self.config = config
153
+ device = "cuda" if torch.cuda.is_available() else "cpu"
154
+ self.model.to(device)
155
+ st.success(f"Model loaded on {device}! 🎉 {random.choice(self.jokes)}")
156
  return self
157
  def save_model(self, path: str):
158
  with st.spinner("Saving model... 💾"):
 
180
  def generate(self, prompt: str):
181
  return self.pipeline(prompt, num_inference_steps=20).images[0]
182
 
 
183
  def generate_filename(sequence, ext="png", prompt=None):
184
  central = pytz.timezone('US/Central')
185
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
 
223
  logger.error(f"Failed to download {url}: {e}")
224
  return False
225
 
 
226
  async def process_pdf_snapshot(pdf_path, mode="single"):
227
  start_time = time.time()
228
  status = st.empty()
229
+ status.text(f"Processing PDF SnapshotK Snapshot ({mode})... (0s)")
230
  try:
231
  doc = fitz.open(pdf_path)
232
  output_files = []
 
274
  status = st.empty()
275
  status.text("Processing GOT-OCR2_0... (0s)")
276
  tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
277
+ model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32)
278
+ device = "cuda" if torch.cuda.is_available() else "cpu"
279
+ model.to(device).eval()
280
  temp_file = generate_filename("temp", "png")
281
  image.save(temp_file)
282
  result = model.chat(tokenizer, temp_file, ocr_type='ocr')
283
  os.remove(temp_file)
284
  elapsed = int(time.time() - start_time)
285
+ status.text(f"GOT-OCR2_0 completed in {elapsed}s on {device}!")
286
  async with aiofiles.open(output_file, "w") as f:
287
  await f.write(result)
288
  return result
 
582
 
583
  FileSidebar()
584
 
585
+ tabs = st.tabs(["Camera 📷", "Download RFP 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑‍💻", "Gallery 📚", "Search 🔎"])
 
586
  (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
587
 
588
  with tab_camera:
 
623
  st.header("Test OCR 🔍")
624
  all_files = get_gallery_files()
625
  if all_files:
 
626
  ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
627
  if st.button("OCR All Assets 🚀"):
628
  full_text = "# OCR Results\n\n"
629
  for file in ocr_files:
630
  if file.endswith('.png'):
631
  image = Image.open(file)
632
+ else:
633
  try:
634
  doc = fitz.open(file)
635
  pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
 
651
  if selected_file:
652
  if selected_file.endswith('.png'):
653
  image = Image.open(selected_file)
654
+ else:
655
  try:
656
  doc = fitz.open(selected_file)
657
  pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
 
801
  result = search_arxiv(query)
802
  st.markdown(result)
803
 
 
804
  st.sidebar.subheader("Gallery Settings")
805
  st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
806
+ display_gpu_info()
807
  st.sidebar.subheader("Action Logs 📜")
808
  for record in log_records:
809
  st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
 
837
 
838
  update_gallery()
839
 
 
840
  if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
841
  st.session_state.messages.append({"role": "user", "content": prompt})
842
  with st.chat_message("user"):
 
848
  if chunk.choices[0].delta.content:
849
  response += chunk.choices[0].delta.content
850
  st.write(response)
851
+ st.session_state.messages.append({"role": "assistant", "content": response})
852
+
853
+ def create_audio_file(filename, audio_input, flag):
854
+ with open(filename, "wb") as f:
855
+ f.write(audio_input)