Spaces:

OpenSourceRonin
/

VPTQ_demo

Running

App Files Files Community

OpenSourceRonin commited on Oct 16, 2024

Commit

5c539b4

verified ·

1 Parent(s): d1789cc

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -110

app.py CHANGED Viewed

@@ -1,10 +1,6 @@
 import spaces
 import os
 import threading
-from collections import deque
-import plotly.graph_objs as go
-import pynvml
 import gradio as gr
 from huggingface_hub import snapshot_download
@@ -30,100 +26,6 @@ models = [
     },
 ]
-# Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
-gpu_util_history = deque(maxlen=100)
-mem_usage_history = deque(maxlen=100)
-def initialize_nvml():
-    """
-    Initialize NVML (NVIDIA Management Library).
-    """
-    pynvml.nvmlInit()
-def get_gpu_info():
-    """
-    Get GPU utilization and memory usage information.
-    Returns:
-        dict: A dictionary containing GPU utilization and memory usage information.
-    """
-    handle = pynvml.nvmlDeviceGetHandleByIndex(0)  # Assuming a single GPU setup
-    utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
-    memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
-    gpu_info = {
-        'gpu_util': utilization.gpu,
-        'mem_used': memory.used / 1024**2,  # Convert bytes to MiB
-        'mem_total': memory.total / 1024**2,  # Convert bytes to MiB
-        'mem_percent': (memory.used / memory.total) * 100
-    }
-    return gpu_info
-def _update_charts(chart_height: int = 200) -> go.Figure:
-    """
-    Update the GPU utilization and memory usage charts.
-    Args:
-        chart_height (int, optional): used to set the height of the chart. Defaults to 200.
-    Returns:
-        plotly.graph_objs.Figure: The updated figure containing the GPU and memory usage charts.
-    """
-    # obtain GPU information
-    gpu_info = get_gpu_info()
-    # records the latest GPU utilization and memory usage values
-    gpu_util = round(gpu_info.get('gpu_util', 0), 1)
-    mem_used = round(gpu_info.get('mem_used', 0) / 1024, 2)  # Convert MiB to GiB
-    gpu_util_history.append(gpu_util)
-    mem_usage_history.append(mem_used)
-    # create GPU utilization line chart
-    gpu_trace = go.Scatter(
-        y=list(gpu_util_history),
-        mode='lines+markers',
-        text=list(gpu_util_history),
-        line=dict(shape='spline', color='blue'),  # Make the line smooth and set color
-        yaxis='y1'  # Link to y-axis 1
-    )
-    # create memory usage line chart
-    mem_trace = go.Scatter(
-        y=list(mem_usage_history),
-        mode='lines+markers',
-        text=list(mem_usage_history),
-        line=dict(shape='spline', color='red'),  # Make the line smooth and set color
-        yaxis='y2'  # Link to y-axis 2
-    )
-    # set the layout of the chart
-    layout = go.Layout(
-        xaxis=dict(title=None, showticklabels=False, ticks=''),
-        yaxis=dict(
-            title='GPU Utilization (%)',
-            range=[-5, 110],
-            titlefont=dict(color='blue'),
-            tickfont=dict(color='blue'),
-        ),
-        yaxis2=dict(title='Memory Usage (GiB)',
-                    range=[0, max(24,
-                                  max(mem_usage_history) + 1)],
-                    titlefont=dict(color='red'),
-                    tickfont=dict(color='red'),
-                    overlaying='y',
-                    side='right'),
-        height=chart_height,  # set the height of the chart
-        margin=dict(l=10, r=10, t=0, b=0),  # set the margin of the chart
-        showlegend=False  # disable the legend
-    )
-    fig = go.Figure(data=[gpu_trace, mem_trace], layout=layout)
-    return fig
 def initialize_history():
     """
     Initializes the GPU utilization and memory usage history.
@@ -134,13 +36,6 @@ def initialize_history():
         mem_usage_history.append(round(gpu_info.get('mem_percent', 0), 1))
-def enable_gpu_info():
-    pynvml.nvmlInit()
-def disable_gpu_info():
-    pynvml.nvmlShutdown()
 model_choices = [f"{model['name']} ({model['bits']})" for model in models]
 display_to_model = {f"{model['name']} ({model['bits']})": model['name'] for model in models}
@@ -159,7 +54,8 @@ def download_models_in_background():
 download_thread = threading.Thread(target=download_models_in_background)
 download_thread.start()
-loaded_models = {}
 @spaces.GPU
 def respond(
@@ -173,12 +69,16 @@ def respond(
 ):
     model_name = display_to_model[selected_model_display_label]
     # Check if the model is already loaded
-    if model_name not in loaded_models:
         # Load and store the model in the cache
-        loaded_models[model_name] = get_chat_loop_generator(model_name)
-    chat_completion = loaded_models[model_name]
     messages = [{"role": "system", "content": system_message}]
@@ -240,4 +140,3 @@ with gr.Blocks(fill_height=True) as demo:
 if __name__ == "__main__":
     share = os.getenv("SHARE_LINK", None) in ["1", "true", "True"]
     demo.launch(share=share)
-    # disable_gpu_info()

 import spaces
 import os
 import threading
 import gradio as gr
 from huggingface_hub import snapshot_download
     },
 ]
 def initialize_history():
     """
     Initializes the GPU utilization and memory usage history.
         mem_usage_history.append(round(gpu_info.get('mem_percent', 0), 1))
 model_choices = [f"{model['name']} ({model['bits']})" for model in models]
 display_to_model = {f"{model['name']} ({model['bits']})": model['name'] for model in models}
 download_thread = threading.Thread(target=download_models_in_background)
 download_thread.start()
+loaded_model = None
+loaded_model_name = None
 @spaces.GPU
 def respond(
 ):
     model_name = display_to_model[selected_model_display_label]
+    global loaded_model
+    global loaded_model_name
     # Check if the model is already loaded
+    if model_name is not loaded_model_name:
         # Load and store the model in the cache
+        loaded_model = get_chat_loop_generator(model_name)
+        loaded_model_name = model_name
+    chat_completion = loaded_model
     messages = [{"role": "system", "content": system_message}]
 if __name__ == "__main__":
     share = os.getenv("SHARE_LINK", None) in ["1", "true", "True"]
     demo.launch(share=share)