RWKV-Gradio-2

Sleeping

App Files Files Community

Daemontatox commited on 16 days ago

Commit

1bc3aec

verified ·

1 Parent(s): 6d7af6b

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -90

app.py CHANGED Viewed

@@ -7,80 +7,15 @@ from datetime import datetime
 import gradio as gr
 from huggingface_hub import hf_hub_download
-# Check for CUDA availability
-cuda_available = torch.cuda.is_available()
-device = torch.device("cuda" if cuda_available else "cpu")
-print(f"Using device: {device}")
-# Function to set up CUDA environment if available
-def setup_cuda_environment():
-    if not cuda_available:
-        print("CUDA not available, falling back to CPU")
-        os.environ["RWKV_V7_ON"] = '1'
-        os.environ["RWKV_JIT_ON"] = '1'
-        os.environ["RWKV_CUDA_ON"] = '0'
-        return False
-    print("CUDA is available, setting up environment")
-    # Try to detect CUDA location automatically
-    possible_cuda_paths = [
-        "/usr/local/cuda",
-        "/opt/cuda",
-        "/usr/lib/cuda",
-        "/usr/cuda",
-        "/usr/local/nvidia/cuda",
-        "/usr/lib/nvidia-cuda-toolkit",
-        "/usr/lib/x86_64-linux-gnu/cuda"
-    ]
-    cuda_found = False
-    for path in possible_cuda_paths:
-        if os.path.exists(path):
-            os.environ["CUDA_HOME"] = path
-            print(f"Found CUDA at: {path}")
-            cuda_found = True
-            break
-    if not cuda_found:
-        # If we can't find the CUDA path but CUDA is available,
-        # try looking for common libraries
-        try:
-            import ctypes
-            cuda_runtime = ctypes.cdll.LoadLibrary("libcudart.so")
-            print("Found CUDA runtime library, proceeding without explicit CUDA_HOME")
-            cuda_found = True
-        except:
-            print("Could not locate CUDA runtime library")
-    # Set RWKV environment variables
-    if cuda_found:
-        os.environ["RWKV_V7_ON"] = '1'
-        os.environ["RWKV_JIT_ON"] = '1'
-        os.environ["RWKV_CUDA_ON"] = '1'
-    else:
-        print("CUDA is available but environment couldn't be set up correctly, falling back to CPU")
-        os.environ["RWKV_V7_ON"] = '1'
-        os.environ["RWKV_JIT_ON"] = '1'
-        os.environ["RWKV_CUDA_ON"] = '0'
-        return False
-    return cuda_found
-# Initialize NVML for GPU monitoring if available
-has_nvml = False
-if cuda_available:
-    try:
-        from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
-        nvmlInit()
-        gpu_h = nvmlDeviceGetHandleByIndex(0)
-        has_nvml = True
-        print("NVML initialized for GPU monitoring")
-    except:
-        print("NVML not available, GPU monitoring disabled")
-# Set up CUDA environment
-use_cuda = setup_cuda_environment()
 # Model parameters
 ctx_limit = 4000
@@ -114,11 +49,11 @@ try:
     model_path_v6 = hf_hub_download(repo_id="BlinkDL/rwkv7-g1", filename=f"{title_v6}.pth")
     print(f"Model downloaded to {model_path_v6}")
-    # Select strategy based on available hardware
-    strategy = 'cuda fp16' if use_cuda else 'cpu fp32'
     print(f"Using strategy: {strategy}")
-    # Initialize model with appropriate strategy
     model_v6 = RWKV(model=model_path_v6.replace('.pth',''), strategy=strategy)
     pipeline_v6 = PIPELINE(model_v6, "rwkv_vocab_v20230424")
     args = model_v6.args
@@ -132,7 +67,7 @@ except Exception as e:
 # Text generation parameters
 penalty_decay = 0.996
-@spaces.GPU
 def generate_prompt(instruction, input=""):
     instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
     input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
@@ -199,21 +134,10 @@ def evaluate(
                 yield out_str.strip()
                 out_last = i + 1
-        # Log GPU info if available
-        if use_cuda and has_nvml:
-            try:
-                gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
-                timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-                print(f'{timestamp} - vram total: {gpu_info.total/1024**2:.2f}MB, used: {gpu_info.used/1024**2:.2f}MB, free: {gpu_info.free/1024**2:.2f}MB')
-            except:
-                print("Error getting GPU info")
         # Clean up to free memory
         del out
         del state
         gc.collect()
-        if use_cuda:
-            torch.cuda.empty_cache()
         yield out_str.strip()
     except Exception as e:
@@ -242,7 +166,7 @@ examples = [
 # Create Gradio UI
 with gr.Blocks(title=title_v6) as demo:
     model_status = "✅ Model loaded successfully" if model_loaded else "❌ Model failed to load"
-    device_status = f"Using {'CUDA' if use_cuda else 'CPU'}"
     gr.HTML(f"<div style=\"text-align: center;\">\n<h1>{title_v6}</h1>\n<p>{model_status} - {device_status}</p>\n</div>")
@@ -268,5 +192,6 @@ with gr.Blocks(title=title_v6) as demo:
 # Launch the app
 print("Starting Gradio app...")
-demo.queue(concurrency_count=1, max_size=10)
 demo.launch(share=False)

 import gradio as gr
 from huggingface_hub import hf_hub_download
+# Force CPU mode as requested
+use_cuda = False
+device = torch.device("cpu")
+print(f"Using device: {device} (forced CPU mode)")
+# Set RWKV environment variables for CPU
+os.environ["RWKV_V7_ON"] = '1'
+os.environ["RWKV_JIT_ON"] = '1'
+os.environ["RWKV_CUDA_ON"] = '0'
 # Model parameters
 ctx_limit = 4000
     model_path_v6 = hf_hub_download(repo_id="BlinkDL/rwkv7-g1", filename=f"{title_v6}.pth")
     print(f"Model downloaded to {model_path_v6}")
+    # Use CPU strategy
+    strategy = 'cpu fp32'
     print(f"Using strategy: {strategy}")
+    # Initialize model with CPU strategy
     model_v6 = RWKV(model=model_path_v6.replace('.pth',''), strategy=strategy)
     pipeline_v6 = PIPELINE(model_v6, "rwkv_vocab_v20230424")
     args = model_v6.args
 # Text generation parameters
 penalty_decay = 0.996
 def generate_prompt(instruction, input=""):
     instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
     input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
                 yield out_str.strip()
                 out_last = i + 1
         # Clean up to free memory
         del out
         del state
         gc.collect()
         yield out_str.strip()
     except Exception as e:
 # Create Gradio UI
 with gr.Blocks(title=title_v6) as demo:
     model_status = "✅ Model loaded successfully" if model_loaded else "❌ Model failed to load"
+    device_status = "Using CPU mode"
     gr.HTML(f"<div style=\"text-align: center;\">\n<h1>{title_v6}</h1>\n<p>{model_status} - {device_status}</p>\n</div>")
 # Launch the app
 print("Starting Gradio app...")
+# Fix the queue method call by removing the incorrect parameter
+demo.queue(max_size=10)
 demo.launch(share=False)