Spaces:

CohereLabs
/

command-a-vision

Running on CPU Upgrade

App Files Files Community

alexrs-cohere commited on Jun 24

Commit

018b8c8

0 Parent(s):

Command A Vision

Browse files

Files changed (13) hide show

.gitattributes +41 -0
.pre-commit-config.yaml +33 -0
.python-version +1 -0
README.md +12 -0
app.py +176 -0
assets/aya-vision-win-rates.png +3 -0
assets/aya-vision.png +3 -0
assets/command-a-longbech-v2.png +3 -0
assets/invoice-1.jpg +3 -0
pyproject.toml +60 -0
requirements.txt +314 -0
style.css +4 -0
uv.lock +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,41 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.12.0
+    hooks:
+      - id: ruff-check
+        args: ["--fix"]
+      - id: ruff-format
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.16.1
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          [
+            "types-python-slugify",
+            "types-pytz",
+            "types-PyYAML",
+            "types-requests",
+          ]

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.10

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Command A Vision
+emoji: ⚡
+colorFrom: red
+colorTo: purple
+sdk: gradio
+sdk_version: 5.34.2
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import os
+import base64
+from collections.abc import Iterator
+import gradio as gr
+from cohere import ClientV2
+model_id = "command-a-vision-07-2025"
+# Initialize Cohere client
+api_key = os.getenv("COHERE_API_KEY")
+if not api_key:
+    raise ValueError("COHERE_API_KEY environment variable is required")
+client = ClientV2(api_key=api_key, client_name="hf-command-a-vision-07-2025")
+IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
+def count_files_in_new_message(paths: list[str]) -> int:
+    image_count = 0
+    for path in paths:
+        if path.endswith(IMAGE_FILE_TYPES):
+            image_count += 1
+    return image_count
+def validate_media_constraints(message: dict) -> bool:
+    image_count = count_files_in_new_message(message["files"])
+    if image_count > 10:
+        gr.Warning("Maximum 10 images are supported.")
+        return False
+    return True
+def encode_image_to_base64(image_path: str) -> str:
+    """Encode an image file to base64 data URL format."""
+    with open(image_path, "rb") as image_file:
+        encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
+        # Determine file extension for MIME type
+        if image_path.lower().endswith('.png'):
+            mime_type = "image/png"
+        elif image_path.lower().endswith('.jpg') or image_path.lower().endswith('.jpeg'):
+            mime_type = "image/jpeg"
+        elif image_path.lower().endswith('.webp'):
+            mime_type = "image/webp"
+        else:
+            mime_type = "image/jpeg"  # default
+        return f"data:{mime_type};base64,{encoded_string}"
+def generate(message: dict, history: list[dict], max_new_tokens: int = 512) -> Iterator[str]:
+    if not validate_media_constraints(message):
+        yield ""
+        return
+    # Build messages for Cohere API
+    messages = []
+    # Add conversation history
+    for item in history:
+        if item["role"] == "assistant":
+            messages.append({"role": "assistant", "content": item["content"]})
+        else:
+            content = item["content"]
+            if isinstance(content, str):
+                messages.append({"role": "user", "content": [{"type": "text", "text": content}]})
+            else:
+                filepath = content[0]
+                # For file-only messages, don't include empty text
+                messages.append({
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": encode_image_to_base64(filepath)}}
+                    ]
+                })
+    # Add current message
+    current_content = []
+    if message["text"]:
+        current_content.append({"type": "text", "text": message["text"]})
+    for file_path in message["files"]:
+        current_content.append({
+            "type": "image_url",
+            "image_url": {"url": encode_image_to_base64(file_path)}
+        })
+    # Only add the message if there's content
+    if current_content:
+        messages.append({"role": "user", "content": current_content})
+    try:
+        # Call Cohere API using the correct event type and delta access
+        response = client.chat_stream(
+            model=model_id,
+            messages=messages,
+            temperature=0.3,
+            max_tokens=max_new_tokens,
+        )
+        output = ""
+        for event in response:
+            if getattr(event, "type", None) == "content-delta":
+                # event.delta.message.content.text is the streamed text
+                text = getattr(event.delta.message.content, "text", "")
+                output += text
+                yield output
+    except Exception as e:
+        gr.Warning(f"Error calling Cohere API: {str(e)}")
+        yield ""
+examples = [
+    [
+        {
+            "text": "Write a COBOL function to reverse a string",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "Como sair de um helicóptero que caiu na água?",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "What is the total amount of the invoice with and without tax?",
+            "files": ["assets/invoice-1.jpg"],
+        }
+    ],
+    [
+        {
+            "text": "¿Contra qué modelo gana más Aya Vision 8B?",
+            "files": ["assets/aya-vision-win-rates.png"],
+        }
+    ],
+    [
+        {
+            "text": "Erläutern Sie die Ergebnisse in der Tabelle",
+            "files": ["assets/command-a-longbech-v2.png"],
+        }
+    ],
+    [
+        {
+            "text": "Explain la théorie de la relativité en français",
+            "files": [],
+        }
+    ],
+]
+demo = gr.ChatInterface(
+    fn=generate,
+    type="messages",
+    textbox=gr.MultimodalTextbox(
+        file_types=list(IMAGE_FILE_TYPES),
+        file_count="multiple",
+        autofocus=True,
+    ),
+    multimodal=True,
+    additional_inputs=[
+        gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
+    ],
+    stop_btn=False,
+    title="Command A Vision",
+    examples=examples,
+    run_examples_on_click=False,
+    cache_examples=False,
+    css_paths="style.css",
+    delete_cache=(1800, 1800),
+)
+if __name__ == "__main__":
+    demo.launch()

assets/aya-vision-win-rates.png ADDED Viewed

Git LFS Details

SHA256: b028457a3e28e5025934ca83a60f31c854fc9ae3511d7159b1f5078b4cd3068a
Pointer size: 131 Bytes
Size of remote file: 221 kB

assets/aya-vision.png ADDED Viewed

Git LFS Details

SHA256: 7b33cc0f5d1450c9b699c538d8edf3493c339f6324a26f6ce3895fa8ec6c00d8
Pointer size: 131 Bytes
Size of remote file: 296 kB

assets/command-a-longbech-v2.png ADDED Viewed

Git LFS Details

SHA256: 8edb84d67ea0f62c29be5b9adff079550486a6dad330d717314b5dfede457f11
Pointer size: 130 Bytes
Size of remote file: 76.1 kB

assets/invoice-1.jpg ADDED Viewed

Git LFS Details

SHA256: 6c173ec50fde1d0aefd552a348268cb49fdc3d87b414d6a5e5dd5e5d58d304e5
Pointer size: 131 Bytes
Size of remote file: 418 kB

pyproject.toml ADDED Viewed

	@@ -0,0 +1,60 @@

+[project]
+name = "command-a-vision-07-2025"
+version = "0.1.0"
+description = ""
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "accelerate>=1.8.1",
+    "av>=14.4.0",
+    "cohere>=5.16.1",
+    "gradio>=5.34.2",
+    "hf-transfer>=0.1.9",
+    "librosa>=0.11.0",
+    "spaces>=0.37.1",
+    "timm>=1.0.16",
+    "torch==2.5.1",
+    "torchvision>=0.20.1",
+    "transformers>=4.53.0",
+]
+[tool.ruff]
+line-length = 119
+[tool.ruff.lint]
+select = ["ALL"]
+ignore = [
+    "COM812", # missing-trailing-comma
+    "D203",   # one-blank-line-before-class
+    "D213",   # multi-line-summary-second-line
+    "E501",   # line-too-long
+    "SIM117", # multiple-with-statements
+    #
+    "D100",    # undocumented-public-module
+    "D101",    # undocumented-public-class
+    "D102",    # undocumented-public-method
+    "D103",    # undocumented-public-function
+    "D104",    # undocumented-public-package
+    "D105",    # undocumented-magic-method
+    "D107",    # undocumented-public-init
+    "EM101",   # raw-string-in-exception
+    "FBT001",  # boolean-type-hint-positional-argument
+    "FBT002",  # boolean-default-value-positional-argument
+    "PD901",   # pandas-df-variable-name
+    "PGH003",  # blanket-type-ignore
+    "PLR0913", # too-many-arguments
+    "PLR0915", # too-many-statements
+    "TRY003",  # raise-vanilla-args
+]
+unfixable = [
+    "F401", # unused-import
+]
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+[tool.ruff.lint.per-file-ignores]
+"*.ipynb" = ["T201", "T203"]
+[tool.ruff.format]
+docstring-code-format = true

requirements.txt ADDED Viewed

	@@ -0,0 +1,314 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt
+accelerate==1.8.1
+    # via command-a-vision-07-2025 (pyproject.toml)
+aiofiles==24.1.0
+    # via gradio
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
+    # via
+    #   gradio
+    #   httpx
+    #   starlette
+audioread==3.0.1
+    # via librosa
+av==14.4.0
+    # via command-a-vision-07-2025 (pyproject.toml)
+certifi==2025.6.15
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.17.1
+    # via soundfile
+charset-normalizer==3.4.2
+    # via requests
+click==8.2.1
+    # via
+    #   typer
+    #   uvicorn
+decorator==5.2.1
+    # via librosa
+exceptiongroup==1.3.0
+    # via anyio
+fastapi==0.115.13
+    # via gradio
+ffmpy==0.6.0
+    # via gradio
+filelock==3.18.0
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+    #   triton
+fsspec==2025.5.1
+    # via
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+gradio==5.34.2
+    # via
+    #   command-a-vision-07-2025 (pyproject.toml)
+    #   spaces
+gradio-client==1.10.3
+    # via gradio
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-transfer==0.1.9
+    # via command-a-vision-07-2025 (pyproject.toml)
+hf-xet==1.1.5
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   gradio
+    #   gradio-client
+    #   safehttpx
+    #   spaces
+huggingface-hub==0.33.0
+    # via
+    #   accelerate
+    #   gradio
+    #   gradio-client
+    #   timm
+    #   tokenizers
+    #   transformers
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+jinja2==3.1.6
+    # via
+    #   gradio
+    #   torch
+joblib==1.5.1
+    # via
+    #   librosa
+    #   scikit-learn
+lazy-loader==0.4
+    # via librosa
+librosa==0.11.0
+    # via command-a-vision-07-2025 (pyproject.toml)
+llvmlite==0.44.0
+    # via numba
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via
+    #   gradio
+    #   jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.1
+    # via librosa
+networkx==3.4.2
+    # via torch
+numba==0.61.2
+    # via librosa
+numpy==2.2.6
+    # via
+    #   accelerate
+    #   gradio
+    #   librosa
+    #   numba
+    #   pandas
+    #   scikit-learn
+    #   scipy
+    #   soundfile
+    #   soxr
+    #   torchvision
+    #   transformers
+nvidia-cublas-cu12==12.4.5.8
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.4.127
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.4.127
+    # via torch
+nvidia-cuda-runtime-cu12==12.4.127
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.2.1.3
+    # via torch
+nvidia-curand-cu12==10.3.5.147
+    # via torch
+nvidia-cusolver-cu12==11.6.1.9
+    # via torch
+nvidia-cusparse-cu12==12.3.1.170
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-nccl-cu12==2.21.5
+    # via torch
+nvidia-nvjitlink-cu12==12.4.127
+    # via
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.4.127
+    # via torch
+orjson==3.10.18
+    # via gradio
+packaging==25.0
+    # via
+    #   accelerate
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   lazy-loader
+    #   pooch
+    #   spaces
+    #   transformers
+pandas==2.3.0
+    # via gradio
+pillow==11.2.1
+    # via
+    #   gradio
+    #   torchvision
+platformdirs==4.3.8
+    # via pooch
+pooch==1.8.2
+    # via librosa
+psutil==5.9.8
+    # via
+    #   accelerate
+    #   spaces
+pycparser==2.22
+    # via cffi
+pydantic==2.11.7
+    # via
+    #   fastapi
+    #   gradio
+    #   spaces
+pydantic-core==2.33.2
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pygments==2.19.2
+    # via rich
+python-dateutil==2.9.0.post0
+    # via pandas
+python-multipart==0.0.20
+    # via gradio
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   accelerate
+    #   gradio
+    #   huggingface-hub
+    #   timm
+    #   transformers
+regex==2024.11.6
+    # via transformers
+requests==2.32.4
+    # via
+    #   huggingface-hub
+    #   pooch
+    #   spaces
+    #   transformers
+rich==14.0.0
+    # via typer
+ruff==0.12.0
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+safetensors==0.5.3
+    # via
+    #   accelerate
+    #   timm
+    #   transformers
+scikit-learn==1.7.0
+    # via librosa
+scipy==1.15.3
+    # via
+    #   librosa
+    #   scikit-learn
+semantic-version==2.10.0
+    # via gradio
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sniffio==1.3.1
+    # via anyio
+soundfile==0.13.1
+    # via librosa
+soxr==0.5.0.post1
+    # via librosa
+spaces==0.37.1
+    # via command-a-vision-07-2025 (pyproject.toml)
+starlette==0.46.2
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.13.1
+    # via torch
+threadpoolctl==3.6.0
+    # via scikit-learn
+timm==1.0.16
+    # via command-a-vision-07-2025 (pyproject.toml)
+tokenizers==0.21.2
+    # via transformers
+tomlkit==0.13.3
+    # via gradio
+torch==2.5.1
+    # via
+    #   command-a-vision-07-2025 (pyproject.toml)
+    #   accelerate
+    #   timm
+    #   torchvision
+torchvision==0.20.1
+    # via
+    #   command-a-vision-07-2025 (pyproject.toml)
+    #   timm
+tqdm==4.67.1
+    # via
+    #   huggingface-hub
+    #   transformers
+transformers==4.53.0
+    # via command-a-vision-07-2025 (pyproject.toml)
+triton==3.1.0
+    # via torch
+typer==0.16.0
+    # via gradio
+typing-extensions==4.14.0
+    # via
+    #   anyio
+    #   exceptiongroup
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   librosa
+    #   pydantic
+    #   pydantic-core
+    #   rich
+    #   spaces
+    #   torch
+    #   typer
+    #   typing-inspection
+    #   uvicorn
+typing-inspection==0.4.1
+    # via pydantic
+tzdata==2025.2
+    # via pandas
+urllib3==2.5.0
+    # via requests
+uvicorn==0.34.3
+    # via gradio
+websockets==15.0.1
+    # via gradio-client
+cohere==5.16.1

style.css ADDED Viewed

	@@ -0,0 +1,4 @@

+h1 {
+  text-align: center;
+  display: block;
+}

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff