|
import streamlit as st
|
|
|
|
|
|
st.markdown("""
|
|
<style>
|
|
.main-title {
|
|
font-size: 36px;
|
|
color: #4A90E2;
|
|
font-weight: bold;
|
|
text-align: center;
|
|
}
|
|
.sub-title {
|
|
font-size: 24px;
|
|
color: #4A90E2;
|
|
margin-top: 20px;
|
|
}
|
|
.section {
|
|
background-color: #f9f9f9;
|
|
padding: 15px;
|
|
border-radius: 10px;
|
|
margin-top: 20px;
|
|
}
|
|
.section h2 {
|
|
font-size: 22px;
|
|
color: #4A90E2;
|
|
}
|
|
.section p, .section ul {
|
|
color: #666666;
|
|
}
|
|
.link {
|
|
color: #4A90E2;
|
|
text-decoration: none;
|
|
}
|
|
.benchmark-table {
|
|
width: 100%;
|
|
border-collapse: collapse;
|
|
margin-top: 20px;
|
|
}
|
|
.benchmark-table th, .benchmark-table td {
|
|
border: 1px solid #ddd;
|
|
padding: 8px;
|
|
text-align: left;
|
|
}
|
|
.benchmark-table th {
|
|
background-color: #4A90E2;
|
|
color: white;
|
|
}
|
|
.benchmark-table td {
|
|
background-color: #f2f2f2;
|
|
}
|
|
</style>
|
|
""", unsafe_allow_html=True)
|
|
|
|
|
|
st.markdown('<div class="main-title">Image Zero Shot Classification with CLIP</div>', unsafe_allow_html=True)
|
|
|
|
|
|
st.markdown("""
|
|
<div class="section">
|
|
<p><strong>CLIP (Contrastive Language-Image Pre-Training)</strong> is a neural network trained on image and text pairs. It has the capability to classify images without requiring hard-coded labels, making it highly flexible. Labels can be provided during inference, similar to the zero-shot capabilities of GPT-2 and GPT-3 models.</p>
|
|
<p>This model was imported from Hugging Face Transformers: <a class="link" href="https://huggingface.co/openai/clip-vit-base-patch32" target="_blank">CLIP Model on Hugging Face</a></p>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|
|
|
|
st.markdown('<div class="sub-title">How to Use the Model</div>', unsafe_allow_html=True)
|
|
st.code('''
|
|
import sparknlp
|
|
from sparknlp.base import *
|
|
from sparknlp.annotator import *
|
|
from pyspark.ml import Pipeline
|
|
|
|
# Load image data
|
|
imageDF = spark.read \\
|
|
.format("image") \\
|
|
.option("dropInvalid", value = True) \\
|
|
.load("src/test/resources/image/")
|
|
|
|
# Define Image Assembler
|
|
imageAssembler: ImageAssembler = ImageAssembler() \\
|
|
.setInputCol("image") \\
|
|
.setOutputCol("image_assembler")
|
|
|
|
# Define candidate labels
|
|
candidateLabels = [
|
|
"a photo of a bird",
|
|
"a photo of a cat",
|
|
"a photo of a dog",
|
|
"a photo of a hen",
|
|
"a photo of a hippo",
|
|
"a photo of a room",
|
|
"a photo of a tractor",
|
|
"a photo of an ostrich",
|
|
"a photo of an ox"]
|
|
|
|
# Define CLIP classifier
|
|
imageClassifier = CLIPForZeroShotClassification \\
|
|
.pretrained() \\
|
|
.setInputCols(["image_assembler"]) \\
|
|
.setOutputCol("label") \\
|
|
.setCandidateLabels(candidateLabels)
|
|
|
|
# Create pipeline
|
|
pipeline = Pipeline().setStages([imageAssembler, imageClassifier])
|
|
|
|
# Apply pipeline to image data
|
|
pipelineDF = pipeline.fit(imageDF).transform(imageDF)
|
|
|
|
# Show results
|
|
pipelineDF \\
|
|
.selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "label.result") \\
|
|
.show(truncate=False)
|
|
''', language='python')
|
|
|
|
|
|
st.markdown('<div class="sub-title">Results</div>', unsafe_allow_html=True)
|
|
st.markdown("""
|
|
<div class="section">
|
|
<table class="benchmark-table">
|
|
<tr>
|
|
<th>Image Name</th>
|
|
<th>Result</th>
|
|
</tr>
|
|
<tr>
|
|
<td>palace.JPEG</td>
|
|
<td>[a photo of a room]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>egyptian_cat.jpeg</td>
|
|
<td>[a photo of a cat]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>hippopotamus.JPEG</td>
|
|
<td>[a photo of a hippo]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>hen.JPEG</td>
|
|
<td>[a photo of a hen]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>ostrich.JPEG</td>
|
|
<td>[a photo of an ostrich]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>junco.JPEG</td>
|
|
<td>[a photo of a bird]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>bluetick.jpg</td>
|
|
<td>[a photo of a dog]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>chihuahua.jpg</td>
|
|
<td>[a photo of a dog]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>tractor.JPEG</td>
|
|
<td>[a photo of a tractor]</td>
|
|
</tr>
|
|
<tr>
|
|
<td>ox.JPEG</td>
|
|
<td>[a photo of an ox]</td>
|
|
</tr>
|
|
</table>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|
|
|
|
st.markdown('<div class="sub-title">Model Information</div>', unsafe_allow_html=True)
|
|
st.markdown("""
|
|
<div class="section">
|
|
<table class="benchmark-table">
|
|
<tr>
|
|
<th>Attribute</th>
|
|
<th>Description</th>
|
|
</tr>
|
|
<tr>
|
|
<td><strong>Model Name</strong></td>
|
|
<td>zero_shot_classifier_clip_vit_base_patch32</td>
|
|
</tr>
|
|
<tr>
|
|
<td><strong>Compatibility</strong></td>
|
|
<td>Spark NLP 5.2.0+</td>
|
|
</tr>
|
|
<tr>
|
|
<td><strong>License</strong></td>
|
|
<td>Open Source</td>
|
|
</tr>
|
|
<tr>
|
|
<td><strong>Edition</strong></td>
|
|
<td>Official</td>
|
|
</tr>
|
|
<tr>
|
|
<td><strong>Input Labels</strong></td>
|
|
<td>[image_assembler]</td>
|
|
</tr>
|
|
<tr>
|
|
<td><strong>Output Labels</strong></td>
|
|
<td>[classification]</td>
|
|
</tr>
|
|
<tr>
|
|
<td><strong>Language</strong></td>
|
|
<td>en</td>
|
|
</tr>
|
|
<tr>
|
|
<td><strong>Size</strong></td>
|
|
<td>392.8 MB</td>
|
|
</tr>
|
|
</table>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|
|
|
|
st.markdown('<div class="sub-title">Data Source</div>', unsafe_allow_html=True)
|
|
st.markdown("""
|
|
<div class="section">
|
|
<p>The CLIP model is available on <a class="link" href="https://huggingface.co/openai/clip-vit-base-patch32" target="_blank">Hugging Face</a>. This model was trained on image-text pairs and can be used for zero-shot image classification.</p>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|
|
|
|
st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
|
|
st.markdown("""
|
|
<div class="section">
|
|
<ul>
|
|
<li><a class="link" href="https://sparknlp.org/2023/12/02/zero_shot_classifier_clip_vit_base_patch32_en.html" target="_blank" rel="noopener">CLIP Model on Spark NLP</a></li>
|
|
<li><a class="link" href="https://huggingface.co/openai/clip-vit-base-patch32" target="_blank" rel="noopener">CLIP Model on Hugging Face</a></li>
|
|
<li><a class="link" href="https://github.com/openai/CLIP" target="_blank" rel="noopener">CLIP GitHub Repository</a></li>
|
|
<li><a class="link" href="https://arxiv.org/abs/2103.00020" target="_blank" rel="noopener">CLIP Paper</a></li>
|
|
</ul>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|
|
|
|
st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
|
|
st.markdown("""
|
|
<div class="section">
|
|
<ul>
|
|
<li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
|
|
<li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
|
|
<li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
|
|
<li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
|
|
<li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
|
|
</ul>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|