Spaces:

p1atdev
/

SigLIP_Tagger

Running

App Files Files Community

Upload 2 files

by John6666 - opened Sep 11

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+177

-177

Files changed (2) hide show

README.md +13 -13
app.py +164 -164

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
----
-title: SigLIP Tagger
-emoji: 🧷
-colorFrom: green
-colorTo: blue
-sdk: gradio
-sdk_version: 4.16.0
-app_file: app.py
-pinned: true
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: SigLIP Tagger
+emoji: 🧷
+colorFrom: green
+colorTo: blue
+sdk: gradio
+sdk_version: 4.43.0
+app_file: app.py
+pinned: true
+license: apache-2.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,164 +1,164 @@
-import os
-from PIL import Image
-import numpy as np
-import torch
-from transformers import (
- AutoImageProcessor,
-)
-import gradio as gr
-from modeling_siglip import SiglipForImageClassification
-HF_TOKEN = os.environ["HF_READ_TOKEN"]
-EXAMPLES = [["./images/sample.jpg"], ["./images/sample2.webp"]]
-model_maps: dict[str, dict] = {
- "test2": {
- "repo": "p1atdev/siglip-tagger-test-2",
- },
- "test3": {
- "repo": "p1atdev/siglip-tagger-test-3",
- },
- # "test4": {
- # "repo": "p1atdev/siglip-tagger-test-4",
- # },
-}
-for key in model_maps.keys():
- model_maps[key]["model"] = SiglipForImageClassification.from_pretrained(
- model_maps[key]["repo"], torch_dtype=torch.bfloat16, token=HF_TOKEN
- )
- model_maps[key]["processor"] = AutoImageProcessor.from_pretrained(
- model_maps[key]["repo"], token=HF_TOKEN
- )
-README_MD = (
- f"""\
-## SigLIP Tagger Test 3
-An experimental model for tagging danbooru tags of images using SigLIP.
-Model(s):
-"""
- + "\n".join(
- f"- [{value['repo']}](https://huggingface.co/{value['repo']})"
- for value in model_maps.values()
- )
- + "\n"
- + """
-Example images by NovelAI and niji・journey.
-"""
-)
-def compose_text(results: dict[str, float], threshold: float = 0.3):
- return ", ".join(
- [
- key
- for key, value in sorted(results.items(), key=lambda x: x[1], reverse=True)
- if value > threshold
- ]
- )
-@torch.no_grad()
-def predict_tags(image: Image.Image, model_name: str, threshold: float):
- if image is None:
- return None, None
- inputs = model_maps[model_name]["processor"](image, return_tensors="pt")
- logits = (
- model_maps[model_name]["model"](
- **inputs.to(
- model_maps[model_name]["model"].device,
- model_maps[model_name]["model"].dtype,
- )
- )
- .logits.detach()
- .cpu()
- .float()
- )
- logits = np.clip(logits, 0.0, 1.0)
- results = {}
- for prediction in logits:
- for i, prob in enumerate(prediction):
- if prob.item() > 0:
- results[model_maps[model_name]["model"].config.id2label[i]] = (
- prob.item()
- )
- return compose_text(results, threshold), results
-css = """\
-.sticky {
- position: sticky;
- top: 16px;
-}
-.gradio-container {
- overflow: clip;
-}
-"""
-def demo():
- with gr.Blocks(css=css) as ui:
- gr.Markdown(README_MD)
- with gr.Row():
- with gr.Column():
- with gr.Row(elem_classes="sticky"):
- with gr.Column():
- input_img = gr.Image(
- label="Input image", type="pil", height=480
- )
- with gr.Group():
- model_name_radio = gr.Radio(
- label="Model",
- choices=list(model_maps.keys()),
- value="test3",
- )
- tag_threshold_slider = gr.Slider(
- label="Tags threshold",
- minimum=0.0,
- maximum=1.0,
- value=0.3,
- step=0.01,
- )
- start_btn = gr.Button(value="Start", variant="primary")
- gr.Examples(
- examples=EXAMPLES,
- inputs=[input_img],
- cache_examples=False,
- )
- with gr.Column():
- output_tags = gr.Text(label="Output text", interactive=False)
- output_label = gr.Label(label="Output tags")
- start_btn.click(
- fn=predict_tags,
- inputs=[input_img, model_name_radio, tag_threshold_slider],
- outputs=[output_tags, output_label],
- )
- ui.launch(
- debug=True,
- # share=True
- )
-if __name__ == "__main__":
- demo()

+import os
+from PIL import Image
+import numpy as np
+import torch
+from transformers import (
+ AutoImageProcessor,
+)
+import gradio as gr
+from modeling_siglip import SiglipForImageClassification
+HF_TOKEN = os.environ.get("HF_READ_TOKEN")
+EXAMPLES = [["./images/sample.jpg"], ["./images/sample2.webp"]]
+model_maps: dict[str, dict] = {
+ "test2": {
+ "repo": "p1atdev/siglip-tagger-test-2",
+ },
+ "test3": {
+ "repo": "p1atdev/siglip-tagger-test-3",
+ },
+ # "test4": {
+ # "repo": "p1atdev/siglip-tagger-test-4",
+ # },
+}
+for key in model_maps.keys():
+ model_maps[key]["model"] = SiglipForImageClassification.from_pretrained(
+ model_maps[key]["repo"], torch_dtype=torch.bfloat16, token=HF_TOKEN
+ )
+ model_maps[key]["processor"] = AutoImageProcessor.from_pretrained(
+ model_maps[key]["repo"], token=HF_TOKEN
+ )
+README_MD = (
+ f"""\
+## SigLIP Tagger Test 3
+An experimental model for tagging danbooru tags of images using SigLIP.
+Model(s):
+"""
+ + "\n".join(
+ f"- [{value['repo']}](https://huggingface.co/{value['repo']})"
+ for value in model_maps.values()
+ )
+ + "\n"
+ + """
+Example images by NovelAI and niji・journey.
+"""
+)
+def compose_text(results: dict[str, float], threshold: float = 0.3):
+ return ", ".join(
+ [
+ key
+ for key, value in sorted(results.items(), key=lambda x: x[1], reverse=True)
+ if value > threshold
+ ]
+ )
+@torch.no_grad()
+def predict_tags(image: Image.Image, model_name: str, threshold: float):
+ if image is None:
+ return None, None
+ inputs = model_maps[model_name]["processor"](image, return_tensors="pt")
+ logits = (
+ model_maps[model_name]["model"](
+ **inputs.to(
+ model_maps[model_name]["model"].device,
+ model_maps[model_name]["model"].dtype,
+ )
+ )
+ .logits.detach()
+ .cpu()
+ .float()
+ )
+ logits = np.clip(logits, 0.0, 1.0)
+ results = {}
+ for prediction in logits:
+ for i, prob in enumerate(prediction):
+ if prob.item() > 0:
+ results[model_maps[model_name]["model"].config.id2label[i]] = (
+ prob.item()
+ )
+ return compose_text(results, threshold), results
+css = """\
+.sticky {
+ position: sticky;
+ top: 16px;
+}
+.gradio-container {
+ overflow: clip;
+}
+"""
+def demo():
+ with gr.Blocks(css=css) as ui:
+ gr.Markdown(README_MD)
+ with gr.Row():
+ with gr.Column():
+ with gr.Row(elem_classes="sticky"):
+ with gr.Column():
+ input_img = gr.Image(
+ label="Input image", type="pil", height=480
+ )
+ with gr.Group():
+ model_name_radio = gr.Radio(
+ label="Model",
+ choices=list(model_maps.keys()),
+ value="test3",
+ )
+ tag_threshold_slider = gr.Slider(
+ label="Tags threshold",
+ minimum=0.0,
+ maximum=1.0,
+ value=0.3,
+ step=0.01,
+ )
+ start_btn = gr.Button(value="Start", variant="primary")
+ gr.Examples(
+ examples=EXAMPLES,
+ inputs=[input_img],
+ cache_examples=False,
+ )
+ with gr.Column():
+ output_tags = gr.Text(label="Output text", interactive=False)
+ output_label = gr.Label(label="Output tags")
+ start_btn.click(
+ fn=predict_tags,
+ inputs=[input_img, model_name_radio, tag_threshold_slider],
+ outputs=[output_tags, output_label],
+ )
+ ui.launch(
+ debug=True,
+ # share=True
+ )
+if __name__ == "__main__":
+ demo()