|
from text_to_animation.model import ControlAnimationModel |
|
import gradio as gr |
|
import os |
|
|
|
huggingspace_name = os.environ.get("SPACE_AUTHOR_NAME") |
|
on_huggingspace = huggingspace_name if huggingspace_name is not None else False |
|
|
|
examples = [ |
|
["Motion 1", "An astronaut dancing in the outer space"], |
|
["Motion 2", "An astronaut dancing in the outer space"], |
|
["Motion 3", "An astronaut dancing in the outer space"], |
|
["Motion 4", "An astronaut dancing in the outer space"], |
|
["Motion 5", "An astronaut dancing in the outer space"], |
|
] |
|
|
|
|
|
def create_demo(model: ControlAnimationModel): |
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
gr.Markdown("## Text and Pose Conditional Video Generation") |
|
|
|
with gr.Row(): |
|
gr.Markdown( |
|
"Selection: **one motion** and a **prompt**, or use the examples below." |
|
) |
|
with gr.Column(): |
|
gallery_pose_sequence = gr.Gallery( |
|
label="Pose Sequence", |
|
value=[ |
|
("__assets__/dance1.gif", "Motion 1"), |
|
("__assets__/dance2.gif", "Motion 2"), |
|
("__assets__/dance3.gif", "Motion 3"), |
|
("__assets__/dance4.gif", "Motion 4"), |
|
("__assets__/dance5.gif", "Motion 5"), |
|
], |
|
).style(grid=[2], height="auto") |
|
input_video_path = gr.Textbox( |
|
label="Pose Sequence", visible=False, value="Motion 1" |
|
) |
|
gr.Markdown("## Selection") |
|
pose_sequence_selector = gr.Markdown("Pose Sequence: **Motion 1**") |
|
with gr.Column(): |
|
prompt = gr.Textbox(label="Prompt") |
|
run_button = gr.Button(label="Run") |
|
with gr.Accordion("Advanced options", open=False): |
|
watermark = gr.Radio( |
|
["Picsart AI Research", "Text2Video-Zero", "None"], |
|
label="Watermark", |
|
value="Picsart AI Research", |
|
) |
|
chunk_size = gr.Slider( |
|
label="Chunk size", |
|
minimum=2, |
|
maximum=16, |
|
value=8, |
|
step=1, |
|
visible=not on_huggingspace, |
|
info="Number of frames processed at once. Reduce for lower memory usage.", |
|
) |
|
merging_ratio = gr.Slider( |
|
label="Merging ratio", |
|
minimum=0.0, |
|
maximum=0.9, |
|
step=0.1, |
|
value=0.0, |
|
visible=not on_huggingspace, |
|
info="Ratio of how many tokens are merged. The higher the more compression (less memory and faster inference).", |
|
) |
|
with gr.Column(): |
|
result = gr.Image(label="Generated Video") |
|
|
|
input_video_path.change(on_video_path_update, None, pose_sequence_selector) |
|
gallery_pose_sequence.select(pose_gallery_callback, None, input_video_path) |
|
inputs = [ |
|
input_video_path, |
|
prompt, |
|
chunk_size, |
|
|
|
|
|
] |
|
|
|
gr.Examples( |
|
examples=examples, |
|
inputs=inputs, |
|
outputs=result, |
|
fn=model.process_controlnet_pose, |
|
cache_examples=on_huggingspace, |
|
run_on_click=False, |
|
) |
|
|
|
run_button.click( |
|
fn=model.process_controlnet_pose, |
|
inputs=inputs, |
|
outputs=result, |
|
) |
|
|
|
return demo |
|
|
|
|
|
def on_video_path_update(evt: gr.EventData): |
|
return f"Selection: **{evt._data}**" |
|
|
|
|
|
def pose_gallery_callback(evt: gr.SelectData): |
|
return f"Motion {evt.index+1}" |
|
|