|
import gradio as gr |
|
import torch |
|
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler |
|
from diffusers.utils import export_to_video |
|
|
|
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16) |
|
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) |
|
pipe.enable_model_cpu_offload() |
|
|
|
def infer(prompt): |
|
|
|
video_frames = pipe(prompt, num_inference_steps=40, height=320, width=576, num_frames=24).frames |
|
video_path = export_to_video(video_frames) |
|
print(video_path) |
|
return video_path |
|
|
|
css = """ |
|
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;} |
|
a {text-decoration-line: underline; font-weight: 600;} |
|
""" |
|
|
|
with gr.Blocks(css=css) as demo: |
|
with gr.Column(elem_id="col-container"): |
|
gr.Markdown( |
|
""" |
|
<h1 style="text-align: center;">Zeroscope Text-to-Video</h1> |
|
|
|
A watermark-free Modelscope-based video model optimized for producing high-quality 16:9 compositions and a smooth video output. <br /> |
|
This zeroscope_v2_576w model was trained using 9,923 clips and 29,769 tagged frames at 24 frames, 576x320 resolution.<br /> |
|
|
|
[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg)](https://huggingface.co/spaces/fffiloni/zeroscope?duplicate=true) |
|
|
|
""" |
|
) |
|
|
|
prompt_in = gr.Textbox(label="Prompt", placeholder="Darth Vader is surfing on waves") |
|
|
|
submit_btn = gr.Button("Submit") |
|
video_result = gr.Video(label="Video Output") |
|
|
|
submit_btn.click(fn=infer, |
|
inputs=[prompt_in], |
|
outputs=[video_result]) |
|
|
|
demo.queue(max_size=12).launch() |
|
|