Spaces:

rynmurdock
/

generative_recsys

Sleeping

App Files Files Community

rynmurdock commited on Jun 3

Commit

e573858

•

1 Parent(s): f94c06d

lfs and sync with blue-tigers github

Browse files

Files changed (5) hide show

.gitattributes +20 -0
app.py +78 -108
lightning_app.py +0 -452
requirements.txt +1 -3
twitter_prompts.csv +0 -72

.gitattributes CHANGED Viewed

	@@ -1 +1,21 @@
1	nsfweffnetv2-b02-3epochs.h5 filter=lfs diff=lfs merge=lfs -text

 nsfweffnetv2-b02-3epochs.h5 filter=lfs diff=lfs merge=lfs -text
+fifth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+ninth.im_.pt filter=lfs diff=lfs merge=lfs -text
+tenth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+third.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+eigth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+first.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+fourth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+ninth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+sixth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+tenth.im_.pt filter=lfs diff=lfs merge=lfs -text
+eigth.im_.pt filter=lfs diff=lfs merge=lfs -text
+seventh.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+sixth.im_.pt filter=lfs diff=lfs merge=lfs -text
+third.im_.pt filter=lfs diff=lfs merge=lfs -text
+fifth.im_.pt filter=lfs diff=lfs merge=lfs -text
+first.im_.pt filter=lfs diff=lfs merge=lfs -text
+fourth.im_.pt filter=lfs diff=lfs merge=lfs -text
+second.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+second.im_.pt filter=lfs diff=lfs merge=lfs -text
+seventh.im_.pt filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -10,12 +10,9 @@ STEPS = 6
 output_hidden_state = False
 device = "cuda"
 dtype = torch.bfloat16
-import matplotlib.pyplot as plt
-import matplotlib
 import logging
 import os
 import imageio
 import gradio as gr
@@ -24,8 +21,6 @@ from sklearn.svm import SVC
 from sklearn import preprocessing
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
-import sched
-import threading
 import random
 import time
@@ -104,7 +99,7 @@ pipe = AnimateDiffPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", mot
  unet=unet, text_encoder=text_encoder)
 pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
 pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
-pipe.set_adapters(["lcm-lora"], [.9])
 pipe.fuse_lora()
@@ -121,6 +116,7 @@ pipe.unet.fuse_qkv_projections()
 #pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
 pipe.to(device=DEVICE)
 #pipe.unet = torch.compile(pipe.unet)
 #pipe.vae = torch.compile(pipe.vae)
@@ -130,9 +126,10 @@ pipe.to(device=DEVICE)
 from transformers import AutoProcessor, PaliGemmaForConditionalGeneration, BitsAndBytesConfig
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
-pali = PaliGemmaForConditionalGeneration.from_pretrained('google/paligemma-3b-pt-224', torch_dtype=dtype, device_map='cuda').eval()
 processor = AutoProcessor.from_pretrained('google/paligemma-3b-pt-224')
 @spaces.GPU()
 def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None):
@@ -148,19 +145,34 @@ def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None
  return inputs_embeds
 @spaces.GPU()
-def generate_pali(user_emb):
- with torch.no_grad():
- prompt = 'caption en'
- model_inputs = processor(text=prompt, images=torch.zeros(1, 3, 224, 224), return_tensors="pt")
- # we need to get im_embs taken in here.
- input_len = model_inputs["input_ids"].shape[-1]
- input_embeds = to_wanted_embs(user_emb.squeeze()[None, None, :].repeat(1, 256, 1),
- model_inputs["input_ids"].to(device),
- model_inputs["attention_mask"].to(device))
- generation = pali.generate(max_new_tokens=100, do_sample=True, top_p=.94, temperature=1.2, inputs_embeds=input_embeds)
- decoded = processor.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
  return decoded
@@ -182,7 +194,7 @@ def generate_gpu(in_im_embs, prompt='the scene'):
  im = torchvision.transforms.ToTensor()(output.frames[0][len(output.frames[0])//2]).unsqueeze(0)
  im = torch.nn.functional.interpolate(im, (224, 224))
  im = (im - .5) * 2
- gemb = pali.vision_tower(im.to(device).to(dtype)).last_hidden_state.detach().to('cpu').to(torch.float32).mean(1)
  return output, im_emb, gemb
@@ -210,10 +222,10 @@ def generate(in_im_embs, prompt='the scene'):
 def get_user_emb(embs, ys):
  # handle case where every instance of calibration videos is 'Neither' or 'Like' or 'Dislike'
- if len(list(ys)) <= 7:
- aways = [.01*torch.randn_like(embs[0]) for i in range(3)]
  embs += aways
- awal = [0 for i in range(3)]
  ys += awal
  indices = list(range(len(embs)))
@@ -241,9 +253,10 @@ def get_user_emb(embs, ys):
  feature_embs = feature_embs / feature_embs.norm()
  #lin_class = Ridge(fit_intercept=False).fit(feature_embs, chosen_y)
- lin_class = SVC(max_iter=20, kernel='linear', C=.1, class_weight='balanced').fit(feature_embs.squeeze(), chosen_y)
  coef_ = torch.tensor(lin_class.coef_, dtype=torch.float32).detach().to('cpu')
- coef_ = coef_ / coef_.abs().max() * 3
  w = 1# if len(embs) % 2 == 0 else 0
  im_emb = w * coef_.to(dtype=dtype)
@@ -273,7 +286,7 @@ def background_next_image():
  # only let it get N (maybe 3) ahead of the user
  #not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
  rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
- while len(rated_rows) < 4:
  # not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
  rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
  time.sleep(.01)
@@ -290,25 +303,21 @@ def background_next_image():
  rated_from_user = rated_rows[[i[1]['from_user_id'] == uid for i in rated_rows.iterrows()]]
  # we pop previous ratings if there are > n
- if len(rated_from_user) >= 15:
  oldest = rated_from_user.iloc[0]['paths']
  prevs_df = prevs_df[prevs_df['paths'] != oldest]
  # we don't compute more after n are in the queue for them
- if len(unrated_from_user) >= 10:
- continue
- if len(rated_rows) < 5:
  continue
  embs, ys, gembs = pluck_embs_ys(uid)
- user_emb = get_user_emb(embs, ys)
- if len(gembs) > 4:
- user_gem = get_user_emb(gembs, ys) / 4 # TODO scale this correctly; matplotlib, etc.
- text = generate_pali(user_gem)
  else:
- text = generate_pali(torch.zeros(1, 1152))
  img, embs, new_gem = generate(user_emb, text)
  if img:
@@ -351,60 +360,16 @@ def next_image(calibrate_prompts, user_id):
  if len(calibrate_prompts) > 0:
  cal_video = calibrate_prompts.pop(0)
  image = prevs_df[prevs_df['paths'] == cal_video]['paths'].to_list()[0]
  return image, calibrate_prompts, ''
  else:
  embs, ys, gembs = pluck_embs_ys(user_id)
- user_emb = get_user_emb(embs, ys)
  image, text = pluck_img(user_id, user_emb)
  return image, calibrate_prompts, text
-done_init = False
 def start(_, calibrate_prompts, user_id, request: gr.Request):
- global done_init
- global prevs_df
- if not done_init:
- # prep our calibration videos
- for im in [
- './first.mp4',
- # './second.mp4',
- # './third.mp4',
- # './fourth.mp4',
- # './fifth.mp4',
- # './sixth.mp4',
- # './seventh.mp4',
- # './eigth.mp4',
- # './ninth.mp4',
- # './tenth.mp4',
- ]:
- tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'text', 'gemb'])
- tmp_df['paths'] = [im]
- image = list(imageio.imiter(im))
- image = image[len(image)//2]
- im = torchvision.transforms.ToTensor()(image).unsqueeze(0)
- im = torch.nn.functional.interpolate(im, (224, 224))
- im = (im - .5) * 2
- im_emb, gemb = encode_space(image, im)
- im_emb = im_emb.to('cpu')
- gemb = gemb.to('cpu')
- tmp_df['embeddings'] = [im_emb]
- tmp_df['gemb'] = [gemb]
- tmp_df['user:rating'] = [{' ': ' '}]
- prevs_df = pd.concat((prevs_df, tmp_df))
- done_init = True
  user_id = int(str(time.time())[-7:].replace('.', ''))
  image, calibrate_prompts, text = next_image(calibrate_prompts, user_id)
  return [
@@ -436,6 +401,7 @@ def choose(img, choice, calibrate_prompts, user_id, request: gr.Request):
  print('NSFW -- choice is disliked')
  choice = 0
  row_mask = [p.split('/')[-1] in img for p in prevs_df['paths'].to_list()]
  # if it's still in the dataframe, add the choice
  if len(prevs_df.loc[row_mask, 'user:rating']) > 0:
@@ -506,11 +472,11 @@ Explore the latent space without text prompts based on your preferences. Learn m
  # calibration videos -- this is a misnomer now :D
  calibrate_prompts = gr.State([
  './first.mp4',
- # './second.mp4',
- # './third.mp4',
- # './fourth.mp4',
- # './fifth.mp4',
- # './sixth.mp4',
  ])
  def l():
  return None
@@ -569,26 +535,30 @@ scheduler = BackgroundScheduler()
 scheduler.add_job(func=background_next_image, trigger="interval", seconds=.5)
 scheduler.start()
-#thread = threading.Thread(target=background_next_image,)
-#thread.start()
-# TODO shouldn't call this before gradio launch, yeah?
-@spaces.GPU(duration=50)
-def encode_space(x, im):
- with torch.no_grad():
- print('encode')
- im_emb, _ = pipe.encode_image(
- x, DEVICE, 1, output_hidden_state
- )
- print('encoded')
- print('pali_enc')
- gemb = pali.vision_tower(im.to(dtype).to('cuda')).last_hidden_state
- print('pali_enced')
- return im_emb.to('cpu'), gemb.to('cpu')
-demo.launch(share=True,)

 output_hidden_state = False
 device = "cuda"
 dtype = torch.bfloat16
+N_IMG_EMBS = 3
 import logging
 import os
 import imageio
 import gradio as gr
 from sklearn import preprocessing
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 import random
 import time
  unet=unet, text_encoder=text_encoder)
 pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
 pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
+pipe.set_adapters(["lcm-lora"], [.95])
 pipe.fuse_lora()
 #pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
 pipe.to(device=DEVICE)
 #pipe.unet = torch.compile(pipe.unet)
 #pipe.vae = torch.compile(pipe.vae)
 from transformers import AutoProcessor, PaliGemmaForConditionalGeneration, BitsAndBytesConfig
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+pali = PaliGemmaForConditionalGeneration.from_pretrained('google/paligemma-3b-pt-224', torch_dtype=dtype, quantization_config=quantization_config).eval()
 processor = AutoProcessor.from_pretrained('google/paligemma-3b-pt-224')
+#pali = torch.compile(pali)
 @spaces.GPU()
 def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None):
  return inputs_embeds
+# TODO cache descriptions?
 @spaces.GPU()
+def generate_pali(n_embs):
+ prompt = 'caption en'
+ model_inputs = processor(text=prompt, images=torch.zeros(1, 3, 224, 224), return_tensors="pt")
+ # we need to get im_embs taken in here.
+ descs = ''
+ for n, emb in enumerate(n_embs):
+ if n < len(n_embs)-1:
+ input_len = model_inputs["input_ids"].shape[-1]
+ input_embeds = to_wanted_embs(emb,
+ model_inputs["input_ids"].to(device),
+ model_inputs["attention_mask"].to(device))
+ generation = pali.generate(max_new_tokens=20, do_sample=True, top_p=.94, temperature=1.2, inputs_embeds=input_embeds)
+ decoded = processor.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
+ descs += f'Description: {decoded}\n'
+ else:
+ prompt = f'en {descs} Describe a new image that is similar.'
+ print(prompt)
+ model_inputs = processor(text=prompt, images=torch.zeros(1, 3, 224, 224), return_tensors="pt")
+ input_len = model_inputs["input_ids"].shape[-1]
+ input_embeds = to_wanted_embs(emb,
+ model_inputs["input_ids"].to(device),
+ model_inputs["attention_mask"].to(device))
+ generation = pali.generate(max_new_tokens=20, do_sample=True, top_p=.94, temperature=1.2, inputs_embeds=input_embeds)
+ decoded = processor.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
  return decoded
  im = torchvision.transforms.ToTensor()(output.frames[0][len(output.frames[0])//2]).unsqueeze(0)
  im = torch.nn.functional.interpolate(im, (224, 224))
  im = (im - .5) * 2
+ gemb = pali.vision_tower(im.to(device).to(dtype)).last_hidden_state.detach().to('cpu').to(torch.float32)
  return output, im_emb, gemb
 def get_user_emb(embs, ys):
  # handle case where every instance of calibration videos is 'Neither' or 'Like' or 'Dislike'
+ if len(list(ys)) <= 10:
+ aways = [torch.zeros_like(embs[0]) for i in range(10)]
  embs += aways
+ awal = [0 for i in range(5)] + [1 for i in range(5)]
  ys += awal
  indices = list(range(len(embs)))
  feature_embs = feature_embs / feature_embs.norm()
  #lin_class = Ridge(fit_intercept=False).fit(feature_embs, chosen_y)
+ #class_weight='balanced'
+ lin_class = SVC(max_iter=500, kernel='linear', C=.1, ).fit(feature_embs.squeeze(), chosen_y)
  coef_ = torch.tensor(lin_class.coef_, dtype=torch.float32).detach().to('cpu')
+ coef_ = coef_ / coef_.abs().max()
  w = 1# if len(embs) % 2 == 0 else 0
  im_emb = w * coef_.to(dtype=dtype)
  # only let it get N (maybe 3) ahead of the user
  #not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
  rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
+ while len(rated_rows) < 5:
  # not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
  rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
  time.sleep(.01)
  rated_from_user = rated_rows[[i[1]['from_user_id'] == uid for i in rated_rows.iterrows()]]
  # we pop previous ratings if there are > n
+ if len(rated_from_user) >= 25:
  oldest = rated_from_user.iloc[0]['paths']
  prevs_df = prevs_df[prevs_df['paths'] != oldest]
  # we don't compute more after n are in the queue for them
+ if len(unrated_from_user) >= 20:
  continue
  embs, ys, gembs = pluck_embs_ys(uid)
+ user_emb = get_user_emb(embs, ys) * 3
+ pos_gembs = [g for g, y in zip(gembs, ys) if y == 1]
+ if len(pos_gembs) > 4:
+ hist_gem = random.sample(pos_gembs, N_IMG_EMBS) # rng n embeddings
+ text = generate_pali(hist_gem)
  else:
+ text = 'the scene'
  img, embs, new_gem = generate(user_emb, text)
  if img:
  if len(calibrate_prompts) > 0:
  cal_video = calibrate_prompts.pop(0)
  image = prevs_df[prevs_df['paths'] == cal_video]['paths'].to_list()[0]
  return image, calibrate_prompts, ''
  else:
  embs, ys, gembs = pluck_embs_ys(user_id)
+ user_emb = get_user_emb(embs, ys) * 3
  image, text = pluck_img(user_id, user_emb)
  return image, calibrate_prompts, text
 def start(_, calibrate_prompts, user_id, request: gr.Request):
  user_id = int(str(time.time())[-7:].replace('.', ''))
  image, calibrate_prompts, text = next_image(calibrate_prompts, user_id)
  return [
  print('NSFW -- choice is disliked')
  choice = 0
+ print(prevs_df['paths'].to_list(), img)
  row_mask = [p.split('/')[-1] in img for p in prevs_df['paths'].to_list()]
  # if it's still in the dataframe, add the choice
  if len(prevs_df.loc[row_mask, 'user:rating']) > 0:
  # calibration videos -- this is a misnomer now :D
  calibrate_prompts = gr.State([
  './first.mp4',
+ './second.mp4',
+ './third.mp4',
+ './fourth.mp4',
+ './fifth.mp4',
+ './sixth.mp4',
  ])
  def l():
  return None
 scheduler.add_job(func=background_next_image, trigger="interval", seconds=.5)
 scheduler.start()
+# prep our calibration videos
+for im in [
+ './first.mp4',
+ './second.mp4',
+ './third.mp4',
+ './fourth.mp4',
+ './fifth.mp4',
+ './sixth.mp4',
+ './seventh.mp4',
+ './eigth.mp4',
+ './ninth.mp4',
+ './tenth.mp4',
+ ]:
+ tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'text', 'gemb'])
+ tmp_df['paths'] = [im]
+ image = list(imageio.imiter(im))
+ image = image[len(image)//2]
+ tmp_df['embeddings'] = [torch.load(im.replace('mp4', 'im_.pt'))]
+ tmp_df['gemb'] = [torch.load(im.replace('mp4', 'gemb_.pt'))]
+ tmp_df['user:rating'] = [{' ': ' '}]
+ prevs_df = pd.concat((prevs_df, tmp_df))
+demo.launch(share=True, server_port=8443)

lightning_app.py DELETED Viewed

@@ -1,452 +0,0 @@
-import torch
-# lol
-sidel = 512
-DEVICE = 'cuda'
-STEPS = 4
-output_hidden_state = False
-device = "cuda"
-dtype = torch.float16
-import matplotlib.pyplot as plt
-import matplotlib
-matplotlib.use('TkAgg')
-from sklearn.linear_model import LinearRegression
-from sfast.compilers.diffusion_pipeline_compiler import (compile, compile_unet,
- CompilationConfig)
-config = CompilationConfig.Default()
-try:
- import triton
- config.enable_triton = True
-except ImportError:
- print('Triton not installed, skip')
-config.enable_cuda_graph = True
-config.enable_jit = True
-config.enable_jit_freeze = True
-config.enable_cnn_optimization = True
-config.preserve_parameters = False
-config.prefer_lowp_gemm = True
-import imageio
-import gradio as gr
-import numpy as np
-from sklearn.svm import SVC
-from sklearn.inspection import permutation_importance
-from sklearn import preprocessing
-import pandas as pd
-import random
-import time
-from PIL import Image
-from safety_checker_improved import maybe_nsfw
-torch.set_grad_enabled(False)
-torch.backends.cuda.matmul.allow_tf32 = True
-torch.backends.cudnn.allow_tf32 = True
-# TODO put back?
-# import spaces
-prompt_list = [p for p in list(set(
- pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
-start_time = time.time()
-####################### Setup Model
-from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler, LCMScheduler, ConsistencyDecoderVAE, AutoencoderTiny
-from hyper_tile import split_attention, flush
-from huggingface_hub import hf_hub_download
-from safetensors.torch import load_file
-from PIL import Image
-from transformers import CLIPVisionModelWithProjection
-import uuid
-import av
-def write_video(file_name, images, fps=10):
- print('Saving')
- container = av.open(file_name, mode="w")
- stream = container.add_stream("h264", rate=fps)
- stream.width = sidel
- stream.height = sidel
- stream.pix_fmt = "yuv420p"
- for img in images:
- img = np.array(img)
- img = np.round(img).astype(np.uint8)
- frame = av.VideoFrame.from_ndarray(img, format="rgb24")
- for packet in stream.encode(frame):
- container.mux(packet)
- # Flush stream
- for packet in stream.encode():
- container.mux(packet)
- # Close the file
- container.close()
- print('Saved')
-bases = {
- #"basem": "emilianJR/epiCRealism"
- #SG161222/Realistic_Vision_V6.0_B1_noVAE
- #runwayml/stable-diffusion-v1-5
- #frankjoshua/realisticVisionV51_v51VAE
- #Lykon/dreamshaper-7
-}
-image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder", torch_dtype=dtype).to(DEVICE)
-vae = AutoencoderTiny.from_pretrained("madebyollin/taesd", torch_dtype=dtype)
-# vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=dtype)
-# vae = compile_unet(vae, config=config)
-#adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
-#pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, image_encoder=image_encoder, torch_dtype=dtype)
-#pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
-#pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
-#pipe.set_adapters(["lcm-lora"], [1])
-#pipe.fuse_lora()
-pipe = AnimateDiffPipeline.from_pretrained('emilianJR/epiCRealism', torch_dtype=dtype, image_encoder=image_encoder, vae=vae)
-pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
-repo = "ByteDance/AnimateDiff-Lightning"
-ckpt = f"animatediff_lightning_4step_diffusers.safetensors"
-pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device='cpu'), strict=False)
-pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin", map_location='cpu')
-pipe.set_ip_adapter_scale(.8)
-# pipe.unet.fuse_qkv_projections()
-#pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
-pipe = compile(pipe, config=config)
-pipe.to(device=DEVICE)
-# THIS WOULD NEED PATCHING TODO
-with split_attention(pipe.vae, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
- # ! Change the tile_size and disable to see their effects
- with split_attention(pipe.unet, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
- im_embs = torch.zeros(1, 1, 1, 1024, device=DEVICE, dtype=dtype)
- output = pipe(prompt='a person', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[im_embs], num_inference_steps=STEPS)
- leave_im_emb, _ = pipe.encode_image(
- output.frames[0][len(output.frames[0])//2], DEVICE, 1, output_hidden_state
- )
-assert len(output.frames[0]) == 16
-leave_im_emb.to('cpu')
-# TODO put back
-# @spaces.GPU()
-def generate(prompt, in_im_embs=None, base='basem'):
- if in_im_embs == None:
- in_im_embs = torch.zeros(1, 1, 1, 1024, device=DEVICE, dtype=dtype)
- #in_im_embs = in_im_embs / torch.norm(in_im_embs)
- else:
- in_im_embs = in_im_embs.to('cuda').unsqueeze(0).unsqueeze(0)
- #im_embs = torch.cat((torch.zeros(1, 1024, device=DEVICE, dtype=dtype), in_im_embs), 0)
- with split_attention(pipe.unet, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
- # ! Change the tile_size and disable to see their effects
- with split_attention(pipe.vae, tile_size=128, disable=False, aspect_ratio=1):
- output = pipe(prompt=prompt, guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[in_im_embs], num_inference_steps=STEPS)
- im_emb, _ = pipe.encode_image(
- output.frames[0][len(output.frames[0])//2], DEVICE, 1, output_hidden_state
- )
- nsfw = maybe_nsfw(output.frames[0][len(output.frames[0])//2])
- name = str(uuid.uuid4()).replace("-", "")
- path = f"/tmp/{name}.mp4"
- if nsfw:
- gr.Warning("NSFW content detected.")
- # TODO could return an automatic dislike of auto dislike on the backend for neither as well; just would need refactoring.
- return None, im_emb
- plt.close('all')
- plt.hist(np.array(im_emb.to('cpu')).flatten(), bins=5)
- plt.savefig('real_im_emb_plot.jpg')
- write_video(path, output.frames[0])
- return path, im_emb.to('cpu')
-#######################
-# TODO add to state instead of shared across all
-glob_idx = 0
-def next_image(embs, ys, calibrate_prompts):
- global glob_idx
- glob_idx = glob_idx + 1
- with torch.no_grad():
- if len(calibrate_prompts) > 0:
- print('######### Calibrating with sample prompts #########')
- prompt = calibrate_prompts.pop(0)
- print(prompt)
- image, img_embs = generate(prompt)
- embs += img_embs
- print(len(embs))
- return image, embs, ys, calibrate_prompts
- else:
- print('######### Roaming #########')
- # sample a .8 of rated embeddings for some stochasticity, or at least two embeddings.
- # could take a sample < len(embs)
- #n_to_choose = max(int((len(embs))), 2)
- #indices = random.sample(range(len(embs)), n_to_choose)
- # sample only as many negatives as there are positives
- #pos_indices = [i for i in indices if ys[i] == 1]
- #neg_indices = [i for i in indices if ys[i] == 0]
- #lower = min(len(pos_indices), len(neg_indices))
- #neg_indices = random.sample(neg_indices, lower)
- #pos_indices = random.sample(pos_indices, lower)
- #indices = neg_indices + pos_indices
- pos_indices = [i for i in range(len(embs)) if ys[i] == 1]
- neg_indices = [i for i in range(len(embs)) if ys[i] == 0]
- # the embs & ys stay tied by index but we shuffle to drop randomly
- random.shuffle(pos_indices)
- random.shuffle(neg_indices)
- #if len(pos_indices) - len(neg_indices) > 48 and len(pos_indices) > 80:
- # pos_indices = pos_indices[32:]
- if len(neg_indices) - len(pos_indices) > 48/16 and len(pos_indices) > 120/16:
- pos_indices = pos_indices[1:]
- if len(neg_indices) - len(pos_indices) > 48/16 and len(neg_indices) > 200/16:
- neg_indices = neg_indices[2:]
- print(len(pos_indices), len(neg_indices))
- indices = pos_indices + neg_indices
- embs = [embs[i] for i in indices]
- ys = [ys[i] for i in indices]
- indices = list(range(len(embs)))
- # handle case where every instance of calibration prompts is 'Neither' or 'Like' or 'Dislike'
- if len(list(set(ys))) <= 1:
- embs.append(.01*torch.randn(1024))
- embs.append(.01*torch.randn(1024))
- ys.append(0)
- ys.append(1)
- # also add the latest 0 and the latest 1
- has_0 = False
- has_1 = False
- for i in reversed(range(len(ys))):
- if ys[i] == 0 and has_0 == False:
- indices.append(i)
- has_0 = True
- elif ys[i] == 1 and has_1 == False:
- indices.append(i)
- has_1 = True
- if has_0 and has_1:
- break
- # we may have just encountered a rare multi-threading diffusers issue (https://github.com/huggingface/diffusers/issues/5749);
- # this ends up adding a rating but losing an embedding, it seems.
- # let's take off a rating if so to continue without indexing errors.
- if len(ys) > len(embs):
- print('ys are longer than embs; popping latest rating')
- ys.pop(-1)
- feature_embs = np.array(torch.stack([embs[i].to('cpu') for i in indices] + [leave_im_emb[0].to('cpu')]).to('cpu'))
- scaler = preprocessing.StandardScaler().fit(feature_embs)
- feature_embs = scaler.transform(feature_embs)
- chosen_y = np.array([ys[i] for i in indices] + [0])
- print('Gathering coefficients')
- #lin_class = LinearRegression(fit_intercept=False).fit(feature_embs, chosen_y)
- lin_class = SVC(max_iter=50000, kernel='linear', class_weight='balanced', C=1).fit(feature_embs, chosen_y)
- coef_ = torch.tensor(lin_class.coef_, dtype=torch.double)
- coef_ = coef_ / coef_.abs().max() * 3
- print(coef_.shape, 'COEF')
- plt.close('all')
- plt.hist(np.array(coef_).flatten(), bins=5)
- plt.savefig('plot.jpg')
- print(coef_)
- print('Gathered')
- rng_prompt = random.choice(prompt_list)
- w = 1# if len(embs) % 2 == 0 else 0
- im_emb = w * coef_.to(dtype=dtype)
- prompt= 'the scene' if glob_idx % 2 == 0 else rng_prompt
- print(prompt)
- image, im_emb = generate(prompt, im_emb)
- embs += im_emb
- if len(embs) > 700/16:
- embs = embs[1:]
- ys = ys[1:]
- return image, embs, ys, calibrate_prompts
-def start(_, embs, ys, calibrate_prompts):
- image, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
- return [
- gr.Button(value='Like (L)', interactive=True),
- gr.Button(value='Neither (Space)', interactive=True),
- gr.Button(value='Dislike (A)', interactive=True),
- gr.Button(value='Start', interactive=False),
- image,
- embs,
- ys,
- calibrate_prompts
- ]
-def choose(img, choice, embs, ys, calibrate_prompts):
- if choice == 'Like (L)':
- choice = 1
- elif choice == 'Neither (Space)':
- embs = embs[:-1]
- img, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
- return img, embs, ys, calibrate_prompts
- else:
- choice = 0
- # if we detected NSFW, leave that area of latent space regardless of how they rated chosen.
- # TODO skip allowing rating
- if img == None:
- print('NSFW -- choice is disliked')
- choice = 0
- ys += [choice]*1
- img, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
- return img, embs, ys, calibrate_prompts
-css = '''.gradio-container{max-width: 700px !important}
-#description{text-align: center}
-#description h1, #description h3{display: block}
-#description p{margin-top: 0}
-.fade-in-out {animation: fadeInOut 3s forwards}
-@keyframes fadeInOut {
- 0% {
- background: var(--bg-color);
- }
- 100% {
- background: var(--button-secondary-background-fill);
- }
-}
-'''
-js_head = '''
-<script>
-document.addEventListener('keydown', function(event) {
- if (event.key === 'a' || event.key === 'A') {
- // Trigger click on 'dislike' if 'A' is pressed
- document.getElementById('dislike').click();
- } else if (event.key === ' ' || event.keyCode === 32) {
- // Trigger click on 'neither' if Spacebar is pressed
- document.getElementById('neither').click();
- } else if (event.key === 'l' || event.key === 'L') {
- // Trigger click on 'like' if 'L' is pressed
- document.getElementById('like').click();
- }
-});
-function fadeInOut(button, color) {
- button.style.setProperty('--bg-color', color);
- button.classList.remove('fade-in-out');
- void button.offsetWidth; // This line forces a repaint by accessing a DOM property
- button.classList.add('fade-in-out');
- button.addEventListener('animationend', () => {
- button.classList.remove('fade-in-out'); // Reset the animation state
- }, {once: true});
-}
-document.body.addEventListener('click', function(event) {
- const target = event.target;
- if (target.id === 'dislike') {
- fadeInOut(target, '#ff1717');
- } else if (target.id === 'like') {
- fadeInOut(target, '#006500');
- } else if (target.id === 'neither') {
- fadeInOut(target, '#cccccc');
- }
-});
-</script>
-'''
-with gr.Blocks(css=css, head=js_head) as demo:
- gr.Markdown('''### Blue Tigers: Generative Recommenders for Exporation of Video
- Explore the latent space without text prompts based on your preferences. Learn more on [the write-up](https://rynmurdock.github.io/posts/2024/3/generative_recomenders/).
- ''', elem_id="description")
- embs = gr.State([])
- ys = gr.State([])
- calibrate_prompts = gr.State([
- 'the moon is melting into my glass of tea',
- 'a sea slug -- pair of claws scuttling -- jelly fish glowing',
- 'an adorable creature. It may be a goblin or a pig or a slug.',
- 'an animation about a gorgeous nebula',
- 'an octopus writhes',
- ])
- def l():
- return None
- with gr.Row(elem_id='output-image'):
- img = gr.Video(
- label='Lightning',
- autoplay=True,
- interactive=False,
- height=sidel,
- width=sidel,
- include_audio=False,
- elem_id="video_output"
- )
- img.play(l, js='''document.querySelector('[data-testid="Lightning-player"]').loop = true''')
- with gr.Row(equal_height=True):
- b3 = gr.Button(value='Dislike (A)', interactive=False, elem_id="dislike")
- b2 = gr.Button(value='Neither (Space)', interactive=False, elem_id="neither")
- b1 = gr.Button(value='Like (L)', interactive=False, elem_id="like")
- b1.click(
- choose,
- [img, b1, embs, ys, calibrate_prompts],
- [img, embs, ys, calibrate_prompts]
- )
- b2.click(
- choose,
- [img, b2, embs, ys, calibrate_prompts],
- [img, embs, ys, calibrate_prompts]
- )
- b3.click(
- choose,
- [img, b3, embs, ys, calibrate_prompts],
- [img, embs, ys, calibrate_prompts]
- )
- with gr.Row():
- b4 = gr.Button(value='Start')
- b4.click(start,
- [b4, embs, ys, calibrate_prompts],
- [b1, b2, b3, b4, img, embs, ys, calibrate_prompts])
- with gr.Row():
- html = gr.HTML('''<div style='text-align:center; font-size:20px'>You will calibrate for several prompts and then roam. </ div><br><br><br>
-<div style='text-align:center; font-size:14px'>Note that while the AnimateDiff-Lightning model with NSFW filtering is unlikely to produce NSFW images, this may still occur, and users should avoid NSFW content when rating.
-</ div>
-<br><br>
-<div style='text-align:center; font-size:14px'>Thanks to @multimodalart for their contributions to the demo, esp. the interface and @maxbittker for feedback.
-</ div>''')
-demo.launch(share=True)

requirements.txt CHANGED Viewed

@@ -15,6 +15,4 @@ tensorflow==2.14.0
 imageio
 apscheduler
 pandas
-av
-torchvision
-bitsandbytes

 imageio
 apscheduler
 pandas
+av

twitter_prompts.csv DELETED Viewed

@@ -1,72 +0,0 @@
-,0
-0,a sunset
-1,a still life in blue
-2,last day on earth
-3,the conch shell
-4,the winds of change
-5,a surrealist eye
-6,a surrealist polaroid photo of an apple
-7,metaphysics
-8,the sun is setting into my glass of tea
-9,the moon at 3am
-10,a memento mori
-11,quaking aspen tree
-12,violets and daffodils
-13,espresso
-14,sisyphus
-15,high windows of stained glass
-16,a green dog
-17,an adorable companion; it is a pig
-18,bird of paradise
-19,a complex intricate machine
-20,a white clock
-21,a film featuring the landscape Salt Lake City Utah
-22,a creature
-23,a house set aflame.
-24,a gorgeous landscape by Cy Twombly
-25,smoke rises from the caterpillar's hookah
-26,corvid in red
-27,Monet's pond
-28,Genesis
-29,Death is a black camel that kneels down so we can ride
-30,a cherry tree made of fractals
-29,the end of the sidewalk
-30,a polaroid photo of a bustling city of lights and sky scrapers
-31,The Fig Tree metaphor
-32,God killed Van Gogh.
-33,a cosmic entity alien with four eyes.
-34,a horse with 128 eyes.
-35,a being with an infinite set of eyes (it is omniscient)
-36,A sticky-note magnum opus featuring birds
-37,Moka Pot
-38,the moon is a sickle cell
-39,The Penultimate Supper
-40,Art
-41,surrealism
-42,a god made of wires & dust
-43,a dandelion blown into the universe