docs/diffusion/stable_diffusion/scripts/image_to_image.html
homediffusionstable_diffusionscripts
11importargparse12frompathlibimportPath1314importtorch1516fromlabmlimportlab,monit17fromlabml\_nn.diffusion.stable\_diffusion.sampler.ddimimportDDIMSampler18fromlabml\_nn.diffusion.stable\_diffusion.utilimportload\_model,load\_img,save\_images,set\_seed
21classImg2Img:
checkpoint_path is the path of the checkpointddim_steps is the number of sampling stepsddim_eta is the DDIM sampling η constant26def\_\_init\_\_(self,\*,checkpoint\_path:Path,27ddim\_steps:int=50,28ddim\_eta:float=0.0):
34self.ddim\_steps=ddim\_steps
37self.model=load\_model(checkpoint\_path)
Get device
39self.device=torch.device("cuda:0")iftorch.cuda.is\_available()elsetorch.device("cpu")
Move the model to device
41self.model.to(self.device)
Initialize DDIM sampler
44self.sampler=DDIMSampler(self.model,45n\_steps=ddim\_steps,46ddim\_eta=ddim\_eta)
dest_path is the path to store the generated imagesorig_img is the image to transformstrength specifies how much of the original image should not be preservedbatch_size is the number of images to generate in a batchprompt is the prompt to generate images withuncond_scale is the unconditional guidance scale s. This is used for ϵθ(xt,c)=sϵcond(xt,c)+(s−1)ϵcond(xt,cu)[email protected]\_grad()49def\_\_call\_\_(self,\*,50dest\_path:str,51orig\_img:str,52strength:float,53batch\_size:int=3,54prompt:str,55uncond\_scale:float=5.0,56):
Make a batch of prompts
67prompts=batch\_size\*[prompt]
Load image
69orig\_image=load\_img(orig\_img).to(self.device)
Encode the image in the latent space and make batch_size copies of it
71orig=self.model.autoencoder\_encode(orig\_image).repeat(batch\_size,1,1,1)
Get the number of steps to diffuse the original
74assert0.\<=strength\<=1.,'can only work with strength in [0.0, 1.0]'75t\_index=int(strength\*self.ddim\_steps)
AMP auto casting
78withtorch.cuda.amp.autocast():
In unconditional scaling is not 1 get the embeddings for empty prompts (no conditioning).
80ifuncond\_scale!=1.0:81un\_cond=self.model.get\_text\_conditioning(batch\_size\*[""])82else:83un\_cond=None
Get the prompt embeddings
85cond=self.model.get\_text\_conditioning(prompts)
Add noise to the original image
87x=self.sampler.q\_sample(orig,t\_index)
Reconstruct from the noisy image
89x=self.sampler.paint(x,cond,t\_index,90uncond\_scale=uncond\_scale,91uncond\_cond=un\_cond)
Decode the image from the autoencoder
93images=self.model.autoencoder\_decode(x)
Save images
96save\_images(images,dest\_path,'img\_')
99defmain():
103parser=argparse.ArgumentParser()104105parser.add\_argument(106"--prompt",107type=str,108nargs="?",109default="a painting of a cute monkey playing guitar",110help="the prompt to render"111)112113parser.add\_argument(114"--orig-img",115type=str,116nargs="?",117help="path to the input image"118)119120parser.add\_argument("--batch\_size",type=int,default=4,help="batch size",)121parser.add\_argument("--steps",type=int,default=50,help="number of ddim sampling steps")122123parser.add\_argument("--scale",type=float,default=5.0,124help="unconditional guidance scale: "125"eps = eps(x, empty) + scale \* (eps(x, cond) - eps(x, empty))")126127parser.add\_argument("--strength",type=float,default=0.75,128help="strength for noise: "129" 1.0 corresponds to full destruction of information in init image")130131opt=parser.parse\_args()132set\_seed(42)133134img2img=Img2Img(checkpoint\_path=lab.get\_data\_path()/'stable-diffusion'/'sd-v1-4.ckpt',135ddim\_steps=opt.steps)136137withmonit.section('Generate'):138img2img(139dest\_path='outputs',140orig\_img=opt.orig\_img,141strength=opt.strength,142batch\_size=opt.batch\_size,143prompt=opt.prompt,144uncond\_scale=opt.scale)
148if\_\_name\_\_=="\_\_main\_\_":149main()