python直接调用InstantID进行一致性图片生成

项目地址 https://github.com/InstantID/InstantID

克隆到本地,根据要求 pip 安装依赖

模型文件上篇文章讲了如何下载 https://www.cnblogs.com/qcy-blog/p/18202276

我用的 windows,所以改了一下示例 infer.py 源码, 主要是修改了模型得绝对路径。

import cv2
import torch
import numpy as np
from PIL import Image
from diffusers.utils import load_image
from diffusers.models import ControlNetModel
from insightface.app import FaceAnalysis
from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps

def resize_img(input_image, max_side=1280, min_side=1024, size=None,
pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64
):
w, h = input_image.size
if size is not None:
w_resize_new, h_resize_new = size
else:
ratio = min_side / min(h, w)
w, h = round(ratiow), round(ratioh)
ratio = max_side / max(h, w)
input_image = input_image.resize([round(ratiow), round(ratioh)], mode)
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
input_image = input_image.resize([w_resize_new, h_resize_new], mode)

<span class="hljs-keyword">if</span> pad_to_max_side:
    res = np.ones([max_side, max_side, <span class="hljs-number">3</span>], dtype=np.uint8) * <span class="hljs-number">255</span>
    offset_x = (max_side - w_resize_new) // <span class="hljs-number">2</span>
    offset_y = (max_side - h_resize_new) // <span class="hljs-number">2</span>
    res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
    input_image = Image.fromarray(res)
<span class="hljs-keyword">return</span> input_image

if name == "main":
# Load face encoder
app = FaceAnalysis(name='antelopev2', root=f'F:\code\ComfyUI\models\insightface', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))
# Path to InstantID models
face_adapter = f'F:\code\ComfyUI\models\checkpoints\ip-adapter.bin'
controlnet_path = f'F:\code\ComfyUI\custom_nodes\ComfyUI-InstantID\checkpoints\controlnet'
# Load pipeline
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
# 模型根据自己需求下载,必须是 SDXL
#base_model_path = 'wangqixun/YamerMIX_v8'
base_model_path = f'F:\cache\hub\models--stabilityai--stable-diffusion-xl-base-1.0\snapshots\462165984030d82259a11f4367a4eed129e94a7b'

pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
    base_model_path,
    controlnet=controlnet,
    torch_dtype=torch.float16,
)
pipe.cuda()
pipe.load_ip_adapter_instantid(face_adapter)

<span class="hljs-comment"># Infer setting</span>
prompt = <span class="hljs-string">"analog film photo of a man. faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage, masterpiece, best quality"</span>
n_prompt = <span class="hljs-string">"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured (lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch,deformed, mutated, cross-eyed, ugly, disfigured"</span>

face_image = load_image(<span class="hljs-string">f"F:\\code\\ComfyUI\\InstantID-main\\examples\\yann-lecun_resize.jpg"</span>)
face_image = resize_img(face_image)

face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
face_info = <span class="hljs-built_in">sorted</span>(face_info, key=<span class="hljs-keyword">lambda</span> x:(x[<span class="hljs-string">'bbox'</span>][<span class="hljs-number">2</span>]-x[<span class="hljs-string">'bbox'</span>][<span class="hljs-number">0</span>])*x[<span class="hljs-string">'bbox'</span>][<span class="hljs-number">3</span>]-x[<span class="hljs-string">'bbox'</span>][<span class="hljs-number">1</span>])[-<span class="hljs-number">1</span>] <span class="hljs-comment"># only use the maximum face</span>
face_emb = face_info[<span class="hljs-string">'embedding'</span>]
face_kps = draw_kps(face_image, face_info[<span class="hljs-string">'kps'</span>])

image = pipe(
    prompt=prompt,
    negative_prompt=n_prompt,
    image_embeds=face_emb,
    image=face_kps,
    controlnet_conditioning_scale=<span class="hljs-number">0.8</span>,
    ip_adapter_scale=<span class="hljs-number">0.8</span>,
    num_inference_steps=<span class="hljs-number">30</span>,
    guidance_scale=<span class="hljs-number">5</span>,
).images[<span class="hljs-number">0</span>]

image.save(<span class="hljs-string">'result.jpg'</span>)