Datawhale X 魔搭 AI夏令营(三)
一. 初识 ComfyUI
1.ComfyUI 是 GUI 的一种,是基于节点工作的用户界面,主要用于操作图像的生成技术,ComfyUI 的特别之处在于它采用了一种模块化的设计,把图像生成的过程分解成了许多小的步骤,每个步骤都是一个节点。这些节点可以连接起来形成一个工作流程,这样用户就可以根据需要定制自己的图像生成过程
二. 开始实践
1. 下载安装 ComfyUI 的执行文件和 task1 中 (见 Datawhale X 魔搭 AI 夏令营(一)) 微调完成 Lora 文件git lfs install git clone https://www.modelscope.cn/datasets/maochase/kolors_test_comfyui.git mv kolors_test_comfyui/* ./ rm -rf kolors_test_comfyui/ mkdir -p /mnt/workspace/models/lightning_logs/version_0/checkpoints/ mv epoch=0-step=500.ckpt /mnt/workspace/models/lightning_logs/version_0/checkpoints/
2. 一键执行安装程序
3. 当执行到最后一个节点的内容输出了一个访问的链接的时候,复制链接到浏览器中访问
https://internal-api-drive-stream.feishu.cn/space/api/box/stream/download/preview/GRrbbu8DXo3XrhxYzHwcvbvRnpf/?preview_type=16
三. 浅尝 ComfyUI 工作流
1. 不带 Lora 的工作流样例 (先下载工作流脚本 kolors_example.json)
点击查看代码
{
"last_node_id": 15,
"last_link_id": 18,
"nodes": [
{
"id": 11,
"type": "VAELoader",
"pos": [
1323,
240
],
"size": {
"0": 315,
"1": 58
},
"flags": {},
"order": 0,
"mode": 0,
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
12
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"sdxl.vae.safetensors"
]
},
{
"id": 10,
"type": "VAEDecode",
"pos": [
1368,
369
],
"size": {
"0": 210,
"1": 46
},
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 18
},
{
"name": "vae",
"type": "VAE",
"link": 12,
"slot_index": 1
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
13
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAEDecode"
}
},
{
"id": 14,
"type": "KolorsSampler",
"pos": [
1011,
371
],
"size": {
"0": 315,
"1": 222
},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "kolors_model",
"type": "KOLORSMODEL",
"link": 16
},
{
"name": "kolors_embeds",
"type": "KOLORS_EMBEDS",
"link": 17
}
],
"outputs": [
{
"name": "latent",
"type": "LATENT",
"links": [
18
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KolorsSampler"
},
"widgets_values": [
1024,
1024,
1000102404233412,
"fixed",
25,
5,
"EulerDiscreteScheduler"
]
},
{
"id": 6,
"type": "DownloadAndLoadKolorsModel",
"pos": [
201,
368
],
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 1,
"mode": 0,
"outputs": [
{
"name": "kolors_model",
"type": "KOLORSMODEL",
"links": [
16
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadKolorsModel"
},
"widgets_values": [
"Kwai-Kolors/Kolors",
"fp16"
]
},
{
"id": 3,
"type": "PreviewImage",
"pos": [
1366,
468
],
"size": [
535.4001724243165,
562.2001106262207
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 13
}
],
"properties": {
"Node name for S&R": "PreviewImage"
}
},
{
"id": 12,
"type": "KolorsTextEncode",
"pos": [
519,
529
],
"size": [
457.2893696934723,
225.28656056301645
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "chatglm3_model",
"type": "CHATGLM3MODEL",
"link": 14,
"slot_index": 0
}
],
"outputs": [
{
"name": "kolors_embeds",
"type": "KOLORS_EMBEDS",
"links": [
17
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KolorsTextEncode"
},
"widgets_values": [
"cinematic photograph of an astronaut riding a horse in space |\nillustration of a cat wearing a top hat and a scarf |\nphotograph of a goldfish in a bowl |\nanime screencap of a red haired girl",
"",
1
]
},
{
"id": 15,
"type": "Note",
"pos": [
200,
636
],
"size": [
273.5273818969726,
149.55464588512064
],
"flags": {},
"order": 2,
"mode": 0,
"properties": {
"text": ""
},
"widgets_values": [
"Text encoding takes the most VRAM, quantization can reduce that a lot.\n\nApproximate values I have observed:\nfp16 - 12 GB\nquant8 - 8-9 GB\nquant4 - 4-5 GB\n\nquant4 reduces the quality quite a bit, 8 seems fine"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 13,
"type": "DownloadAndLoadChatGLM3",
"pos": [
206,
522
],
"size": [
274.5334274291992,
58
],
"flags": {},
"order": 3,
"mode": 0,
"outputs": [
{
"name": "chatglm3_model",
"type": "CHATGLM3MODEL",
"links": [
14
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadChatGLM3"
},
"widgets_values": [
"fp16"
]
}
],
"links": [
[
12,
11,
0,
10,
1,
"VAE"
],
[
13,
10,
0,
3,
0,
"IMAGE"
],
[
14,
13,
0,
12,
0,
"CHATGLM3MODEL"
],
[
16,
6,
0,
14,
0,
"KOLORSMODEL"
],
[
17,
12,
0,
14,
1,
"KOLORS_EMBEDS"
],
[
18,
14,
0,
10,
0,
"LATENT"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1.1,
"offset": {
"0": -114.73954010009766,
"1": -139.79705810546875
}
}
},
"version": 0.4
}
2. 完成第一次生图
a. 结果如下:
- 带 Lora 的工作流样例 (工作流脚本 kolors_with_lora_example.json)
点击查看代码
{
"last_node_id": 16,
"last_link_id": 20,
"nodes": [
{
"id": 11,
"type": "VAELoader",
"pos": [
1323,
240
],
"size": {
"0": 315,
"1": 58
},
"flags": {},
"order": 0,
"mode": 0,
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
12
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"sdxl.vae.safetensors"
]
},
{
"id": 10,
"type": "VAEDecode",
"pos": [
1368,
369
],
"size": {
"0": 210,
"1": 46
},
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 18
},
{
"name": "vae",
"type": "VAE",
"link": 12,
"slot_index": 1
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
13
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAEDecode"
}
},
{
"id": 15,
"type": "Note",
"pos": [
200,
636
],
"size": {
"0": 273.5273742675781,
"1": 149.5546417236328
},
"flags": {},
"order": 1,
"mode": 0,
"properties": {
"text": ""
},
"widgets_values": [
"Text encoding takes the most VRAM, quantization can reduce that a lot.\n\nApproximate values I have observed:\nfp16 - 12 GB\nquant8 - 8-9 GB\nquant4 - 4-5 GB\n\nquant4 reduces the quality quite a bit, 8 seems fine"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 13,
"type": "DownloadAndLoadChatGLM3",
"pos": [
206,
522
],
"size": {
"0": 274.5334167480469,
"1": 58
},
"flags": {},
"order": 2,
"mode": 0,
"outputs": [
{
"name": "chatglm3_model",
"type": "CHATGLM3MODEL",
"links": [
14
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadChatGLM3"
},
"widgets_values": [
"fp16"
]
},
{
"id": 6,
"type": "DownloadAndLoadKolorsModel",
"pos": [
201,
368
],
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 3,
"mode": 0,
"outputs": [
{
"name": "kolors_model",
"type": "KOLORSMODEL",
"links": [
19
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadKolorsModel"
},
"widgets_values": [
"Kwai-Kolors/Kolors",
"fp16"
]
},
{
"id": 12,
"type": "KolorsTextEncode",
"pos": [
519,
529
],
"size": {
"0": 457.28936767578125,
"1": 225.28656005859375
},
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "chatglm3_model",
"type": "CHATGLM3MODEL",
"link": 14,
"slot_index": 0
}
],
"outputs": [
{
"name": "kolors_embeds",
"type": "KOLORS_EMBEDS",
"links": [
17
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KolorsTextEncode"
},
"widgets_values": [
"二次元,长发,少女,白色背景",
"",
1
]
},
{
"id": 3,
"type": "PreviewImage",
"pos": [
1366,
469
],
"size": {
"0": 535.400146484375,
"1": 562.2001342773438
},
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 13
}
],
"properties": {
"Node name for S&R": "PreviewImage"
}
},
{
"id": 16,
"type": "LoadKolorsLoRA",
"pos": [
606,
368
],
"size": {
"0": 317.4000244140625,
"1": 82
},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "kolors_model",
"type": "KOLORSMODEL",
"link": 19
}
],
"outputs": [
{
"name": "kolors_model",
"type": "KOLORSMODEL",
"links": [
20
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "LoadKolorsLoRA"
},
"widgets_values": [
"/mnt/workspace/models/lightning_logs/version_0/checkpoints/epoch=0-step=500.ckpt",
2
]
},
{
"id": 14,
"type": "KolorsSampler",
"pos": [
1011,
371
],
"size": {
"0": 315,
"1": 266
},
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "kolors_model",
"type": "KOLORSMODEL",
"link": 20
},
{
"name": "kolors_embeds",
"type": "KOLORS_EMBEDS",
"link": 17
},
{
"name": "latent",
"type": "LATENT",
"link": null
}
],
"outputs": [
{
"name": "latent",
"type": "LATENT",
"links": [
18
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KolorsSampler"
},
"widgets_values": [
1024,
1024,
0,
"fixed",
25,
5,
"EulerDiscreteScheduler",
1
]
}
],
"links": [
[
12,
11,
0,
10,
1,
"VAE"
],
[
13,
10,
0,
3,
0,
"IMAGE"
],
[
14,
13,
0,
12,
0,
"CHATGLM3MODEL"
],
[
17,
12,
0,
14,
1,
"KOLORS_EMBEDS"
],
[
18,
14,
0,
10,
0,
"LATENT"
],
[
19,
6,
0,
16,
0,
"KOLORSMODEL"
],
[
20,
16,
0,
14,
0,
"KOLORSMODEL"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1.2100000000000002,
"offset": {
"0": -183.91309381910426,
"1": -202.11110769225016
}
}
},
"version": 0.4
}
4. 生图步骤同上
a. 效果:
五. 准备一个高质量的数据集
当我们进行图片生成相关的工作时,选择合适的数据集是非常重要的。如何找到适合自己的数据集呢,这里给大家整理了一些重要的参考维度,希望可以帮助你快速找到适合的数据集:
1. 明确你的需求和目标
a. 关注应用场景 **:确定你的模型将被应用到什么样的场景中(例如,艺术风格转换、产品图像生成、医疗影像合成等)。
b. 关注数据类型 **:你需要什么样的图片?比如是真实世界的照片还是合成图像?是黑白的还是彩色的?是高分辨率还是低分辨率?
c. 关注数据量 **:考虑你的任务应该需要多少图片来支持训练和验证。
2. 数据集来源整理
以下渠道来源均需要考虑合规性问题,请大家在使用数据集过程中谨慎选择