Hi! First I just want to say, thank you for making this opensource and doing such an amazing job with this.
I'm having no problems with text2video, but with video2video I'm having problems. Any of the video2video examples in the github readme don't work.
(musev) root@10703020883e:/workspace/MuseV# python scripts/inference/video2video.py --sd_model_name fantasticmix_v10 --unet_model_name musev_referencenet --referencenet_model_name musev_referencenet --ip_adapter_model_name musev_referencenet -test_data_path ./configs/tasks/example.yaml --vision_clip_extractor_class_name ImageClipVisionFeatureExtractor --vision_clip_model_path ./checkpoints/IP-Adapter/models/image_encoder --output_dir ./output --n_batch 1 --controlnet_name dwpose_body_hand --which2video "video_middle" --target_datas dance1 --fps 12 --time_size 12
/opt/conda/envs/musev/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
2024-04-05 20:19:43.562604: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-05 20:19:43.584943: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-05 20:19:44.056965: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
/opt/conda/envs/musev/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
/workspace/MuseV/diffusers/src/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
args
{'add_static_video_prompt': False,
'context_batch_size': 1,
'context_frames': 12,
'context_overlap': 4,
'context_schedule': 'uniform_v2',
'context_stride': 1,
'controlnet_conditioning_scale': 1.0,
'controlnet_name': 'dwpose_body_hand',
'cross_attention_dim': 768,
'enable_zero_snr': False,
'end_to_end': True,
'face_image_path': None,
'facein_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/facein.py',
'facein_model_name': None,
'facein_scale': 1.0,
'fix_condition_images': False,
'fixed_ip_adapter_image': True,
'fixed_refer_face_image': True,
'fixed_refer_image': True,
'fps': 12,
'guidance_scale': 7.5,
'height': None,
'img_length_ratio': 1.0,
'img_weight': 0.001,
'interpolation_factor': 1,
'ip_adapter_face_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/ip_adapter.py',
'ip_adapter_face_model_name': None,
'ip_adapter_face_scale': 1.0,
'ip_adapter_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/ip_adapter.py',
'ip_adapter_model_name': 'musev_referencenet',
'ip_adapter_scale': 1.0,
'ipadapter_image_path': None,
'lcm_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/lcm_model.py',
'lcm_model_name': None,
'log_level': 'INFO',
'motion_speed': 8.0,
'n_batch': 1,
'n_cols': 3,
'n_repeat': 1,
'n_vision_condition': 1,
'need_hist_match': False,
'need_img_based_video_noise': True,
'need_return_condition': False,
'need_return_videos': False,
'need_video2video': False,
'negative_prompt': 'V2',
'negprompt_cfg_path': '/workspace/MuseV/scripts/inference/../../configs/model/negative_prompt.py',
'noise_type': 'video_fusion',
'num_inference_steps': 30,
'output_dir': './output',
'overwrite': False,
'pose_guider_model_path': None,
'prompt_only_use_image_prompt': False,
'record_mid_video_latents': False,
'record_mid_video_noises': False,
'redraw_condition_image': False,
'redraw_condition_image_with_facein': True,
'redraw_condition_image_with_ip_adapter_face': True,
'redraw_condition_image_with_ipdapter': True,
'redraw_condition_image_with_referencenet': True,
'referencenet_image_path': None,
'referencenet_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/referencenet.py',
'referencenet_model_name': 'musev_referencenet',
'sample_rate': 1,
'save_filetype': 'mp4',
'save_images': False,
'sd_model_cfg_path': '/workspace/MuseV/scripts/inference/../../configs/model/T2I_all_model.py',
'sd_model_name': 'fantasticmix_v10',
'seed': None,
'strength': 0.8,
'target_datas': 'dance1',
'test_data_path': './configs/tasks/example.yaml',
'time_size': 12,
'unet_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/motion_model.py',
'unet_model_name': 'musev_referencenet',
'use_condition_image': True,
'vae_model_path': './checkpoints/vae/sd-vae-ft-mse',
'video_guidance_scale': 3.5,
'video_guidance_scale_end': None,
'video_guidance_scale_method': 'linear',
'video_has_condition': True,
'video_is_middle': False,
'video_negative_prompt': 'V2',
'video_num_inference_steps': 10,
'video_overlap': 1,
'video_strength': 1.0,
'vision_clip_extractor_class_name': 'ImageClipVisionFeatureExtractor',
'vision_clip_model_path': './checkpoints/IP-Adapter/models/image_encoder',
'w_ind_noise': 0.5,
'which2video': 'video_middle',
'width': None,
'write_info': False}
running model, T2I SD
{'fantasticmix_v10': {'sd': '/workspace/MuseV/configs/model/../../checkpoints/t2i/sd1.5/fantasticmix_v10'}}
lcm: None None
unet_model_params_dict_src dict_keys(['musev', 'musev_referencenet', 'musev_referencenet_pose'])
unet: musev_referencenet /workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet
referencenet_model_params_dict_src dict_keys(['musev_referencenet'])
referencenet: musev_referencenet /workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet
ip_adapter_model_params_dict_src dict_keys(['IPAdapter', 'IPAdapterPlus', 'IPAdapterPlus-face', 'IPAdapterFaceID', 'musev_referencenet', 'musev_referencenet_pose'])
ip_adapter: musev_referencenet {'ip_image_encoder': '/workspace/MuseV/configs/model/../../checkpoints/IP-Adapter/image_encoder', 'ip_ckpt': '/workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet/ip_adapter_image_proj.bin', 'ip_scale': 1.0, 'clip_extra_context_tokens': 4, 'clip_embeddings_dim': 1024, 'desp': ''}
facein: None None
ip_adapter_face: None None
video_negprompt V2 badhandv4, ng_deepnegative_v1_75t, (((multiple heads))), (((bad body))), (((two people))), ((extra arms)), ((deformed body)), (((sexy))), paintings,(((two heads))), ((big head)),sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, (((nsfw))), nipples, extra fingers, (extra legs), (long neck), mutated hands, (fused fingers), (too many fingers)
negprompt V2 badhandv4, ng_deepnegative_v1_75t, (((multiple heads))), (((bad body))), (((two people))), ((extra arms)), ((deformed body)), (((sexy))), paintings,(((two heads))), ((big head)),sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, (((nsfw))), nipples, extra fingers, (extra legs), (long neck), mutated hands, (fused fingers), (too many fingers)
n_test_datas 1
2024-04-05 20:19:51,813- musev:997- INFO- vision_clip_extractor, name=ImageClipVisionFeatureExtractor, path=./checkpoints/IP-Adapter/models/image_encoder
test_model_vae_model_path ./checkpoints/vae/sd-vae-ft-mse
Traceback (most recent call last):
File "/workspace/MuseV/scripts/inference/video2video.py", line 1102, in <module>
sd_predictor = DiffusersPipelinePredictor(
File "/workspace/MuseV/musev/pipelines/pipeline_controlnet_predictor.py", line 165, in __init__
controlnet, controlnet_processor, processor_params = load_controlnet_model(
File "/workspace/MuseV/MMCM/mmcm/vision/feature_extractor/controlnet.py", line 856, in load_controlnet_model
controlnet_processor = ControlnetProcessor(
File "/workspace/MuseV/MMCM/mmcm/vision/feature_extractor/controlnet.py", line 71, in __init__
self.processor = processor_cls()
File "/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/__init__.py", line 141, in __init__
self.pose_estimation = Wholebody(
File "/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/wholebody.py", line 53, in __init__
self.detector = init_detector(det_config, det_ckpt, device=device)
NameError: name 'init_detector' is not defined
The error tells me there's no variable called "init_detector", so when we look into the code, it's defined here:
/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/wholebody.py
Interestingly, I don't get a warning that "mmcv" is not installed, as you'd expect. But, the script believes that init_detector
is not defined. So, I took out the line from mmdet.apis import inference_detector, init_detector
out of the try statement to see what happened.
(musev) root@10703020883e:/workspace/MuseV# python scripts/inference/video2video.py --sd_model_name fantasticmix_v10 --unet_model_name musev_referencenet --referencenet_model_name musev_referencenet --ip_adapter_model_name musev_referencenet -test_data_path ./configs/tasks/example.yaml --vision_clip_extractor_class_name ImageClipVisionFeatureExtractor --vision_clip_model_path ./checkpoints/IP-Adapter/models/image_encoder --output_dir ./output --n_batch 1 --controlnet_name dwpose_body_hand --which2video "video_middle" --target_datas dance1 --fps 12 --time_size 12
/opt/conda/envs/musev/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
2024-04-05 20:26:05.951185: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-05 20:26:05.973544: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-05 20:26:06.415460: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
/opt/conda/envs/musev/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
/workspace/MuseV/diffusers/src/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
args
{'add_static_video_prompt': False,
'context_batch_size': 1,
'context_frames': 12,
'context_overlap': 4,
'context_schedule': 'uniform_v2',
'context_stride': 1,
'controlnet_conditioning_scale': 1.0,
'controlnet_name': 'dwpose_body_hand',
'cross_attention_dim': 768,
'enable_zero_snr': False,
'end_to_end': True,
'face_image_path': None,
'facein_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/facein.py',
'facein_model_name': None,
'facein_scale': 1.0,
'fix_condition_images': False,
'fixed_ip_adapter_image': True,
'fixed_refer_face_image': True,
'fixed_refer_image': True,
'fps': 12,
'guidance_scale': 7.5,
'height': None,
'img_length_ratio': 1.0,
'img_weight': 0.001,
'interpolation_factor': 1,
'ip_adapter_face_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/ip_adapter.py',
'ip_adapter_face_model_name': None,
'ip_adapter_face_scale': 1.0,
'ip_adapter_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/ip_adapter.py',
'ip_adapter_model_name': 'musev_referencenet',
'ip_adapter_scale': 1.0,
'ipadapter_image_path': None,
'lcm_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/lcm_model.py',
'lcm_model_name': None,
'log_level': 'INFO',
'motion_speed': 8.0,
'n_batch': 1,
'n_cols': 3,
'n_repeat': 1,
'n_vision_condition': 1,
'need_hist_match': False,
'need_img_based_video_noise': True,
'need_return_condition': False,
'need_return_videos': False,
'need_video2video': False,
'negative_prompt': 'V2',
'negprompt_cfg_path': '/workspace/MuseV/scripts/inference/../../configs/model/negative_prompt.py',
'noise_type': 'video_fusion',
'num_inference_steps': 30,
'output_dir': './output',
'overwrite': False,
'pose_guider_model_path': None,
'prompt_only_use_image_prompt': False,
'record_mid_video_latents': False,
'record_mid_video_noises': False,
'redraw_condition_image': False,
'redraw_condition_image_with_facein': True,
'redraw_condition_image_with_ip_adapter_face': True,
'redraw_condition_image_with_ipdapter': True,
'redraw_condition_image_with_referencenet': True,
'referencenet_image_path': None,
'referencenet_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/referencenet.py',
'referencenet_model_name': 'musev_referencenet',
'sample_rate': 1,
'save_filetype': 'mp4',
'save_images': False,
'sd_model_cfg_path': '/workspace/MuseV/scripts/inference/../../configs/model/T2I_all_model.py',
'sd_model_name': 'fantasticmix_v10',
'seed': None,
'strength': 0.8,
'target_datas': 'dance1',
'test_data_path': './configs/tasks/example.yaml',
'time_size': 12,
'unet_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/motion_model.py',
'unet_model_name': 'musev_referencenet',
'use_condition_image': True,
'vae_model_path': './checkpoints/vae/sd-vae-ft-mse',
'video_guidance_scale': 3.5,
'video_guidance_scale_end': None,
'video_guidance_scale_method': 'linear',
'video_has_condition': True,
'video_is_middle': False,
'video_negative_prompt': 'V2',
'video_num_inference_steps': 10,
'video_overlap': 1,
'video_strength': 1.0,
'vision_clip_extractor_class_name': 'ImageClipVisionFeatureExtractor',
'vision_clip_model_path': './checkpoints/IP-Adapter/models/image_encoder',
'w_ind_noise': 0.5,
'which2video': 'video_middle',
'width': None,
'write_info': False}
running model, T2I SD
{'fantasticmix_v10': {'sd': '/workspace/MuseV/configs/model/../../checkpoints/t2i/sd1.5/fantasticmix_v10'}}
lcm: None None
unet_model_params_dict_src dict_keys(['musev', 'musev_referencenet', 'musev_referencenet_pose'])
unet: musev_referencenet /workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet
referencenet_model_params_dict_src dict_keys(['musev_referencenet'])
referencenet: musev_referencenet /workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet
ip_adapter_model_params_dict_src dict_keys(['IPAdapter', 'IPAdapterPlus', 'IPAdapterPlus-face', 'IPAdapterFaceID', 'musev_referencenet', 'musev_referencenet_pose'])
ip_adapter: musev_referencenet {'ip_image_encoder': '/workspace/MuseV/configs/model/../../checkpoints/IP-Adapter/image_encoder', 'ip_ckpt': '/workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet/ip_adapter_image_proj.bin', 'ip_scale': 1.0, 'clip_extra_context_tokens': 4, 'clip_embeddings_dim': 1024, 'desp': ''}
facein: None None
ip_adapter_face: None None
video_negprompt V2 badhandv4, ng_deepnegative_v1_75t, (((multiple heads))), (((bad body))), (((two people))), ((extra arms)), ((deformed body)), (((sexy))), paintings,(((two heads))), ((big head)),sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, (((nsfw))), nipples, extra fingers, (extra legs), (long neck), mutated hands, (fused fingers), (too many fingers)
negprompt V2 badhandv4, ng_deepnegative_v1_75t, (((multiple heads))), (((bad body))), (((two people))), ((extra arms)), ((deformed body)), (((sexy))), paintings,(((two heads))), ((big head)),sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, (((nsfw))), nipples, extra fingers, (extra legs), (long neck), mutated hands, (fused fingers), (too many fingers)
n_test_datas 1
2024-04-05 20:26:13,677- musev:997- INFO- vision_clip_extractor, name=ImageClipVisionFeatureExtractor, path=./checkpoints/IP-Adapter/models/image_encoder
test_model_vae_model_path ./checkpoints/vae/sd-vae-ft-mse
Traceback (most recent call last):
File "/workspace/MuseV/scripts/inference/video2video.py", line 1102, in <module>
sd_predictor = DiffusersPipelinePredictor(
File "/workspace/MuseV/musev/pipelines/pipeline_controlnet_predictor.py", line 165, in __init__
controlnet, controlnet_processor, processor_params = load_controlnet_model(
File "/workspace/MuseV/MMCM/mmcm/vision/feature_extractor/controlnet.py", line 856, in load_controlnet_model
controlnet_processor = ControlnetProcessor(
File "/workspace/MuseV/MMCM/mmcm/vision/feature_extractor/controlnet.py", line 71, in __init__
self.processor = processor_cls()
File "/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/__init__.py", line 139, in __init__
from .wholebody import Wholebody
File "/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/wholebody.py", line 24, in <module>
from mmdet.apis import inference_detector, init_detector
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/apis/__init__.py", line 2, in <module>
from .det_inferencer import DetInferencer
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/apis/det_inferencer.py", line 22, in <module>
from mmdet.evaluation import INSTANCE_OFFSET
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/evaluation/__init__.py", line 3, in <module>
from .metrics import * # noqa: F401,F403
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/evaluation/metrics/__init__.py", line 5, in <module>
from .coco_metric import CocoMetric
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py", line 16, in <module>
from mmdet.datasets.api_wrappers import COCO, COCOeval, COCOevalMP
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/datasets/__init__.py", line 26, in <module>
from .utils import get_loading_pipeline
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/datasets/utils.py", line 5, in <module>
from mmdet.datasets.transforms import LoadAnnotations, LoadPanopticAnnotations
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/datasets/transforms/__init__.py", line 6, in <module>
from .formatting import (ImageToTensor, PackDetInputs, PackReIDInputs,
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/datasets/transforms/formatting.py", line 11, in <module>
from mmdet.structures.bbox import BaseBoxes
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/structures/bbox/__init__.py", line 2, in <module>
from .base_boxes import BaseBoxes
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/structures/bbox/base_boxes.py", line 9, in <module>
from mmdet.structures.mask.structures import BitmapMasks, PolygonMasks
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/structures/mask/__init__.py", line 3, in <module>
from .structures import (BaseInstanceMasks, BitmapMasks, PolygonMasks,
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/structures/mask/structures.py", line 12, in <module>
from mmcv.ops.roi_align import roi_align
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/ops/__init__.py", line 3, in <module>
from .active_rotated_filter import active_rotated_filter
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/ops/active_rotated_filter.py", line 10, in <module>
ext_module = ext_loader.load_ext(
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/utils/ext_loader.py", line 13, in load_ext
ext = importlib.import_module('mmcv.' + name)
File "/opt/conda/envs/musev/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
ImportError: /opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/_ext.cpython-310-x86_64-linux-gnu.so: undefined symbol: _ZN2at4_ops10zeros_like4callERKNS_6TensorEN3c108optionalINS5_10ScalarTypeEEENS6_INS5_6LayoutEEENS6_INS5_6DeviceEEENS6_IbEENS6_INS5_12MemoryFormatEEE
(musev) root@10703020883e:/workspace/MuseV# python scripts/inference/video2video.py --sd_model_name fantasticmix_v10 --unet_model_name musev_referencenet --referencenet_model_name musev_referencenet --ip_adapter_model_name musev_referencenet -test_data_path ./configs/tasks/example.yaml --vision_clip_extractor_class_name ImageClipVisionFeatureExtractor --vision_clip_model_path ./checkpoints/IP-Adapter/models/image_encoder --output_dir ./output --n_batch 1 --controlnet_name dwpose_body_hand --which2video "video_middle" --target_datas dance1 --fps 12 --time_size 12
/opt/conda/envs/musev/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
2024-04-05 20:28:21.233723: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-05 20:28:21.261046: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-05 20:28:21.719853: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
/opt/conda/envs/musev/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
/workspace/MuseV/diffusers/src/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
args
{'add_static_video_prompt': False,
'context_batch_size': 1,
'context_frames': 12,
'context_overlap': 4,
'context_schedule': 'uniform_v2',
'context_stride': 1,
'controlnet_conditioning_scale': 1.0,
'controlnet_name': 'dwpose_body_hand',
'cross_attention_dim': 768,
'enable_zero_snr': False,
'end_to_end': True,
'face_image_path': None,
'facein_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/facein.py',
'facein_model_name': None,
'facein_scale': 1.0,
'fix_condition_images': False,
'fixed_ip_adapter_image': True,
'fixed_refer_face_image': True,
'fixed_refer_image': True,
'fps': 12,
'guidance_scale': 7.5,
'height': None,
'img_length_ratio': 1.0,
'img_weight': 0.001,
'interpolation_factor': 1,
'ip_adapter_face_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/ip_adapter.py',
'ip_adapter_face_model_name': None,
'ip_adapter_face_scale': 1.0,
'ip_adapter_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/ip_adapter.py',
'ip_adapter_model_name': 'musev_referencenet',
'ip_adapter_scale': 1.0,
'ipadapter_image_path': None,
'lcm_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/lcm_model.py',
'lcm_model_name': None,
'log_level': 'INFO',
'motion_speed': 8.0,
'n_batch': 1,
'n_cols': 3,
'n_repeat': 1,
'n_vision_condition': 1,
'need_hist_match': False,
'need_img_based_video_noise': True,
'need_return_condition': False,
'need_return_videos': False,
'need_video2video': False,
'negative_prompt': 'V2',
'negprompt_cfg_path': '/workspace/MuseV/scripts/inference/../../configs/model/negative_prompt.py',
'noise_type': 'video_fusion',
'num_inference_steps': 30,
'output_dir': './output',
'overwrite': False,
'pose_guider_model_path': None,
'prompt_only_use_image_prompt': False,
'record_mid_video_latents': False,
'record_mid_video_noises': False,
'redraw_condition_image': False,
'redraw_condition_image_with_facein': True,
'redraw_condition_image_with_ip_adapter_face': True,
'redraw_condition_image_with_ipdapter': True,
'redraw_condition_image_with_referencenet': True,
'referencenet_image_path': None,
'referencenet_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/referencenet.py',
'referencenet_model_name': 'musev_referencenet',
'sample_rate': 1,
'save_filetype': 'mp4',
'save_images': False,
'sd_model_cfg_path': '/workspace/MuseV/scripts/inference/../../configs/model/T2I_all_model.py',
'sd_model_name': 'fantasticmix_v10',
'seed': None,
'strength': 0.8,
'target_datas': 'dance1',
'test_data_path': './configs/tasks/example.yaml',
'time_size': 12,
'unet_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/motion_model.py',
'unet_model_name': 'musev_referencenet',
'use_condition_image': True,
'vae_model_path': './checkpoints/vae/sd-vae-ft-mse',
'video_guidance_scale': 3.5,
'video_guidance_scale_end': None,
'video_guidance_scale_method': 'linear',
'video_has_condition': True,
'video_is_middle': False,
'video_negative_prompt': 'V2',
'video_num_inference_steps': 10,
'video_overlap': 1,
'video_strength': 1.0,
'vision_clip_extractor_class_name': 'ImageClipVisionFeatureExtractor',
'vision_clip_model_path': './checkpoints/IP-Adapter/models/image_encoder',
'w_ind_noise': 0.5,
'which2video': 'video_middle',
'width': None,
'write_info': False}
running model, T2I SD
{'fantasticmix_v10': {'sd': '/workspace/MuseV/configs/model/../../checkpoints/t2i/sd1.5/fantasticmix_v10'}}
lcm: None None
unet_model_params_dict_src dict_keys(['musev', 'musev_referencenet', 'musev_referencenet_pose'])
unet: musev_referencenet /workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet
referencenet_model_params_dict_src dict_keys(['musev_referencenet'])
referencenet: musev_referencenet /workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet
ip_adapter_model_params_dict_src dict_keys(['IPAdapter', 'IPAdapterPlus', 'IPAdapterPlus-face', 'IPAdapterFaceID', 'musev_referencenet', 'musev_referencenet_pose'])
ip_adapter: musev_referencenet {'ip_image_encoder': '/workspace/MuseV/configs/model/../../checkpoints/IP-Adapter/image_encoder', 'ip_ckpt': '/workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet/ip_adapter_image_proj.bin', 'ip_scale': 1.0, 'clip_extra_context_tokens': 4, 'clip_embeddings_dim': 1024, 'desp': ''}
facein: None None
ip_adapter_face: None None
video_negprompt V2 badhandv4, ng_deepnegative_v1_75t, (((multiple heads))), (((bad body))), (((two people))), ((extra arms)), ((deformed body)), (((sexy))), paintings,(((two heads))), ((big head)),sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, (((nsfw))), nipples, extra fingers, (extra legs), (long neck), mutated hands, (fused fingers), (too many fingers)
negprompt V2 badhandv4, ng_deepnegative_v1_75t, (((multiple heads))), (((bad body))), (((two people))), ((extra arms)), ((deformed body)), (((sexy))), paintings,(((two heads))), ((big head)),sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, (((nsfw))), nipples, extra fingers, (extra legs), (long neck), mutated hands, (fused fingers), (too many fingers)
n_test_datas 1
2024-04-05 20:28:28,992- musev:997- INFO- vision_clip_extractor, name=ImageClipVisionFeatureExtractor, path=./checkpoints/IP-Adapter/models/image_encoder
test_model_vae_model_path ./checkpoints/vae/sd-vae-ft-mse
Traceback (most recent call last):
File "/workspace/MuseV/scripts/inference/video2video.py", line 1102, in <module>
sd_predictor = DiffusersPipelinePredictor(
File "/workspace/MuseV/musev/pipelines/pipeline_controlnet_predictor.py", line 165, in __init__
controlnet, controlnet_processor, processor_params = load_controlnet_model(
File "/workspace/MuseV/MMCM/mmcm/vision/feature_extractor/controlnet.py", line 856, in load_controlnet_model
controlnet_processor = ControlnetProcessor(
File "/workspace/MuseV/MMCM/mmcm/vision/feature_extractor/controlnet.py", line 71, in __init__
self.processor = processor_cls()
File "/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/__init__.py", line 139, in __init__
from .wholebody import Wholebody
File "/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/wholebody.py", line 13, in <module>
from mmpose.apis import inference_topdown
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmpose/apis/__init__.py", line 2, in <module>
from .inference import (collect_multi_frames, inference_bottomup,
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmpose/apis/inference.py", line 17, in <module>
from mmpose.models.builder import build_pose_estimator
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmpose/models/__init__.py", line 8, in <module>
from .heads import * # noqa
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmpose/models/heads/__init__.py", line 11, in <module>
from .transformer_heads import EDPoseHead
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmpose/models/heads/transformer_heads/__init__.py", line 2, in <module>
from .edpose_head import EDPoseHead
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmpose/models/heads/transformer_heads/edpose_head.py", line 14, in <module>
from mmcv.ops import MultiScaleDeformableAttention
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/ops/__init__.py", line 3, in <module>
from .active_rotated_filter import active_rotated_filter
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/ops/active_rotated_filter.py", line 10, in <module>
ext_module = ext_loader.load_ext(
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/utils/ext_loader.py", line 13, in load_ext
ext = importlib.import_module('mmcv.' + name)
File "/opt/conda/envs/musev/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
ImportError: /opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/_ext.cpython-310-x86_64-linux-gnu.so: undefined symbol: _ZN2at4_ops10zeros_like4callERKNS_6TensorEN3c108optionalINS5_10ScalarTypeEEENS6_INS5_6LayoutEEENS6_INS5_6DeviceEEENS6_IbEENS6_INS5_12MemoryFormatEEE
(musev) root@10703020883e:/workspace/MuseV# python scripts/inference/video2video.py --sd_model_name fantasticmix_v10 --unet_model_name musev_referencenet --referencenet_model_name musev_referencenet --ip_adapter_model_name musev_referencenet -test_data_path ./configs/tasks/example.yaml --vision_clip_extractor_class_name ImageClipVisionFeatureExtractor --vision_clip_model_path ./checkpoints/IP-Adapter/models/image_encoder --output_dir ./output --n_batch 1 --controlnet_name dwpose_body_hand --which2video "video_middle" --target_datas dance1 --fps 12 --time_size 12
/opt/conda/envs/musev/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
2024-04-05 20:40:39.017376: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-05 20:40:39.041670: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-05 20:40:39.480456: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
/opt/conda/envs/musev/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
/workspace/MuseV/diffusers/src/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
torch.utils._pytree._register_pytree_node(
args
{'add_static_video_prompt': False,
'context_batch_size': 1,
'context_frames': 12,
'context_overlap': 4,
'context_schedule': 'uniform_v2',
'context_stride': 1,
'controlnet_conditioning_scale': 1.0,
'controlnet_name': 'dwpose_body_hand',
'cross_attention_dim': 768,
'enable_zero_snr': False,
'end_to_end': True,
'face_image_path': None,
'facein_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/facein.py',
'facein_model_name': None,
'facein_scale': 1.0,
'fix_condition_images': False,
'fixed_ip_adapter_image': True,
'fixed_refer_face_image': True,
'fixed_refer_image': True,
'fps': 12,
'guidance_scale': 7.5,
'height': None,
'img_length_ratio': 1.0,
'img_weight': 0.001,
'interpolation_factor': 1,
'ip_adapter_face_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/ip_adapter.py',
'ip_adapter_face_model_name': None,
'ip_adapter_face_scale': 1.0,
'ip_adapter_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/ip_adapter.py',
'ip_adapter_model_name': 'musev_referencenet',
'ip_adapter_scale': 1.0,
'ipadapter_image_path': None,
'lcm_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/lcm_model.py',
'lcm_model_name': None,
'log_level': 'INFO',
'motion_speed': 8.0,
'n_batch': 1,
'n_cols': 3,
'n_repeat': 1,
'n_vision_condition': 1,
'need_hist_match': False,
'need_img_based_video_noise': True,
'need_return_condition': False,
'need_return_videos': False,
'need_video2video': False,
'negative_prompt': 'V2',
'negprompt_cfg_path': '/workspace/MuseV/scripts/inference/../../configs/model/negative_prompt.py',
'noise_type': 'video_fusion',
'num_inference_steps': 30,
'output_dir': './output',
'overwrite': False,
'pose_guider_model_path': None,
'prompt_only_use_image_prompt': False,
'record_mid_video_latents': False,
'record_mid_video_noises': False,
'redraw_condition_image': False,
'redraw_condition_image_with_facein': True,
'redraw_condition_image_with_ip_adapter_face': True,
'redraw_condition_image_with_ipdapter': True,
'redraw_condition_image_with_referencenet': True,
'referencenet_image_path': None,
'referencenet_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/referencenet.py',
'referencenet_model_name': 'musev_referencenet',
'sample_rate': 1,
'save_filetype': 'mp4',
'save_images': False,
'sd_model_cfg_path': '/workspace/MuseV/scripts/inference/../../configs/model/T2I_all_model.py',
'sd_model_name': 'fantasticmix_v10',
'seed': None,
'strength': 0.8,
'target_datas': 'dance1',
'test_data_path': './configs/tasks/example.yaml',
'time_size': 12,
'unet_model_cfg_path': '/workspace/MuseV/scripts/inference/../.././configs/model/motion_model.py',
'unet_model_name': 'musev_referencenet',
'use_condition_image': True,
'vae_model_path': './checkpoints/vae/sd-vae-ft-mse',
'video_guidance_scale': 3.5,
'video_guidance_scale_end': None,
'video_guidance_scale_method': 'linear',
'video_has_condition': True,
'video_is_middle': False,
'video_negative_prompt': 'V2',
'video_num_inference_steps': 10,
'video_overlap': 1,
'video_strength': 1.0,
'vision_clip_extractor_class_name': 'ImageClipVisionFeatureExtractor',
'vision_clip_model_path': './checkpoints/IP-Adapter/models/image_encoder',
'w_ind_noise': 0.5,
'which2video': 'video_middle',
'width': None,
'write_info': False}
running model, T2I SD
{'fantasticmix_v10': {'sd': '/workspace/MuseV/configs/model/../../checkpoints/t2i/sd1.5/fantasticmix_v10'}}
lcm: None None
unet_model_params_dict_src dict_keys(['musev', 'musev_referencenet', 'musev_referencenet_pose'])
unet: musev_referencenet /workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet
referencenet_model_params_dict_src dict_keys(['musev_referencenet'])
referencenet: musev_referencenet /workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet
ip_adapter_model_params_dict_src dict_keys(['IPAdapter', 'IPAdapterPlus', 'IPAdapterPlus-face', 'IPAdapterFaceID', 'musev_referencenet', 'musev_referencenet_pose'])
ip_adapter: musev_referencenet {'ip_image_encoder': '/workspace/MuseV/configs/model/../../checkpoints/IP-Adapter/image_encoder', 'ip_ckpt': '/workspace/MuseV/configs/model/../../checkpoints/motion/musev_referencenet/ip_adapter_image_proj.bin', 'ip_scale': 1.0, 'clip_extra_context_tokens': 4, 'clip_embeddings_dim': 1024, 'desp': ''}
facein: None None
ip_adapter_face: None None
video_negprompt V2 badhandv4, ng_deepnegative_v1_75t, (((multiple heads))), (((bad body))), (((two people))), ((extra arms)), ((deformed body)), (((sexy))), paintings,(((two heads))), ((big head)),sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, (((nsfw))), nipples, extra fingers, (extra legs), (long neck), mutated hands, (fused fingers), (too many fingers)
negprompt V2 badhandv4, ng_deepnegative_v1_75t, (((multiple heads))), (((bad body))), (((two people))), ((extra arms)), ((deformed body)), (((sexy))), paintings,(((two heads))), ((big head)),sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, (((nsfw))), nipples, extra fingers, (extra legs), (long neck), mutated hands, (fused fingers), (too many fingers)
n_test_datas 1
2024-04-05 20:40:46,818- musev:997- INFO- vision_clip_extractor, name=ImageClipVisionFeatureExtractor, path=./checkpoints/IP-Adapter/models/image_encoder
test_model_vae_model_path ./checkpoints/vae/sd-vae-ft-mse
Traceback (most recent call last):
File "/workspace/MuseV/scripts/inference/video2video.py", line 1102, in <module>
sd_predictor = DiffusersPipelinePredictor(
File "/workspace/MuseV/musev/pipelines/pipeline_controlnet_predictor.py", line 165, in __init__
controlnet, controlnet_processor, processor_params = load_controlnet_model(
File "/workspace/MuseV/MMCM/mmcm/vision/feature_extractor/controlnet.py", line 856, in load_controlnet_model
controlnet_processor = ControlnetProcessor(
File "/workspace/MuseV/MMCM/mmcm/vision/feature_extractor/controlnet.py", line 71, in __init__
self.processor = processor_cls()
File "/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/__init__.py", line 139, in __init__
from .wholebody import Wholebody
File "/workspace/MuseV/controlnet_aux/src/controlnet_aux/dwpose/wholebody.py", line 25, in <module>
from mmdet.apis import inference_detector, init_detector
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/apis/__init__.py", line 2, in <module>
from .det_inferencer import DetInferencer
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/apis/det_inferencer.py", line 22, in <module>
from mmdet.evaluation import INSTANCE_OFFSET
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/evaluation/__init__.py", line 3, in <module>
from .metrics import * # noqa: F401,F403
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/evaluation/metrics/__init__.py", line 5, in <module>
from .coco_metric import CocoMetric
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py", line 16, in <module>
from mmdet.datasets.api_wrappers import COCO, COCOeval, COCOevalMP
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/datasets/__init__.py", line 26, in <module>
from .utils import get_loading_pipeline
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/datasets/utils.py", line 5, in <module>
from mmdet.datasets.transforms import LoadAnnotations, LoadPanopticAnnotations
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/datasets/transforms/__init__.py", line 6, in <module>
from .formatting import (ImageToTensor, PackDetInputs, PackReIDInputs,
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/datasets/transforms/formatting.py", line 11, in <module>
from mmdet.structures.bbox import BaseBoxes
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/structures/bbox/__init__.py", line 2, in <module>
from .base_boxes import BaseBoxes
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/structures/bbox/base_boxes.py", line 9, in <module>
from mmdet.structures.mask.structures import BitmapMasks, PolygonMasks
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/structures/mask/__init__.py", line 3, in <module>
from .structures import (BaseInstanceMasks, BitmapMasks, PolygonMasks,
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmdet/structures/mask/structures.py", line 12, in <module>
from mmcv.ops.roi_align import roi_align
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/ops/__init__.py", line 3, in <module>
from .active_rotated_filter import active_rotated_filter
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/ops/active_rotated_filter.py", line 10, in <module>
ext_module = ext_loader.load_ext(
File "/opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/utils/ext_loader.py", line 13, in load_ext
ext = importlib.import_module('mmcv.' + name)
File "/opt/conda/envs/musev/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
ImportError: /opt/conda/envs/musev/lib/python3.10/site-packages/mmcv/_ext.cpython-310-x86_64-linux-gnu.so: undefined symbol: _ZN2at4_ops10zeros_like4callERKNS_6TensorEN3c108optionalINS5_10ScalarTypeEEENS6_INS5_6LayoutEEENS6_INS5_6DeviceEEENS6_IbEENS6_INS5_12MemoryFormatEEE