libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs16_gbs128_acc8_1n1g |
7801 MiB / 32.06 samples/s |
7795 MiB / 31.2 samples/s |
7795 MiB / 31.2 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs32_gbs32_acc1_1n1g |
7937 MiB(显存不合理) / 32.94 samples/s |
7039 MiB / 32.96 samples/s |
7039 MiB / 32.94 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n8g |
3880 MiB / 15.41 samples/s |
3874 MiB / 15.38 samples/s |
3874 MiB / 15.38 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs32_gbs512_acc8_1n8g |
7722 MiB / 167.49 samples/s |
7684 MiB / 168.03 samples/s |
7684 MiB / 168.77 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs32_gbs512_acc8_1n8g |
6340 MiB / 57.29 samples/s |
6334 MiB / 57.19 samples/s |
6304 MiB / 57.29 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n4g |
5212 MiB / 96.02 samples/s |
5206 MiB / 95.9 samples/s |
5206 MiB / 95.8 samples/s lr=NA |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n8g |
6154 MiB / 71.38 samples/s |
6148 MiB / 71.31 samples/s |
6148 MiB / 71.45 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n8g |
5442 MiB / 63.83 samples/s |
5436 MiB / 63.65 samples/s |
5436 MiB / 63.74 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs2048_acc8_1n8g |
6060 MiB / 223.19 samples/s |
6054 MiB / 223.52 samples/s lr=NA |
6054 MiB / 223.14 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs256_acc1_1n8g |
4776 MiB / 187.71 samples/s |
4770 MiB / 187.7 samples/s |
4770 MiB / 187.37 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs16_gbs128_acc8_1n4g |
7540 MiB / 47.8 samples/s |
7534 MiB / 47.83 samples/s |
7534 MiB / 47.84 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs24_gbs384_acc16_1n8g |
7920 MiB / 72.88 samples/s |
7914 MiB / 74.13 samples/s |
7914 MiB / 73.36 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n4g |
7150 MiB / 8.34 samples/s |
7144 MiB / 8.38 samples/s |
7144 MiB / 8.34 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n1g |
9081 MiB / 13.14 samples/s lr=NA |
9075 MiB / 13.12 samples/s lr=NA |
9075 MiB / 13.28 samples/s lr=NA |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs8_acc1_1n1g |
8235 MiB / 12.95 samples/s |
8091 MiB / 12.79 samples/s |
8091 MiB / 12.91 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs12_gbs96_acc8_1n4g |
8460 MiB / 39.62 samples/s loss=inf |
8454 MiB / 39.64 samples/s |
8454 MiB / 39.65 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n8g |
3548 MiB / 7.53 samples/s |
3542 MiB / 7.55 samples/s |
3542 MiB / 7.53 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs8_gbs128_acc8_1n8g |
7252 MiB / 64.78 samples/s |
7230 MiB / 64.73 samples/s |
7230 MiB / 64.77 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs8_gbs128_acc8_1n8g |
5518 MiB / 24.44 samples/s |
5512 MiB / 24.48 samples/s |
5512 MiB / 24.44 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n4g |
6594 MiB / 41.94 samples/s |
6588 MiB / 41.95 samples/s |
6588 MiB / 41.93 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n4g |
5162 MiB / 32.87 samples/s |
5156 MiB / 32.84 samples/s |
5156 MiB / 32.87 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n8g |
6176 MiB / 32.69 samples/s |
6152 MiB / 32.49 samples/s |
6152 MiB / 32.43 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n8g |
5458 MiB / 27.01 samples/s |
5412 MiB / 27.01 samples/s |
5412 MiB / 27.0 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs512_acc8_1n8g |
6070 MiB / 82.3 samples/s |
6064 MiB / 82.18 samples/s |
6064 MiB / 82.28 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs64_acc1_1n8g |
4726 MiB / 63.62 samples/s |
4720 MiB / 63.61 samples/s |
4720 MiB / 63.61 samples/s |
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs6_gbs96_acc16_1n8g |
6822 MiB / 34.45 samples/s |
6816 MiB / 34.77 samples/s |
6816 MiB / 33.95 samples/s |
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n4g |
6594 MiB / 4.07 samples/s |
6588 MiB / 4.05 samples/s |
6588 MiB / 4.03 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n1g |
7230 MiB / 108.92 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n1g |
6692 MiB / 113.19 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g |
0 MiB / 0 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs256_gbs256_acc1_1n4g |
6570 MiB / 264.65 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g |
10414 MiB / 107.39 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs256_gbs256_acc1_1n4g |
5914 MiB / 100.94 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g |
6748 MiB / 198.7 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n4g |
5484 MiB / 218.7 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs128_gbs256_acc1_1n4g |
4354 MiB / 495.47 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g |
6530 MiB / 214.09 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs128_gbs256_acc1_1n4g |
4098 MiB / 163.96 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g |
4644 MiB / 199.26 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs128_gbs2048_acc8_1n8g |
10452 MiB / 182.07 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs256_gbs512_acc1_1n8g |
5940 MiB / 233.05 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n4g |
3518 MiB / 215.72 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs64_gbs256_acc1_1n4g |
3296 MiB / 217.42 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g |
6234 MiB / 219.87 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs128_gbs512_acc1_1n8g |
4074 MiB / 350.99 samples/s |
|
|
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g |
4618 MiB / 213.33 samples/s |
|
|