openbmb / cpm-bee Goto Github PK

View Code? Open in Web Editor NEW

2.7K 2.7K 211.0 6.77 MB

百亿参数的中英文双语基座大模型

Python 90.25% Shell 0.64% Jupyter Notebook 9.11%

cpm-bee's People

Contributors

Stargazers

Watchers

Forkers

kunlun-zhu quduoduo itsharex rayjue dumpmemory 1264321138 nlpz127 binyu-xidian-university leisangcs jerryallison gsgampo eric8810 ferryrec wandergluf norseux bartslab cocowhale ethanlovequeen niepengfeiisgood apollohuang1 zhuoyue tonywang-sh fomennianhua ithink3iam decoder666 zhanglv0209 xzlstorm tisoyboy jangocheng pfxjacky hzzhang-nlp unsetopt rindrop panqiaotian robotpin yuanjie-ai jadentan neverstoplearn zdd10010806 sd5884703 poorlet petercao rustaceanchina zheng5yu9 jieyoujun yanniszhou 20-46 qfdox cador soon14 joshuayan startguy 2132660698 hhy5277 dustinchu alicebong himongo wpq3142 quithink moziofmoon trphoenix kekewind yellllllow shitload lance911 infrastring hatjs880328s xfg0913 xuyongfu cocoliu iceai999 jiezhanggt excogi aokimisako daoyuly pelatong idealistzhu ymg2007 iamleon121 zjdyzww skysqlite eltociear ralao87 dave-apmic arthurxl chopperyuting minttec oevevr anna8kun reenature sweetsmint asdlei99 dark-mocha tszgc tyrannoe nmalogy xyzqu tears743 alvinu mayottee

cpm-bee's Issues

单机多卡加载模型时卡住

torch 1.13
cuda 11.7

推理代码能正常运行

训练开4卡4090，加载模型时卡住，cpu占用100%，显卡占用100%

torchrun --nnodes=1 --nproc_per_node=4 --rdzv_id=1 --rdzv_backend=c10d --rdzv_endpoint=localhost:12345 finetune_cpm_bee.py --use-delta --model-config config/cpm-bee-10b.json --dataset datasets/eprstmt/binary/dev --eval_dataset datasets/eprstmt/binary/eval_dev --epoch 100 --batch-size 4 --train-iters 100 --save-name cpm_bee_finetune --max-length 2048 --save results/ --lr 0.0001 --inspect-iters 100 --warmup-iters 1 --eval-interval 1000 --early-stop-patience 5 --lr-decay-style noam --weight-decay 0.01 --clip-grad 1.0 --loss-scale 32768 --start-step 0 --load path/pytorch_model_10b.bin

====================== Initialization ======================
rank : 0
local_rank : 0
world_size : 4
local_size : 4
master : star-SYS-420GP-TNR:37257
device : 0
cpus : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1
3, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 2
4, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 3
5, 36, 37]

微调是否要装nccl?

Traceback (most recent call last):
File "finetune_cpm_bee.py", line 4, in
import bmtrain as bmt
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/init.py", line 2, in
from .init import init_distributed
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/init.py", line 8, in
from . import nccl
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/nccl/init.py", line 4, in
from . import _C as C
ImportError: /home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/nccl/_C.cpython-38-x86_64-linux-gnu.so: undefined symbol: ncclBroadcast
Traceback (most recent call last):
File "finetune_cpm_bee.py", line 4, in
import bmtrain as bmt
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/init.py", line 2, in
from .init import init_distributed
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/init.py", line 8, in
from . import nccl
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/nccl/init.py", line 4, in
from . import _C as C
ImportError: /home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/nccl/_C.cpython-38-x86_64-linux-gnu.so: undefined symbol: ncclBroadcast
Traceback (most recent call last):
File "finetune_cpm_bee.py", line 4, in
import bmtrain as bmt
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/init.py", line 2, in
from .init import init_distributed
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/init.py", line 8, in
from . import nccl
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/nccl/init.py", line 4, in
from . import _C as C
ImportError: /home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/nccl/_C.cpython-38-x86_64-linux-gnu.so: undefined symbol: ncclBroadcast
Traceback (most recent call last):
File "finetune_cpm_bee.py", line 4, in
import bmtrain as bmt
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/init.py", line 2, in
from .init import init_distributed
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/init.py", line 8, in
from . import nccl
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/nccl/init.py", line 4, in
from . import _C as C
ImportError: /home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/bmtrain/nccl/_C.cpython-38-x86_64-linux-gnu.so: undefined symbol: ncclBroadcast
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 81486) of binary: /home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/bin/python
Traceback (most recent call last):
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/bin/torchrun", line 11, in
sys.exit(main())
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/torch/distributed/elastic/multiprocessing/errors/init.py", line 346, in wrapper
return f(*args, **kwargs)
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/torch/distributed/run.py", line 762, in main
run(args)
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/torch/distributed/run.py", line 753, in run
elastic_launch(
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 132, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/mdisk2/tanjunwen/anaconda3/envs/cpmbee2/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 246, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:

model test

Dear Editor,
All of you did a good job in the LLM's exploration.
I try to use CPM-Bee-2B , But I find that requesting in Chinese will return English results,
while requesting in English will return Chinese results.
It is so bad fo me :(
there is my data
data_list =
[{"input": "我生病了,写一份请假单", "prompt": "内容符合规范", "": ""},
{"input": "I’m sick, write a leave of absence form.", "prompt": "make enough word", "": ""}]
But i get a bad odd answer.

I know the model too small.
Did I enter the language in the wrong format?
Hope we can have a talk about this, Thank u.
Yours,

CPM-Bee数据格式相关问题

请问如果想要在CPM-Bee模型上同时微调几个不同的子任务，数据格式需要设置成什么样的？谢谢，盼复

和清华的 ChatGLM 6b 相比，效果会更好吗？

请问开源的模型句子长度支持是多少？

Is there any social media groups that community users can join in？

discord/wechat/qq/clack?

个人感觉模型的能力有所保留或者是问的格式不对？

load model /models/cpm-bee-10b/pytorch_model.bin please wait ......
sucess loaded model /models/cpm-bee-10b/pytorch_model.bin
Enter json line to evaluate: { "prompt": "Java冒泡排序算法"}
Error: ''
press Enter to continue...
{ "prompt": "Java冒泡排序算法","": ""}
Enter json line to evaluate: { "prompt": "Java冒泡排序算法","": ""}
{'prompt': 'Java冒泡排序算法', '': 'Java冒泡排序算法'}
press Enter to continue...

Enter json line to evaluate: {"input": "我生病了,写一份请假单", "prompt": "内容符合规范", "": ""}
Error: ''
press Enter to continue...

Enter json line to evaluate: {"input": "我生病了,写一份请假单", "prompt": "内容符合规范", "": ""}
{'input': '我生病了,写一份请假单', 'prompt': '内容符合规范', '': ',老板不批假怎么办?'}
Enter json line to evaluate: {"input": "购物车服务有添加商品，删除商品，购物车结算等功能", "prompt": "写代码", "": ""}
{'input': '购物车服务有添加商品，删除商品，购物车结算等功能', 'prompt': '写代码', '': '。\n购物车服务的核心是购物车结算，购物车结算有两种方式：\n1、先将商品添加到购物车里面，然后再进行结算；\n2、直接在购物车里面就可以进行结算。'}

Enter json line to evaluate: {"document": "今天天气很好，我和妈妈一起去公园，", "prompt": "往后写约100字", "": ""}
{'document': '今天天气很好，我和妈妈一起去公园，', 'prompt': '往后写约100字', '': '公园里有很多人，我和妈妈一起去玩滑梯了。\n今天天气很好，我和妈妈一起去公园，公园里有很多人，我和妈妈一起去玩滑梯了。\n今天天气很好，我和妈妈一起去公园，公园里有很多人，我和妈妈一起去玩滑梯了。\n今天天气很好，我和妈妈一起去公园，公园里有很多人。'}

比如代码生成，逻辑推理，文本生成，都明显比之前清华6b的差。

下面是改进的代码，可以从控制台输入，方便测试，希望纳入代码库中帮助别人，谢谢。

from cpm_live.generation.bee import CPMBeeBeamSearch
from cpm_live.models import CPMBeeTorch, CPMBeeConfig
from cpm_live.tokenizers import CPMBeeTokenizer
from opendelta import LoraModel
import torch
import json

if name == "main":

data_list = [

{"document": "今天天气是真的<mask_0>", "": {"<mask_0>": ""}},
{"input": "今天天气很好，我和妈妈一起去公园，", "prompt": "往后写约100字", "": ""},
{"input": "北京是**的首都", "prompt": "中翻英", "": ""},
{"input": "NGC 6231是一个位于天蝎座的疏散星团，天球座标为赤经16时54分，赤纬-41度48分，", "question": "NGC 6231的经纬度是多少？", "": ""},
{"input":"之前多次聚餐都选择这里，现在由于炭火改成了电烤羊，口感真的不如从前，","question":"评分是多少？(1-5)","":""},
{"input": "父母都希望自己的孩子诚实、勇敢、有礼貌。父母首先得从自己做起，", "options": {"<option_0>": "少提要求", "<option_1>": "降低标准", "<option_2>": "自己先做好", "<option_3>": "让孩子拿主意"}, "question": "教育孩子时，父母应该：", "": ""}
]
print('sample input data ')
for data in data_list :
print(json.dumps(data,ensure_ascii=False))
config = CPMBeeConfig.from_json_file("config/cpm-bee-10b.json")
ckpt_path = "/models/cpm-bee-10b/pytorch_model.bin"
tokenizer = CPMBeeTokenizer()
model = CPMBeeTorch(config=config)

# insert LoRA if your model has been finetuned in delta-tuning.
# delta_model = LoraModel(backbone_model=model, modified_modules=["project_q", "project_v"], backend="hf")
# lora_ckpt_path = "path/to/lora.pt"
#model.load_state_dict(torch.load(lora_ckpt_path), strict=False)
print('load model '+ckpt_path +" please wait ......")
model.load_state_dict(torch.load(ckpt_path), strict=False)
model.cuda()
print('sucess loaded model '+ckpt_path)
while True:
   # Read input from console
   line = input("Enter json line to evaluate: ")
   json_array =[ json.loads(line)]
   #print(json_array)
  # Evaluate input as a Python expression
   try:
      # use beam search
      beam_search = CPMBeeBeamSearch(
      model=model,
      tokenizer=tokenizer,
      )
      inference_results = beam_search.generate(json_array , max_length=100, repetition_penalty=1.1)
      for res in inference_results:
         print(res)
   except Exception as e:
     result = f"Error: {e}"
     print(result)

   # Wait for continued input
   input("press Enter to continue...\n")

评测代码是否开源？

您好，请问未来是否会开源评测代码？

.pt 格式的模型需要基于CPM-Bee在自己的数据集训练得到是吗，单张4090显卡是否能运行呢？

使用preprocess生成数据后，运行微调脚本报错：module 'bmtrain.optim' has no attribute 'OptimManager'

克隆了$ git clone -b main --single-branch https://github.com/OpenBMB/CPM-Bee.git
装了库：pip install -r requirements.txt
转换了数据
但是微调没跑通，报错如下：

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /data/rain-llm/code/CPM-Bee/src/finetune_cpm_bee.py:193 in <module>                              │
│                                                                                                  │
│   190 │   model: CPMBee,                                                                         │
│   191 │   optimizer: bmt.optim.AdamOffloadOptimizer,                                             │
│   192 │   lr_scheduler: bmt.lr_scheduler.WarmupLRScheduler,                                      │
│ ❱ 193 │   optim_manager: bmt.optim.OptimManager,                                                 │
│   194 ):                                                                                         │
│   195 │                                                                                          │
│   196 │   average_time = bmt.utils.AverageRecorder()                                             │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: module 'bmtrain.optim' has no attribute 'OptimManager'
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /data/rain-llm/code/CPM-Bee/src/finetune_cpm_bee.py:193 in <module>                              │
│                                                                                                  │
│   190 │   model: CPMBee,                                                                         │
│   191 │   optimizer: bmt.optim.AdamOffloadOptimizer,                                             │
│   192 │   lr_scheduler: bmt.lr_scheduler.WarmupLRScheduler,                                      │
│ ❱ 193 │   optim_manager: bmt.optim.OptimManager,                                                 │
│   194 ):                                                                                         │
│   195 │                                                                                          │
│   196 │   average_time = bmt.utils.AverageRecorder()                                             │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: module 'bmtrain.optim' has no attribute 'OptimManager'
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /data/rain-llm/code/CPM-Bee/src/finetune_cpm_bee.py:193 in <module>                              │
│                                                                                                  │
│   190 │   model: CPMBee,                                                                         │
│   191 │   optimizer: bmt.optim.AdamOffloadOptimizer,                                             │
│   192 │   lr_scheduler: bmt.lr_scheduler.WarmupLRScheduler,                                      │
│ ❱ 193 │   optim_manager: bmt.optim.OptimManager,                                                 │
│   194 ):                                                                                         │
│   195 │                                                                                          │
│   196 │   average_time = bmt.utils.AverageRecorder()                                             │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: module 'bmtrain.optim' has no attribute 'OptimManager'
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /data/rain-llm/code/CPM-Bee/src/finetune_cpm_bee.py:193 in <module>                              │
│                                                                                                  │
│   190 │   model: CPMBee,                                                                         │
│   191 │   optimizer: bmt.optim.AdamOffloadOptimizer,                                             │
│   192 │   lr_scheduler: bmt.lr_scheduler.WarmupLRScheduler,                                      │
│ ❱ 193 │   optim_manager: bmt.optim.OptimManager,                                                 │
│   194 ):                                                                                         │
│   195 │                                                                                          │
│   196 │   average_time = bmt.utils.AverageRecorder()                                             │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: module 'bmtrain.optim' has no attribute 'OptimManager'
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 94429) of binary: /opt/conda/bin/python3
Traceback (most recent call last):
  File "/opt/conda/bin/torchrun", line 33, in <module>
    sys.exit(load_entry_point('torch==2.0.0', 'console_scripts', 'torchrun')())
  File "/opt/conda/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 346, in wrapper
    return f(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/torch/distributed/run.py", line 794, in main
    run(args)
  File "/opt/conda/lib/python3.10/site-packages/torch/distributed/run.py", line 785, in run
    elastic_launch(
  File "/opt/conda/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 134, in __call__
    return launch_agent(self._config, self._entrypoint, list(args))
  File "/opt/conda/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 250, in launch_agent
    raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError: 
============================================================
finetune_cpm_bee.py FAILED
------------------------------------------------------------
Failures:
[1]:
  time      : 2023-05-28_22:57:29
  host      : ubuntu
  rank      : 1 (local_rank: 1)
  exitcode  : 1 (pid: 94430)
  error_file: <N/A>
  traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
[2]:
  time      : 2023-05-28_22:57:29
  host      : ubuntu
  rank      : 2 (local_rank: 2)
  exitcode  : 1 (pid: 94431)
  error_file: <N/A>
  traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
[3]:
  time      : 2023-05-28_22:57:29
  host      : ubuntu
  rank      : 3 (local_rank: 3)
  exitcode  : 1 (pid: 94432)
  error_file: <N/A>
  traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
------------------------------------------------------------
Root Cause (first observed failure):
[0]:
  time      : 2023-05-28_22:57:29
  host      : ubuntu
  rank      : 0 (local_rank: 0)
  exitcode  : 1 (pid: 94429)
  error_file: <N/A>
  traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html

直接运行推理没有报错

python text_generation.py

{'document': '**最好的大学是<mask_0>', '': {'<mask_0>': '清华北大，世界最好的大学是麻省理工。'}}

Question: Any plan to release a technical report?

Dear authors,

Thanks for your contribution to the community!!! :-) The OpenBMB series is awesome!

I wonder if a technical report will be released to disclose some training and evaluation/benchmark details.

Thanks and have a good day!

Best,
Zhihong

BMTrain 现在是否适配CUDA 12

请问 BMTrain 现在是否能够适配CUDA 12

推理时指定 Prompt

运行text_generation.py报错

Traceback (most recent call last):
File "cpmbee_translator.py", line 2, in
from cpm_live.generation.bee import CPMBeeBeamSearch
File "/home/css/CPM-Bee/src/cpm_live/generation/init.py", line 1, in
from .ant import CPMAntBeamSearch, CPMAntRandomSampling, CPMAntGeneration
File "/home/css/CPM-Bee/src/cpm_live/generation/ant.py", line 4, in
from ..utils import pad
File "/home/css/CPM-Bee/src/cpm_live/utils/init.py", line 1, in
from .config import Config
File "/home/css/CPM-Bee/src/cpm_live/utils/config.py", line 20, in
from .log import logger
File "/home/css/CPM-Bee/src/cpm_live/utils/log.py", line 27, in
logger = _get_logger()
File "/home/css/CPM-Bee/src/cpm_live/utils/log.py", line 15, in _get_logger
node_name = os.getenv("NODE_NAME", str(bmt.rank()))
File "/opt/conda/lib/python3.8/site-packages/bmtrain-0.0.15-py3.8-linux-x86_64.egg/bmtrain/global_var.py", line 24, in rank
return config['rank']
KeyError: 'rank'

运行demo直接报该错误，请问有什么方法解决吗，尝试在 text_generation.py中加入以下代码也不行
import os
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '5678'

colab上，安装bmtrain失败，能否帮忙看下什么原因？

!pip install -r requirements.txt
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch<2.0.0,>=1.10
Using cached torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl (887.5 MB)
Collecting bmtrain>=0.2.1
Using cached bmtrain-0.2.2.tar.gz (58 kB)
error: subprocess-exited-with-error

× python setup.py egg_info did not run successfully.
│ exit code: 1
╰─> See above for output.

note: This error originates from a subprocess, and is likely not a problem with pip.
Preparing metadata (setup.py) ... error
error: metadata-generation-failed

× Encountered error while generating package metadata.
╰─> See above for output.

note: This is an issue with the package mentioned above, not pip.
hint: See above for details.

数据格式

有关数据格式的更多信息，您可以查看CPM-Bee数据格式

CPM-Bee数据格式在哪？

使用单卡进行微调时，dataset会报End of dataset的错

如题，使用预处理数据脚本预处理数据后，在进行微调时，dataset报错，不太确定是预处理脚本的问题，还是dataset迭代的问题

376 if self._max_repeat_times is not None:
377 if self._repeat_times >= self._max_repeat_times:
--> 378 raise EOFError("End of dataset")
379 print('_prepare_new_epoch, 2')
380 nw_unused_block: List[int] = []

support huggingface

感谢开源，请问训练数据有开源链接吗？

finetune时如何加载预训练模型？

finetune的时候如果load参数为预训练模型pytorch_model.bin文件，损失就为nan；如果不加载预训练模型pytorch_model.bin文件那就是从头开始训了？

请问input、prompt、<ans>最大字符串长度是多少？

[BUG]text 中含有"<"时 tokenizer 报错，

运行下面的代码会报错，经过测试是因为含有"<"
"""
from cpm_live.models import CPMBeeTorch, CPMBeeConfig
from cpm_live.tokenizers import CPMBeeTokenizer
config = CPMBeeConfig.from_json_file("config/cpm-bee-10b.json")
tokenizer = CPMBeeTokenizer()
print(tokenizer._special_tokens)
text = "if 成绩 < 60"
tokens = tokenizer.tokenize(text)
"""

File "text_generation.py", line 28, in
tokens = tokenizer.tokenize(text)
File "/root/CPM-Bee/src/cpm_live/tokenizers/bee.py", line 143, in tokenize
raise ValueError("Unexpected end of text {}".format(text))
ValueError: Unexpected end of text if 成绩 < 60

支持windows部署吗

windowns部署的时候，编译bmtrain不过，缺少nccl

是否有技术交流群呢？

请问是否有技术交流群呢？方便大家一起沟通一下。

对于问答和文本生成，在模型实际推理的时候有什么区别？

在api中，问答使用的是question，生成使用的是prompt，区别在哪里呢?

environment issue

when running text_generation, bmtrain error appeared:
ImportError: /home/CPM-Bee/BMTrain/bmtrain/optim/_cuda.cpython-39-x86_64-linux-gnu.so: undefined symbol: _ZN2at4_ops9new_zeros4callERKNS_6TensorEN3c108ArrayRefINS5_6SymIntEEENS5_8optionalINS5_10ScalarTypeEEENS9_INS5_6LayoutEEENS9_INS5_6DeviceEEENS9_IbEE
can you please provide conda env or Dockerfile?

模型下载地方

例子给出，ckpt_path = "cpm-bee-3b-ckpt.pt" 这个模型文件从哪里下载

是否支持流式输出？

在生成文本时，怎么能做到流式输出呢？

请教预训练的数据集格式是什么？

我尝试用了{"input": "问题：xxx答案：", "": "yyy"} 做预训练，但是loss基本上为nan。但是对于微调是正确的，请教正确的预训练格式是什么

是否支持langchain的调用

是否支持langchain的调用，能否提供相应的文档

Where is /your/model/checkpoint in the code:model.load_state_dict(torch.load("/your/model/checkpoint"))?

I have download the model cpm-bee-10b from huggingface ,but I do not understand the path of /your/model/checkpointin.
Can you someone please guide me to solve this problem?

CUDAt版本是12.1而不是11.8,如何解决？

`
RuntimeError:
The detected CUDA version (12.1) mismatches the version that was used to compile
PyTorch (11.8). Please make sure to use the same CUDA versions.

我的电脑上的cuda版本nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Mon_Apr__3_17:16:06_PDT_2023
Cuda compilation tools, release 12.1, V12.1.105
Build cuda_12.1.r12.1/compiler.32688072_0
`
这台电脑上还有其它项目，难道，要我为了这一个项目降版本么，
能否修懒得说那里让这个项目支持12可能性

推理速度有点慢，有什么好方法吗？

是否考虑支持一下Apple Silicon M1/M2 芯片

没有N卡，是否考虑支持一下Apple Silicon M1/M2 芯片呢？或者摩尔线程的显卡？

请问后续是否能支持转为HF格式？谢谢。

通用对话的数据格式

我看到README里面给出了6种特定任务的数据格式，想问一下如果要做通用的对话（假定就是单轮的），数据格式应该是怎样的呢？如果用文本生成的那种格式，prompt应该用什么比较好？我尝试了一些好像效果不太好

相对路径各种import报错，能不能修改为绝对路径？

请问如果要使用该模型做指令微调，数据格式应该怎么整，能否出一个sft的示例代码？

全参数微调时报错:TypeError: CheckpointBlock._named_members() got an unexpected keyword argument 'remove_duplicate'

8卡机器，使用4，5，6，7这4块显卡；
默认安装的opendelta == 0.3.0
torch==2.0.0
transformers==4.28.1

全参数微调时报错如下：
TypeError: CheckpointBlock._named_members() got an unexpected keyword argument 'remove_duplicate'
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 8973) of binary: /opt/conda/bin/python

import error, undefined symbol: ncclBroadcast

I'm trying the demo code, here is the information: with CUDA 12.1

the command !python -c "import torch;print(torch.cuda.nccl.version())", can return (2, 14, 3)

below is the original import error stack:

ImportError Traceback (most recent call last)
Cell In[10], line 1
----> 1 from cpm_live.generation.bee import CPMBeeBeamSearch
2 from cpm_live.models import CPMBeeTorch, CPMBeeConfig
3 from cpm_live.tokenizers import CPMBeeTokenizer

File /workspace/cpm_live/generation/init.py:1
----> 1 from .ant import CPMAntBeamSearch, CPMAntRandomSampling, CPMAntGeneration

File /workspace/cpm_live/generation/ant.py:4
2 import torch.nn.functional as F
3 from .generation_utils import BeamHypotheses, apply_repetition_penalty, top_k_top_p_filtering
----> 4 from ..utils import pad
7 class CPMAntGeneration:
8 def init(self, model, tokenizer, prompt_length=32):

File /workspace/cpm_live/utils/init.py:1
----> 1 from .config import Config
2 from .data_utils import pad
3 from .object import allgather_objects

File /workspace/cpm_live/utils/config.py:20
18 import copy
19 from typing import Any, Dict, Union
---> 20 from .log import logger
23 def load_dataset_config(dataset_path: str):
24 cfg = json.load(open(dataset_path, "r", encoding="utf-8"))

File /workspace/cpm_live/utils/log.py:7
5 import json
6 import logging
----> 7 import bmtrain as bmt
10 # Set up the common logger
11 def _get_logger():

File /usr/local/lib/python3.10/dist-packages/bmtrain/init.py:2
1 from .global_var import config, world_size, rank
----> 2 from .init import init_distributed
4 from .parameter import DistributedParameter, ParameterInitializer
5 from .layer import DistributedModule

File /usr/local/lib/python3.10/dist-packages/bmtrain/init.py:8
6 from .utils import print_dict
7 from .global_var import config
----> 8 from . import nccl
9 from .synchronize import synchronize
10 def init_distributed(
11 init_method : str = "env://",
12 seed : int = 0,
(...)
15 num_micro_batches: int = None,
16 ):

File /usr/local/lib/python3.10/dist-packages/bmtrain/nccl/init.py:4
2 from typing_extensions import Literal
3 import torch
----> 4 from . import _C as C
5 from .enums import *
7 class NCCLCommunicator:

ImportError: /usr/local/lib/python3.10/dist-packages/bmtrain/nccl/_C.cpython-310-x86_64-linux-gnu.so: undefined symbol: ncclBroadcast

怎样加载增量微调[LoRA]后的模型

请问怎样加载增量微调后的模型，README中只有

delta_model = LoraModel(backbone_model=model, modified_modules=["project_q", "project_v"], backend="hf")

这句代码，没有给出加载模型权重的代码，求指导