ValueError: While loading /data/llm_resume_profiles_phi3_v1, expected target modules i

yes, you can refer to <a class="issue-link js-issue-link" data-error-text="Failed to l

yes, you can refer to <a class="issue-link js-issue-link" data-error-text

yes, you can refer to <a class="issue-link js-issue-link" da

[Bug]: Phi3 lora module not loading about vllm HOT 6 OPEN

arunpatala commented on June 21, 2024

[Bug]: Phi3 lora module not loading

from vllm.

Comments (6)

WeiXiaoSummer commented on June 21, 2024 1

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

from vllm.

KaranChand commented on June 21, 2024 1

I added the new projection layers using the conversion code above and the standard vLLM code for LoRA using snapshot_download using their documentation (https://docs.vllm.ai/en/latest/models/lora.html#using-lora-adapters).
It did work without errors, though performance dropped massively compared to the same model (with merged LoRA) without vLLM. I used the same tokenizer for both models.
I have not found out yet why this is the case, might just be a bug in my own code somewhere

Update: There was a bug in my code, the script above works just fine and the LoRa weights are merged well

from vllm.

arunpatala commented on June 21, 2024

Thanks a lot. I will use this, but will VLLM be adding support for this in the future?

from vllm.

SHIMURA0 commented on June 21, 2024

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

After decomposing layers of Phi3, will there be any problem merging LoRA layers back into Phi3? Do we have to merge the decomposed layers back before append LoRA back onto the original Phi3?

from vllm.

WeiXiaoSummer commented on June 21, 2024

Thanks a lot. I will use this, but will VLLM be adding support for this in the future?

I guess they will, otherwise we will always need to convert the lora first, which is not very convenient.

from vllm.

WeiXiaoSummer commented on June 21, 2024

yes, you can refer to #4715 for a conversation script. However phi3 128k contains also a gateup_proj, you can modify the script to decompose gate_up_proj layer as well. Like following:

import torch
import os
import json
import fire
import shutil

from safetensors.torch import load_file, save_file

def replicate_lora_a_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('qkv_proj')
    res = {}
    for t in ['q_proj', 'k_proj', 'v_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def replicate_lora_a_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    prefix, suffix = name.split('gate_up_proj')
    res = {}
    for t in ['gate_proj', 'up_proj']:
        name = f"{prefix}{t}{suffix}"
        res[name] = weight.clone()
    return res

def split_lora_b_qkv(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 3
    prefix, suffix = name.split('qkv_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['q_proj', 'k_proj', 'v_proj'], weight.split(size))
    }
    return res
 
def split_lora_b_gate_up(name: str, weight: "torch.Tensor") -> dict[str, "torch.Tensor"]:
    size = weight.shape[0] // 2
    prefix, suffix = name.split('gate_up_proj')
    res = {
        f"{prefix}{t}{suffix}": w
        for t, w in zip(['gate_proj', 'up_proj'], weight.split(size))
    }
    return res

def convert_qkv_gate_up_lora_to_splits_vllm(adapter_folder_path: str, output_folder_path: str) -> None:
    """return the new adapter dict"""

    adapter_bin_name = 'adapter_model.safetensors'
    adapter_config_name = 'adapter_config.json'


    lora = load_file(f"{adapter_folder_path}/{adapter_bin_name}")
    with open(f"{adapter_folder_path}/{adapter_config_name}", 'r') as f:
        lora_config = json.load(f)

    assert 'qkv_proj' in lora_config['target_modules']
    assert 'gate_up_proj' in lora_config['target_modules']

    # converting weights
    res = {}
    for k, v in lora.items():
        if 'qkv_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_qkv(k, v))
        elif 'qkv_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_qkv(k, v))
        elif 'gate_up_proj' in k and 'lora_A' in k:
            res.update(replicate_lora_a_gate_up(k, v))
        elif 'gate_up_proj' in k and 'lora_B' in k:
            res.update(split_lora_b_gate_up(k, v))
        else:
            res[k] = v
    
    # converting config
    temp = ['q_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj'] + [t for t in lora_config['target_modules'] if t != 'qkv_proj' and t != 'gate_up_proj']
    lora_config['target_modules'] = temp

    # saving
    os.makedirs(output_folder_path, exist_ok=True)
    save_file(res, f"{output_folder_path}/{adapter_bin_name}", metadata={"format": "pt"})
    with open(f"{output_folder_path}/{adapter_config_name}", 'w') as f:
        json.dump(lora_config, f, indent=4)
    
    for file in os.listdir(adapter_folder_path):
        if file != adapter_bin_name and file != adapter_config_name:
            shutil.copy(f"{adapter_folder_path}/{file}", f"{output_folder_path}/{file}")

if __name__ == "__main__":
    fire.Fire(convert_qkv_gate_up_lora_to_splits_vllm)

It can load it without error, but not sure if there will be any performance issue.

After decomposing layers of Phi3, will there be any problem merging LoRA layers back into Phi3? Do we have to merge the decomposed layers back before append LoRA back onto the original Phi3?

I think you can directly merge the original (not decomposed) LORA adapter to Phi3 if you're not trying to load LORA via vllm.

from vllm.

[Bug]: Phi3 lora module not loading about vllm HOT 6 OPEN

Comments (6)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent