first commit

2023-10-11 11:51:08 +08:00 · 2023-10-11 11:51:08 +08:00 · 6362475d20
commit 6362475d20
parent 9003e58b3f
24 changed files with 14218 additions and 1 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 ZJUKG
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@ -1 +1,51 @@
-# KoPA
+# Making Large Language Models Perform Better in Knowledge Graph Completion
+![](https://img.shields.io/badge/version-1.0.1-blue)
+[![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/zjukg/KoPA/main/LICENSE)
+[![AAAI](https://img.shields.io/badge/NLPCC'23-brightgreen)](http://tcci.ccf.org.cn/conference/2023/)
+[![Pytorch](https://img.shields.io/badge/PyTorch-%23EE4C2C.svg?e&logo=PyTorch&logoColor=white)](https://pytorch.org/)
+ - [Making Large Language Models Perform Better in Knowledge Graph Completion
+](https://arxiv.org/abs/2310.06671)
+
+> Large language model (LLM) based knowledge graph completion (KGC) aims to predict the missing triples in the KGs with LLMs and enrich the KGs to become better web infrastructure, which can benefit a lot of web-based automatic services. However, research about LLM-based KGC is limited and lacks effective utilization of LLM's inference capabilities, which ignores the important structural information in KGs and prevents LLMs from acquiring accurate factual knowledge. In this paper, we discuss how to incorporate the helpful KG structural information into the LLMs, aiming to achieve structrual-aware reasoning in the LLMs. We first transfer the existing LLM paradigms to structural-aware settings and further propose a knowledge prefix adapter (KoPA) to fulfill this stated goal. KoPA employs structural embedding pre-training to capture the structural information of entities and relations in the KG. Then KoPA informs the LLMs of the knowledge prefix adapter which projects the structural embeddings into the textual space and obtains virtual knowledge tokens as a prefix of the input prompt. We conduct comprehensive experiments on these structural-aware LLM-based KGC methods and provide an in-depth analysis comparing how the introduction of structural information would be better for LLM's knowledge reasoning ability.
+
+## 🌈 Model Architecture
+![Model_architecture](figure/model.png)
+
+
+## 🔬 Dependencies
+Our code is developed based on [alpaca-lora](https://github.com/tloen/alpaca-lora). Please build the Python environment following the instruction in Alpaca-lora.
+
+## 📕 Training & Test
+
+- run KoPA tuning
+```shell
+export WANDB_DISABLED=true
+wandb offline
+CUDA_VISIBLE_DEVICES=0 nohup python finetune_kopa.py \
+    --base_model 'YOUR LLM PATH' \
+    --data_path 'data/UMLS-train.json' \
+    --output_dir 'YOUR SAVE PATH' \
+    --num_epochs 3 \
+    --lora_r 32 \
+    --learning_rate 3e-4 \
+    --batch_size 12 \
+    --micro_batch_size 12 \
+    --num_prefix 1 \
+    --kge_model 'data/UMLS-rotate.pth' \
+    --lora_target_modules='[q_proj,k_proj,v_proj,o_proj]' > log.txt &
+```
+You may need to fill the LLM path and save path before running.
+
+- run inference
+```shell
+CUDA_VISIBLE_DEVICES=0 python inference_kopa.py
+```
+
+
+## 🤝 Cite:
+Please condiser citing this paper if you use the code from our work.
+Thanks a lot :)
+
+```bigquery
+TBD
+```
--- a/data/UMLS-rotate.pth
+++ b/data/UMLS-rotate.pth
--- a/data/UMLS-test.json
+++ b/data/UMLS-test.json
--- a/data/UMLS-train.json
+++ b/data/UMLS-train.json
--- a/data/UMLS-valid.json
+++ b/data/UMLS-valid.json
--- a/figure/model.png
+++ b/figure/model.png
--- a/finetune.py
+++ b/finetune.py
@ -0,0 +1,273 @@
+import os
+import sys
+from typing import List
+
+import time
+import fire
+import torch
+import transformers
+from datasets import load_dataset
+
+"""
+Unused imports:
+import torch.nn as nn
+import bitsandbytes as bnb
+"""
+
+from peft import (
+    LoraConfig,
+    get_peft_model,
+    get_peft_model_state_dict,
+    prepare_model_for_int8_training,
+    set_peft_model_state_dict,
+)
+from transformers import LlamaForCausalLM, LlamaTokenizer
+
+from utils.prompter import Prompter
+
+
+def train(
+    # model/data params
+    base_model: str = "",  # the only required argument
+    data_path: str = "YOUR LLM PATH",
+    output_dir: str = "./lora-alpaca",
+    # training hyperparams
+    batch_size: int = 16,
+    micro_batch_size: int = 16,
+    num_epochs: int = 2,
+    learning_rate: float = 3e-4,
+    cutoff_len: int = 512,
+    val_set_size: int = 0,
+    # lora hyperparams
+    lora_r: int = 16,
+    lora_alpha: int = 16,
+    lora_dropout: float = 0.05,
+    lora_target_modules: List[str] = [
+        "q_proj",
+        "v_proj",
+    ],
+    # llm hyperparams
+    train_on_inputs: bool = True,  # if False, masks out inputs in loss
+    add_eos_token: bool = False,
+    group_by_length: bool = False,  # faster, but produces an odd training loss curve
+    # wandb params
+    wandb_project: str = "",
+    wandb_run_name: str = "",
+    wandb_watch: str = "",  # options: false | gradients | all
+    wandb_log_model: str = "",  # options: false | true
+    resume_from_checkpoint: str = None,  # either training checkpoint or final adapter
+    prompt_template_name: str = "alpaca",  # The prompt template to use, will default to alpaca.
+):
+    if int(os.environ.get("LOCAL_RANK", 0)) == 0:
+        print(
+            f"Training Alpaca-LoRA model with params:\n"
+            f"base_model: {base_model}\n"
+            f"data_path: {data_path}\n"
+            f"output_dir: {output_dir}\n"
+            f"batch_size: {batch_size}\n"
+            f"micro_batch_size: {micro_batch_size}\n"
+            f"num_epochs: {num_epochs}\n"
+            f"learning_rate: {learning_rate}\n"
+            f"cutoff_len: {cutoff_len}\n"
+            f"val_set_size: {val_set_size}\n"
+            f"lora_r: {lora_r}\n"
+            f"lora_alpha: {lora_alpha}\n"
+            f"lora_dropout: {lora_dropout}\n"
+            f"lora_target_modules: {lora_target_modules}\n"
+            f"train_on_inputs: {train_on_inputs}\n"
+            f"add_eos_token: {add_eos_token}\n"
+            f"group_by_length: {group_by_length}\n"
+            f"wandb_project: {wandb_project}\n"
+            f"wandb_run_name: {wandb_run_name}\n"
+            f"wandb_watch: {wandb_watch}\n"
+            f"wandb_log_model: {wandb_log_model}\n"
+            f"resume_from_checkpoint: {resume_from_checkpoint or False}\n"
+            f"prompt template: {prompt_template_name}\n"
+        )
+    assert (
+        base_model
+    ), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"
+    gradient_accumulation_steps = batch_size // micro_batch_size
+
+    prompter = Prompter(prompt_template_name)
+
+    device_map = "auto"
+    world_size = int(os.environ.get("WORLD_SIZE", 1))
+    ddp = world_size != 1
+    if ddp:
+        device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
+        gradient_accumulation_steps = gradient_accumulation_steps // world_size
+
+
+    model = LlamaForCausalLM.from_pretrained(
+        base_model,
+        # load_in_8bit=True,
+        torch_dtype=torch.float16,
+        device_map=device_map,
+    )
+
+    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+
+    tokenizer.pad_token_id = (
+        0  # unk. we want this to be different from the eos token
+    )
+    tokenizer.padding_side = "left"  # Allow batched inference
+
+    def tokenize(prompt, add_eos_token=True):
+        # there's probably a way to do this with the tokenizer settings
+        # but again, gotta move fast
+        result = tokenizer(
+            prompt,
+            truncation=True,
+            max_length=cutoff_len,
+            padding=False,
+            return_tensors=None,
+        )
+        if (
+            result["input_ids"][-1] != tokenizer.eos_token_id
+            and len(result["input_ids"]) < cutoff_len
+            and add_eos_token
+        ):
+            result["input_ids"].append(tokenizer.eos_token_id)
+            result["attention_mask"].append(1)
+
+        result["labels"] = result["input_ids"].copy()
+
+        return result
+
+    def generate_and_tokenize_prompt(data_point):
+        full_prompt = prompter.generate_prompt(
+            data_point["instruction"],
+            data_point["input"],
+            data_point["output"],
+        )
+        tokenized_full_prompt = tokenize(full_prompt)
+        if not train_on_inputs:
+            user_prompt = prompter.generate_prompt(
+                data_point["instruction"], data_point["input"]
+            )
+            tokenized_user_prompt = tokenize(
+                user_prompt, add_eos_token=add_eos_token
+            )
+            user_prompt_len = len(tokenized_user_prompt["input_ids"])
+
+            if add_eos_token:
+                user_prompt_len -= 1
+
+            tokenized_full_prompt["labels"] = [
+                -100
+            ] * user_prompt_len + tokenized_full_prompt["labels"][
+                user_prompt_len:
+            ]  # could be sped up, probably
+        return tokenized_full_prompt
+
+    model = prepare_model_for_int8_training(model)
+
+    config = LoraConfig(
+        r=lora_r,
+        lora_alpha=lora_alpha,
+        target_modules=lora_target_modules,
+        lora_dropout=lora_dropout,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+    model = get_peft_model(model, config)
+
+    if data_path.endswith(".json") or data_path.endswith(".jsonl"):
+        data = load_dataset("json", data_files=data_path)
+    else:
+        data = load_dataset(data_path)
+
+    if resume_from_checkpoint:
+        # Check the available weights and load them
+        checkpoint_name = os.path.join(
+            resume_from_checkpoint, "pytorch_model.bin"
+        )  # Full checkpoint
+        if not os.path.exists(checkpoint_name):
+            checkpoint_name = os.path.join(
+                resume_from_checkpoint, "adapter_model.bin"
+            )  # only LoRA model - LoRA config above has to fit
+            resume_from_checkpoint = (
+                False  # So the trainer won't try loading its state
+            )
+        # The two files above have a different name depending on how they were saved, but are actually the same.
+        if os.path.exists(checkpoint_name):
+            print(f"Restarting from {checkpoint_name}")
+            adapters_weights = torch.load(checkpoint_name)
+            set_peft_model_state_dict(model, adapters_weights)
+        else:
+            print(f"Checkpoint {checkpoint_name} not found")
+
+    model.print_trainable_parameters()  # Be more transparent about the % of trainable params.
+
+    if val_set_size > 0:
+        train_val = data["train"].train_test_split(
+            test_size=val_set_size, shuffle=True, seed=42
+        )
+        train_data = (
+            train_val["train"].shuffle().map(generate_and_tokenize_prompt)
+        )
+        val_data = (
+            train_val["test"].shuffle().map(generate_and_tokenize_prompt)
+        )
+    else:
+        train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
+        val_data = None
+
+    if not ddp and torch.cuda.device_count() > 1:
+        # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
+        model.is_parallelizable = True
+        model.model_parallel = True
+
+    trainer = transformers.Trainer(
+        model=model,
+        train_dataset=train_data,
+        eval_dataset=val_data,
+        args=transformers.TrainingArguments(
+            per_device_train_batch_size=micro_batch_size,
+            gradient_accumulation_steps=gradient_accumulation_steps,
+            warmup_steps=100,
+            num_train_epochs=num_epochs,
+            learning_rate=learning_rate,
+            fp16=True,
+            logging_steps=10,
+            optim="adamw_torch",
+            evaluation_strategy="steps" if val_set_size > 0 else "no",
+            save_strategy="steps",
+            eval_steps=None,
+            save_steps=8000,
+            output_dir=output_dir,
+            save_total_limit=2,
+            load_best_model_at_end=True if val_set_size > 0 else False,
+            ddp_find_unused_parameters=False if ddp else None,
+            group_by_length=group_by_length,
+            report_to=None,
+            run_name=None,
+        ),
+        data_collator=transformers.DataCollatorForSeq2Seq(
+            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
+        ),
+    )
+    model.config.use_cache = False
+
+    old_state_dict = model.state_dict
+    model.state_dict = (
+        lambda self, *_, **__: get_peft_model_state_dict(
+            self, old_state_dict()
+        )
+    ).__get__(model, type(model))
+
+    if torch.__version__ >= "2" and sys.platform != "win32":
+        model = torch.compile(model)
+
+    trainer.train(resume_from_checkpoint=resume_from_checkpoint)
+
+    model.save_pretrained(output_dir)
+
+    print(
+        "\n If there's a warning about missing keys above, please disregard :)"
+    )
+
+
+if __name__ == "__main__":
+    fire.Fire(train)
--- a/finetune_kopa.py
+++ b/finetune_kopa.py
@ -0,0 +1,279 @@
+import os
+import sys
+from typing import List
+
+import fire
+import torch
+import transformers
+from datasets import load_dataset
+from kopa import KoPA, KoPAWithAdapter
+
+"""
+Unused imports:
+import torch.nn as nn
+import bitsandbytes as bnb
+"""
+
+from peft import (
+    LoraConfig,
+    get_peft_model,
+    get_peft_model_state_dict,
+    prepare_model_for_int8_training,
+    set_peft_model_state_dict,
+)
+from transformers import LlamaForCausalLM, LlamaTokenizer
+
+from utils.prompter import Prompter
+
+
+def train(
+    # model/data params
+    base_model: str = "",  # the only required argument
+    data_path: str = "YOUR LLM PATH",
+    output_dir: str = "./lora-alpaca",
+    # training hyperparams
+    batch_size: int = 16,
+    micro_batch_size: int = 16,
+    num_epochs: int = 2,
+    learning_rate: float = 3e-4,
+    cutoff_len: int = 512,
+    val_set_size: int = 0,
+    # lora hyperparams
+    lora_r: int = 16,
+    lora_alpha: int = 16,
+    lora_dropout: float = 0.05,
+    lora_target_modules: List[str] = [
+        "q_proj",
+        "v_proj",
+    ],
+    num_prefix: int = 1,
+    # llm hyperparams
+    train_on_inputs: bool = True,  # if False, masks out inputs in loss
+    add_eos_token: bool = False,
+    group_by_length: bool = False,  # faster, but produces an odd training loss curve
+    # wandb params
+    wandb_project: str = "",
+    wandb_run_name: str = "",
+    wandb_watch: str = "",  # options: false | gradients | all
+    wandb_log_model: str = "",  # options: false | true
+    resume_from_checkpoint: str = None,  # either training checkpoint or final adapter
+    prompt_template_name: str = "alpaca",  # The prompt template to use, will default to alpaca.
+    kge_model: str = "data/CoDeX-S.pth"
+):
+    if int(os.environ.get("LOCAL_RANK", 0)) == 0:
+        print(
+            f"Training Alpaca-LoRA model with params:\n"
+            f"base_model: {base_model}\n"
+            f"data_path: {data_path}\n"
+            f"output_dir: {output_dir}\n"
+            f"batch_size: {batch_size}\n"
+            f"micro_batch_size: {micro_batch_size}\n"
+            f"num_epochs: {num_epochs}\n"
+            f"learning_rate: {learning_rate}\n"
+            f"cutoff_len: {cutoff_len}\n"
+            f"val_set_size: {val_set_size}\n"
+            f"lora_r: {lora_r}\n"
+            f"num_prefix: {num_prefix}\n"
+            f"lora_alpha: {lora_alpha}\n"
+            f"lora_dropout: {lora_dropout}\n"
+            f"lora_target_modules: {lora_target_modules}\n"
+            f"train_on_inputs: {train_on_inputs}\n"
+            f"add_eos_token: {add_eos_token}\n"
+            f"group_by_length: {group_by_length}\n"
+            f"wandb_project: {wandb_project}\n"
+            f"wandb_run_name: {wandb_run_name}\n"
+            f"wandb_watch: {wandb_watch}\n"
+            f"wandb_log_model: {wandb_log_model}\n"
+            f"resume_from_checkpoint: {resume_from_checkpoint or False}\n"
+            f"prompt template: {prompt_template_name}\n"
+            f"kge model: {kge_model}\n"
+        )
+    assert (
+        base_model
+    ), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"
+    gradient_accumulation_steps = batch_size // micro_batch_size
+
+    prompter = Prompter(prompt_template_name)
+
+    device_map = "auto"
+    world_size = int(os.environ.get("WORLD_SIZE", 1))
+    ddp = world_size != 1
+    if ddp:
+        device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
+        gradient_accumulation_steps = gradient_accumulation_steps // world_size
+
+
+    model = LlamaForCausalLM.from_pretrained(
+        base_model,
+        # load_in_8bit=True,
+        torch_dtype=torch.float16,
+        device_map=device_map,
+    )
+
+    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+
+    tokenizer.pad_token_id = (
+        0  # unk. we want this to be different from the eos token
+    )
+    tokenizer.padding_side = "left"  # Allow batched inference
+
+    def tokenize(prompt, add_eos_token=True):
+        # there's probably a way to do this with the tokenizer settings
+        # but again, gotta move fast
+        result = tokenizer(
+            prompt,
+            truncation=True,
+            max_length=cutoff_len,
+            padding=False,
+            return_tensors=None,
+        )
+        if (
+            result["input_ids"][-1] != tokenizer.eos_token_id
+            and len(result["input_ids"]) < cutoff_len
+            and add_eos_token
+        ):
+            result["input_ids"].append(tokenizer.eos_token_id)
+            result["attention_mask"].append(1)
+
+        result["labels"] = result["input_ids"].copy()
+
+        return result
+
+    def generate_and_tokenize_prompt(data_point):
+        full_prompt = prompter.generate_prompt(
+            data_point["instruction"],
+            data_point["input"],
+            data_point["output"],
+        )
+        tokenized_full_prompt = tokenize(full_prompt)
+        if not train_on_inputs:
+            user_prompt = prompter.generate_prompt(
+                data_point["instruction"], data_point["input"]
+            )
+            tokenized_user_prompt = tokenize(
+                user_prompt, add_eos_token=add_eos_token
+            )
+            user_prompt_len = len(tokenized_user_prompt["input_ids"])
+
+            if add_eos_token:
+                user_prompt_len -= 1
+
+            tokenized_full_prompt["labels"] = [
+                -100
+            ] * user_prompt_len + tokenized_full_prompt["labels"][
+                user_prompt_len:
+            ]  # could be sped up, probably
+        return tokenized_full_prompt
+
+    # model = prepare_model_for_int8_training(model)
+
+    config = LoraConfig(
+        r=lora_r,
+        lora_alpha=lora_alpha,
+        target_modules=lora_target_modules,
+        lora_dropout=lora_dropout,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+    model = get_peft_model(model, config)
+    slama_model = KoPAWithAdapter(model, num_prefix, kge_model=kge_model)
+
+    if data_path.endswith(".json") or data_path.endswith(".jsonl"):
+        data = load_dataset("json", data_files=data_path, cache_dir="/data/zhangyichi/.huggingface/")
+    else:
+        data = load_dataset(data_path, cache_dir="/data/zhangyichi/.huggingface/")
+
+    if resume_from_checkpoint:
+        # Check the available weights and load them
+        checkpoint_name = os.path.join(
+            resume_from_checkpoint, "pytorch_model.bin"
+        )  # Full checkpoint
+        if not os.path.exists(checkpoint_name):
+            checkpoint_name = os.path.join(
+                resume_from_checkpoint, "adapter_model.bin"
+            )  # only LoRA model - LoRA config above has to fit
+            resume_from_checkpoint = (
+                False  # So the trainer won't try loading its state
+            )
+        # The two files above have a different name depending on how they were saved, but are actually the same.
+        if os.path.exists(checkpoint_name):
+            print(f"Restarting from {checkpoint_name}")
+            adapters_weights = torch.load(checkpoint_name)
+            set_peft_model_state_dict(model, adapters_weights)
+        else:
+            print(f"Checkpoint {checkpoint_name} not found")
+
+    # model.print_trainable_parameters()  # Be more transparent about the % of trainable params.
+
+    if val_set_size > 0:
+        train_val = data["train"].train_test_split(
+            test_size=val_set_size, shuffle=True, seed=42
+        )
+        train_data = (
+            train_val["train"].shuffle().map(generate_and_tokenize_prompt)
+        )
+        val_data = (
+            train_val["test"].shuffle().map(generate_and_tokenize_prompt)
+        )
+    else:
+        train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
+        val_data = None
+
+    if not ddp and torch.cuda.device_count() > 1:
+        # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
+        model.is_parallelizable = True
+        model.model_parallel = True
+
+    trainer = transformers.Trainer(
+        model=slama_model,
+        train_dataset=train_data,
+        eval_dataset=val_data,
+        args=transformers.TrainingArguments(
+            per_device_train_batch_size=micro_batch_size,
+            gradient_accumulation_steps=gradient_accumulation_steps,
+            warmup_steps=100,
+            num_train_epochs=num_epochs,
+            learning_rate=learning_rate,
+            fp16=True,
+            logging_steps=10,
+            optim="adamw_hf",
+            evaluation_strategy="steps" if val_set_size > 0 else "no",
+            save_strategy="steps",
+            eval_steps=None,
+            save_steps=5000,
+            output_dir=output_dir,
+            save_total_limit=2,
+            load_best_model_at_end=True if val_set_size > 0 else False,
+            ddp_find_unused_parameters=False if ddp else None,
+            group_by_length=group_by_length,
+            report_to=None,
+            run_name=None,
+        ),
+        data_collator=transformers.DataCollatorForSeq2Seq(
+            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
+        ),
+    )
+    model.config.use_cache = False
+
+    old_state_dict = model.state_dict
+    model.state_dict = (
+        lambda self, *_, **__: get_peft_model_state_dict(
+            self, old_state_dict()
+        )
+    ).__get__(model, type(model))
+
+    if torch.__version__ >= "2" and sys.platform != "win32":
+        model = torch.compile(model)
+
+    trainer.train(resume_from_checkpoint=resume_from_checkpoint)
+
+    model.save_pretrained(output_dir)
+    torch.save(slama_model.embeddings, os.path.join(output_dir, "embeddings.pth"))
+
+    print(
+        "\n If there's a warning about missing keys above, please disregard :)"
+    )
+
+
+if __name__ == "__main__":
+    fire.Fire(train)
--- a/inference_kopa.py
+++ b/inference_kopa.py
@ -0,0 +1,95 @@
+import os
+import json
+import torch
+import transformers
+from peft import PeftModel
+from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
+
+from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
+
+base_path = 'YOUR LLM PATH'
+
+prompt_template = """
+Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+
+### Instruction:
+Given a triple from a knowledge graph. Each triple consists of a head entity, a relation, and a tail entity. Please determine the correctness of the triple and response True or False.
+
+### Input:
+{}
+
+### Response:
+
+"""
+
+def load_test_dataset(path):
+    test_dataset = json.load(open(path, "r"))
+    return test_dataset
+
+if __name__ == "__main__":
+    cuda = "cuda:0"
+    lora_weights = "YOUR SAVE PATH"
+    test_data_path = "data/UMLS-test.json"
+    embedding_path = "{}/embeddings.pth".format(lora_weights)
+    test_dataset = load_test_dataset(test_data_path)
+    kg_embeddings = torch.load(embedding_path).to(cuda)
+    tokenizer = LlamaTokenizer.from_pretrained(base_path)
+    model = LlamaForCausalLM.from_pretrained(
+        base_path,
+        torch_dtype=torch.float16
+    ).to(cuda)
+    model = PeftModel.from_pretrained(
+        model,
+        lora_weights,
+        torch_dtype=torch.float16,
+    ).to(cuda)
+    # unwind broken decapoda-research config
+    model.config.pad_token_id = tokenizer.pad_token_id = 0  # unk
+    model.config.bos_token_id = 1
+    model.config.eos_token_id = 2
+    model = model.eval()
+    result = []
+    for data in test_dataset:
+        ent = data["input"]
+        ans = data["output"]
+        ids = data["embedding_ids"]
+        ids = torch.LongTensor(ids).reshape(1, -1).to(cuda)
+        prefix = kg_embeddings(ids)
+        prompt = prompt_template.format(ent)
+        inputs = tokenizer(prompt, return_tensors="pt")
+        input_ids = inputs.input_ids.to(cuda)
+        token_embeds = model.model.model.embed_tokens(input_ids)
+        input_embeds = torch.cat((prefix, token_embeds), dim=1)
+        generate_ids = model.generate(
+            inputs_embeds=input_embeds, 
+            max_new_tokens=16
+        )
+        context = tokenizer.batch_decode(input_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        response = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        response = response.replace(context, "").strip()
+        print(response + '\n')
+        result.append(
+            {
+                "answer": ans,
+                "predict": response
+            }
+        )
+    answer = []
+    predict = []
+    for data in result:
+        if "True" in data["answer"]:
+            answer.append(1)
+        else:
+            answer.append(0)
+        if "True" in data["predict"]:
+            predict.append(1)
+        else:
+            predict.append(0)
+    acc = accuracy_score(y_true=answer, y_pred=predict)
+    p = precision_score(y_true=answer, y_pred=predict)
+    r = recall_score(y_true=answer, y_pred=predict)
+    f1 = f1_score(y_true=answer, y_pred=predict)
+    print(acc, p, r, f1)
+    
+
+    
--- a/kopa.py
+++ b/kopa.py
@ -0,0 +1,181 @@
+import torch
+import torch.nn as nn
+from typing import Optional, List, Union, Tuple
+
+from transformers import LlamaForCausalLM
+from process_kge import load_pretrain_kge
+
+
+class KoPA(nn.Module):
+    def __init__(
+        self,
+        model: LlamaForCausalLM
+    ) -> None:
+        super(KoPA, self).__init__()
+        self.llama_model = model
+        # self.embeddings = nn.Embedding(100, 4096)
+        self.embeddings = PrefixKGEmbedding(
+            num_ent=2034,
+            num_rel=42,
+            dim_llm=4096,
+            num_prefix=1
+        )
+    
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        embedding_ids: torch.LongTensor = None
+    ):
+        kg_embeds = self.embeddings(embedding_ids)
+        batch_size, seq_len, _ = kg_embeds.shape
+        token_embeds = self.llama_model.model.model.embed_tokens(input_ids)
+        input_embeds = torch.cat((kg_embeds, token_embeds), dim=1)
+        prefix_mask = torch.ones((batch_size, seq_len))
+        prefix_labels = torch.full((batch_size, seq_len), fill_value=-100, dtype=torch.long)
+        new_attention_mask = torch.cat((prefix_mask.cuda(), attention_mask), dim=-1)
+        new_labels = torch.cat((prefix_labels.cuda(), labels), dim=-1)
+        return self.llama_model(
+            input_ids=None,
+            attention_mask=new_attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=input_embeds,
+            labels=new_labels,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+
+class KoPAWithAdapter(nn.Module):
+    def __init__(
+        self,
+        model: LlamaForCausalLM,
+        num_prefix: int,
+        kge_model: str = "data/UMLS-rotate.pth",
+        pretrain_emb_path = None
+    ) -> None:
+        super(KoPAWithAdapter, self).__init__()
+        self.llama_model = model
+        ent_embs, rel_embs = load_pretrain_kge(kge_model)
+        if pretrain_emb_path is None:
+            print("Adapter Trained From Scratch".format(pretrain_emb_path))
+            self.embeddings = PretrainKGEmbedding(
+                pretrain_ent_embs=ent_embs,
+                pretrain_rel_embs=rel_embs,
+                dim_llm=4096,
+                num_prefix=num_prefix
+            )
+        else:
+            print("Adapter Load From {}".format(pretrain_emb_path))
+            self.embeddings = torch.load(pretrain_emb_path)
+    
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        embedding_ids: torch.LongTensor = None
+    ):
+        kg_embeds = self.embeddings(embedding_ids)
+        # print(kg_embeds.shape)
+        batch_size, seq_len, _ = kg_embeds.shape
+        token_embeds = self.llama_model.model.model.embed_tokens(input_ids)
+        input_embeds = torch.cat((kg_embeds, token_embeds), dim=1)
+        prefix_mask = torch.ones((batch_size, seq_len))
+        prefix_labels = torch.full((batch_size, seq_len), fill_value=-100, dtype=torch.long)
+        new_attention_mask = torch.cat((prefix_mask.cuda(), attention_mask), dim=-1)
+        new_labels = torch.cat((prefix_labels.cuda(), labels), dim=-1)
+        return self.llama_model(
+            input_ids=None,
+            attention_mask=new_attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=input_embeds,
+            labels=new_labels,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+
+
+class PrefixKGEmbedding(nn.Module):
+    def __init__(
+        self,
+        num_ent,
+        num_rel,
+        dim_llm,
+        num_prefix
+    ):
+        super(PrefixKGEmbedding, self).__init__()
+        self.emb_dim = num_prefix * dim_llm
+        self.ent_embeddings = nn.Embedding(num_ent, self.emb_dim)
+        self.rel_embeddings = nn.Embedding(num_rel, self.emb_dim)
+    
+
+    def forward(self, triple_ids):
+        head, relation, tail = triple_ids[:, 0], triple_ids[:, 1], triple_ids[:, 2]
+        h = self.ent_embeddings(head)
+        r = self.rel_embeddings(relation)
+        t = self.ent_embeddings(tail)
+        prefix = torch.stack((h, r, t), dim=1)
+        return prefix
+
+class PretrainKGEmbedding(nn.Module):
+    def __init__(
+        self,
+        pretrain_ent_embs,
+        pretrain_rel_embs,
+        dim_llm,
+        num_prefix
+    ):
+        super(PretrainKGEmbedding, self).__init__()
+        self.num_prefix = num_prefix
+        self.llm_dim = dim_llm
+        self.emb_dim = num_prefix * dim_llm
+        self.ent_embeddings = nn.Embedding.from_pretrained(pretrain_ent_embs)
+        self.rel_embeddings = nn.Embedding.from_pretrained(pretrain_rel_embs)
+        self.pretrain_dim = self.ent_embeddings.weight.shape[1]
+        # Froze the pretrain embeddings
+        self.ent_embeddings.requires_grad_(False)
+        self.rel_embeddings.requires_grad_(False)
+        self.adapter = nn.Linear(self.pretrain_dim, self.emb_dim)
+    
+
+    def forward(self, triple_ids):
+        # main training stage
+        if triple_ids.shape[1] == 3:
+            head, relation, tail = triple_ids[:, 0], triple_ids[:, 1], triple_ids[:, 2]
+            h = self.ent_embeddings(head)
+            r = self.rel_embeddings(relation)
+            t = self.ent_embeddings(tail)
+            pretrain_embs = torch.stack((h, r, t), dim=1)
+            prefix = self.adapter(pretrain_embs).reshape(-1, 3*self.num_prefix, self.llm_dim)
+            return prefix
+        # entity-aware pre-funing
+        else:
+            ent = triple_ids.reshape(-1,)
+            emb = self.ent_embeddings(ent)
+            prefix = self.adapter(emb).reshape(-1, self.num_prefix, self.llm_dim)
+            # print(prefix.shape)
+            return prefix
+
--- a/process_kge.py
+++ b/process_kge.py
@ -0,0 +1,39 @@
+import torch
+
+
+def load_pretrain_kge(path):
+    if "complex" in path:
+        return load_complex_model(path)
+    kge_model = torch.load(path)
+    ent_embs = torch.tensor(kge_model["ent_embeddings.weight"]).cpu()
+    rel_embs = torch.tensor(kge_model["rel_embeddings.weight"]).cpu()
+    ent_embs.requires_grad = False
+    rel_embs.requires_grad = False
+    ent_dim = ent_embs.shape[1]
+    rel_dim = rel_embs.shape[1]
+    print(ent_dim, rel_dim)
+    if ent_dim != rel_dim:
+        rel_embs = torch.cat((rel_embs, rel_embs), dim=-1)
+    # print(ent_embs.shape, rel_embs.shape)
+    # print(ent_embs.requires_grad, rel_embs.requires_grad)
+    return ent_embs, rel_embs
+
+
+def load_complex_model(path):
+    kge_model = torch.load(path)
+    ent_embs1 = torch.tensor(kge_model["ent_re_embeddings.weight"]).cpu()
+    ent_embs2 = torch.tensor(kge_model["ent_im_embeddings.weight"]).cpu()
+    rel_embs1 = torch.tensor(kge_model["rel_re_embeddings.weight"]).cpu()
+    rel_embs2 = torch.tensor(kge_model["rel_im_embeddings.weight"]).cpu()
+    ent_embs = torch.cat((ent_embs1, ent_embs2), dim=-1)
+    rel_embs = torch.cat((rel_embs1, rel_embs2), dim=-1)
+    ent_embs.requires_grad = False
+    rel_embs.requires_grad = False
+    ent_dim = ent_embs.shape[1]
+    rel_dim = rel_embs.shape[1]
+    print(ent_dim, rel_dim)
+    return ent_embs, rel_embs
+
+
+if __name__ == "__main__":
+    load_pretrain_kge("data/CoDeX-S-complex.pth")
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,12 @@
+accelerate
+appdirs
+loralib
+bitsandbytes
+black
+black[jupyter]
+datasets
+fire
+git+https://github.com/huggingface/peft.git
+transformers>=4.28.0
+sentencepiece
+gradio
--- a/scripts/run_umls_base.sh
+++ b/scripts/run_umls_base.sh
@ -0,0 +1,13 @@
+export WANDB_DISABLED=true
+wandb offline
+CUDA_VISIBLE_DEVICES=1 nohup python finetune.py \
+    --base_model 'YOUR LLM PATH' \
+    --data_path 'preprocess/UMLS-train.json' \
+    --output_dir 'YOUR SAVE PATH' \
+    --num_epochs 3 \
+    --batch_size 12 \
+    --micro_batch_size 12 \
+    --lora_r 32 \
+    --learning_rate 3e-4 \
+    --lora_target_modules='[q_proj,k_proj,v_proj,o_proj]' > log.txt &
+
--- a/scripts/run_umls_kopa.sh
+++ b/scripts/run_umls_kopa.sh
@ -0,0 +1,14 @@
+export WANDB_DISABLED=true
+wandb offline
+CUDA_VISIBLE_DEVICES=0 nohup python finetune_kopa.py \
+    --base_model 'YOUR LLM PATH' \
+    --data_path 'data/UMLS-train.json' \
+    --output_dir 'YOUR SAVE PATH' \
+    --num_epochs 3 \
+    --lora_r 32 \
+    --learning_rate 3e-4 \
+    --batch_size 12 \
+    --micro_batch_size 12 \
+    --num_prefix 1 \
+    --kge_model 'data/UMLS-rotate.pth' \
+    --lora_target_modules='[q_proj,k_proj,v_proj,o_proj]' > log.txt &
--- a/templates/README.md
+++ b/templates/README.md
@ -0,0 +1,46 @@
+# Prompt templates
+
+This directory contains template styles for the prompts used to finetune LoRA models.
+
+## Format
+
+A template is described via a JSON file with the following keys:
+
+- `prompt_input`: The template to use when input is not None. Uses `{instruction}` and `{input}` placeholders.
+- `prompt_no_input`: The template to use when input is None. Uses `{instruction}` placeholders.
+- `description`: A short description of the template, with possible use cases.
+- `response_split`: The text to use as separator when cutting real response from the model output.
+
+No `{response}` placeholder was used, since the response is always the last element of the template and is just to be concatenated to the rest.
+
+## Example template
+
+The default template, used unless otherwise specified, is `alpaca.json`
+
+```json
+{
+    "description": "Template used by Alpaca-LoRA.",
+    "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
+    "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
+    "response_split": "### Response:"    
+}
+
+```
+
+## Current templates
+
+### alpaca
+
+Default template used for generic LoRA fine tunes so far.
+
+### alpaca_legacy
+
+Legacy template used by the original alpaca repo, with no `\n` after the response field. Kept for reference and experiments.
+
+### alpaca_short
+
+A trimmed down alpaca template which seems to perform just as well and spare some tokens. Models created with the default template seem to be queryable by the short tempalte as well. More experiments are welcome.
+
+### vigogne
+
+The default alpaca template, translated to french. This template was used to train the "Vigogne" LoRA and is to be used to query it, or for extra fine tuning.
--- a/templates/alpaca.json
+++ b/templates/alpaca.json
@ -0,0 +1,6 @@
+{
+    "description": "Template used by Alpaca-LoRA.",
+    "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
+    "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
+    "response_split": "### Response:"    
+}
--- a/templates/alpaca_legacy.json
+++ b/templates/alpaca_legacy.json
@ -0,0 +1,6 @@
+{
+    "description": "Legacy template, used by Original Alpaca repository.",
+    "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:",
+    "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:",
+    "response_split": "### Response:"    
+}
--- a/templates/alpaca_short.json
+++ b/templates/alpaca_short.json
@ -0,0 +1,6 @@
+{
+    "description": "A shorter template to experiment with.",
+    "prompt_input": "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
+    "prompt_no_input": "### Instruction:\n{instruction}\n\n### Response:\n",
+    "response_split": "### Response:"    
+}
--- a/templates/vigogne.json
+++ b/templates/vigogne.json
@ -0,0 +1,6 @@
+{
+    "description": "French template, used by Vigogne for finetuning.",
+    "prompt_input": "Ci-dessous se trouve une instruction qui décrit une tâche, associée à une entrée qui fournit un contexte supplémentaire. Écrivez une réponse qui complète correctement la demande.\n\n### Instruction:\n{instruction}\n\n### Entrée:\n{input}\n\n### Réponse:\n",
+    "prompt_no_input": "Ci-dessous se trouve une instruction qui décrit une tâche. Écrivez une réponse qui complète correctement la demande.\n\n### Instruction:\n{instruction}\n\n### Réponse:\n",
+    "response_split": "### Réponse:"
+}
--- a/utils/init.py
+++ b/utils/init.py
--- a/utils/callbacks.py
+++ b/utils/callbacks.py
@ -0,0 +1,75 @@
+"""
+Helpers to support streaming generate output.
+Borrowed from https://github.com/oobabooga/text-generation-webui/blob/ad37f396fc8bcbab90e11ecf17c56c97bfbd4a9c/modules/callbacks.py
+"""
+
+import gc
+import traceback
+from queue import Queue
+from threading import Thread
+
+import torch
+import transformers
+
+
+class Stream(transformers.StoppingCriteria):
+    def __init__(self, callback_func=None):
+        self.callback_func = callback_func
+
+    def __call__(self, input_ids, scores) -> bool:
+        if self.callback_func is not None:
+            self.callback_func(input_ids[0])
+        return False
+
+
+class Iteratorize:
+
+    """
+    Transforms a function that takes a callback
+    into a lazy iterator (generator).
+    """
+
+    def __init__(self, func, kwargs={}, callback=None):
+        self.mfunc = func
+        self.c_callback = callback
+        self.q = Queue()
+        self.sentinel = object()
+        self.kwargs = kwargs
+        self.stop_now = False
+
+        def _callback(val):
+            if self.stop_now:
+                raise ValueError
+            self.q.put(val)
+
+        def gentask():
+            try:
+                ret = self.mfunc(callback=_callback, **self.kwargs)
+            except ValueError:
+                pass
+            except:
+                traceback.print_exc()
+                pass
+
+            self.q.put(self.sentinel)
+            if self.c_callback:
+                self.c_callback(ret)
+
+        self.thread = Thread(target=gentask)
+        self.thread.start()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        obj = self.q.get(True, None)
+        if obj is self.sentinel:
+            raise StopIteration
+        else:
+            return obj
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop_now = True
--- a/utils/prompter.py
+++ b/utils/prompter.py
@ -0,0 +1,51 @@
+"""
+A dedicated helper to manage templates and prompt building.
+"""
+
+import json
+import os.path as osp
+from typing import Union
+
+
+class Prompter(object):
+    __slots__ = ("template", "_verbose")
+
+    def __init__(self, template_name: str = "", verbose: bool = False):
+        self._verbose = verbose
+        if not template_name:
+            # Enforce the default here, so the constructor can be called with '' and will not break.
+            template_name = "alpaca"
+        file_name = osp.join("templates", f"{template_name}.json")
+        if not osp.exists(file_name):
+            raise ValueError(f"Can't read {file_name}")
+        with open(file_name) as fp:
+            self.template = json.load(fp)
+        if self._verbose:
+            print(
+                f"Using prompt template {template_name}: {self.template['description']}"
+            )
+
+    def generate_prompt(
+        self,
+        instruction: str,
+        input: Union[None, str] = None,
+        label: Union[None, str] = None,
+    ) -> str:
+        # returns the full prompt from instruction and optional input
+        # if a label (=response, =output) is provided, it's also appended.
+        if input:
+            res = self.template["prompt_input"].format(
+                instruction=instruction, input=input
+            )
+        else:
+            res = self.template["prompt_no_input"].format(
+                instruction=instruction
+            )
+        if label:
+            res = f"{res}{label}"
+        if self._verbose:
+            print(res)
+        return res
+
+    def get_response(self, output: str) -> str:
+        return output.split(self.template["response_split"])[1].strip()