Skip to content

Commit

Permalink
Merge branch 'main' of github.com:EngineeringSoftware/codeditor into …
Browse files Browse the repository at this point in the history
…main
  • Loading branch information
JiyangZhang committed Dec 4, 2023
2 parents 6632253 + 61b1047 commit 2092a68
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 12 deletions.
7 changes: 4 additions & 3 deletions python/deltr/coditT5/CodeT5.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,16 @@ def tokenizer_collate_fn(
)

def tokenize_collate_fn_predict(self, batch_data: List[Tuple[str, str, int]]):

source_batch = [self.tokenize_sequence(t[0]) for t in batch_data]
target_batch = [self.tokenize_sequence(t[1]) for t in batch_data]
index_batch = [t[2] for t in batch_data]
max_length = MAX_LENGTH
batch_size = len(source_batch)

batched_input_ids, batched_labels_ids, = (
(
batched_input_ids,
batched_labels_ids,
) = (
[],
[],
)
Expand Down Expand Up @@ -205,7 +207,6 @@ def predict_dataloader(self):


class CodeT5Module(pl.LightningModule):

# Instantiate the model
def __init__(
self,
Expand Down
1 change: 0 additions & 1 deletion python/deltr/coditT5/save_pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def locate_ckpt(ckpt_dir: Path) -> Optional[Path]:


def add_tokens_to_tokenizer():

from transformers import RobertaTokenizer
from deltr.collector.diff_utils import EDIT_TOKENS

Expand Down
8 changes: 0 additions & 8 deletions python/deltr/coditT5/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,9 @@ def __init__(
)

def __len__(self):

return len(self.source_code)

def __getitem__(self, index: int):

return {
"code": self.source_code[index],
"context": self.context[index],
Expand Down Expand Up @@ -325,7 +323,6 @@ def __init__(
self.__split_data_to_chunks__(source_code, context, tokenized_labels)

def __len__(self):

return len(self.tokenized_code_input)

def __split_data_to_chunks__(self, source_code, context, tokenized_labels):
Expand Down Expand Up @@ -372,11 +369,9 @@ def __split_data_to_chunks__(self, source_code, context, tokenized_labels):
self.data_index.append(index)
self.labels.append(tokenized_label)


return

def __getitem__(self, index: int):

return {
"code": self.tokenized_code_input[index],
"context": self.tokenized_context_input[index],
Expand All @@ -388,7 +383,6 @@ def __getitem__(self, index: int):
def tokenize_and_align_labels(
source_code: List[str], labels: List[int], tokenizer: Any
) -> List[List[int]]:

tokenized_labels = []

for code, label in zip(source_code, labels):
Expand Down Expand Up @@ -445,7 +439,6 @@ def __len__(self) -> int:
return self.n_data

def __getitem__(self, index: int) -> Tuple:

if index < 0:
index = self.n_data + index

Expand Down Expand Up @@ -495,7 +488,6 @@ def __len__(self) -> int:
return self.n_data

def __getitem__(self, index: int) -> Tuple:

if index < 0:
index = self.n_data + index

Expand Down

0 comments on commit 2092a68

Please sign in to comment.