Skip to content

Commit

Permalink
clean doc
Browse files Browse the repository at this point in the history
  • Loading branch information
ArvinZhuang committed Jul 4, 2024
1 parent 3ed609a commit f6fb0dc
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 27 deletions.
28 changes: 15 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -354,25 +354,27 @@ ndcg_cut_10 all 0.7675
---

## References
[1] Devendra Sachan, Mike Lewis, Mandar Joshi, Armen Aghajanyan, Wen-tau Yih, Joelle Pineau, and Luke Zettlemoyer. 2022. Improving Passage Retrieval with Zero-Shot Question Generation
## 🫡 References
[1] Devendra Sachan, Mike Lewis, Mandar Joshi, Armen Aghajanyan, Wen-tau Yih, Joelle Pineau, and Luke Zettlemoyer, *Improving Passage Retrieval with Zero-Shot Question Generation*, EMNLP 2022

[2] Weiwei Sun,Lingyong Yan,Xinyu Ma,Pengjie Ren,Dawei Yin,and Zhaochun Ren. 2023. Is ChatGPT Good at Search?
[2] Weiwei Sun,Lingyong Yan,Xinyu Ma,Pengjie Ren,Dawei Yin,and Zhaochun Ren, Is ChatGPT Good at Search? *Investigating Large Language Models as Re-Ranking Agents*, EMNLP 2023

[3] Shengyao Zhuang, Honglei Zhuang, Bevan Koopman, and Guido Zuccon. 2023. A Setwise Approach for Effective and Highly Efficient Zero-shot Ranking with Large Language Models
[3] Shengyao Zhuang, Honglei Zhuang, Bevan Koopman, and Guido Zuccon, *A Setwise Approach for Effective and Highly Efficient Zero-shot Ranking with Large Language Models*, SIGIR 2024

[4] Zhen Qin, Rolf Jagerman, Kai Hui, Honglei Zhuang, Junru Wu, Jiaming Shen, Tianqi Liu, Jialu Liu, Donald Metzler, Xuanhui Wang, and Michael Bendersky. 2023. Large language models are effective text rankers with pairwise ranking prompting
[4] Zhen Qin, Rolf Jagerman, Kai Hui, Honglei Zhuang, Junru Wu, Jiaming Shen, Tianqi Liu, Jialu Liu, Donald Metzler, Xuanhui Wang, and Michael Bendersky, *Large Language Models are Effective Text Rankers with Pairwise Ranking Prompting*, Findings: NAACL 2024



---
If you used our code for your research, please consider to cite our paper:

```text
@article{zhuang2023setwise,
title={A Setwise Approach for Effective and Highly Efficient Zero-shot Ranking with Large Language Models},
author={Zhuang, Shengyao and Zhuang, Honglei and Koopman, Bevan and Zuccon, Guido},
journal={arXiv preprint arXiv:2310.09497},
year={2023}
## 🙏 Citation

```bibtex
@inproceedings{zhuang2024setwise,
author={Zhuang, Shengyao and Zhuang, Honglei and Koopman, Bevan and Zuccon, Guido},
title={A Setwise Approach for Effective and Highly Efficient Zero-shot Ranking with Large Language Models},
booktitle = {Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval},
year = {2024},
series = {SIGIR '24}
}
```
2 changes: 1 addition & 1 deletion llmrankers/listwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def __init__(self, model_name_or_path, tokenizer_name_or_path, device, window_si
else torch.float32,
cache_dir=cache_dir).eval()
else:
raise NotImplementedError
raise NotImplementedError(f"Model type {self.config.model_type} is not supported yet for listwise :(")

def compare(self, query: str, docs: List):
self.total_compare += 1
Expand Down
10 changes: 5 additions & 5 deletions llmrankers/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def __init__(self, model_name_or_path,
else torch.float32,
cache_dir=cache_dir).eval()
else:
raise NotImplementedError
raise NotImplementedError(f"Model type {self.config.model_type} is not supported yet for pairwise :(")

self.total_compare = 0
self.total_completion_tokens = 0
self.total_prompt_tokens = 0
Expand All @@ -85,6 +86,7 @@ def compare(self, query: str, docs: List):
doc1, doc2 = docs[0], docs[1]
input_texts = [self.prompt.format(query=query, doc1=doc1, doc2=doc2),
self.prompt.format(query=query, doc1=doc2, doc2=doc1)]
output = None
if self.config.model_type == 't5':
input_ids = self.tokenizer(input_texts,
padding='longest',
Expand Down Expand Up @@ -124,9 +126,7 @@ def compare(self, query: str, docs: List):
skip_special_tokens=True).strip().upper()
output1 = self.tokenizer.decode(output_ids[1][input_ids.shape[1]:],
skip_special_tokens=True).strip().upper()
return [f'Passage {output0}', f'Passage {output1}']
else:
raise NotImplementedError
output = [f'Passage {output0}', f'Passage {output1}']

return output

Expand Down Expand Up @@ -236,7 +236,7 @@ def __gt__(self, other):
self.heapSort(arr, self.k)
ranking = [SearchResult(docid=doc.docid, score=-i, text=None) for i, doc in enumerate(reversed(arr))]

#
## this is a bit slower but standard bobblesort implementation, keep here FYI
# elif self.method == "bubblesort":
# k = min(k, len(ranking))
# for i in range(k):
Expand Down
16 changes: 10 additions & 6 deletions llmrankers/pointwise.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List
from .rankers import LlmRanker, SearchResult
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoConfig
from torch.utils.data import DataLoader
from transformers import DataCollatorWithPadding
from .pairwise import Text2TextGenerationDataset
Expand All @@ -15,11 +15,15 @@ def __init__(self, model_name_or_path, tokenizer_name_or_path, device, method="q
if tokenizer_name_or_path is not None else
model_name_or_path,
cache_dir=cache_dir)
self.llm = T5ForConditionalGeneration.from_pretrained(model_name_or_path,
device_map='auto',
torch_dtype=torch.float16 if device == 'cuda'
else torch.float32,
cache_dir=cache_dir)
self.config = AutoConfig.from_pretrained(model_name_or_path, cache_dir=cache_dir)
if self.config.model_type == 't5':
self.llm = T5ForConditionalGeneration.from_pretrained(model_name_or_path,
device_map='auto',
torch_dtype=torch.float16 if device == 'cuda'
else torch.float32,
cache_dir=cache_dir)
else:
raise NotImplementedError(f"Model type {self.config.model_type} is not supported yet for pointwise :(")

self.device = device
self.method = method
Expand Down
4 changes: 2 additions & 2 deletions llmrankers/setwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def __init__(self,
torch_dtype=torch.float16 if device == 'cuda'
else torch.float32,
cache_dir=cache_dir).eval()
else:
raise NotImplementedError(f"Model type {self.config.model_type} is not supported yet for setwise:(")

self.scoring = scoring
self.method = method
Expand Down Expand Up @@ -167,8 +169,6 @@ def compare(self, query: str, docs: List):

output = self.tokenizer.decode(output_ids[input_ids.shape[1]:],
skip_special_tokens=True).strip().upper()
else:
raise NotImplementedError

elif self.scoring == 'likelihood':
if self.config.model_type == 't5':
Expand Down

0 comments on commit f6fb0dc

Please sign in to comment.