RippleGPT-Nano / paper /references.bib
Tavernari's picture
Upload folder using huggingface_hub
148b631 verified
raw
history blame contribute delete
964 Bytes
@inproceedings{vaswani2017,
title={Attention is all you need},
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
booktitle={Advances in neural information processing systems},
volume={30},
year={2017}
}
@inproceedings{press2022,
title={Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation},
author={Press, Ofir and Smith, Noah A and Lewis, Mike},
booktitle={International Conference on Learning Representations},
year={2022}
}
@article{shazeer2020,
title={GLU variants improve transformer},
author={Shazeer, Noam},
journal={arXiv preprint arXiv:2002.05202},
year={2020}
}
@book{tolstoy,
title={War and Peace},
author={Tolstoy, Leo},
publisher={Project Gutenberg},
note={Dataset}
}
@misc{bigcode,
title={The Stack},
author={BigCode Project},
year={2022},
note={Dataset}
}