clembench-2024: A Challenging, Dynamic, Complementary, Multilingual Benchmark and Underlying Flexible Framework for LLMs as Multi-Action Agents

Beyer, Anne and Chalamalasetti, Kranti and Hakimov, Sherzod and Madureira, Brielen and Sadler, Philipp and Schlangen, David

, 2024
[PDF]
@misc{Beyer-2024,
  title = {clembench-2024: A Challenging, Dynamic, Complementary, Multilingual Benchmark and Underlying Flexible Framework for LLMs as Multi-Action Agents},
  author = {Beyer, Anne and Chalamalasetti, Kranti and Hakimov, Sherzod and Madureira, Brielen and Sadler, Philipp and Schlangen, David},
  year = {2024},
  eprint = {2405.20859},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL}
}