@misc{rezaei2025egonormiabenchmarkingphysicalsocial,title={EgoNormia: Benchmarking Physical Social Norm Understanding},author={Rezaei, MohammadHossein and Fu, Yicheng and Cuvin, Phil and Ziems, Caleb and Zhang, Yanzhe and Zhu, Hao and Yang, Diyi},year={2025},eprint={2502.20490},archiveprefix={arXiv},primaryclass={cs.CV},}
In Proceedings of the 2025 Conference of the Nations of America Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), Apr 2025
Negation has been a long-standing challenge for language models. Previous studies have shown that they struggle with negation in many natural language understanding tasks. In this work, we propose a self-supervised method to make language models more robust against negation. We introduce a novel task, Next Sentence Polarity Prediction (NSPP), and a variation of the Next Sentence Prediction (NSP) task. We show that RoBERTa and BERT further pre-trained on our tasks outperform the off-the-shelf transformer models on eight negation-related benchmarks. Most notably, the pretrainings yield between 1.8% and 9.1% improvement on CondaQA, a large question-answering corpus requiring reasoning over negation.
@inproceedings{rezaei-blanco-2025-making,title={Making Language Models Robust Against Negation},author={Rezaei, MohammadHossein and Blanco, Eduardo},booktitle={Proceedings of the 2025 Conference of the Nations of America Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},month=apr,year={2025},address={Albuquerque, New Mexico, USA},url={https://arxiv.org/abs/2502.07717},notes={To appear}}
Negation is a common linguistic phenomenon. Yet language models face challenges with negation in many natural language understanding tasks such as question answering and natural language inference. In this paper, we experiment with seamless strategies that incorporate affirmative interpretations (i.e., paraphrases without negation) to make models more robust against negation. Crucially, our affirmative interpretations are obtained automatically. We show improvements with CondaQA, a large corpus requiring reasoning with negation, and five natural language understanding tasks.
@inproceedings{rezaei-blanco-2024-paraphrasing,title={Paraphrasing in Affirmative Terms Improves Negation Understanding},author={Rezaei, MohammadHossein and Blanco, Eduardo},editor={Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek},booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},month=aug,year={2024},address={Bangkok, Thailand},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2024.acl-short.55/},doi={10.18653/v1/2024.acl-short.55},pages={602--615},}
Detecting machine-generated text is a critical task in the era of large language models. In this paper, we present our systems for SemEval-2024 Task 8, which focuses on multi-class classification to discern between human-written and maching-generated texts by five state-of-the-art large language models. We propose three different systems: unsupervised text similarity, triplet-loss-trained text similarity, and text classification. We show that the triplet-loss trained text similarity system outperforms the other systems, achieving 80% accuracy on the test set and surpassing the baseline model for this subtask. Additionally, our text classification system, which takes into account sentence paraphrases generated by the candidate models, also outperforms the unsupervised text similarity system, achieving 74% accuracy.
@inproceedings{rezaei-EtAl:2024:SemEval2024,author={Rezaei, MohammadHossein and Kwon, Yeaeun and Sanayei, Reza and Singh, Abhyuday and Bethard, Steven},title={CLULab-UofA at SemEval-2024 Task 8: Detecting Machine-Generated Text Using Triplet-Loss-Trained Text Similarity and Text Classification},booktitle={Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)},month=jun,year={2024},address={Mexico City, Mexico},publisher={Association for Computational Linguistics},pages={1509--1515},url={https://aclanthology.org/2024.semeval2024-1.213},}
The advent of large language models (LLMs) has revolutionized Natural Language Generation (NLG), offering unmatched text generation capabilities. However, this progress introduces significant challenges, notably hallucinations—semantically incorrect yet fluent outputs. This phenomenon undermines content reliability, as traditional detection systems focus more on fluency than accuracy, posing a risk of misinformation spread.Our study addresses these issues by proposing a unified strategy for detecting hallucinations in neural model-generated text, focusing on the SHROOM task in SemEval 2024. We employ diverse methodologies to identify output divergence from the source content. We utilized Sentence Transformers to measure cosine similarity between source-hypothesis and source-target embeddings, experimented with omitting source content in the cosine similarity computations, and Leveragied LLMs’ In-Context Learning with detailed task prompts as our methodologies. The varying performance of our different approaches across the subtasks underscores the complexity of Natural Language Understanding tasks, highlighting the importance of addressing the nuances of semantic correctness in the era of advanced language models.
@inproceedings{sanayei-etal-2024-maria,title={{MAR}i{A} at {S}em{E}val 2024 Task-6: Hallucination Detection Through {LLM}s, {MNLI}, and Cosine similarity},author={Sanayei, Reza and Singh, Abhyuday and Rezaei, Mohammadhossein and Bethard, Steven},editor={Ojha, Atul Kr. and Do{\u{g}}ru{\"o}z, A. Seza and Tayyar Madabushi, Harish and Da San Martino, Giovanni and Rosenthal, Sara and Ros{\'a}, Aiala},booktitle={Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)},month=jun,year={2024},address={Mexico City, Mexico},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2024.semeval-1.225/},doi={10.18653/v1/2024.semeval-1.225},pages={1584--1588},}
Yes-no questions expect a yes or no for an answer, but people often skip polar keywords. Instead, they answer with long explanations that must be interpreted. In this paper, we focus on this challenging problem and release new benchmarks in eight languages. We present a distant supervision approach to collect training data, and demonstrate that direct answers (i.e., with polar keywords) are useful to train models to interpret indirect answers (i.e., without polar keywords). We show that monolingual fine-tuning is beneficial if training data can be obtained via distant supervision for the language of interest (5 languages). Additionally, we show that cross-lingual fine-tuning is always beneficial (8 languages).
@inproceedings{wang-etal-2023-interpreting,title={Interpreting Indirect Answers to Yes-No Questions in Multiple Languages},author={Wang, Zijie and Hossain, Md and Mathur, Shivam and Melo, Terry and Ozler, Kadir and Park, Keun and Quintero, Jacob and Rezaei, MohammadHossein and Shakya, Shreya and Uddin, Md and Blanco, Eduardo},editor={Bouamor, Houda and Pino, Juan and Bali, Kalika},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2023},month=dec,year={2023},address={Singapore},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2023.findings-emnlp.146/},doi={10.18653/v1/2023.findings-emnlp.146},pages={2210--2227},}
2022
Solving Functional Equations in R+ Using Inequalities and Sequences
@article{rezaei2024solving,title={Solving Functional Equations in R+ Using Inequalities and Sequences},author={Rezaei, MohammadHossein},journal={Mathematical Reflections},number={3},year={2022},publisher={AwesomeMath},note={High School Mathematics Olympiad Journal},}