@inproceedings{salous_enhancing_2025, Author = {Salous, Mazen and Riecken, John-Uwe and Heuten, Wilko and Abdenaboui, Larbi}, Title = {Enhancing Image Accessibility in Educational Contexts for Blind and Visually Impaired Learners Through Integrated Computer Vision Techniques}, Year = {2025}, Pages = {193-200}, Month = {}, Publisher = {Springer Nature Switzerland}, Booktitle = {Technology for Inclusion and Participation for All: Recent Achievements and Future Directions}, Doi = {10.1007/978-3-032-01632-4_25}, type = {inproceedings}, Abstract = {Blind and visually impaired (BVI) learners face challenges accessing image-based educational content. This paper presents an AI-driven system that generates multimodal representations—raised tactile graphics and descriptive text—by integrating open-vocabulary object detection (YOLO-World), image segmentation (SAM), edge detection (Canny) and natural language generation (GPT-4o). The goal is to improve image accessibility for BVI students. A user study with four BVI participants in two educational scenarios (animal anatomy and biology scales) showed that the multimodal outputs enhanced understanding and were not seen as overly complex. Participants appreciated the dual-modality approach (average rating: 4.25/5), though one participant noted a need for practice or assistance. These results, although preliminary, highlight the system’s potential to make educational images more accessible and underscore the need for personalization and user training.} } @COMMENT{Bibtex file generated on }