@inproceedings{ada7133fd9a74c89858d4a7f02fa090b,
title = "Look and listen: A multi-modality late fusion approach to scene classification for autonomous machines",
abstract = "The novelty of this study consists in a multi-modality approach to scene classification, where image and audio complement each other in a process of deep late fusion. The approach is demonstrated on a difficult classification problem, consisting of two synchronised and balanced datasets of 16, 000 data objects, encompassing 4.4 hours of video of 8 environments with varying degrees of similarity. We first extract video frames and accompanying audio at one second intervals. The image and the audio datasets are first classified independently, using a fine-tuned VGG16 and an evolutionary optimised deep neural network, with accuracies of 89.27% and 93.72%, respectively. This is followed by late fusion of the two neural networks to enable a higher order function, leading to accuracy of 96.81% in this multi-modality classifier with synchronised video frames and audio clips. The tertiary neural network implemented for late fusion outperforms classical state-of-the-art classifiers by around 3% when the two primary networks are considered as feature generators. We show that situations where a single-modality may be confused by anomalous data points are now corrected through an emerging higher order integration. Prominent examples include a water feature in a city misclassified as a river by the audio classifier alone and a densely crowded street misclassified as a forest by the image classifier alone. Both are examples which are correctly classified by our multi-modality approach.",
keywords = "Image analysis, Neural networks, Urban areas, Forestry, Generators, Intelligent robots, Rivers",
author = "Bird, {Jordan J.} and Faria, {Diego R.} and Cristiano Premebida and Aniko Ekart and George Vogiatzis",
note = "{\textcopyright} 2021 IEEE. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses, in any current or future media, including reprinting/republishing this material for advertising or promotional purposes, creating new collective works, for resale or redistribution to servers or lists, or reuse of any copyrighted component of this work in other works.; 2020 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2020 ; Conference date: 24-10-2020 Through 24-01-2021",
year = "2021",
month = feb,
day = "10",
doi = "10.1109/IROS45743.2020.9341557",
language = "English",
isbn = "978-1-7281-6213-3",
series = "IEEE International Conference on Intelligent Robots and Systems",
publisher = "IEEE",
pages = "10380--10385",
booktitle = "2020 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2020",
address = "United States",
}