2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.A. Sánchez-Hernández C. Garrido-Munoz, F. J. Castellanos
Continual Learning for Document Image Binarization Proceedings Article
In: International Conference on Pattern Recognition, pp. 1443-1449, IEEE, Montreal, QC, Canada, 2022, ISBN: 978-1-6654-9063-4.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido:2022:CL,
title = {Continual Learning for Document Image Binarization},
author = {C. Garrido-Munoz, A. Sánchez-Hernández, F. J. Castellanos, J. Calvo-Zaragoza},
url = {https://ieeexplore.ieee.org/abstract/document/9956669},
doi = {10.1109/ICPR56361.2022.9956669},
isbn = {978-1-6654-9063-4},
year = {2022},
date = {2022-08-21},
urldate = {2022-08-21},
booktitle = {International Conference on Pattern Recognition},
pages = {1443-1449},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, D.; Delgado, T.; Calvo-Zaragoza, J.; Madueño, A.; García-Iasci, P.
Speeding-up the encoding of mensural collections from Spanish libraries Journal Article
In: IAML 2022 Prague, 2022.
BibTeX | Tags: MultiScore
@article{k502,
title = {Speeding-up the encoding of mensural collections from Spanish libraries},
author = {D. Rizo and T. Delgado and J. Calvo-Zaragoza and A. Madueño and P. García-Iasci},
year = {2022},
date = {2022-07-01},
booktitle = {IAML 2022 Prague},
journal = {IAML 2022 Prague},
organization = {IAML},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Decoupling music notation to improve end-to-end Optical Music Recognition Journal Article
In: Pattern Recognition Letters, vol. 158, pp. 157-163, 2022, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2022,
title = {Decoupling music notation to improve end-to-end Optical Music Recognition},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2022.04.032},
issn = {0167-8655},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Pattern Recognition Letters},
volume = {158},
pages = {157-163},
abstract = {Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases. Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Insights into transfer learning between image and audio music transcription Proceedings Article
In: Sound and Music Computing Conference, pp. 295-301, Zenodo, Saint-Étienne, France, 2022.
Abstract | Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras2022b,
title = {Insights into transfer learning between image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.5281/zenodo.6797870},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
booktitle = {Sound and Music Computing Conference},
pages = {295-301},
publisher = {Zenodo},
address = {Saint-Étienne, France},
abstract = {Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem. Iñesta, J. M.; Thomae, M. E.
An On-line Tool for Transcription of Music Scores: MuRET Presentation
Montreal (Canada), 01.05.2022.
Abstract | Links | BibTeX | Tags: HispaMus
@misc{k520,
title = {An On-line Tool for Transcription of Music Scores: MuRET},
author = {J. M. Iñesta and M. E. Thomae},
url = {undefined},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {1st Int. Conf. The Sound of Future/The Future of Sound},
address = {Montreal (Canada)},
organization = {CIRMMT},
abstract = {MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections.},
key = {OMR, Machine Learning},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {presentation}
}
MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections. Sánchez-Ferrer, A.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), pp. 616–628, Aveiro, Portugal, 2022, ISBN: 978-3-031-04881-4.
@inproceedings{k512,
title = {The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition},
author = {A. Sánchez-Ferrer and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-3-031-04881-4},
year = {2022},
date = {2022-05-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
pages = {616--628},
address = {Aveiro, Portugal},
abstract = {In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus. Desmond, K.; Pugin, L.; Regimbal, J.; Rizo, D.; Sapp, C. S.; Thomae, M. E.
Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation Proceedings Article
In: Music Encoding Conference Proceedings 2021, pp. 197–219, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k497,
title = {Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation},
author = {K. Desmond and L. Pugin and J. Regimbal and D. Rizo and C. S. Sapp and M. E. Thomae},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {197–219},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Münnich, S.; Rizo, D.
Foreword Proceedings Article
In: Münnich, S.; Rizo, D. (Ed.): Music Encoding Conference Proceedings 2021, pp. vii–viii, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k496,
title = {Foreword},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {vii–viii},
publisher = {Humanities Commons},
chapter = {1},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Münnich, S.; Rizo, D.
Music Encoding Conference Proceedings 2022. Book
Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@book{k495,
title = {Music Encoding Conference Proceedings 2022.},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {book}
}
Mas-Candela, E.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A First Approach to Image Transformation Sequence Retrieval Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 321-332, Aveiro, Portugal, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k494,
title = {A First Approach to Image Transformation Sequence Retrieval},
author = {E. Mas-Candela and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {321-332},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 482-492, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: Leonardo2021
@inproceedings{k493,
title = {Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {482-492},
address = {Aveiro, Portugal},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
On the Use of Transformers for End-to-End Optical Music Recognition Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 470-481, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: MultiScore
@inproceedings{k492,
title = {On the Use of Transformers for End-to-End Optical Music Recognition},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {470-481},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Arroyo, V.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Pertusa, A.
Neural audio-to-score music transcription for unconstrained polyphony using compact output representations Proceedings Article
In: Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, Singapur, Singapur, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k487,
title = {Neural audio-to-score music transcription for unconstrained polyphony using compact output representations},
author = {V. Arroyo and J. J. Valero-Mas and J. Calvo-Zaragoza and A. Pertusa},
year = {2022},
date = {2022-05-01},
booktitle = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
publisher = {IEEE},
address = {Singapur, Singapur},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A holistic approach for image-to-graph: application to optical music recognition Journal Article
In: International Journal on Document Analysis and Recognition, 2022.
BibTeX | Tags: Leonardo2021
@article{k522,
title = {A holistic approach for image-to-graph: application to optical music recognition},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {article}
}
Alashhab, S.; Gallego, A. J.; Lozano, M. Á.
Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 114, pp. 105188, 2022, ISSN: 0952-1976.
@article{k511,
title = {Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks},
author = {S. Alashhab and A. J. Gallego and M. Á. Lozano},
issn = {0952-1976},
year = {2022},
date = {2022-01-01},
journal = {Engineering Applications of Artificial Intelligence},
volume = {114},
pages = {105188},
abstract = {Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes. Gallego, A. J.; Rico-Juan, J. R.; Valero-Mas, J. J.
Efficient k-nearest neighbor search based on clustering and adaptive k values Journal Article
In: Pattern Recognition, vol. 122, pp. 108356, 2022, ISSN: 0031-3203.
@article{k510,
title = {Efficient k-nearest neighbor search based on clustering and adaptive k values},
author = {A. J. Gallego and J. R. Rico-Juan and J. J. Valero-Mas},
issn = {0031-3203},
year = {2022},
date = {2022-01-01},
journal = {Pattern Recognition},
volume = {122},
pages = {108356},
abstract = {The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search. Bernabeu, M.
Búsqueda de imágenes similares usando técnicas de aprendizaje automático PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k509,
title = {Búsqueda de imágenes similares usando técnicas de aprendizaje automático},
author = {M. Bernabeu},
editor = {Antonio Pertusa Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Alashhab, S.
Aplicaciones de visión artificial para ayuda a personas con dificultades visuales PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k508,
title = {Aplicaciones de visión artificial para ayuda a personas con dificultades visuales},
author = {S. Alashhab},
editor = {Miguel Angel Lozano and Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Sáez-Pérez, J.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), 2022.
@inproceedings{k506,
title = {Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition},
author = {J. Sáez-Pérez and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
keywords = {ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor Proceedings Article
In: Pattern Recognition. ICPR International Workshops and Challenges, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k504,
title = {Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition. ICPR International Workshops and Challenges},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
de la Fuente, C.; Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks Journal Article
In: Multimedia Tools and Applications, 2022.
BibTeX | Tags: ACIF/2019/042, APOSTD/2020/256
@article{k501,
title = {Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks},
author = {C. de la Fuente and F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Multimedia Tools and Applications},
keywords = {ACIF/2019/042, APOSTD/2020/256},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.; Fujinaga, I.
Domain Adaptation for Staff-Region Retrieval of Music Score Images Journal Article
In: International Journal on Document Analysis and Recognition, vol. 25, iss. Special Issue: ICFHR 2022, pp. 281-292, 2022, ISSN: 1433-2825.
BibTeX | Tags: MultiScore
@article{Castellanos:2022:DAStaff,
title = {Domain Adaptation for Staff-Region Retrieval of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza and I. Fujinaga},
issn = {1433-2825},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
volume = {25},
issue = {Special Issue: ICFHR 2022},
pages = {281-292},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR, Bangalore, India, 2022.
Abstract | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{k499,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR},
address = {Bangalore, India},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages. Rosello, A.; Ayllon, E.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Test Sample Selection for Handwriting Recognition Through Language Modeling Proceedings Article
In: Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k498,
title = {Test Sample Selection for Handwriting Recognition Through Language Modeling},
author = {A. Rosello and E. Ayllon and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Region-based Layout Analysis of Music Score Images Journal Article
In: Expert Systems with Applications, pp. 118211, 2022, ISSN: 0957-4174.
BibTeX | Tags: MultiScore
@article{k486,
title = {Region-based Layout Analysis of Music Score Images},
author = {F. J. Castellanos and C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
issn = {0957-4174},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
pages = {118211},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Fuente, C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Image and Audio Music Transcription Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 11, pp. 77-84, 2022.
BibTeX | Tags: MultiScore
@article{k479,
title = {Multimodal Image and Audio Music Transcription},
author = {C. Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
journal = {International Journal of Multimedia Information Retrieval},
volume = {11},
pages = {77-84},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
2021
Ríos-Vila, A.; Calvo-Zaragoza, J.; Iñesta, J. M.
CTC-based end-to-end approach for full page Optical Music Recognition Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 11, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k488,
title = {CTC-based end-to-end approach for full page Optical Music Recognition},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and J. M. Iñesta},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {11},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 6, 2021.
@inproceedings{k482,
title = {An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {6},
keywords = {GRE19-04, ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Calvo-Zaragoza, J.; Pertusa, A.; Gallego, A. J.; Iñesta, J. M.; Micó, L.; Oncina, J.; Perez-Sancho, C.; de León, P. J. Ponce; Rizo, D.
MultiScore Project: Multimodal Transcription of Music Scores Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 3, 2021.
Links | BibTeX | Tags: MultiScore
@inproceedings{k481,
title = {MultiScore Project: Multimodal Transcription of Music Scores},
author = {J. Calvo-Zaragoza and A. Pertusa and A. J. Gallego and J. M. Iñesta and L. Micó and J. Oncina and C. Perez-Sancho and P. J. Ponce de León and D. Rizo},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/481/MML2021__MultiScore_Final.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {3},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
OMR-assisted transcription: a case study with early prints Proceedings Article
In: Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR, pp. 35-41, 2021, ISBN: 978-1-7327299-0-2.
BibTeX | Tags: MultiScore
@inproceedings{k483,
title = {OMR-assisted transcription: a case study with early prints},
author = {M. Alfaro-Contreras and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-1-7327299-0-2},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
booktitle = {Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR},
pages = {35-41},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Neural Domain Adaptation for Document Image Binarization Journal Article
In: Pattern Recognition, vol. 119, pp. 108099, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@article{k467,
title = {Unsupervised Neural Domain Adaptation for Document Image Binarization},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Pattern Recognition},
volume = {119},
pages = {108099},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Madueño, A.; Ríos-Vila, A.; Rizo, D.
Automatized incipit encoding at the Andalusian Music Documentation Center Proceedings Article
In: Digital Libraries for Musicology / IAML Joint Session, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k484,
title = {Automatized incipit encoding at the Andalusian Music Documentation Center},
author = {A. Madueño and A. Ríos-Vila and D. Rizo},
year = {2021},
date = {2021-07-01},
urldate = {2021-07-01},
booktitle = {Digital Libraries for Musicology / IAML Joint Session},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.
Unsupervised Neural Document Analysis for Music Score Images Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 50-54, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@inproceedings{k468,
title = {Unsupervised Neural Document Analysis for Music Score Images},
author = {F. J. Castellanos and A. J. Gallego},
year = {2021},
date = {2021-07-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {50-54},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Esplà-Gomis, M.; Rizo, D.; de León, P. J. Ponce; Iñesta, J. M.
Applying Automatic Translation for Optical Music Recognition’s Encoding Step Journal Article
In: Applied Sciences, vol. 11, no. 9, 2021, ISSN: 2076-3417.
Abstract | BibTeX | Tags: GV/2020/030, HispaMus
@article{k464,
title = {Applying Automatic Translation for Optical Music Recognition’s Encoding Step},
author = {A. Ríos-Vila and M. Esplà-Gomis and D. Rizo and P. J. Ponce de León and J. M. Iñesta},
issn = {2076-3417},
year = {2021},
date = {2021-04-01},
urldate = {2021-04-01},
journal = {Applied Sciences},
volume = {11},
number = {9},
abstract = {Optical music recognition is a research field whose efforts have been mainly focused, due to the difficulties involved in its processes, on document and image recognition. However, there is a final step after the recognition phase that has not been properly addressed or discussed, and which is relevant to obtaining a standard digital score from the recognition process: the step of encoding data into a standard file format. In this paper, we address this task by proposing and evaluating the feasibility of using machine translation techniques, using statistical approaches and neural systems, to automatically convert the results of graphical encoding recognition into a standard semantic format, which can be exported as a digital score. We also discuss the implications, challenges and details to be taken into account when applying machine translation techniques to music languages, which are very different from natural human languages. This needs to be addressed prior to performing experiments and has not been reported in previous works. We also describe and detail experimental results, and conclude that applying machine translation techniques is a suitable solution for this task, as they have proven to obtain robust results.},
keywords = {GV/2020/030, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Optical music recognition is a research field whose efforts have been mainly focused, due to the difficulties involved in its processes, on document and image recognition. However, there is a final step after the recognition phase that has not been properly addressed or discussed, and which is relevant to obtaining a standard digital score from the recognition process: the step of encoding data into a standard file format. In this paper, we address this task by proposing and evaluating the feasibility of using machine translation techniques, using statistical approaches and neural systems, to automatically convert the results of graphical encoding recognition into a standard semantic format, which can be exported as a digital score. We also discuss the implications, challenges and details to be taken into account when applying machine translation techniques to music languages, which are very different from natural human languages. This needs to be addressed prior to performing experiments and has not been reported in previous works. We also describe and detail experimental results, and conclude that applying machine translation techniques is a suitable solution for this task, as they have proven to obtain robust results. Cuevas-Velasquez, H.; Gallego, A. J.; Fisher, R. B.
Two Heads are Better than One: Geometric-Latent Attention for Point Cloud Classification and Segmentation Proceedings Article
In: The 32nd British Machine Vision Conference (BMVC), 2021.
BibTeX | Tags:
@inproceedings{k513,
title = {Two Heads are Better than One: Geometric-Latent Attention for Point Cloud Classification and Segmentation},
author = {H. Cuevas-Velasquez and A. J. Gallego and R. B. Fisher},
year = {2021},
date = {2021-01-01},
booktitle = {The 32nd British Machine Vision Conference (BMVC)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ortega-Bastida, J.
Aproximación de soluciones multimodales para aplicaciones Deep Learning PhD Thesis
2021.
BibTeX | Tags:
@phdthesis{k507,
title = {Aproximación de soluciones multimodales para aplicaciones Deep Learning},
author = {J. Ortega-Bastida},
editor = {J. Ramón Rico-Juan and A. J. Gallego},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Garrido-Munoz, C.; Sánchez-Hernández, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
Domain Adaptation for Document Image Binarization via Domain Classification Proceedings Article
In: Tallón-Ballesteros, A. J. (Ed.): Frontiers in Artificial Intelligence and Applications, pp. 569-582, IOS Press, 2021, ISBN: 978-1-64368-224-2.
BibTeX | Tags: GRE19-04, GV/2020/030
@inproceedings{k480,
title = {Domain Adaptation for Document Image Binarization via Domain Classification},
author = {C. Garrido-Munoz and A. Sánchez-Hernández and F. J. Castellanos and J. Calvo-Zaragoza},
editor = {A. J. Tallón-Ballesteros},
isbn = {978-1-64368-224-2},
year = {2021},
date = {2021-01-01},
booktitle = {Frontiers in Artificial Intelligence and Applications},
pages = {569-582},
publisher = {IOS Press},
chapter = {-},
keywords = {GRE19-04, GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Mas-Candela, E.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Sequential Next-Symbol Prediction for Optical Music Recognition Proceedings Article
In: 16th International Conference on Document Analysis and Recognition, pp. 708-722, 2021, ISBN: 978-3-030-86334-0.
Links | BibTeX | Tags: GV/2020/030
@inproceedings{k478,
title = {Sequential Next-Symbol Prediction for Optical Music Recognition},
author = {E. Mas-Candela and M. Alfaro-Contreras and J. Calvo-Zaragoza},
url = {https://link.springer.com/chapter/10.1007/978-3-030-86334-0_46},
isbn = {978-3-030-86334-0},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {16th International Conference on Document Analysis and Recognition},
pages = {708-722},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Rizo, D.; Calvo-Zaragoza, J.
Complete Optical Music Recognition via Agnostic Transcription and Machine Translation Proceedings Article
In: 16th International Conference on Document Analysis and Recognition, pp. 661-675, 2021.
BibTeX | Tags: GV/2020/030
@inproceedings{k477,
title = {Complete Optical Music Recognition via Agnostic Transcription and Machine Translation},
author = {A. Ríos-Vila and D. Rizo and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {16th International Conference on Document Analysis and Recognition},
pages = {661-675},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Prototype Generation in the String Space via Approximate Median for Data Reduction in Nearest Neighbor classification Journal Article
In: Soft Computing, vol. 25, 2021, ISSN: 15403-15415.
BibTeX | Tags: GRE19-04, HispaMus
@article{k476,
title = {Prototype Generation in the String Space via Approximate Median for Data Reduction in Nearest Neighbor classification},
author = {F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
issn = {15403-15415},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Soft Computing},
volume = {25},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Domain Adaptation for Document Analysis of Music Score Images Proceedings Article
In: Proc. of the 22nd International Society for Music Information Retrieval Conference, 2021.
@inproceedings{k475,
title = {Unsupervised Domain Adaptation for Document Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proc. of the 22nd International Society for Music Information Retrieval Conference},
keywords = {GRE19-04},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.
Neural architectures for exploiting the components of Agnostic Notation in Optical Music Recognition Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 13-17, 2021.
BibTeX | Tags:
@inproceedings{k474,
title = {Neural architectures for exploiting the components of Agnostic Notation in Optical Music Recognition},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {13-17},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
López-Gutiérrez, J. C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Data Augmentation for End-to-End Optical Music Recognition Proceedings Article
In: Proceedings of the 14th IAPR International Workshop on Graphics Recognition (GREC), pp. 59-73, Springer, 2021.
BibTeX | Tags: GV/2020/030
@inproceedings{k473,
title = {Data Augmentation for End-to-End Optical Music Recognition},
author = {J. C. López-Gutiérrez and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proceedings of the 14th IAPR International Workshop on Graphics Recognition (GREC)},
pages = {59-73},
publisher = {Springer},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Ortega-Bastida, J.; Gallego, A. J.; Rico-Juan, J. R.; Albarrán, P.
A multimodal approach for regional GDP prediction using social media activity and historical information Journal Article
In: Applied Soft Computing, pp. 107693, 2021, ISSN: 1568-4946.
@article{k471,
title = {A multimodal approach for regional GDP prediction using social media activity and historical information},
author = {J. Ortega-Bastida and A. J. Gallego and J. R. Rico-Juan and P. Albarrán},
issn = {1568-4946},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Applied Soft Computing},
pages = {107693},
abstract = {This work proposes a multimodal approach with which to predict the regional Gross Domestic Product (GDP) by combining historical GDP values with the embodied information in Twitter messages concerning the current economic condition. This proposal is of great interest, since it delivers forecasts at higher frequencies than both the official statistics (published only annually at the regional level in Spain) and the existing unofficial quarterly predictions (which rely on economic indicators that are available only after months of delay). The proposed method is based on a two-stage architecture. In the first stage, a multi-task autoencoder is initially used to obtain a GDP-related representation of tweets, which are then filtered to remove outliers and to obtain the GDP prediction from the consensus of opinions. In a second stage, this result is combined with the historical GDP values of the region using a multimodal network. The method is evaluated in four different regions of Spain using the tweets written by the most relevant economists, politicians, newspapers and institutions in each one. The results show that our approach successfully learns the evolution of the GDP using only historical information and tweets, thus making it possible to provide earlier forecasts about the regional GDP. This method also makes it possible to establish which the most or least influential opinions regarding this prediction are. As an additional exercise, we have assessed how well our method predicted the effect of the COVID-19 pandemic.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
This work proposes a multimodal approach with which to predict the regional Gross Domestic Product (GDP) by combining historical GDP values with the embodied information in Twitter messages concerning the current economic condition. This proposal is of great interest, since it delivers forecasts at higher frequencies than both the official statistics (published only annually at the regional level in Spain) and the existing unofficial quarterly predictions (which rely on economic indicators that are available only after months of delay). The proposed method is based on a two-stage architecture. In the first stage, a multi-task autoencoder is initially used to obtain a GDP-related representation of tweets, which are then filtered to remove outliers and to obtain the GDP prediction from the consensus of opinions. In a second stage, this result is combined with the historical GDP values of the region using a multimodal network. The method is evaluated in four different regions of Spain using the tweets written by the most relevant economists, politicians, newspapers and institutions in each one. The results show that our approach successfully learns the evolution of the GDP using only historical information and tweets, thus making it possible to provide earlier forecasts about the regional GDP. This method also makes it possible to establish which the most or least influential opinions regarding this prediction are. As an additional exercise, we have assessed how well our method predicted the effect of the COVID-19 pandemic. Calvo-Zaragoza, J.; Rizo, D.; Iñesta, J. M.
Reconocimiento Óptico de Partituras (OMR) aplicado al Fonde de Música Tradicional IMF-CSIC Book Chapter
In: Gambero-Ustárroz, M.; Ros-Fábregas, E. (Ed.): Musicología en Web. Patrimonio musical y Humanidades Digitales, Chapter 4, pp. 87-109, Edition Reichenberger, 2021, ISBN: 978-3-967280-14-2.
@inbook{k470,
title = {Reconocimiento Óptico de Partituras (OMR) aplicado al Fonde de Música Tradicional IMF-CSIC},
author = {J. Calvo-Zaragoza and D. Rizo and J. M. Iñesta},
editor = {M. Gambero-Ustárroz and E. Ros-Fábregas},
isbn = {978-3-967280-14-2},
year = {2021},
date = {2021-01-01},
booktitle = {Musicología en Web. Patrimonio musical y Humanidades Digitales},
pages = {87-109},
publisher = {Edition Reichenberger},
chapter = {4},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {inbook}
}
Fuente, C; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Audio and Image Music Transcription Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 18-22, 2021.
BibTeX | Tags: ACIF/2019/042, APOSTD/2020/256, MultiScore
@inproceedings{k469,
title = {Multimodal Audio and Image Music Transcription},
author = {C Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {18-22},
keywords = {ACIF/2019/042, APOSTD/2020/256, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.
Exploiting the Two-Dimensional Nature of Agnostic Music Notation for Neural Optical Music Recognition Journal Article
In: Applied Sciences (Special Issue Advances in Music Reading Systems), vol. 11, no. 8, pp. 3621, 2021, ISSN: 2076-3417.
@article{k463,
title = {Exploiting the Two-Dimensional Nature of Agnostic Music Notation for Neural Optical Music Recognition},
author = {M. Alfaro-Contreras and J. J. Valero-Mas},
issn = {2076-3417},
year = {2021},
date = {2021-01-01},
journal = {Applied Sciences (Special Issue Advances in Music Reading Systems)},
volume = {11},
number = {8},
pages = {3621},
keywords = {GRE19-04},
pubstate = {published},
tppubtype = {article}
}
Román, M. A.
An End-to-End Framework for Audio-to-Score Music Transcription PhD Thesis
2021.
@phdthesis{k462,
title = {An End-to-End Framework for Audio-to-Score Music Transcription},
author = {M. A. Román},
editor = {J. Calvo-Zaragoza and A. Pertusa},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
organization = {Universidad de Alicante},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {phdthesis}
}
Gallego, A. J.; Calvo-Zaragoza, J.; Fisher, R. B.
Incremental Unsupervised Domain-Adversarial Training of Neural Networks Journal Article
In: IEEE Transactions on Neural Networks and Learning Systems, vol. 32, no. 11, pp. 4864-4878, 2021, ISSN: 2162-2388.
Abstract | Links | BibTeX | Tags: GRE19-04, HispaMus
@article{k455,
title = {Incremental Unsupervised Domain-Adversarial Training of Neural Networks},
author = {A. J. Gallego and J. Calvo-Zaragoza and R. B. Fisher},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/455/2001.04129.pdf},
issn = {2162-2388},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE Transactions on Neural Networks and Learning Systems},
volume = {32},
number = {11},
pages = {4864-4878},
abstract = {In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms.},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms.
2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
A. Sánchez-Hernández C. Garrido-Munoz, F. J. Castellanos
Continual Learning for Document Image Binarization Proceedings Article
In: International Conference on Pattern Recognition, pp. 1443-1449, IEEE, Montreal, QC, Canada, 2022, ISBN: 978-1-6654-9063-4.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido:2022:CL,
title = {Continual Learning for Document Image Binarization},
author = {C. Garrido-Munoz, A. Sánchez-Hernández, F. J. Castellanos, J. Calvo-Zaragoza},
url = {https://ieeexplore.ieee.org/abstract/document/9956669},
doi = {10.1109/ICPR56361.2022.9956669},
isbn = {978-1-6654-9063-4},
year = {2022},
date = {2022-08-21},
urldate = {2022-08-21},
booktitle = {International Conference on Pattern Recognition},
pages = {1443-1449},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, D.; Delgado, T.; Calvo-Zaragoza, J.; Madueño, A.; García-Iasci, P.
Speeding-up the encoding of mensural collections from Spanish libraries Journal Article
In: IAML 2022 Prague, 2022.
BibTeX | Tags: MultiScore
@article{k502,
title = {Speeding-up the encoding of mensural collections from Spanish libraries},
author = {D. Rizo and T. Delgado and J. Calvo-Zaragoza and A. Madueño and P. García-Iasci},
year = {2022},
date = {2022-07-01},
booktitle = {IAML 2022 Prague},
journal = {IAML 2022 Prague},
organization = {IAML},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Decoupling music notation to improve end-to-end Optical Music Recognition Journal Article
In: Pattern Recognition Letters, vol. 158, pp. 157-163, 2022, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2022,
title = {Decoupling music notation to improve end-to-end Optical Music Recognition},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2022.04.032},
issn = {0167-8655},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Pattern Recognition Letters},
volume = {158},
pages = {157-163},
abstract = {Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Insights into transfer learning between image and audio music transcription Proceedings Article
In: Sound and Music Computing Conference, pp. 295-301, Zenodo, Saint-Étienne, France, 2022.
Abstract | Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras2022b,
title = {Insights into transfer learning between image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.5281/zenodo.6797870},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
booktitle = {Sound and Music Computing Conference},
pages = {295-301},
publisher = {Zenodo},
address = {Saint-Étienne, France},
abstract = {Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Iñesta, J. M.; Thomae, M. E.
An On-line Tool for Transcription of Music Scores: MuRET Presentation
Montreal (Canada), 01.05.2022.
Abstract | Links | BibTeX | Tags: HispaMus
@misc{k520,
title = {An On-line Tool for Transcription of Music Scores: MuRET},
author = {J. M. Iñesta and M. E. Thomae},
url = {undefined},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {1st Int. Conf. The Sound of Future/The Future of Sound},
address = {Montreal (Canada)},
organization = {CIRMMT},
abstract = {MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections.},
key = {OMR, Machine Learning},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {presentation}
}
Sánchez-Ferrer, A.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), pp. 616–628, Aveiro, Portugal, 2022, ISBN: 978-3-031-04881-4.
@inproceedings{k512,
title = {The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition},
author = {A. Sánchez-Ferrer and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-3-031-04881-4},
year = {2022},
date = {2022-05-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
pages = {616--628},
address = {Aveiro, Portugal},
abstract = {In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Desmond, K.; Pugin, L.; Regimbal, J.; Rizo, D.; Sapp, C. S.; Thomae, M. E.
Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation Proceedings Article
In: Music Encoding Conference Proceedings 2021, pp. 197–219, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k497,
title = {Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation},
author = {K. Desmond and L. Pugin and J. Regimbal and D. Rizo and C. S. Sapp and M. E. Thomae},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {197–219},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Münnich, S.; Rizo, D.
Foreword Proceedings Article
In: Münnich, S.; Rizo, D. (Ed.): Music Encoding Conference Proceedings 2021, pp. vii–viii, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k496,
title = {Foreword},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {vii–viii},
publisher = {Humanities Commons},
chapter = {1},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Münnich, S.; Rizo, D.
Music Encoding Conference Proceedings 2022. Book
Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@book{k495,
title = {Music Encoding Conference Proceedings 2022.},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {book}
}
Mas-Candela, E.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A First Approach to Image Transformation Sequence Retrieval Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 321-332, Aveiro, Portugal, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k494,
title = {A First Approach to Image Transformation Sequence Retrieval},
author = {E. Mas-Candela and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {321-332},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 482-492, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: Leonardo2021
@inproceedings{k493,
title = {Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {482-492},
address = {Aveiro, Portugal},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
On the Use of Transformers for End-to-End Optical Music Recognition Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 470-481, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: MultiScore
@inproceedings{k492,
title = {On the Use of Transformers for End-to-End Optical Music Recognition},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {470-481},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Arroyo, V.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Pertusa, A.
Neural audio-to-score music transcription for unconstrained polyphony using compact output representations Proceedings Article
In: Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, Singapur, Singapur, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k487,
title = {Neural audio-to-score music transcription for unconstrained polyphony using compact output representations},
author = {V. Arroyo and J. J. Valero-Mas and J. Calvo-Zaragoza and A. Pertusa},
year = {2022},
date = {2022-05-01},
booktitle = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
publisher = {IEEE},
address = {Singapur, Singapur},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A holistic approach for image-to-graph: application to optical music recognition Journal Article
In: International Journal on Document Analysis and Recognition, 2022.
BibTeX | Tags: Leonardo2021
@article{k522,
title = {A holistic approach for image-to-graph: application to optical music recognition},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {article}
}
Alashhab, S.; Gallego, A. J.; Lozano, M. Á.
Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 114, pp. 105188, 2022, ISSN: 0952-1976.
@article{k511,
title = {Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks},
author = {S. Alashhab and A. J. Gallego and M. Á. Lozano},
issn = {0952-1976},
year = {2022},
date = {2022-01-01},
journal = {Engineering Applications of Artificial Intelligence},
volume = {114},
pages = {105188},
abstract = {Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gallego, A. J.; Rico-Juan, J. R.; Valero-Mas, J. J.
Efficient k-nearest neighbor search based on clustering and adaptive k values Journal Article
In: Pattern Recognition, vol. 122, pp. 108356, 2022, ISSN: 0031-3203.
@article{k510,
title = {Efficient k-nearest neighbor search based on clustering and adaptive k values},
author = {A. J. Gallego and J. R. Rico-Juan and J. J. Valero-Mas},
issn = {0031-3203},
year = {2022},
date = {2022-01-01},
journal = {Pattern Recognition},
volume = {122},
pages = {108356},
abstract = {The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bernabeu, M.
Búsqueda de imágenes similares usando técnicas de aprendizaje automático PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k509,
title = {Búsqueda de imágenes similares usando técnicas de aprendizaje automático},
author = {M. Bernabeu},
editor = {Antonio Pertusa Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Alashhab, S.
Aplicaciones de visión artificial para ayuda a personas con dificultades visuales PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k508,
title = {Aplicaciones de visión artificial para ayuda a personas con dificultades visuales},
author = {S. Alashhab},
editor = {Miguel Angel Lozano and Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Sáez-Pérez, J.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), 2022.
@inproceedings{k506,
title = {Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition},
author = {J. Sáez-Pérez and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
keywords = {ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor Proceedings Article
In: Pattern Recognition. ICPR International Workshops and Challenges, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k504,
title = {Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition. ICPR International Workshops and Challenges},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
de la Fuente, C.; Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks Journal Article
In: Multimedia Tools and Applications, 2022.
BibTeX | Tags: ACIF/2019/042, APOSTD/2020/256
@article{k501,
title = {Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks},
author = {C. de la Fuente and F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Multimedia Tools and Applications},
keywords = {ACIF/2019/042, APOSTD/2020/256},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.; Fujinaga, I.
Domain Adaptation for Staff-Region Retrieval of Music Score Images Journal Article
In: International Journal on Document Analysis and Recognition, vol. 25, iss. Special Issue: ICFHR 2022, pp. 281-292, 2022, ISSN: 1433-2825.
BibTeX | Tags: MultiScore
@article{Castellanos:2022:DAStaff,
title = {Domain Adaptation for Staff-Region Retrieval of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza and I. Fujinaga},
issn = {1433-2825},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
volume = {25},
issue = {Special Issue: ICFHR 2022},
pages = {281-292},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR, Bangalore, India, 2022.
Abstract | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{k499,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR},
address = {Bangalore, India},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Rosello, A.; Ayllon, E.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Test Sample Selection for Handwriting Recognition Through Language Modeling Proceedings Article
In: Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k498,
title = {Test Sample Selection for Handwriting Recognition Through Language Modeling},
author = {A. Rosello and E. Ayllon and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Region-based Layout Analysis of Music Score Images Journal Article
In: Expert Systems with Applications, pp. 118211, 2022, ISSN: 0957-4174.
BibTeX | Tags: MultiScore
@article{k486,
title = {Region-based Layout Analysis of Music Score Images},
author = {F. J. Castellanos and C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
issn = {0957-4174},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
pages = {118211},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Fuente, C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Image and Audio Music Transcription Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 11, pp. 77-84, 2022.
BibTeX | Tags: MultiScore
@article{k479,
title = {Multimodal Image and Audio Music Transcription},
author = {C. Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
journal = {International Journal of Multimedia Information Retrieval},
volume = {11},
pages = {77-84},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
2021
Ríos-Vila, A.; Calvo-Zaragoza, J.; Iñesta, J. M.
CTC-based end-to-end approach for full page Optical Music Recognition Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 11, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k488,
title = {CTC-based end-to-end approach for full page Optical Music Recognition},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and J. M. Iñesta},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {11},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 6, 2021.
@inproceedings{k482,
title = {An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {6},
keywords = {GRE19-04, ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Calvo-Zaragoza, J.; Pertusa, A.; Gallego, A. J.; Iñesta, J. M.; Micó, L.; Oncina, J.; Perez-Sancho, C.; de León, P. J. Ponce; Rizo, D.
MultiScore Project: Multimodal Transcription of Music Scores Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 3, 2021.
Links | BibTeX | Tags: MultiScore
@inproceedings{k481,
title = {MultiScore Project: Multimodal Transcription of Music Scores},
author = {J. Calvo-Zaragoza and A. Pertusa and A. J. Gallego and J. M. Iñesta and L. Micó and J. Oncina and C. Perez-Sancho and P. J. Ponce de León and D. Rizo},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/481/MML2021__MultiScore_Final.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {3},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
OMR-assisted transcription: a case study with early prints Proceedings Article
In: Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR, pp. 35-41, 2021, ISBN: 978-1-7327299-0-2.
BibTeX | Tags: MultiScore
@inproceedings{k483,
title = {OMR-assisted transcription: a case study with early prints},
author = {M. Alfaro-Contreras and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-1-7327299-0-2},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
booktitle = {Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR},
pages = {35-41},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Neural Domain Adaptation for Document Image Binarization Journal Article
In: Pattern Recognition, vol. 119, pp. 108099, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@article{k467,
title = {Unsupervised Neural Domain Adaptation for Document Image Binarization},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Pattern Recognition},
volume = {119},
pages = {108099},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Madueño, A.; Ríos-Vila, A.; Rizo, D.
Automatized incipit encoding at the Andalusian Music Documentation Center Proceedings Article
In: Digital Libraries for Musicology / IAML Joint Session, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k484,
title = {Automatized incipit encoding at the Andalusian Music Documentation Center},
author = {A. Madueño and A. Ríos-Vila and D. Rizo},
year = {2021},
date = {2021-07-01},
urldate = {2021-07-01},
booktitle = {Digital Libraries for Musicology / IAML Joint Session},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.
Unsupervised Neural Document Analysis for Music Score Images Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 50-54, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@inproceedings{k468,
title = {Unsupervised Neural Document Analysis for Music Score Images},
author = {F. J. Castellanos and A. J. Gallego},
year = {2021},
date = {2021-07-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {50-54},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Esplà-Gomis, M.; Rizo, D.; de León, P. J. Ponce; Iñesta, J. M.
Applying Automatic Translation for Optical Music Recognition’s Encoding Step Journal Article
In: Applied Sciences, vol. 11, no. 9, 2021, ISSN: 2076-3417.
Abstract | BibTeX | Tags: GV/2020/030, HispaMus
@article{k464,
title = {Applying Automatic Translation for Optical Music Recognition’s Encoding Step},
author = {A. Ríos-Vila and M. Esplà-Gomis and D. Rizo and P. J. Ponce de León and J. M. Iñesta},
issn = {2076-3417},
year = {2021},
date = {2021-04-01},
urldate = {2021-04-01},
journal = {Applied Sciences},
volume = {11},
number = {9},
abstract = {Optical music recognition is a research field whose efforts have been mainly focused, due to the difficulties involved in its processes, on document and image recognition. However, there is a final step after the recognition phase that has not been properly addressed or discussed, and which is relevant to obtaining a standard digital score from the recognition process: the step of encoding data into a standard file format. In this paper, we address this task by proposing and evaluating the feasibility of using machine translation techniques, using statistical approaches and neural systems, to automatically convert the results of graphical encoding recognition into a standard semantic format, which can be exported as a digital score. We also discuss the implications, challenges and details to be taken into account when applying machine translation techniques to music languages, which are very different from natural human languages. This needs to be addressed prior to performing experiments and has not been reported in previous works. We also describe and detail experimental results, and conclude that applying machine translation techniques is a suitable solution for this task, as they have proven to obtain robust results.},
keywords = {GV/2020/030, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Cuevas-Velasquez, H.; Gallego, A. J.; Fisher, R. B.
Two Heads are Better than One: Geometric-Latent Attention for Point Cloud Classification and Segmentation Proceedings Article
In: The 32nd British Machine Vision Conference (BMVC), 2021.
BibTeX | Tags:
@inproceedings{k513,
title = {Two Heads are Better than One: Geometric-Latent Attention for Point Cloud Classification and Segmentation},
author = {H. Cuevas-Velasquez and A. J. Gallego and R. B. Fisher},
year = {2021},
date = {2021-01-01},
booktitle = {The 32nd British Machine Vision Conference (BMVC)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ortega-Bastida, J.
Aproximación de soluciones multimodales para aplicaciones Deep Learning PhD Thesis
2021.
BibTeX | Tags:
@phdthesis{k507,
title = {Aproximación de soluciones multimodales para aplicaciones Deep Learning},
author = {J. Ortega-Bastida},
editor = {J. Ramón Rico-Juan and A. J. Gallego},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Garrido-Munoz, C.; Sánchez-Hernández, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
Domain Adaptation for Document Image Binarization via Domain Classification Proceedings Article
In: Tallón-Ballesteros, A. J. (Ed.): Frontiers in Artificial Intelligence and Applications, pp. 569-582, IOS Press, 2021, ISBN: 978-1-64368-224-2.
BibTeX | Tags: GRE19-04, GV/2020/030
@inproceedings{k480,
title = {Domain Adaptation for Document Image Binarization via Domain Classification},
author = {C. Garrido-Munoz and A. Sánchez-Hernández and F. J. Castellanos and J. Calvo-Zaragoza},
editor = {A. J. Tallón-Ballesteros},
isbn = {978-1-64368-224-2},
year = {2021},
date = {2021-01-01},
booktitle = {Frontiers in Artificial Intelligence and Applications},
pages = {569-582},
publisher = {IOS Press},
chapter = {-},
keywords = {GRE19-04, GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Mas-Candela, E.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Sequential Next-Symbol Prediction for Optical Music Recognition Proceedings Article
In: 16th International Conference on Document Analysis and Recognition, pp. 708-722, 2021, ISBN: 978-3-030-86334-0.
Links | BibTeX | Tags: GV/2020/030
@inproceedings{k478,
title = {Sequential Next-Symbol Prediction for Optical Music Recognition},
author = {E. Mas-Candela and M. Alfaro-Contreras and J. Calvo-Zaragoza},
url = {https://link.springer.com/chapter/10.1007/978-3-030-86334-0_46},
isbn = {978-3-030-86334-0},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {16th International Conference on Document Analysis and Recognition},
pages = {708-722},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Rizo, D.; Calvo-Zaragoza, J.
Complete Optical Music Recognition via Agnostic Transcription and Machine Translation Proceedings Article
In: 16th International Conference on Document Analysis and Recognition, pp. 661-675, 2021.
BibTeX | Tags: GV/2020/030
@inproceedings{k477,
title = {Complete Optical Music Recognition via Agnostic Transcription and Machine Translation},
author = {A. Ríos-Vila and D. Rizo and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {16th International Conference on Document Analysis and Recognition},
pages = {661-675},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Prototype Generation in the String Space via Approximate Median for Data Reduction in Nearest Neighbor classification Journal Article
In: Soft Computing, vol. 25, 2021, ISSN: 15403-15415.
BibTeX | Tags: GRE19-04, HispaMus
@article{k476,
title = {Prototype Generation in the String Space via Approximate Median for Data Reduction in Nearest Neighbor classification},
author = {F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
issn = {15403-15415},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Soft Computing},
volume = {25},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Domain Adaptation for Document Analysis of Music Score Images Proceedings Article
In: Proc. of the 22nd International Society for Music Information Retrieval Conference, 2021.
@inproceedings{k475,
title = {Unsupervised Domain Adaptation for Document Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proc. of the 22nd International Society for Music Information Retrieval Conference},
keywords = {GRE19-04},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.
Neural architectures for exploiting the components of Agnostic Notation in Optical Music Recognition Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 13-17, 2021.
BibTeX | Tags:
@inproceedings{k474,
title = {Neural architectures for exploiting the components of Agnostic Notation in Optical Music Recognition},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {13-17},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
López-Gutiérrez, J. C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Data Augmentation for End-to-End Optical Music Recognition Proceedings Article
In: Proceedings of the 14th IAPR International Workshop on Graphics Recognition (GREC), pp. 59-73, Springer, 2021.
BibTeX | Tags: GV/2020/030
@inproceedings{k473,
title = {Data Augmentation for End-to-End Optical Music Recognition},
author = {J. C. López-Gutiérrez and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proceedings of the 14th IAPR International Workshop on Graphics Recognition (GREC)},
pages = {59-73},
publisher = {Springer},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Ortega-Bastida, J.; Gallego, A. J.; Rico-Juan, J. R.; Albarrán, P.
A multimodal approach for regional GDP prediction using social media activity and historical information Journal Article
In: Applied Soft Computing, pp. 107693, 2021, ISSN: 1568-4946.
@article{k471,
title = {A multimodal approach for regional GDP prediction using social media activity and historical information},
author = {J. Ortega-Bastida and A. J. Gallego and J. R. Rico-Juan and P. Albarrán},
issn = {1568-4946},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Applied Soft Computing},
pages = {107693},
abstract = {This work proposes a multimodal approach with which to predict the regional Gross Domestic Product (GDP) by combining historical GDP values with the embodied information in Twitter messages concerning the current economic condition. This proposal is of great interest, since it delivers forecasts at higher frequencies than both the official statistics (published only annually at the regional level in Spain) and the existing unofficial quarterly predictions (which rely on economic indicators that are available only after months of delay). The proposed method is based on a two-stage architecture. In the first stage, a multi-task autoencoder is initially used to obtain a GDP-related representation of tweets, which are then filtered to remove outliers and to obtain the GDP prediction from the consensus of opinions. In a second stage, this result is combined with the historical GDP values of the region using a multimodal network. The method is evaluated in four different regions of Spain using the tweets written by the most relevant economists, politicians, newspapers and institutions in each one. The results show that our approach successfully learns the evolution of the GDP using only historical information and tweets, thus making it possible to provide earlier forecasts about the regional GDP. This method also makes it possible to establish which the most or least influential opinions regarding this prediction are. As an additional exercise, we have assessed how well our method predicted the effect of the COVID-19 pandemic.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Calvo-Zaragoza, J.; Rizo, D.; Iñesta, J. M.
Reconocimiento Óptico de Partituras (OMR) aplicado al Fonde de Música Tradicional IMF-CSIC Book Chapter
In: Gambero-Ustárroz, M.; Ros-Fábregas, E. (Ed.): Musicología en Web. Patrimonio musical y Humanidades Digitales, Chapter 4, pp. 87-109, Edition Reichenberger, 2021, ISBN: 978-3-967280-14-2.
@inbook{k470,
title = {Reconocimiento Óptico de Partituras (OMR) aplicado al Fonde de Música Tradicional IMF-CSIC},
author = {J. Calvo-Zaragoza and D. Rizo and J. M. Iñesta},
editor = {M. Gambero-Ustárroz and E. Ros-Fábregas},
isbn = {978-3-967280-14-2},
year = {2021},
date = {2021-01-01},
booktitle = {Musicología en Web. Patrimonio musical y Humanidades Digitales},
pages = {87-109},
publisher = {Edition Reichenberger},
chapter = {4},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {inbook}
}
Fuente, C; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Audio and Image Music Transcription Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 18-22, 2021.
BibTeX | Tags: ACIF/2019/042, APOSTD/2020/256, MultiScore
@inproceedings{k469,
title = {Multimodal Audio and Image Music Transcription},
author = {C Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {18-22},
keywords = {ACIF/2019/042, APOSTD/2020/256, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.
Exploiting the Two-Dimensional Nature of Agnostic Music Notation for Neural Optical Music Recognition Journal Article
In: Applied Sciences (Special Issue Advances in Music Reading Systems), vol. 11, no. 8, pp. 3621, 2021, ISSN: 2076-3417.
@article{k463,
title = {Exploiting the Two-Dimensional Nature of Agnostic Music Notation for Neural Optical Music Recognition},
author = {M. Alfaro-Contreras and J. J. Valero-Mas},
issn = {2076-3417},
year = {2021},
date = {2021-01-01},
journal = {Applied Sciences (Special Issue Advances in Music Reading Systems)},
volume = {11},
number = {8},
pages = {3621},
keywords = {GRE19-04},
pubstate = {published},
tppubtype = {article}
}
Román, M. A.
An End-to-End Framework for Audio-to-Score Music Transcription PhD Thesis
2021.
@phdthesis{k462,
title = {An End-to-End Framework for Audio-to-Score Music Transcription},
author = {M. A. Román},
editor = {J. Calvo-Zaragoza and A. Pertusa},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
organization = {Universidad de Alicante},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {phdthesis}
}
Gallego, A. J.; Calvo-Zaragoza, J.; Fisher, R. B.
Incremental Unsupervised Domain-Adversarial Training of Neural Networks Journal Article
In: IEEE Transactions on Neural Networks and Learning Systems, vol. 32, no. 11, pp. 4864-4878, 2021, ISSN: 2162-2388.
Abstract | Links | BibTeX | Tags: GRE19-04, HispaMus
@article{k455,
title = {Incremental Unsupervised Domain-Adversarial Training of Neural Networks},
author = {A. J. Gallego and J. Calvo-Zaragoza and R. B. Fisher},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/455/2001.04129.pdf},
issn = {2162-2388},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE Transactions on Neural Networks and Learning Systems},
volume = {32},
number = {11},
pages = {4864-4878},
abstract = {In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms.},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}