2024
Ríos-Vila, A.; Calvo-Zaragoza, J.; Paquet, T.
Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription Conference
Document Analysis and Recognition - ICDAR 2024, vol. 1, Springer Nature Switzerland, 2024, ISBN: 978-3-031-70552-6.
BibTeX | Tags: MultiScore
@conference{RiosVila:ICDAR:2024,
title = {Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and T. Paquet},
isbn = {978-3-031-70552-6},
year = {2024},
date = {2024-09-02},
urldate = {2024-09-02},
booktitle = {Document Analysis and Recognition - ICDAR 2024},
volume = {1},
pages = {20-37},
publisher = {Springer Nature Switzerland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Maciá, M.; Rizo, D.
The Impact of UX/UI on Piano-Assisted Learning in Extended Reality Conference
Computer Supported Music Education. Angers, France., 2024.
BibTeX | Tags:
@conference{macia2024,
title = {The Impact of UX/UI on Piano-Assisted Learning in Extended Reality},
author = {M. Maciá and D. Rizo},
year = {2024},
date = {2024-05-04},
urldate = {2024-05-04},
booktitle = {Computer Supported Music Education. Angers, France.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Rios-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
A Transformer Approach for Polyphonic Audio-to-Score Transcription Proceedings Article
In: Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seul (Korea), 2024.
BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras:ICASSP24,
title = {A Transformer Approach for Polyphonic Audio-to-Score Transcription},
author = {M. Alfaro-Contreras and A. Rios-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2024},
date = {2024-04-19},
urldate = {2024-04-19},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)},
address = {Seul (Korea)},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Martínez-Sevilla, J. C.; Ríos-Vila, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
A Holistic Approach for Aligned Music and Lyrics Transcription Conference
Document Analysis and Recognition - ICDAR 2023, vol. 1, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-41676-7.
Abstract | Links | BibTeX | Tags: REPERTORIUM
@conference{MartinezSevilla:ICDAR:2023,
title = {A Holistic Approach for Aligned Music and Lyrics Transcription},
author = {J.C. Martínez-Sevilla and A. Ríos-Vila and F. J. Castellanos and J. Calvo-Zaragoza },
editor = {Fink, Gernot A. and Jain, Rajiv and Kise, Koichi and Zanibbi, Richard},
doi = {https://doi.org/10.1007/978-3-031-41676-7_11},
isbn = {978-3-031-41676-7},
year = {2023},
date = {2023-08-28},
urldate = {2023-08-28},
booktitle = {Document Analysis and Recognition - ICDAR 2023},
volume = {1},
pages = {185--201},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT.},
keywords = {REPERTORIUM},
pubstate = {published},
tppubtype = {conference}
}
In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT. Martínez-Sevilla, J. C.; Alfaro-Contreras, M.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works Proceedings Article
In: INTERSPEECH Conference, pp. 2793-2797, Dublin, Ireland, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Martínez-Sevilla2023,
title = {Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works},
author = {J.C. Martínez-Sevilla and M. Alfaro-Contreras and J. J. Valero-Mas and J. Calvo-Zaragoza
},
doi = {10.21437/Interspeech.2023-88},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
booktitle = {INTERSPEECH Conference},
pages = {2793-2797},
address = {Dublin, Ireland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study Proceedings Article
In: Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science, pp. 64-77, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-37731-0.
Links | BibTeX | Tags: MultiScore
@inproceedings{k505,
title = {Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-37731-0_6},
isbn = {978-3-031-37731-0},
year = {2023},
date = {2023-08-10},
urldate = {2022-01-01},
booktitle = {Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science},
volume = {13645},
pages = {64-77},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Evaluating Domain Generalization in Kitchen Utensils Classification Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 108-118, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido-Munoz2023,
title = {Evaluating Domain Generalization in Kitchen Utensils Classification},
author = {C. Garrido-Munoz and M. Alfaro-Contreras and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-36616-1_9},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {108-118},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
González-Barrachina, P.; Alfaro-Contreras, M.; Nieto-Hidalgo, M.; Calvo-Zaragoza, J.
Lifelong Learning for Document Image Binarization: An Experimental Study Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 146-157, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{González-Barrachina2023,
title = {Lifelong Learning for Document Image Binarization: An Experimental Study},
author = {P. González-Barrachina and M. Alfaro-Contreras and M. Nieto-Hidalgo and J. Calvo-Zaragoza },
doi = {10.1007/978-3-031-36616-1_12},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {146-157},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Iñesta, J. M.; Calvo-Zaragoza, J.
Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 12, pp. 12-24, 2023.
@article{Alfaro-Contreras2023b,
title = {Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation},
author = {M. Alfaro-Contreras and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/s13735-023-00278-5},
year = {2023},
date = {2023-05-26},
urldate = {2023-05-26},
journal = {International Journal of Multimedia Information Retrieval},
volume = {12},
pages = {12-24},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-end optical music recognition for pianoform sheet music Journal Article
In: International Journal on Document Analysis and Recognition (IJDAR), iss. ICDAR 2023, 2023, ISSN: 1433-2825.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Ríos-Vila2023,
title = {End-to-end optical music recognition for pianoform sheet music},
author = {A. Ríos-Vila and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://link.springer.com/content/pdf/10.1007/s10032-023-00432-z.pdf},
doi = {10.1007/s10032-023-00432-z},
issn = {1433-2825},
year = {2023},
date = {2023-05-12},
urldate = {2023-05-12},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
issue = {ICDAR 2023},
abstract = {End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner. Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor Journal Article
In: Pattern Recognition Letters, vol. 167, pp. 1-8, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023,
title = {Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2023.01.014},
year = {2023},
date = {2023-03-01},
journal = {Pattern Recognition Letters},
volume = {167},
pages = {1-8},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Valero-Mas, J. J.; Gallego, A. J.; Alonso-Jiménez, P.; Serra, X.
Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification Journal Article
In: Pattern Recognition, vol. 135, pp. 109190, 2023, ISSN: 0031-3203.
Abstract | BibTeX | Tags: DOREMI, MultiScore
@article{k519,
title = {Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification},
author = {J. J. Valero-Mas and A. J. Gallego and P. Alonso-Jiménez and X. Serra},
issn = {0031-3203},
year = {2023},
date = {2023-01-01},
journal = {Pattern Recognition},
volume = {135},
pages = {109190},
abstract = {Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works.},
keywords = {DOREMI, MultiScore},
pubstate = {published},
tppubtype = {article}
}
Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works. Sánchez-Ferrer, A.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Experimental Study on Marine Debris Location and Recognition using Object Detection Journal Article
In: Pattern Recognition Letters, 2023, ISSN: 0167-8655.
Abstract | BibTeX | Tags: TADMar
@article{k521,
title = {An Experimental Study on Marine Debris Location and Recognition using Object Detection},
author = {A. Sánchez-Ferrer and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
issn = {0167-8655},
year = {2023},
date = {2023-01-01},
journal = {Pattern Recognition Letters},
abstract = {The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.},
keywords = {TADMar},
pubstate = {published},
tppubtype = {article}
}
The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent. Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Late multimodal fusion for image and audio music transcription Journal Article
In: Expert Systems With Applications, vol. 216, pp. 119491-119500, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023c,
title = {Late multimodal fusion for image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.eswa.2022.119491},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Expert Systems With Applications},
volume = {216},
pages = {119491-119500},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages. Rizo, D.; Delgado, T.; Calvo-Zaragoza, J.; Madueño, A.; García-Iasci, P.
Speeding-up the encoding of mensural collections from Spanish libraries Journal Article
In: IAML 2022 Prague, 2022.
BibTeX | Tags: MultiScore
@article{k502,
title = {Speeding-up the encoding of mensural collections from Spanish libraries},
author = {D. Rizo and T. Delgado and J. Calvo-Zaragoza and A. Madueño and P. García-Iasci},
year = {2022},
date = {2022-07-01},
booktitle = {IAML 2022 Prague},
journal = {IAML 2022 Prague},
organization = {IAML},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Insights into transfer learning between image and audio music transcription Proceedings Article
In: Sound and Music Computing Conference, pp. 295-301, Zenodo, Saint-Étienne, France, 2022.
Abstract | Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras2022b,
title = {Insights into transfer learning between image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.5281/zenodo.6797870},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
booktitle = {Sound and Music Computing Conference},
pages = {295-301},
publisher = {Zenodo},
address = {Saint-Étienne, France},
abstract = {Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem. Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Decoupling music notation to improve end-to-end Optical Music Recognition Journal Article
In: Pattern Recognition Letters, vol. 158, pp. 157-163, 2022, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2022,
title = {Decoupling music notation to improve end-to-end Optical Music Recognition},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2022.04.032},
issn = {0167-8655},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Pattern Recognition Letters},
volume = {158},
pages = {157-163},
abstract = {Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases. Arroyo, V.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Pertusa, A.
Neural audio-to-score music transcription for unconstrained polyphony using compact output representations Proceedings Article
In: Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, Singapur, Singapur, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k487,
title = {Neural audio-to-score music transcription for unconstrained polyphony using compact output representations},
author = {V. Arroyo and J. J. Valero-Mas and J. Calvo-Zaragoza and A. Pertusa},
year = {2022},
date = {2022-05-01},
booktitle = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
publisher = {IEEE},
address = {Singapur, Singapur},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
On the Use of Transformers for End-to-End Optical Music Recognition Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 470-481, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: MultiScore
@inproceedings{k492,
title = {On the Use of Transformers for End-to-End Optical Music Recognition},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {470-481},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 482-492, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: Leonardo2021
@inproceedings{k493,
title = {Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {482-492},
address = {Aveiro, Portugal},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {inproceedings}
}
Mas-Candela, E.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A First Approach to Image Transformation Sequence Retrieval Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 321-332, Aveiro, Portugal, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k494,
title = {A First Approach to Image Transformation Sequence Retrieval},
author = {E. Mas-Candela and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {321-332},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Münnich, S.; Rizo, D.
Music Encoding Conference Proceedings 2022. Book
Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@book{k495,
title = {Music Encoding Conference Proceedings 2022.},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {book}
}
Münnich, S.; Rizo, D.
Foreword Proceedings Article
In: Münnich, S.; Rizo, D. (Ed.): Music Encoding Conference Proceedings 2021, pp. vii–viii, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k496,
title = {Foreword},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {vii–viii},
publisher = {Humanities Commons},
chapter = {1},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Desmond, K.; Pugin, L.; Regimbal, J.; Rizo, D.; Sapp, C. S.; Thomae, M. E.
Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation Proceedings Article
In: Music Encoding Conference Proceedings 2021, pp. 197–219, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k497,
title = {Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation},
author = {K. Desmond and L. Pugin and J. Regimbal and D. Rizo and C. S. Sapp and M. E. Thomae},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {197–219},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sánchez-Ferrer, A.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), pp. 616–628, Aveiro, Portugal, 2022, ISBN: 978-3-031-04881-4.
@inproceedings{k512,
title = {The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition},
author = {A. Sánchez-Ferrer and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-3-031-04881-4},
year = {2022},
date = {2022-05-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
pages = {616--628},
address = {Aveiro, Portugal},
abstract = {In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus. Iñesta, J. M.; Thomae, M. E.
An On-line Tool for Transcription of Music Scores: MuRET Presentation
Montreal (Canada), 01.05.2022.
Abstract | Links | BibTeX | Tags: HispaMus
@misc{k520,
title = {An On-line Tool for Transcription of Music Scores: MuRET},
author = {J. M. Iñesta and M. E. Thomae},
url = {undefined},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {1st Int. Conf. The Sound of Future/The Future of Sound},
address = {Montreal (Canada)},
organization = {CIRMMT},
abstract = {MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections.},
key = {OMR, Machine Learning},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {presentation}
}
MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections. Fuente, C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Image and Audio Music Transcription Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 11, pp. 77-84, 2022.
BibTeX | Tags: MultiScore
@article{k479,
title = {Multimodal Image and Audio Music Transcription},
author = {C. Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
journal = {International Journal of Multimedia Information Retrieval},
volume = {11},
pages = {77-84},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Region-based Layout Analysis of Music Score Images Journal Article
In: Expert Systems with Applications, pp. 118211, 2022, ISSN: 0957-4174.
BibTeX | Tags: MultiScore
@article{k486,
title = {Region-based Layout Analysis of Music Score Images},
author = {F. J. Castellanos and C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
issn = {0957-4174},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
pages = {118211},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rosello, A.; Ayllon, E.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Test Sample Selection for Handwriting Recognition Through Language Modeling Proceedings Article
In: Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k498,
title = {Test Sample Selection for Handwriting Recognition Through Language Modeling},
author = {A. Rosello and E. Ayllon and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR, Bangalore, India, 2022.
Abstract | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{k499,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR},
address = {Bangalore, India},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages. Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.; Fujinaga, I.
Domain Adaptation for Staff-Region Retrieval of Music Score Images Journal Article
In: International Journal on Document Analysis and Recognition, 2022, ISSN: 1433-2825.
BibTeX | Tags: MultiScore
@article{k500,
title = {Domain Adaptation for Staff-Region Retrieval of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza and I. Fujinaga},
issn = {1433-2825},
year = {2022},
date = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
de la Fuente, C.; Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks Journal Article
In: Multimedia Tools and Applications, 2022.
BibTeX | Tags:
@article{k501,
title = {Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks},
author = {C. de la Fuente and F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Multimedia Tools and Applications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor Proceedings Article
In: Pattern Recognition. ICPR International Workshops and Challenges, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k504,
title = {Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition. ICPR International Workshops and Challenges},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sáez-Pérez, J.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), 2022.
@inproceedings{k506,
title = {Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition},
author = {J. Sáez-Pérez and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
keywords = {ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Alashhab, S.
Aplicaciones de visión artificial para ayuda a personas con dificultades visuales PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k508,
title = {Aplicaciones de visión artificial para ayuda a personas con dificultades visuales},
author = {S. Alashhab},
editor = {Miguel Angel Lozano and Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Bernabeu, M.
Búsqueda de imágenes similares usando técnicas de aprendizaje automático PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k509,
title = {Búsqueda de imágenes similares usando técnicas de aprendizaje automático},
author = {M. Bernabeu},
editor = {Antonio Pertusa Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Gallego, A. J.; Rico-Juan, J. R.; Valero-Mas, J. J.
Efficient k-nearest neighbor search based on clustering and adaptive k values Journal Article
In: Pattern Recognition, vol. 122, pp. 108356, 2022, ISSN: 0031-3203.
@article{k510,
title = {Efficient k-nearest neighbor search based on clustering and adaptive k values},
author = {A. J. Gallego and J. R. Rico-Juan and J. J. Valero-Mas},
issn = {0031-3203},
year = {2022},
date = {2022-01-01},
journal = {Pattern Recognition},
volume = {122},
pages = {108356},
abstract = {The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search. Alashhab, S.; Gallego, A. J.; Lozano, M. Á.
Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 114, pp. 105188, 2022, ISSN: 0952-1976.
@article{k511,
title = {Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks},
author = {S. Alashhab and A. J. Gallego and M. Á. Lozano},
issn = {0952-1976},
year = {2022},
date = {2022-01-01},
journal = {Engineering Applications of Artificial Intelligence},
volume = {114},
pages = {105188},
abstract = {Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes. Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A holistic approach for image-to-graph: application to optical music recognition Journal Article
In: International Journal on Document Analysis and Recognition, 2022.
BibTeX | Tags: Leonardo2021
@article{k522,
title = {A holistic approach for image-to-graph: application to optical music recognition},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {article}
}
2021
Calvo-Zaragoza, J.; Pertusa, A.; Gallego, A. J.; Iñesta, J. M.; Micó, L.; Oncina, J.; Perez-Sancho, C.; de León, P. J. Ponce; Rizo, D.
MultiScore Project: Multimodal Transcription of Music Scores Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 3, 2021.
Links | BibTeX | Tags: MultiScore
@inproceedings{k481,
title = {MultiScore Project: Multimodal Transcription of Music Scores},
author = {J. Calvo-Zaragoza and A. Pertusa and A. J. Gallego and J. M. Iñesta and L. Micó and J. Oncina and C. Perez-Sancho and P. J. Ponce de León and D. Rizo},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/481/MML2021__MultiScore_Final.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {3},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 6, 2021.
@inproceedings{k482,
title = {An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {6},
keywords = {GRE19-04, ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Calvo-Zaragoza, J.; Iñesta, J. M.
CTC-based end-to-end approach for full page Optical Music Recognition Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 11, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k488,
title = {CTC-based end-to-end approach for full page Optical Music Recognition},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and J. M. Iñesta},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {11},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Neural Domain Adaptation for Document Image Binarization Journal Article
In: Pattern Recognition, vol. 119, pp. 108099, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@article{k467,
title = {Unsupervised Neural Domain Adaptation for Document Image Binarization},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Pattern Recognition},
volume = {119},
pages = {108099},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
OMR-assisted transcription: a case study with early prints Proceedings Article
In: Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR, pp. 35-41, 2021, ISBN: 978-1-7327299-0-2.
BibTeX | Tags: MultiScore
@inproceedings{k483,
title = {OMR-assisted transcription: a case study with early prints},
author = {M. Alfaro-Contreras and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-1-7327299-0-2},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
booktitle = {Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR},
pages = {35-41},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.
Unsupervised Neural Document Analysis for Music Score Images Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 50-54, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@inproceedings{k468,
title = {Unsupervised Neural Document Analysis for Music Score Images},
author = {F. J. Castellanos and A. J. Gallego},
year = {2021},
date = {2021-07-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {50-54},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {inproceedings}
}
Madueño, A.; Ríos-Vila, A.; Rizo, D.
Automatized incipit encoding at the Andalusian Music Documentation Center Proceedings Article
In: Digital Libraries for Musicology / IAML Joint Session, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k484,
title = {Automatized incipit encoding at the Andalusian Music Documentation Center},
author = {A. Madueño and A. Ríos-Vila and D. Rizo},
year = {2021},
date = {2021-07-01},
urldate = {2021-07-01},
booktitle = {Digital Libraries for Musicology / IAML Joint Session},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Esplà-Gomis, M.; Rizo, D.; de León, P. J. Ponce; Iñesta, J. M.
Applying Automatic Translation for Optical Music Recognition’s Encoding Step Journal Article
In: Applied Sciences, vol. 11, no. 9, 2021, ISSN: 2076-3417.
Abstract | BibTeX | Tags: GV/2020/030, HispaMus
@article{k464,
title = {Applying Automatic Translation for Optical Music Recognition’s Encoding Step},
author = {A. Ríos-Vila and M. Esplà-Gomis and D. Rizo and P. J. Ponce de León and J. M. Iñesta},
issn = {2076-3417},
year = {2021},
date = {2021-04-01},
urldate = {2021-04-01},
journal = {Applied Sciences},
volume = {11},
number = {9},
abstract = {Optical music recognition is a research field whose efforts have been mainly focused, due to the difficulties involved in its processes, on document and image recognition. However, there is a final step after the recognition phase that has not been properly addressed or discussed, and which is relevant to obtaining a standard digital score from the recognition process: the step of encoding data into a standard file format. In this paper, we address this task by proposing and evaluating the feasibility of using machine translation techniques, using statistical approaches and neural systems, to automatically convert the results of graphical encoding recognition into a standard semantic format, which can be exported as a digital score. We also discuss the implications, challenges and details to be taken into account when applying machine translation techniques to music languages, which are very different from natural human languages. This needs to be addressed prior to performing experiments and has not been reported in previous works. We also describe and detail experimental results, and conclude that applying machine translation techniques is a suitable solution for this task, as they have proven to obtain robust results.},
keywords = {GV/2020/030, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Optical music recognition is a research field whose efforts have been mainly focused, due to the difficulties involved in its processes, on document and image recognition. However, there is a final step after the recognition phase that has not been properly addressed or discussed, and which is relevant to obtaining a standard digital score from the recognition process: the step of encoding data into a standard file format. In this paper, we address this task by proposing and evaluating the feasibility of using machine translation techniques, using statistical approaches and neural systems, to automatically convert the results of graphical encoding recognition into a standard semantic format, which can be exported as a digital score. We also discuss the implications, challenges and details to be taken into account when applying machine translation techniques to music languages, which are very different from natural human languages. This needs to be addressed prior to performing experiments and has not been reported in previous works. We also describe and detail experimental results, and conclude that applying machine translation techniques is a suitable solution for this task, as they have proven to obtain robust results. Gallego, A. J.; Calvo-Zaragoza, J.; Fisher, R. B.
Incremental Unsupervised Domain-Adversarial Training of Neural Networks Journal Article
In: IEEE Transactions on Neural Networks and Learning Systems, vol. 32, no. 11, pp. 4864-4878, 2021, ISSN: 2162-2388.
Abstract | Links | BibTeX | Tags: GRE19-04, HispaMus
@article{k455,
title = {Incremental Unsupervised Domain-Adversarial Training of Neural Networks},
author = {A. J. Gallego and J. Calvo-Zaragoza and R. B. Fisher},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/455/2001.04129.pdf},
issn = {2162-2388},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE Transactions on Neural Networks and Learning Systems},
volume = {32},
number = {11},
pages = {4864-4878},
abstract = {In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms.},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms. Román, M. A.
An End-to-End Framework for Audio-to-Score Music Transcription PhD Thesis
2021.
@phdthesis{k462,
title = {An End-to-End Framework for Audio-to-Score Music Transcription},
author = {M. A. Román},
editor = {J. Calvo-Zaragoza and A. Pertusa},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
organization = {Universidad de Alicante},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {phdthesis}
}
2024
Ríos-Vila, A.; Calvo-Zaragoza, J.; Paquet, T.
Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription Conference
Document Analysis and Recognition - ICDAR 2024, vol. 1, Springer Nature Switzerland, 2024, ISBN: 978-3-031-70552-6.
BibTeX | Tags: MultiScore
@conference{RiosVila:ICDAR:2024,
title = {Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and T. Paquet},
isbn = {978-3-031-70552-6},
year = {2024},
date = {2024-09-02},
urldate = {2024-09-02},
booktitle = {Document Analysis and Recognition - ICDAR 2024},
volume = {1},
pages = {20-37},
publisher = {Springer Nature Switzerland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Maciá, M.; Rizo, D.
The Impact of UX/UI on Piano-Assisted Learning in Extended Reality Conference
Computer Supported Music Education. Angers, France., 2024.
BibTeX | Tags:
@conference{macia2024,
title = {The Impact of UX/UI on Piano-Assisted Learning in Extended Reality},
author = {M. Maciá and D. Rizo},
year = {2024},
date = {2024-05-04},
urldate = {2024-05-04},
booktitle = {Computer Supported Music Education. Angers, France.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Rios-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
A Transformer Approach for Polyphonic Audio-to-Score Transcription Proceedings Article
In: Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seul (Korea), 2024.
BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras:ICASSP24,
title = {A Transformer Approach for Polyphonic Audio-to-Score Transcription},
author = {M. Alfaro-Contreras and A. Rios-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2024},
date = {2024-04-19},
urldate = {2024-04-19},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)},
address = {Seul (Korea)},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Martínez-Sevilla, J. C.; Ríos-Vila, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
A Holistic Approach for Aligned Music and Lyrics Transcription Conference
Document Analysis and Recognition - ICDAR 2023, vol. 1, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-41676-7.
Abstract | Links | BibTeX | Tags: REPERTORIUM
@conference{MartinezSevilla:ICDAR:2023,
title = {A Holistic Approach for Aligned Music and Lyrics Transcription},
author = {J.C. Martínez-Sevilla and A. Ríos-Vila and F. J. Castellanos and J. Calvo-Zaragoza },
editor = {Fink, Gernot A. and Jain, Rajiv and Kise, Koichi and Zanibbi, Richard},
doi = {https://doi.org/10.1007/978-3-031-41676-7_11},
isbn = {978-3-031-41676-7},
year = {2023},
date = {2023-08-28},
urldate = {2023-08-28},
booktitle = {Document Analysis and Recognition - ICDAR 2023},
volume = {1},
pages = {185--201},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT.},
keywords = {REPERTORIUM},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Alfaro-Contreras, M.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works Proceedings Article
In: INTERSPEECH Conference, pp. 2793-2797, Dublin, Ireland, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Martínez-Sevilla2023,
title = {Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works},
author = {J.C. Martínez-Sevilla and M. Alfaro-Contreras and J. J. Valero-Mas and J. Calvo-Zaragoza
},
doi = {10.21437/Interspeech.2023-88},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
booktitle = {INTERSPEECH Conference},
pages = {2793-2797},
address = {Dublin, Ireland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study Proceedings Article
In: Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science, pp. 64-77, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-37731-0.
Links | BibTeX | Tags: MultiScore
@inproceedings{k505,
title = {Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-37731-0_6},
isbn = {978-3-031-37731-0},
year = {2023},
date = {2023-08-10},
urldate = {2022-01-01},
booktitle = {Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science},
volume = {13645},
pages = {64-77},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Evaluating Domain Generalization in Kitchen Utensils Classification Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 108-118, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido-Munoz2023,
title = {Evaluating Domain Generalization in Kitchen Utensils Classification},
author = {C. Garrido-Munoz and M. Alfaro-Contreras and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-36616-1_9},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {108-118},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
González-Barrachina, P.; Alfaro-Contreras, M.; Nieto-Hidalgo, M.; Calvo-Zaragoza, J.
Lifelong Learning for Document Image Binarization: An Experimental Study Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 146-157, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{González-Barrachina2023,
title = {Lifelong Learning for Document Image Binarization: An Experimental Study},
author = {P. González-Barrachina and M. Alfaro-Contreras and M. Nieto-Hidalgo and J. Calvo-Zaragoza },
doi = {10.1007/978-3-031-36616-1_12},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {146-157},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Iñesta, J. M.; Calvo-Zaragoza, J.
Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 12, pp. 12-24, 2023.
@article{Alfaro-Contreras2023b,
title = {Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation},
author = {M. Alfaro-Contreras and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/s13735-023-00278-5},
year = {2023},
date = {2023-05-26},
urldate = {2023-05-26},
journal = {International Journal of Multimedia Information Retrieval},
volume = {12},
pages = {12-24},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-end optical music recognition for pianoform sheet music Journal Article
In: International Journal on Document Analysis and Recognition (IJDAR), iss. ICDAR 2023, 2023, ISSN: 1433-2825.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Ríos-Vila2023,
title = {End-to-end optical music recognition for pianoform sheet music},
author = {A. Ríos-Vila and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://link.springer.com/content/pdf/10.1007/s10032-023-00432-z.pdf},
doi = {10.1007/s10032-023-00432-z},
issn = {1433-2825},
year = {2023},
date = {2023-05-12},
urldate = {2023-05-12},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
issue = {ICDAR 2023},
abstract = {End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor Journal Article
In: Pattern Recognition Letters, vol. 167, pp. 1-8, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023,
title = {Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2023.01.014},
year = {2023},
date = {2023-03-01},
journal = {Pattern Recognition Letters},
volume = {167},
pages = {1-8},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Valero-Mas, J. J.; Gallego, A. J.; Alonso-Jiménez, P.; Serra, X.
Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification Journal Article
In: Pattern Recognition, vol. 135, pp. 109190, 2023, ISSN: 0031-3203.
Abstract | BibTeX | Tags: DOREMI, MultiScore
@article{k519,
title = {Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification},
author = {J. J. Valero-Mas and A. J. Gallego and P. Alonso-Jiménez and X. Serra},
issn = {0031-3203},
year = {2023},
date = {2023-01-01},
journal = {Pattern Recognition},
volume = {135},
pages = {109190},
abstract = {Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works.},
keywords = {DOREMI, MultiScore},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Ferrer, A.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Experimental Study on Marine Debris Location and Recognition using Object Detection Journal Article
In: Pattern Recognition Letters, 2023, ISSN: 0167-8655.
Abstract | BibTeX | Tags: TADMar
@article{k521,
title = {An Experimental Study on Marine Debris Location and Recognition using Object Detection},
author = {A. Sánchez-Ferrer and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
issn = {0167-8655},
year = {2023},
date = {2023-01-01},
journal = {Pattern Recognition Letters},
abstract = {The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.},
keywords = {TADMar},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Late multimodal fusion for image and audio music transcription Journal Article
In: Expert Systems With Applications, vol. 216, pp. 119491-119500, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023c,
title = {Late multimodal fusion for image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.eswa.2022.119491},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Expert Systems With Applications},
volume = {216},
pages = {119491-119500},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, D.; Delgado, T.; Calvo-Zaragoza, J.; Madueño, A.; García-Iasci, P.
Speeding-up the encoding of mensural collections from Spanish libraries Journal Article
In: IAML 2022 Prague, 2022.
BibTeX | Tags: MultiScore
@article{k502,
title = {Speeding-up the encoding of mensural collections from Spanish libraries},
author = {D. Rizo and T. Delgado and J. Calvo-Zaragoza and A. Madueño and P. García-Iasci},
year = {2022},
date = {2022-07-01},
booktitle = {IAML 2022 Prague},
journal = {IAML 2022 Prague},
organization = {IAML},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Insights into transfer learning between image and audio music transcription Proceedings Article
In: Sound and Music Computing Conference, pp. 295-301, Zenodo, Saint-Étienne, France, 2022.
Abstract | Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras2022b,
title = {Insights into transfer learning between image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.5281/zenodo.6797870},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
booktitle = {Sound and Music Computing Conference},
pages = {295-301},
publisher = {Zenodo},
address = {Saint-Étienne, France},
abstract = {Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Decoupling music notation to improve end-to-end Optical Music Recognition Journal Article
In: Pattern Recognition Letters, vol. 158, pp. 157-163, 2022, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2022,
title = {Decoupling music notation to improve end-to-end Optical Music Recognition},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2022.04.032},
issn = {0167-8655},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Pattern Recognition Letters},
volume = {158},
pages = {157-163},
abstract = {Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Arroyo, V.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Pertusa, A.
Neural audio-to-score music transcription for unconstrained polyphony using compact output representations Proceedings Article
In: Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, Singapur, Singapur, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k487,
title = {Neural audio-to-score music transcription for unconstrained polyphony using compact output representations},
author = {V. Arroyo and J. J. Valero-Mas and J. Calvo-Zaragoza and A. Pertusa},
year = {2022},
date = {2022-05-01},
booktitle = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
publisher = {IEEE},
address = {Singapur, Singapur},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
On the Use of Transformers for End-to-End Optical Music Recognition Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 470-481, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: MultiScore
@inproceedings{k492,
title = {On the Use of Transformers for End-to-End Optical Music Recognition},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {470-481},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 482-492, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: Leonardo2021
@inproceedings{k493,
title = {Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {482-492},
address = {Aveiro, Portugal},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {inproceedings}
}
Mas-Candela, E.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A First Approach to Image Transformation Sequence Retrieval Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 321-332, Aveiro, Portugal, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k494,
title = {A First Approach to Image Transformation Sequence Retrieval},
author = {E. Mas-Candela and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {321-332},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Münnich, S.; Rizo, D.
Music Encoding Conference Proceedings 2022. Book
Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@book{k495,
title = {Music Encoding Conference Proceedings 2022.},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {book}
}
Münnich, S.; Rizo, D.
Foreword Proceedings Article
In: Münnich, S.; Rizo, D. (Ed.): Music Encoding Conference Proceedings 2021, pp. vii–viii, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k496,
title = {Foreword},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {vii–viii},
publisher = {Humanities Commons},
chapter = {1},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Desmond, K.; Pugin, L.; Regimbal, J.; Rizo, D.; Sapp, C. S.; Thomae, M. E.
Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation Proceedings Article
In: Music Encoding Conference Proceedings 2021, pp. 197–219, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k497,
title = {Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation},
author = {K. Desmond and L. Pugin and J. Regimbal and D. Rizo and C. S. Sapp and M. E. Thomae},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {197–219},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sánchez-Ferrer, A.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), pp. 616–628, Aveiro, Portugal, 2022, ISBN: 978-3-031-04881-4.
@inproceedings{k512,
title = {The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition},
author = {A. Sánchez-Ferrer and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-3-031-04881-4},
year = {2022},
date = {2022-05-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
pages = {616--628},
address = {Aveiro, Portugal},
abstract = {In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Iñesta, J. M.; Thomae, M. E.
An On-line Tool for Transcription of Music Scores: MuRET Presentation
Montreal (Canada), 01.05.2022.
Abstract | Links | BibTeX | Tags: HispaMus
@misc{k520,
title = {An On-line Tool for Transcription of Music Scores: MuRET},
author = {J. M. Iñesta and M. E. Thomae},
url = {undefined},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {1st Int. Conf. The Sound of Future/The Future of Sound},
address = {Montreal (Canada)},
organization = {CIRMMT},
abstract = {MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections.},
key = {OMR, Machine Learning},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {presentation}
}
Fuente, C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Image and Audio Music Transcription Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 11, pp. 77-84, 2022.
BibTeX | Tags: MultiScore
@article{k479,
title = {Multimodal Image and Audio Music Transcription},
author = {C. Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
journal = {International Journal of Multimedia Information Retrieval},
volume = {11},
pages = {77-84},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Region-based Layout Analysis of Music Score Images Journal Article
In: Expert Systems with Applications, pp. 118211, 2022, ISSN: 0957-4174.
BibTeX | Tags: MultiScore
@article{k486,
title = {Region-based Layout Analysis of Music Score Images},
author = {F. J. Castellanos and C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
issn = {0957-4174},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
pages = {118211},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rosello, A.; Ayllon, E.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Test Sample Selection for Handwriting Recognition Through Language Modeling Proceedings Article
In: Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k498,
title = {Test Sample Selection for Handwriting Recognition Through Language Modeling},
author = {A. Rosello and E. Ayllon and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR, Bangalore, India, 2022.
Abstract | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{k499,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR},
address = {Bangalore, India},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.; Fujinaga, I.
Domain Adaptation for Staff-Region Retrieval of Music Score Images Journal Article
In: International Journal on Document Analysis and Recognition, 2022, ISSN: 1433-2825.
BibTeX | Tags: MultiScore
@article{k500,
title = {Domain Adaptation for Staff-Region Retrieval of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza and I. Fujinaga},
issn = {1433-2825},
year = {2022},
date = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
de la Fuente, C.; Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks Journal Article
In: Multimedia Tools and Applications, 2022.
BibTeX | Tags:
@article{k501,
title = {Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks},
author = {C. de la Fuente and F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Multimedia Tools and Applications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor Proceedings Article
In: Pattern Recognition. ICPR International Workshops and Challenges, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k504,
title = {Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition. ICPR International Workshops and Challenges},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sáez-Pérez, J.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), 2022.
@inproceedings{k506,
title = {Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition},
author = {J. Sáez-Pérez and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
keywords = {ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Alashhab, S.
Aplicaciones de visión artificial para ayuda a personas con dificultades visuales PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k508,
title = {Aplicaciones de visión artificial para ayuda a personas con dificultades visuales},
author = {S. Alashhab},
editor = {Miguel Angel Lozano and Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Bernabeu, M.
Búsqueda de imágenes similares usando técnicas de aprendizaje automático PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k509,
title = {Búsqueda de imágenes similares usando técnicas de aprendizaje automático},
author = {M. Bernabeu},
editor = {Antonio Pertusa Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Gallego, A. J.; Rico-Juan, J. R.; Valero-Mas, J. J.
Efficient k-nearest neighbor search based on clustering and adaptive k values Journal Article
In: Pattern Recognition, vol. 122, pp. 108356, 2022, ISSN: 0031-3203.
@article{k510,
title = {Efficient k-nearest neighbor search based on clustering and adaptive k values},
author = {A. J. Gallego and J. R. Rico-Juan and J. J. Valero-Mas},
issn = {0031-3203},
year = {2022},
date = {2022-01-01},
journal = {Pattern Recognition},
volume = {122},
pages = {108356},
abstract = {The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Alashhab, S.; Gallego, A. J.; Lozano, M. Á.
Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 114, pp. 105188, 2022, ISSN: 0952-1976.
@article{k511,
title = {Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks},
author = {S. Alashhab and A. J. Gallego and M. Á. Lozano},
issn = {0952-1976},
year = {2022},
date = {2022-01-01},
journal = {Engineering Applications of Artificial Intelligence},
volume = {114},
pages = {105188},
abstract = {Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A holistic approach for image-to-graph: application to optical music recognition Journal Article
In: International Journal on Document Analysis and Recognition, 2022.
BibTeX | Tags: Leonardo2021
@article{k522,
title = {A holistic approach for image-to-graph: application to optical music recognition},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {article}
}
2021
Calvo-Zaragoza, J.; Pertusa, A.; Gallego, A. J.; Iñesta, J. M.; Micó, L.; Oncina, J.; Perez-Sancho, C.; de León, P. J. Ponce; Rizo, D.
MultiScore Project: Multimodal Transcription of Music Scores Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 3, 2021.
Links | BibTeX | Tags: MultiScore
@inproceedings{k481,
title = {MultiScore Project: Multimodal Transcription of Music Scores},
author = {J. Calvo-Zaragoza and A. Pertusa and A. J. Gallego and J. M. Iñesta and L. Micó and J. Oncina and C. Perez-Sancho and P. J. Ponce de León and D. Rizo},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/481/MML2021__MultiScore_Final.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {3},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 6, 2021.
@inproceedings{k482,
title = {An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {6},
keywords = {GRE19-04, ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Calvo-Zaragoza, J.; Iñesta, J. M.
CTC-based end-to-end approach for full page Optical Music Recognition Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 11, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k488,
title = {CTC-based end-to-end approach for full page Optical Music Recognition},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and J. M. Iñesta},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {11},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Neural Domain Adaptation for Document Image Binarization Journal Article
In: Pattern Recognition, vol. 119, pp. 108099, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@article{k467,
title = {Unsupervised Neural Domain Adaptation for Document Image Binarization},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Pattern Recognition},
volume = {119},
pages = {108099},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
OMR-assisted transcription: a case study with early prints Proceedings Article
In: Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR, pp. 35-41, 2021, ISBN: 978-1-7327299-0-2.
BibTeX | Tags: MultiScore
@inproceedings{k483,
title = {OMR-assisted transcription: a case study with early prints},
author = {M. Alfaro-Contreras and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-1-7327299-0-2},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
booktitle = {Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR},
pages = {35-41},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.
Unsupervised Neural Document Analysis for Music Score Images Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 50-54, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@inproceedings{k468,
title = {Unsupervised Neural Document Analysis for Music Score Images},
author = {F. J. Castellanos and A. J. Gallego},
year = {2021},
date = {2021-07-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {50-54},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {inproceedings}
}
Madueño, A.; Ríos-Vila, A.; Rizo, D.
Automatized incipit encoding at the Andalusian Music Documentation Center Proceedings Article
In: Digital Libraries for Musicology / IAML Joint Session, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k484,
title = {Automatized incipit encoding at the Andalusian Music Documentation Center},
author = {A. Madueño and A. Ríos-Vila and D. Rizo},
year = {2021},
date = {2021-07-01},
urldate = {2021-07-01},
booktitle = {Digital Libraries for Musicology / IAML Joint Session},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Esplà-Gomis, M.; Rizo, D.; de León, P. J. Ponce; Iñesta, J. M.
Applying Automatic Translation for Optical Music Recognition’s Encoding Step Journal Article
In: Applied Sciences, vol. 11, no. 9, 2021, ISSN: 2076-3417.
Abstract | BibTeX | Tags: GV/2020/030, HispaMus
@article{k464,
title = {Applying Automatic Translation for Optical Music Recognition’s Encoding Step},
author = {A. Ríos-Vila and M. Esplà-Gomis and D. Rizo and P. J. Ponce de León and J. M. Iñesta},
issn = {2076-3417},
year = {2021},
date = {2021-04-01},
urldate = {2021-04-01},
journal = {Applied Sciences},
volume = {11},
number = {9},
abstract = {Optical music recognition is a research field whose efforts have been mainly focused, due to the difficulties involved in its processes, on document and image recognition. However, there is a final step after the recognition phase that has not been properly addressed or discussed, and which is relevant to obtaining a standard digital score from the recognition process: the step of encoding data into a standard file format. In this paper, we address this task by proposing and evaluating the feasibility of using machine translation techniques, using statistical approaches and neural systems, to automatically convert the results of graphical encoding recognition into a standard semantic format, which can be exported as a digital score. We also discuss the implications, challenges and details to be taken into account when applying machine translation techniques to music languages, which are very different from natural human languages. This needs to be addressed prior to performing experiments and has not been reported in previous works. We also describe and detail experimental results, and conclude that applying machine translation techniques is a suitable solution for this task, as they have proven to obtain robust results.},
keywords = {GV/2020/030, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Gallego, A. J.; Calvo-Zaragoza, J.; Fisher, R. B.
Incremental Unsupervised Domain-Adversarial Training of Neural Networks Journal Article
In: IEEE Transactions on Neural Networks and Learning Systems, vol. 32, no. 11, pp. 4864-4878, 2021, ISSN: 2162-2388.
Abstract | Links | BibTeX | Tags: GRE19-04, HispaMus
@article{k455,
title = {Incremental Unsupervised Domain-Adversarial Training of Neural Networks},
author = {A. J. Gallego and J. Calvo-Zaragoza and R. B. Fisher},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/455/2001.04129.pdf},
issn = {2162-2388},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE Transactions on Neural Networks and Learning Systems},
volume = {32},
number = {11},
pages = {4864-4878},
abstract = {In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms.},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Román, M. A.
An End-to-End Framework for Audio-to-Score Music Transcription PhD Thesis
2021.
@phdthesis{k462,
title = {An End-to-End Framework for Audio-to-Score Music Transcription},
author = {M. A. Román},
editor = {J. Calvo-Zaragoza and A. Pertusa},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
organization = {Universidad de Alicante},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {phdthesis}
}