2025
Kim, D.; Han, D.; Jeong, D.; Valero-Mas, J. J.
On the automatic recognition of Jeongganbo music notation: dataset and approach Journal Article
In: Journal on Computing and Cultural Heritage, 2025, ISSN: 1556-4673.
@article{nokey,
title = {On the automatic recognition of Jeongganbo music notation: dataset and approach},
author = {D. Kim and D. Han and D. Jeong and J. J. Valero-Mas},
issn = {1556-4673},
year = {2025},
date = {2025-01-16},
urldate = {2025-01-16},
journal = {Journal on Computing and Cultural Heritage},
abstract = {The Jeongganbo notation, the first music representation system in East Asia capable of jointly expressing pitch and duration, has been extensively used---and still is---in the Korean music tradition since its inception in the 15th century. In this regard, there exists a plethora of music works that exclusively endure as physical sheets, which not only constitutes a heritage preservation challenge due to the inherent degradation of this format but also impedes the use of computational tools to study and exploit this music tradition. While the Optical Music Recognition (OMR) field, which represents the research area devoted to devising methods capable of automatically transcribing music sheets into digital formats, has addressed this issue in a number of music notations from the Western tradition, no previous research has considered the preservation of Jeonganbo scores. In this context, this work presents the following contributions: (i) the first data assortment of real Jeongganbo scores for OMR tasks; (ii) a collection of synthetic data generation and augmentation mechanisms to alleviate the scarcity of manual annotation; and (iii) a neural-based transcription scheme based on state-of-the-art OMR strategies specifically tailored to Jeongganbo scores. The experiments performed prove the validity of the approach---performance rates close to a 90% of success---and open new research avenues for under-resourced yet challenging music notations. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The Jeongganbo notation, the first music representation system in East Asia capable of jointly expressing pitch and duration, has been extensively used---and still is---in the Korean music tradition since its inception in the 15th century. In this regard, there exists a plethora of music works that exclusively endure as physical sheets, which not only constitutes a heritage preservation challenge due to the inherent degradation of this format but also impedes the use of computational tools to study and exploit this music tradition. While the Optical Music Recognition (OMR) field, which represents the research area devoted to devising methods capable of automatically transcribing music sheets into digital formats, has addressed this issue in a number of music notations from the Western tradition, no previous research has considered the preservation of Jeonganbo scores. In this context, this work presents the following contributions: (i) the first data assortment of real Jeongganbo scores for OMR tasks; (ii) a collection of synthetic data generation and augmentation mechanisms to alleviate the scarcity of manual annotation; and (iii) a neural-based transcription scheme based on state-of-the-art OMR strategies specifically tailored to Jeongganbo scores. The experiments performed prove the validity of the approach---performance rates close to a 90% of success---and open new research avenues for under-resourced yet challenging music notations. 2024
Galan-Cuenca, A.; Valero-Mas, J. J.; Martinez-Sevilla, J. C.; Hidalgo-Centeno, A.; Pertusa, A.; Calvo-Zaragoza, J.
Proceedings of the 32nd ACM International Conference on Multimedia, Association for Computing Machinery, 2024, ISBN: 979-8-4007-0686-8.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {MUSCAT: a Multimodal mUSic Collection for Automatic Transcription of real recordings and image scores},
author = {A. Galan-Cuenca and J. J. Valero-Mas and J. C. Martinez-Sevilla and A. Hidalgo-Centeno and A. Pertusa and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1145/3664647.3681572},
isbn = {979-8-4007-0686-8},
year = {2024},
date = {2024-10-28},
booktitle = {Proceedings of the 32nd ACM International Conference on Multimedia},
pages = {583-591},
publisher = {Association for Computing Machinery},
abstract = {Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively. Penarrubia, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Contrastive Self-Supervised Learning for Optical Music Recognition Conference
International Workshop on Document Analysis Systems, 2024, ISBN: 978-3-031-70442-0.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Contrastive Self-Supervised Learning for Optical Music Recognition},
author = {C. Penarrubia and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-70442-0_19},
isbn = {978-3-031-70442-0},
year = {2024},
date = {2024-09-11},
urldate = {2024-09-11},
booktitle = {International Workshop on Document Analysis Systems},
pages = {312-326},
abstract = {Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration. Ríos-Vila, A.; Calvo-Zaragoza, J.; Paquet, T.
Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription Conference
Document Analysis and Recognition - ICDAR 2024, vol. 1, Springer Nature Switzerland, 2024, ISBN: 978-3-031-70552-6.
BibTeX | Tags: MultiScore
@conference{RiosVila:ICDAR:2024,
title = {Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and T. Paquet},
isbn = {978-3-031-70552-6},
year = {2024},
date = {2024-09-02},
urldate = {2024-09-02},
booktitle = {Document Analysis and Recognition - ICDAR 2024},
volume = {1},
pages = {20-37},
publisher = {Springer Nature Switzerland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Maciá, M.; Rizo, D.
The Impact of UX/UI on Piano-Assisted Learning in Extended Reality Conference
Computer Supported Music Education. Angers, France., 2024.
BibTeX | Tags:
@conference{macia2024,
title = {The Impact of UX/UI on Piano-Assisted Learning in Extended Reality},
author = {M. Maciá and D. Rizo},
year = {2024},
date = {2024-05-04},
urldate = {2024-05-04},
booktitle = {Computer Supported Music Education. Angers, France.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Rios-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
A Transformer Approach for Polyphonic Audio-to-Score Transcription Proceedings Article
In: Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seul (Korea), 2024.
Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras:ICASSP24,
title = {A Transformer Approach for Polyphonic Audio-to-Score Transcription},
author = {M. Alfaro-Contreras and A. Rios-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1109/ICASSP48485.2024.10447162},
year = {2024},
date = {2024-04-19},
urldate = {2024-04-19},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)},
address = {Seul (Korea)},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Valero-Mas, J. J.; Gallego, A. J.; Rico-Juan, J. R.
An overview of ensemble and feature learning in few-shot image classification using siamese networks Journal Article
In: Multimedia Tools and Applications, vol. 83, pp. 19929–19952, 2024, ISSN: 1380-7501.
@article{nokey,
title = {An overview of ensemble and feature learning in few-shot image classification using siamese networks},
author = {J. J. Valero-Mas and A. J. Gallego and J. R. Rico-Juan },
doi = {https://doi.org/10.1007/s11042-023-15607-3},
issn = {1380-7501},
year = {2024},
date = {2024-02-01},
urldate = {2023-07-29},
journal = {Multimedia Tools and Applications},
volume = {83},
pages = {19929–19952},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
Ramoneda, P.; Jeong, D.; Valero-Mas, J. J.; Serra, X.
Predicting performance difficulty from piano sheet music images Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Predicting performance difficulty from piano sheet music images},
author = {P. Ramoneda and D. Jeong and J. J. Valero-Mas and X. Serra},
doi = {10.5281/zenodo.10265386},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-11-04},
urldate = {2023-11-04},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {708-715},
address = {Milan, Italy},
abstract = {Estimating the performance difficulty of a musical score is crucial in music education for adequately designing the learning curriculum of the students. Although the music information retrieval community has recently shown interest in this task, existing approaches mainly use machine-readable scores, leaving the broader case of sheet music images unaddressed. Based on previous works involving sheet music images, we use a mid-level representation, bootleg score, describing notehead positions relative to staff lines coupled with a transformer model. This architecture is adapted to our task by introducing a different encoding scheme that reduces the encoded sequence length to one-eighth of the original size. In terms of evaluation, we consider five datasets---more than 7500 scores with up to 9 difficulty levels---, two being mainly compiled for this work. The results obtained when pretraining the scheme on the IMSLP corpus and fine-tuning it on the considered datasets prove the proposal's validity, achieving the best-performing model with a balanced accuracy of 40.3% and a mean square error of 1.3. Finally, we provide access to our code, data, and models for transparency and reproducibility.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Estimating the performance difficulty of a musical score is crucial in music education for adequately designing the learning curriculum of the students. Although the music information retrieval community has recently shown interest in this task, existing approaches mainly use machine-readable scores, leaving the broader case of sheet music images unaddressed. Based on previous works involving sheet music images, we use a mid-level representation, bootleg score, describing notehead positions relative to staff lines coupled with a transformer model. This architecture is adapted to our task by introducing a different encoding scheme that reduces the encoded sequence length to one-eighth of the original size. In terms of evaluation, we consider five datasets---more than 7500 scores with up to 9 difficulty levels---, two being mainly compiled for this work. The results obtained when pretraining the scheme on the IMSLP corpus and fine-tuning it on the considered datasets prove the proposal's validity, achieving the best-performing model with a balanced accuracy of 40.3% and a mean square error of 1.3. Finally, we provide access to our code, data, and models for transparency and reproducibility. Penarrubia, C.; Garrido-Munoz, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Efficient notation assembly in optical music recognition Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
BibTeX | Tags:
@conference{nokey,
title = {Efficient notation assembly in optical music recognition},
author = {C. Penarrubia and C. Garrido-Munoz and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-10-30},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {182-189},
address = {Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Ríos-Vila, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
A Holistic Approach for Aligned Music and Lyrics Transcription Conference
Document Analysis and Recognition - ICDAR 2023, vol. 1, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-41676-7.
Abstract | Links | BibTeX | Tags: REPERTORIUM
@conference{MartinezSevilla:ICDAR:2023,
title = {A Holistic Approach for Aligned Music and Lyrics Transcription},
author = {J.C. Martínez-Sevilla and A. Ríos-Vila and F. J. Castellanos and J. Calvo-Zaragoza },
editor = {Fink, Gernot A. and Jain, Rajiv and Kise, Koichi and Zanibbi, Richard},
doi = {https://doi.org/10.1007/978-3-031-41676-7_11},
isbn = {978-3-031-41676-7},
year = {2023},
date = {2023-08-28},
urldate = {2023-08-28},
booktitle = {Document Analysis and Recognition - ICDAR 2023},
volume = {1},
pages = {185--201},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT.},
keywords = {REPERTORIUM},
pubstate = {published},
tppubtype = {conference}
}
In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT. Martínez-Sevilla, J. C.; Alfaro-Contreras, M.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works Proceedings Article
In: INTERSPEECH Conference, pp. 2793-2797, Dublin, Ireland, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Martínez-Sevilla2023,
title = {Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works},
author = {J.C. Martínez-Sevilla and M. Alfaro-Contreras and J. J. Valero-Mas and J. Calvo-Zaragoza
},
doi = {10.21437/Interspeech.2023-88},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
booktitle = {INTERSPEECH Conference},
pages = {2793-2797},
address = {Dublin, Ireland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study Proceedings Article
In: Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science, pp. 64-77, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-37731-0.
Links | BibTeX | Tags: MultiScore
@inproceedings{k505,
title = {Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-37731-0_6},
isbn = {978-3-031-37731-0},
year = {2023},
date = {2023-08-10},
urldate = {2022-01-01},
booktitle = {Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science},
volume = {13645},
pages = {64-77},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Evaluating Domain Generalization in Kitchen Utensils Classification Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 108-118, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido-Munoz2023,
title = {Evaluating Domain Generalization in Kitchen Utensils Classification},
author = {C. Garrido-Munoz and M. Alfaro-Contreras and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-36616-1_9},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {108-118},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
González-Barrachina, P.; Alfaro-Contreras, M.; Nieto-Hidalgo, M.; Calvo-Zaragoza, J.
Lifelong Learning for Document Image Binarization: An Experimental Study Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 146-157, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{González-Barrachina2023,
title = {Lifelong Learning for Document Image Binarization: An Experimental Study},
author = {P. González-Barrachina and M. Alfaro-Contreras and M. Nieto-Hidalgo and J. Calvo-Zaragoza },
doi = {10.1007/978-3-031-36616-1_12},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {146-157},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Penarrubia, C.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification Conference
Iberian Conference on Pattern Recognition and Image Analysis, Alicante, Spain, 2023, ISBN: 978-3-031-36616-1.
Abstract | Links | BibTeX | Tags: DOREMI
@conference{nokey,
title = {Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification},
author = {C. Penarrubia and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-36616-1_2},
isbn = {978-3-031-36616-1},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {15.27},
address = {Alicante, Spain},
abstract = {Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic.},
keywords = {DOREMI},
pubstate = {published},
tppubtype = {conference}
}
Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic. Alfaro-Contreras, M.; Iñesta, J. M.; Calvo-Zaragoza, J.
Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 12, pp. 12-24, 2023.
@article{Alfaro-Contreras2023b,
title = {Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation},
author = {M. Alfaro-Contreras and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/s13735-023-00278-5},
year = {2023},
date = {2023-05-26},
urldate = {2023-05-26},
journal = {International Journal of Multimedia Information Retrieval},
volume = {12},
pages = {12-24},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-end optical music recognition for pianoform sheet music Journal Article
In: International Journal on Document Analysis and Recognition (IJDAR), iss. ICDAR 2023, 2023, ISSN: 1433-2825.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Ríos-Vila2023,
title = {End-to-end optical music recognition for pianoform sheet music},
author = {A. Ríos-Vila and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://link.springer.com/content/pdf/10.1007/s10032-023-00432-z.pdf},
doi = {10.1007/s10032-023-00432-z},
issn = {1433-2825},
year = {2023},
date = {2023-05-12},
urldate = {2023-05-12},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
issue = {ICDAR 2023},
abstract = {End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner. Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor Journal Article
In: Pattern Recognition Letters, vol. 167, pp. 1-8, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023,
title = {Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2023.01.014},
year = {2023},
date = {2023-03-01},
journal = {Pattern Recognition Letters},
volume = {167},
pages = {1-8},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rico-Juan, J. R.; Sánchez-Cartagena, V. M.; Valero-Mas, J. J.; Gallego, A. J.
Identifying student profiles within online judge systems using explainable artificial intelligence Journal Article
In: IEEE Transactions on Learning Technologies, vol. 16, no. 6, pp. 955-969, 2023, ISSN: 1939-1382.
@article{nokey,
title = {Identifying student profiles within online judge systems using explainable artificial intelligence},
author = {J. R. Rico-Juan and V. M. Sánchez-Cartagena and J. J. Valero-Mas and A. J. Gallego},
doi = {10.1109/TLT.2023.3239110},
issn = {1939-1382},
year = {2023},
date = {2023-01-23},
urldate = {2023-01-23},
journal = {IEEE Transactions on Learning Technologies},
volume = {16},
number = {6},
pages = {955-969},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Valero-Mas, J. J.; Gallego, A. J.; Alonso-Jiménez, P.; Serra, X.
Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification Journal Article
In: Pattern Recognition, vol. 135, pp. 109190, 2023, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: DOREMI, MultiScore
@article{k519,
title = {Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification},
author = {J. J. Valero-Mas and A. J. Gallego and P. Alonso-Jiménez and X. Serra},
doi = {https://doi.org/10.1016/j.patcog.2022.109190},
issn = {0031-3203},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition},
volume = {135},
pages = {109190},
abstract = {Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works.},
keywords = {DOREMI, MultiScore},
pubstate = {published},
tppubtype = {article}
}
Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works. Sánchez-Ferrer, A.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Experimental Study on Marine Debris Location and Recognition using Object Detection Journal Article
In: Pattern Recognition Letters, 2023, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: TADMar
@article{k521,
title = {An Experimental Study on Marine Debris Location and Recognition using Object Detection},
author = {A. Sánchez-Ferrer and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1016/j.patrec.2022.12.019},
issn = {0167-8655},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition Letters},
abstract = {The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.},
keywords = {TADMar},
pubstate = {published},
tppubtype = {article}
}
The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent. Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Late multimodal fusion for image and audio music transcription Journal Article
In: Expert Systems With Applications, vol. 216, pp. 119491-119500, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023c,
title = {Late multimodal fusion for image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.eswa.2022.119491},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Expert Systems With Applications},
volume = {216},
pages = {119491-119500},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages. Rizo, D.; Delgado, T.; Calvo-Zaragoza, J.; Madueño, A.; García-Iasci, P.
Speeding-up the encoding of mensural collections from Spanish libraries Journal Article
In: IAML 2022 Prague, 2022.
BibTeX | Tags: MultiScore
@article{k502,
title = {Speeding-up the encoding of mensural collections from Spanish libraries},
author = {D. Rizo and T. Delgado and J. Calvo-Zaragoza and A. Madueño and P. García-Iasci},
year = {2022},
date = {2022-07-01},
booktitle = {IAML 2022 Prague},
journal = {IAML 2022 Prague},
organization = {IAML},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Insights into transfer learning between image and audio music transcription Proceedings Article
In: Sound and Music Computing Conference, pp. 295-301, Zenodo, Saint-Étienne, France, 2022.
Abstract | Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras2022b,
title = {Insights into transfer learning between image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.5281/zenodo.6797870},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
booktitle = {Sound and Music Computing Conference},
pages = {295-301},
publisher = {Zenodo},
address = {Saint-Étienne, France},
abstract = {Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem. Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Decoupling music notation to improve end-to-end Optical Music Recognition Journal Article
In: Pattern Recognition Letters, vol. 158, pp. 157-163, 2022, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2022,
title = {Decoupling music notation to improve end-to-end Optical Music Recognition},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2022.04.032},
issn = {0167-8655},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Pattern Recognition Letters},
volume = {158},
pages = {157-163},
abstract = {Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases. Arroyo, V.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Pertusa, A.
Neural audio-to-score music transcription for unconstrained polyphony using compact output representations Proceedings Article
In: Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, Singapur, Singapur, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k487,
title = {Neural audio-to-score music transcription for unconstrained polyphony using compact output representations},
author = {V. Arroyo and J. J. Valero-Mas and J. Calvo-Zaragoza and A. Pertusa},
year = {2022},
date = {2022-05-01},
booktitle = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
publisher = {IEEE},
address = {Singapur, Singapur},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
On the Use of Transformers for End-to-End Optical Music Recognition Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 470-481, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: MultiScore
@inproceedings{k492,
title = {On the Use of Transformers for End-to-End Optical Music Recognition},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {470-481},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 482-492, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: Leonardo2021
@inproceedings{k493,
title = {Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {482-492},
address = {Aveiro, Portugal},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {inproceedings}
}
Mas-Candela, E.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A First Approach to Image Transformation Sequence Retrieval Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 321-332, Aveiro, Portugal, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k494,
title = {A First Approach to Image Transformation Sequence Retrieval},
author = {E. Mas-Candela and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {321-332},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Münnich, S.; Rizo, D.
Music Encoding Conference Proceedings 2022. Book
Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@book{k495,
title = {Music Encoding Conference Proceedings 2022.},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {book}
}
Münnich, S.; Rizo, D.
Foreword Proceedings Article
In: Münnich, S.; Rizo, D. (Ed.): Music Encoding Conference Proceedings 2021, pp. vii–viii, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k496,
title = {Foreword},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {vii–viii},
publisher = {Humanities Commons},
chapter = {1},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Desmond, K.; Pugin, L.; Regimbal, J.; Rizo, D.; Sapp, C. S.; Thomae, M. E.
Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation Proceedings Article
In: Music Encoding Conference Proceedings 2021, pp. 197–219, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k497,
title = {Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation},
author = {K. Desmond and L. Pugin and J. Regimbal and D. Rizo and C. S. Sapp and M. E. Thomae},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {197–219},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sánchez-Ferrer, A.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), pp. 616–628, Aveiro, Portugal, 2022, ISBN: 978-3-031-04881-4.
@inproceedings{k512,
title = {The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition},
author = {A. Sánchez-Ferrer and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-3-031-04881-4},
year = {2022},
date = {2022-05-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
pages = {616--628},
address = {Aveiro, Portugal},
abstract = {In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus. Iñesta, J. M.; Thomae, M. E.
An On-line Tool for Transcription of Music Scores: MuRET Presentation
Montreal (Canada), 01.05.2022.
Abstract | Links | BibTeX | Tags: HispaMus
@misc{k520,
title = {An On-line Tool for Transcription of Music Scores: MuRET},
author = {J. M. Iñesta and M. E. Thomae},
url = {undefined},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {1st Int. Conf. The Sound of Future/The Future of Sound},
address = {Montreal (Canada)},
organization = {CIRMMT},
abstract = {MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections.},
key = {OMR, Machine Learning},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {presentation}
}
MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections. Fuente, C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Image and Audio Music Transcription Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 11, pp. 77-84, 2022.
BibTeX | Tags: MultiScore
@article{k479,
title = {Multimodal Image and Audio Music Transcription},
author = {C. Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
journal = {International Journal of Multimedia Information Retrieval},
volume = {11},
pages = {77-84},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Region-based Layout Analysis of Music Score Images Journal Article
In: Expert Systems with Applications, pp. 118211, 2022, ISSN: 0957-4174.
BibTeX | Tags: MultiScore
@article{k486,
title = {Region-based Layout Analysis of Music Score Images},
author = {F. J. Castellanos and C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
issn = {0957-4174},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
pages = {118211},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rosello, A.; Ayllon, E.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Test Sample Selection for Handwriting Recognition Through Language Modeling Proceedings Article
In: Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k498,
title = {Test Sample Selection for Handwriting Recognition Through Language Modeling},
author = {A. Rosello and E. Ayllon and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR, Bangalore, India, 2022.
Abstract | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{k499,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR},
address = {Bangalore, India},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages. Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.; Fujinaga, I.
Domain Adaptation for Staff-Region Retrieval of Music Score Images Journal Article
In: International Journal on Document Analysis and Recognition, 2022, ISSN: 1433-2825.
BibTeX | Tags: MultiScore
@article{k500,
title = {Domain Adaptation for Staff-Region Retrieval of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza and I. Fujinaga},
issn = {1433-2825},
year = {2022},
date = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
de la Fuente, C.; Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks Journal Article
In: Multimedia Tools and Applications, 2022.
BibTeX | Tags:
@article{k501,
title = {Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks},
author = {C. de la Fuente and F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Multimedia Tools and Applications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor Proceedings Article
In: Pattern Recognition. ICPR International Workshops and Challenges, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k504,
title = {Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition. ICPR International Workshops and Challenges},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sáez-Pérez, J.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), 2022.
@inproceedings{k506,
title = {Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition},
author = {J. Sáez-Pérez and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
keywords = {ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Alashhab, S.
Aplicaciones de visión artificial para ayuda a personas con dificultades visuales PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k508,
title = {Aplicaciones de visión artificial para ayuda a personas con dificultades visuales},
author = {S. Alashhab},
editor = {Miguel Angel Lozano and Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Bernabeu, M.
Búsqueda de imágenes similares usando técnicas de aprendizaje automático PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k509,
title = {Búsqueda de imágenes similares usando técnicas de aprendizaje automático},
author = {M. Bernabeu},
editor = {Antonio Pertusa Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Gallego, A. J.; Rico-Juan, J. R.; Valero-Mas, J. J.
Efficient k-nearest neighbor search based on clustering and adaptive k values Journal Article
In: Pattern Recognition, vol. 122, pp. 108356, 2022, ISSN: 0031-3203.
@article{k510,
title = {Efficient k-nearest neighbor search based on clustering and adaptive k values},
author = {A. J. Gallego and J. R. Rico-Juan and J. J. Valero-Mas},
issn = {0031-3203},
year = {2022},
date = {2022-01-01},
journal = {Pattern Recognition},
volume = {122},
pages = {108356},
abstract = {The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search. Alashhab, S.; Gallego, A. J.; Lozano, M. Á.
Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 114, pp. 105188, 2022, ISSN: 0952-1976.
@article{k511,
title = {Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks},
author = {S. Alashhab and A. J. Gallego and M. Á. Lozano},
issn = {0952-1976},
year = {2022},
date = {2022-01-01},
journal = {Engineering Applications of Artificial Intelligence},
volume = {114},
pages = {105188},
abstract = {Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes. Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A holistic approach for image-to-graph: application to optical music recognition Journal Article
In: International Journal on Document Analysis and Recognition, 2022.
BibTeX | Tags: Leonardo2021
@article{k522,
title = {A holistic approach for image-to-graph: application to optical music recognition},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {article}
}
2021
Calvo-Zaragoza, J.; Pertusa, A.; Gallego, A. J.; Iñesta, J. M.; Micó, L.; Oncina, J.; Perez-Sancho, C.; de León, P. J. Ponce; Rizo, D.
MultiScore Project: Multimodal Transcription of Music Scores Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 3, 2021.
Links | BibTeX | Tags: MultiScore
@inproceedings{k481,
title = {MultiScore Project: Multimodal Transcription of Music Scores},
author = {J. Calvo-Zaragoza and A. Pertusa and A. J. Gallego and J. M. Iñesta and L. Micó and J. Oncina and C. Perez-Sancho and P. J. Ponce de León and D. Rizo},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/481/MML2021__MultiScore_Final.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {3},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 6, 2021.
@inproceedings{k482,
title = {An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {6},
keywords = {GRE19-04, ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
2025
Kim, D.; Han, D.; Jeong, D.; Valero-Mas, J. J.
On the automatic recognition of Jeongganbo music notation: dataset and approach Journal Article
In: Journal on Computing and Cultural Heritage, 2025, ISSN: 1556-4673.
@article{nokey,
title = {On the automatic recognition of Jeongganbo music notation: dataset and approach},
author = {D. Kim and D. Han and D. Jeong and J. J. Valero-Mas},
issn = {1556-4673},
year = {2025},
date = {2025-01-16},
urldate = {2025-01-16},
journal = {Journal on Computing and Cultural Heritage},
abstract = {The Jeongganbo notation, the first music representation system in East Asia capable of jointly expressing pitch and duration, has been extensively used---and still is---in the Korean music tradition since its inception in the 15th century. In this regard, there exists a plethora of music works that exclusively endure as physical sheets, which not only constitutes a heritage preservation challenge due to the inherent degradation of this format but also impedes the use of computational tools to study and exploit this music tradition. While the Optical Music Recognition (OMR) field, which represents the research area devoted to devising methods capable of automatically transcribing music sheets into digital formats, has addressed this issue in a number of music notations from the Western tradition, no previous research has considered the preservation of Jeonganbo scores. In this context, this work presents the following contributions: (i) the first data assortment of real Jeongganbo scores for OMR tasks; (ii) a collection of synthetic data generation and augmentation mechanisms to alleviate the scarcity of manual annotation; and (iii) a neural-based transcription scheme based on state-of-the-art OMR strategies specifically tailored to Jeongganbo scores. The experiments performed prove the validity of the approach---performance rates close to a 90% of success---and open new research avenues for under-resourced yet challenging music notations. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
Galan-Cuenca, A.; Valero-Mas, J. J.; Martinez-Sevilla, J. C.; Hidalgo-Centeno, A.; Pertusa, A.; Calvo-Zaragoza, J.
Proceedings of the 32nd ACM International Conference on Multimedia, Association for Computing Machinery, 2024, ISBN: 979-8-4007-0686-8.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {MUSCAT: a Multimodal mUSic Collection for Automatic Transcription of real recordings and image scores},
author = {A. Galan-Cuenca and J. J. Valero-Mas and J. C. Martinez-Sevilla and A. Hidalgo-Centeno and A. Pertusa and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1145/3664647.3681572},
isbn = {979-8-4007-0686-8},
year = {2024},
date = {2024-10-28},
booktitle = {Proceedings of the 32nd ACM International Conference on Multimedia},
pages = {583-591},
publisher = {Association for Computing Machinery},
abstract = {Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Penarrubia, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Contrastive Self-Supervised Learning for Optical Music Recognition Conference
International Workshop on Document Analysis Systems, 2024, ISBN: 978-3-031-70442-0.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Contrastive Self-Supervised Learning for Optical Music Recognition},
author = {C. Penarrubia and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-70442-0_19},
isbn = {978-3-031-70442-0},
year = {2024},
date = {2024-09-11},
urldate = {2024-09-11},
booktitle = {International Workshop on Document Analysis Systems},
pages = {312-326},
abstract = {Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Ríos-Vila, A.; Calvo-Zaragoza, J.; Paquet, T.
Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription Conference
Document Analysis and Recognition - ICDAR 2024, vol. 1, Springer Nature Switzerland, 2024, ISBN: 978-3-031-70552-6.
BibTeX | Tags: MultiScore
@conference{RiosVila:ICDAR:2024,
title = {Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and T. Paquet},
isbn = {978-3-031-70552-6},
year = {2024},
date = {2024-09-02},
urldate = {2024-09-02},
booktitle = {Document Analysis and Recognition - ICDAR 2024},
volume = {1},
pages = {20-37},
publisher = {Springer Nature Switzerland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Maciá, M.; Rizo, D.
The Impact of UX/UI on Piano-Assisted Learning in Extended Reality Conference
Computer Supported Music Education. Angers, France., 2024.
BibTeX | Tags:
@conference{macia2024,
title = {The Impact of UX/UI on Piano-Assisted Learning in Extended Reality},
author = {M. Maciá and D. Rizo},
year = {2024},
date = {2024-05-04},
urldate = {2024-05-04},
booktitle = {Computer Supported Music Education. Angers, France.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Rios-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
A Transformer Approach for Polyphonic Audio-to-Score Transcription Proceedings Article
In: Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seul (Korea), 2024.
Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras:ICASSP24,
title = {A Transformer Approach for Polyphonic Audio-to-Score Transcription},
author = {M. Alfaro-Contreras and A. Rios-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1109/ICASSP48485.2024.10447162},
year = {2024},
date = {2024-04-19},
urldate = {2024-04-19},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)},
address = {Seul (Korea)},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Valero-Mas, J. J.; Gallego, A. J.; Rico-Juan, J. R.
An overview of ensemble and feature learning in few-shot image classification using siamese networks Journal Article
In: Multimedia Tools and Applications, vol. 83, pp. 19929–19952, 2024, ISSN: 1380-7501.
@article{nokey,
title = {An overview of ensemble and feature learning in few-shot image classification using siamese networks},
author = {J. J. Valero-Mas and A. J. Gallego and J. R. Rico-Juan },
doi = {https://doi.org/10.1007/s11042-023-15607-3},
issn = {1380-7501},
year = {2024},
date = {2024-02-01},
urldate = {2023-07-29},
journal = {Multimedia Tools and Applications},
volume = {83},
pages = {19929–19952},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
Ramoneda, P.; Jeong, D.; Valero-Mas, J. J.; Serra, X.
Predicting performance difficulty from piano sheet music images Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Predicting performance difficulty from piano sheet music images},
author = {P. Ramoneda and D. Jeong and J. J. Valero-Mas and X. Serra},
doi = {10.5281/zenodo.10265386},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-11-04},
urldate = {2023-11-04},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {708-715},
address = {Milan, Italy},
abstract = {Estimating the performance difficulty of a musical score is crucial in music education for adequately designing the learning curriculum of the students. Although the music information retrieval community has recently shown interest in this task, existing approaches mainly use machine-readable scores, leaving the broader case of sheet music images unaddressed. Based on previous works involving sheet music images, we use a mid-level representation, bootleg score, describing notehead positions relative to staff lines coupled with a transformer model. This architecture is adapted to our task by introducing a different encoding scheme that reduces the encoded sequence length to one-eighth of the original size. In terms of evaluation, we consider five datasets---more than 7500 scores with up to 9 difficulty levels---, two being mainly compiled for this work. The results obtained when pretraining the scheme on the IMSLP corpus and fine-tuning it on the considered datasets prove the proposal's validity, achieving the best-performing model with a balanced accuracy of 40.3% and a mean square error of 1.3. Finally, we provide access to our code, data, and models for transparency and reproducibility.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Penarrubia, C.; Garrido-Munoz, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Efficient notation assembly in optical music recognition Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
BibTeX | Tags:
@conference{nokey,
title = {Efficient notation assembly in optical music recognition},
author = {C. Penarrubia and C. Garrido-Munoz and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-10-30},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {182-189},
address = {Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Ríos-Vila, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
A Holistic Approach for Aligned Music and Lyrics Transcription Conference
Document Analysis and Recognition - ICDAR 2023, vol. 1, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-41676-7.
Abstract | Links | BibTeX | Tags: REPERTORIUM
@conference{MartinezSevilla:ICDAR:2023,
title = {A Holistic Approach for Aligned Music and Lyrics Transcription},
author = {J.C. Martínez-Sevilla and A. Ríos-Vila and F. J. Castellanos and J. Calvo-Zaragoza },
editor = {Fink, Gernot A. and Jain, Rajiv and Kise, Koichi and Zanibbi, Richard},
doi = {https://doi.org/10.1007/978-3-031-41676-7_11},
isbn = {978-3-031-41676-7},
year = {2023},
date = {2023-08-28},
urldate = {2023-08-28},
booktitle = {Document Analysis and Recognition - ICDAR 2023},
volume = {1},
pages = {185--201},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT.},
keywords = {REPERTORIUM},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Alfaro-Contreras, M.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works Proceedings Article
In: INTERSPEECH Conference, pp. 2793-2797, Dublin, Ireland, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Martínez-Sevilla2023,
title = {Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works},
author = {J.C. Martínez-Sevilla and M. Alfaro-Contreras and J. J. Valero-Mas and J. Calvo-Zaragoza
},
doi = {10.21437/Interspeech.2023-88},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
booktitle = {INTERSPEECH Conference},
pages = {2793-2797},
address = {Dublin, Ireland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study Proceedings Article
In: Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science, pp. 64-77, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-37731-0.
Links | BibTeX | Tags: MultiScore
@inproceedings{k505,
title = {Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-37731-0_6},
isbn = {978-3-031-37731-0},
year = {2023},
date = {2023-08-10},
urldate = {2022-01-01},
booktitle = {Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science},
volume = {13645},
pages = {64-77},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Evaluating Domain Generalization in Kitchen Utensils Classification Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 108-118, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido-Munoz2023,
title = {Evaluating Domain Generalization in Kitchen Utensils Classification},
author = {C. Garrido-Munoz and M. Alfaro-Contreras and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-36616-1_9},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {108-118},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
González-Barrachina, P.; Alfaro-Contreras, M.; Nieto-Hidalgo, M.; Calvo-Zaragoza, J.
Lifelong Learning for Document Image Binarization: An Experimental Study Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 146-157, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{González-Barrachina2023,
title = {Lifelong Learning for Document Image Binarization: An Experimental Study},
author = {P. González-Barrachina and M. Alfaro-Contreras and M. Nieto-Hidalgo and J. Calvo-Zaragoza },
doi = {10.1007/978-3-031-36616-1_12},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {146-157},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Penarrubia, C.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification Conference
Iberian Conference on Pattern Recognition and Image Analysis, Alicante, Spain, 2023, ISBN: 978-3-031-36616-1.
Abstract | Links | BibTeX | Tags: DOREMI
@conference{nokey,
title = {Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification},
author = {C. Penarrubia and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-36616-1_2},
isbn = {978-3-031-36616-1},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {15.27},
address = {Alicante, Spain},
abstract = {Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic.},
keywords = {DOREMI},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Iñesta, J. M.; Calvo-Zaragoza, J.
Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 12, pp. 12-24, 2023.
@article{Alfaro-Contreras2023b,
title = {Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation},
author = {M. Alfaro-Contreras and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/s13735-023-00278-5},
year = {2023},
date = {2023-05-26},
urldate = {2023-05-26},
journal = {International Journal of Multimedia Information Retrieval},
volume = {12},
pages = {12-24},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-end optical music recognition for pianoform sheet music Journal Article
In: International Journal on Document Analysis and Recognition (IJDAR), iss. ICDAR 2023, 2023, ISSN: 1433-2825.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Ríos-Vila2023,
title = {End-to-end optical music recognition for pianoform sheet music},
author = {A. Ríos-Vila and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://link.springer.com/content/pdf/10.1007/s10032-023-00432-z.pdf},
doi = {10.1007/s10032-023-00432-z},
issn = {1433-2825},
year = {2023},
date = {2023-05-12},
urldate = {2023-05-12},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
issue = {ICDAR 2023},
abstract = {End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor Journal Article
In: Pattern Recognition Letters, vol. 167, pp. 1-8, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023,
title = {Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2023.01.014},
year = {2023},
date = {2023-03-01},
journal = {Pattern Recognition Letters},
volume = {167},
pages = {1-8},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rico-Juan, J. R.; Sánchez-Cartagena, V. M.; Valero-Mas, J. J.; Gallego, A. J.
Identifying student profiles within online judge systems using explainable artificial intelligence Journal Article
In: IEEE Transactions on Learning Technologies, vol. 16, no. 6, pp. 955-969, 2023, ISSN: 1939-1382.
@article{nokey,
title = {Identifying student profiles within online judge systems using explainable artificial intelligence},
author = {J. R. Rico-Juan and V. M. Sánchez-Cartagena and J. J. Valero-Mas and A. J. Gallego},
doi = {10.1109/TLT.2023.3239110},
issn = {1939-1382},
year = {2023},
date = {2023-01-23},
urldate = {2023-01-23},
journal = {IEEE Transactions on Learning Technologies},
volume = {16},
number = {6},
pages = {955-969},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Valero-Mas, J. J.; Gallego, A. J.; Alonso-Jiménez, P.; Serra, X.
Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification Journal Article
In: Pattern Recognition, vol. 135, pp. 109190, 2023, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: DOREMI, MultiScore
@article{k519,
title = {Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification},
author = {J. J. Valero-Mas and A. J. Gallego and P. Alonso-Jiménez and X. Serra},
doi = {https://doi.org/10.1016/j.patcog.2022.109190},
issn = {0031-3203},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition},
volume = {135},
pages = {109190},
abstract = {Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works.},
keywords = {DOREMI, MultiScore},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Ferrer, A.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Experimental Study on Marine Debris Location and Recognition using Object Detection Journal Article
In: Pattern Recognition Letters, 2023, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: TADMar
@article{k521,
title = {An Experimental Study on Marine Debris Location and Recognition using Object Detection},
author = {A. Sánchez-Ferrer and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1016/j.patrec.2022.12.019},
issn = {0167-8655},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition Letters},
abstract = {The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.},
keywords = {TADMar},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Late multimodal fusion for image and audio music transcription Journal Article
In: Expert Systems With Applications, vol. 216, pp. 119491-119500, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023c,
title = {Late multimodal fusion for image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.eswa.2022.119491},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Expert Systems With Applications},
volume = {216},
pages = {119491-119500},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, D.; Delgado, T.; Calvo-Zaragoza, J.; Madueño, A.; García-Iasci, P.
Speeding-up the encoding of mensural collections from Spanish libraries Journal Article
In: IAML 2022 Prague, 2022.
BibTeX | Tags: MultiScore
@article{k502,
title = {Speeding-up the encoding of mensural collections from Spanish libraries},
author = {D. Rizo and T. Delgado and J. Calvo-Zaragoza and A. Madueño and P. García-Iasci},
year = {2022},
date = {2022-07-01},
booktitle = {IAML 2022 Prague},
journal = {IAML 2022 Prague},
organization = {IAML},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Insights into transfer learning between image and audio music transcription Proceedings Article
In: Sound and Music Computing Conference, pp. 295-301, Zenodo, Saint-Étienne, France, 2022.
Abstract | Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras2022b,
title = {Insights into transfer learning between image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.5281/zenodo.6797870},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
booktitle = {Sound and Music Computing Conference},
pages = {295-301},
publisher = {Zenodo},
address = {Saint-Étienne, France},
abstract = {Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Decoupling music notation to improve end-to-end Optical Music Recognition Journal Article
In: Pattern Recognition Letters, vol. 158, pp. 157-163, 2022, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2022,
title = {Decoupling music notation to improve end-to-end Optical Music Recognition},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2022.04.032},
issn = {0167-8655},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Pattern Recognition Letters},
volume = {158},
pages = {157-163},
abstract = {Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Arroyo, V.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Pertusa, A.
Neural audio-to-score music transcription for unconstrained polyphony using compact output representations Proceedings Article
In: Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, Singapur, Singapur, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k487,
title = {Neural audio-to-score music transcription for unconstrained polyphony using compact output representations},
author = {V. Arroyo and J. J. Valero-Mas and J. Calvo-Zaragoza and A. Pertusa},
year = {2022},
date = {2022-05-01},
booktitle = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
publisher = {IEEE},
address = {Singapur, Singapur},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
On the Use of Transformers for End-to-End Optical Music Recognition Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 470-481, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: MultiScore
@inproceedings{k492,
title = {On the Use of Transformers for End-to-End Optical Music Recognition},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {470-481},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 482-492, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: Leonardo2021
@inproceedings{k493,
title = {Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {482-492},
address = {Aveiro, Portugal},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {inproceedings}
}
Mas-Candela, E.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A First Approach to Image Transformation Sequence Retrieval Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 321-332, Aveiro, Portugal, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k494,
title = {A First Approach to Image Transformation Sequence Retrieval},
author = {E. Mas-Candela and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {321-332},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Münnich, S.; Rizo, D.
Music Encoding Conference Proceedings 2022. Book
Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@book{k495,
title = {Music Encoding Conference Proceedings 2022.},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {book}
}
Münnich, S.; Rizo, D.
Foreword Proceedings Article
In: Münnich, S.; Rizo, D. (Ed.): Music Encoding Conference Proceedings 2021, pp. vii–viii, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k496,
title = {Foreword},
author = {S. Münnich and D. Rizo},
editor = {S. Münnich and D. Rizo},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {vii–viii},
publisher = {Humanities Commons},
chapter = {1},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Desmond, K.; Pugin, L.; Regimbal, J.; Rizo, D.; Sapp, C. S.; Thomae, M. E.
Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation Proceedings Article
In: Music Encoding Conference Proceedings 2021, pp. 197–219, Humanities Commons, 2022, ISBN: 978-84-1302-173-7.
BibTeX | Tags: MultiScore
@inproceedings{k497,
title = {Encoding Polyphony from Medieval Manuscripts Notated in Mensural Notation},
author = {K. Desmond and L. Pugin and J. Regimbal and D. Rizo and C. S. Sapp and M. E. Thomae},
isbn = {978-84-1302-173-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Music Encoding Conference Proceedings 2021},
pages = {197–219},
publisher = {Humanities Commons},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sánchez-Ferrer, A.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), pp. 616–628, Aveiro, Portugal, 2022, ISBN: 978-3-031-04881-4.
@inproceedings{k512,
title = {The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition},
author = {A. Sánchez-Ferrer and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-3-031-04881-4},
year = {2022},
date = {2022-05-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
pages = {616--628},
address = {Aveiro, Portugal},
abstract = {In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Iñesta, J. M.; Thomae, M. E.
An On-line Tool for Transcription of Music Scores: MuRET Presentation
Montreal (Canada), 01.05.2022.
Abstract | Links | BibTeX | Tags: HispaMus
@misc{k520,
title = {An On-line Tool for Transcription of Music Scores: MuRET},
author = {J. M. Iñesta and M. E. Thomae},
url = {undefined},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {1st Int. Conf. The Sound of Future/The Future of Sound},
address = {Montreal (Canada)},
organization = {CIRMMT},
abstract = {MuRET is a Machine-Learning Optical Music Recognition (OMR) research tool. It runs in the browser. It has been created for helping in the transcription of music collections, for experimenting with machine learning algorithms for OMR and it's capable of working well with different notations and writings. Why using Machine Learning? Instead of designing a system to solve the task, we have designed a system to learn how to solve the task from sets of labeled (solved) images. This way it's adaptable to new (previously unseen) collections.},
key = {OMR, Machine Learning},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {presentation}
}
Fuente, C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Image and Audio Music Transcription Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 11, pp. 77-84, 2022.
BibTeX | Tags: MultiScore
@article{k479,
title = {Multimodal Image and Audio Music Transcription},
author = {C. Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
journal = {International Journal of Multimedia Information Retrieval},
volume = {11},
pages = {77-84},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Region-based Layout Analysis of Music Score Images Journal Article
In: Expert Systems with Applications, pp. 118211, 2022, ISSN: 0957-4174.
BibTeX | Tags: MultiScore
@article{k486,
title = {Region-based Layout Analysis of Music Score Images},
author = {F. J. Castellanos and C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
issn = {0957-4174},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
pages = {118211},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rosello, A.; Ayllon, E.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Test Sample Selection for Handwriting Recognition Through Language Modeling Proceedings Article
In: Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k498,
title = {Test Sample Selection for Handwriting Recognition Through Language Modeling},
author = {A. Rosello and E. Ayllon and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR, Bangalore, India, 2022.
Abstract | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{k499,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR},
address = {Bangalore, India},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.; Fujinaga, I.
Domain Adaptation for Staff-Region Retrieval of Music Score Images Journal Article
In: International Journal on Document Analysis and Recognition, 2022, ISSN: 1433-2825.
BibTeX | Tags: MultiScore
@article{k500,
title = {Domain Adaptation for Staff-Region Retrieval of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza and I. Fujinaga},
issn = {1433-2825},
year = {2022},
date = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
de la Fuente, C.; Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks Journal Article
In: Multimedia Tools and Applications, 2022.
BibTeX | Tags:
@article{k501,
title = {Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks},
author = {C. de la Fuente and F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Multimedia Tools and Applications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor Proceedings Article
In: Pattern Recognition. ICPR International Workshops and Challenges, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k504,
title = {Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition. ICPR International Workshops and Challenges},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sáez-Pérez, J.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), 2022.
@inproceedings{k506,
title = {Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition},
author = {J. Sáez-Pérez and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
keywords = {ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Alashhab, S.
Aplicaciones de visión artificial para ayuda a personas con dificultades visuales PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k508,
title = {Aplicaciones de visión artificial para ayuda a personas con dificultades visuales},
author = {S. Alashhab},
editor = {Miguel Angel Lozano and Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Bernabeu, M.
Búsqueda de imágenes similares usando técnicas de aprendizaje automático PhD Thesis
2022.
BibTeX | Tags:
@phdthesis{k509,
title = {Búsqueda de imágenes similares usando técnicas de aprendizaje automático},
author = {M. Bernabeu},
editor = {Antonio Pertusa Antonio Javier Gallego},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
organization = {Universidad de Alicante},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Gallego, A. J.; Rico-Juan, J. R.; Valero-Mas, J. J.
Efficient k-nearest neighbor search based on clustering and adaptive k values Journal Article
In: Pattern Recognition, vol. 122, pp. 108356, 2022, ISSN: 0031-3203.
@article{k510,
title = {Efficient k-nearest neighbor search based on clustering and adaptive k values},
author = {A. J. Gallego and J. R. Rico-Juan and J. J. Valero-Mas},
issn = {0031-3203},
year = {2022},
date = {2022-01-01},
journal = {Pattern Recognition},
volume = {122},
pages = {108356},
abstract = {The k-Nearest Neighbor (kNN) algorithm is widely used in the supervised learning field and, particularly, in search and classification tasks, owing to its simplicity, competitive performance, and good statistical properties. However, its inherent inefficiency prevents its use in most modern applications due to the vast amount of data that the current technological evolution generates, being thus the optimization of kNN-based search strategies of particular interest. This paper introduces the caKD+ algorithm, which tackles this limitation by combining the use of feature learning techniques, clustering methods, adaptive search parameters per cluster, and the use of pre-calculated K-Dimensional Tree structures, and results in a highly efficient search method. This proposal has been evaluated using 10 datasets and the results show that caKD+ significantly outperforms 16 state-of-the-art efficient search methods while still depicting such an accurate performance as the one by the exhaustive kNN search.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Alashhab, S.; Gallego, A. J.; Lozano, M. Á.
Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 114, pp. 105188, 2022, ISSN: 0952-1976.
@article{k511,
title = {Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks},
author = {S. Alashhab and A. J. Gallego and M. Á. Lozano},
issn = {0952-1976},
year = {2022},
date = {2022-01-01},
journal = {Engineering Applications of Artificial Intelligence},
volume = {114},
pages = {105188},
abstract = {Existing research for the assistance of visually impaired people mainly focus on solving a single task (such as reading a text or detecting an obstacle), hence forcing the user to switch applications to perform other actions. This paper proposes an interactive system for mobile devices controlled by hand gestures that allow the user to control the device and use several assistance tools by making simple static and dynamic hand gestures (e.g., pointing a finger at an object will show a description of it). The system is based on a multi-head neural network, which initially detects and classifies the gestures, and subsequently, depending on the gesture detected, performs a second stage that carries out the corresponding action. This architecture optimizes the resources required to perform different tasks, it takes advantage of the information obtained from an initial backbone to perform different processes in a second stage. To train and evaluate the system, a dataset with about 40k images was manually compiled and labeled including different types of hand gestures, backgrounds (indoors and outdoors), lighting conditions, etc. This dataset contains synthetic gestures (whose objective is to pre-train the system to improve the results) and real images captured using different mobile phones. The comparison made with nearly 50 state-of-the-art methods shows competitive results as regards the different actions performed by the system, such as the accuracy of classification and localization of gestures, or the generation of descriptions for objects and scenes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A holistic approach for image-to-graph: application to optical music recognition Journal Article
In: International Journal on Document Analysis and Recognition, 2022.
BibTeX | Tags: Leonardo2021
@article{k522,
title = {A holistic approach for image-to-graph: application to optical music recognition},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {article}
}
2021
Calvo-Zaragoza, J.; Pertusa, A.; Gallego, A. J.; Iñesta, J. M.; Micó, L.; Oncina, J.; Perez-Sancho, C.; de León, P. J. Ponce; Rizo, D.
MultiScore Project: Multimodal Transcription of Music Scores Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 3, 2021.
Links | BibTeX | Tags: MultiScore
@inproceedings{k481,
title = {MultiScore Project: Multimodal Transcription of Music Scores},
author = {J. Calvo-Zaragoza and A. Pertusa and A. J. Gallego and J. M. Iñesta and L. Micó and J. Oncina and C. Perez-Sancho and P. J. Ponce de León and D. Rizo},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/481/MML2021__MultiScore_Final.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {3},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 6, 2021.
@inproceedings{k482,
title = {An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {6},
keywords = {GRE19-04, ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}