2025
F. J. Castellanos J. P. Martinez-Esteso, J. Calvo-Zaragoza
Maritime search and rescue missions with aerial images: A survey Journal Article
In: Computer Science Review, vol. 57, pp. 100736, 2025, ISSN: 1574-0137.
@article{Martinez-Esteso:2025:survey,
title = {Maritime search and rescue missions with aerial images: A survey},
author = {J. P. Martinez-Esteso, F. J. Castellanos, J. Calvo-Zaragoza, A. J. Gallego},
doi = {https://doi.org/10.1016/j.cosrev.2025.100736},
issn = {1574-0137},
year = {2025},
date = {2025-02-25},
urldate = {2025-02-25},
journal = {Computer Science Review},
volume = {57},
pages = {100736},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kim, D.; Han, D.; Jeong, D.; Valero-Mas, J. J.
On the automatic recognition of Jeongganbo music notation: dataset and approach Journal Article
In: Journal on Computing and Cultural Heritage, 2025, ISSN: 1556-4673.
@article{nokey,
title = {On the automatic recognition of Jeongganbo music notation: dataset and approach},
author = {D. Kim and D. Han and D. Jeong and J. J. Valero-Mas},
issn = {1556-4673},
year = {2025},
date = {2025-01-16},
urldate = {2025-01-16},
journal = {Journal on Computing and Cultural Heritage},
abstract = {The Jeongganbo notation, the first music representation system in East Asia capable of jointly expressing pitch and duration, has been extensively used---and still is---in the Korean music tradition since its inception in the 15th century. In this regard, there exists a plethora of music works that exclusively endure as physical sheets, which not only constitutes a heritage preservation challenge due to the inherent degradation of this format but also impedes the use of computational tools to study and exploit this music tradition. While the Optical Music Recognition (OMR) field, which represents the research area devoted to devising methods capable of automatically transcribing music sheets into digital formats, has addressed this issue in a number of music notations from the Western tradition, no previous research has considered the preservation of Jeonganbo scores. In this context, this work presents the following contributions: (i) the first data assortment of real Jeongganbo scores for OMR tasks; (ii) a collection of synthetic data generation and augmentation mechanisms to alleviate the scarcity of manual annotation; and (iii) a neural-based transcription scheme based on state-of-the-art OMR strategies specifically tailored to Jeongganbo scores. The experiments performed prove the validity of the approach---performance rates close to a 90% of success---and open new research avenues for under-resourced yet challenging music notations. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The Jeongganbo notation, the first music representation system in East Asia capable of jointly expressing pitch and duration, has been extensively used---and still is---in the Korean music tradition since its inception in the 15th century. In this regard, there exists a plethora of music works that exclusively endure as physical sheets, which not only constitutes a heritage preservation challenge due to the inherent degradation of this format but also impedes the use of computational tools to study and exploit this music tradition. While the Optical Music Recognition (OMR) field, which represents the research area devoted to devising methods capable of automatically transcribing music sheets into digital formats, has addressed this issue in a number of music notations from the Western tradition, no previous research has considered the preservation of Jeonganbo scores. In this context, this work presents the following contributions: (i) the first data assortment of real Jeongganbo scores for OMR tasks; (ii) a collection of synthetic data generation and augmentation mechanisms to alleviate the scarcity of manual annotation; and (iii) a neural-based transcription scheme based on state-of-the-art OMR strategies specifically tailored to Jeongganbo scores. The experiments performed prove the validity of the approach---performance rates close to a 90% of success---and open new research avenues for under-resourced yet challenging music notations. 2024
F. J. Castellanos J. P. Martinez-Esteso, A. Rosello
On the use of synthetic data for body detection in maritime search and rescue operations Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 139, pp. 109586, 2024, ISSN: 0952-1976.
@article{Martinez-Esteso:2024:synthetic,
title = {On the use of synthetic data for body detection in maritime search and rescue operations},
author = {J. P. Martinez-Esteso, F. J. Castellanos, A. Rosello, J. Calvo-Zaragoza, A. J. Gallego},
url = {https://www.sciencedirect.com/science/article/pii/S0952197624017445},
doi = {https://doi.org/10.1016/j.engappai.2024.109586},
issn = {0952-1976},
year = {2024},
date = {2024-11-07},
urldate = {2024-11-07},
journal = {Engineering Applications of Artificial Intelligence},
volume = {139},
pages = {109586},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Galan-Cuenca, A.; Valero-Mas, J. J.; Martinez-Sevilla, J. C.; Hidalgo-Centeno, A.; Pertusa, A.; Calvo-Zaragoza, J.
Proceedings of the 32nd ACM International Conference on Multimedia, Association for Computing Machinery, 2024, ISBN: 979-8-4007-0686-8.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {MUSCAT: a Multimodal mUSic Collection for Automatic Transcription of real recordings and image scores},
author = {A. Galan-Cuenca and J. J. Valero-Mas and J. C. Martinez-Sevilla and A. Hidalgo-Centeno and A. Pertusa and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1145/3664647.3681572},
isbn = {979-8-4007-0686-8},
year = {2024},
date = {2024-10-28},
booktitle = {Proceedings of the 32nd ACM International Conference on Multimedia},
pages = {583-591},
publisher = {Association for Computing Machinery},
abstract = {Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.J. J Valero-Mas A. Galan-Cuenca, J. C. Martinez-Sevilla
MM '24: Proceedings of the 32nd ACM International Conference on Multimedia, Association for Computing Machinery, New York, NY, USA, 2024, ISBN: 979-8-400-70686-8.
Abstract | Links | BibTeX | Tags: MultiScore
@conference{nokey,
title = {MUSCAT: A Multimodal mUSic Collection for Automatic Transcription of Real Recordings and Image Scores},
author = {A. Galan-Cuenca, J. J Valero-Mas, J. C. Martinez-Sevilla, A. Hidalgo-Centeno, A. Pertusa, J. Calvo-Zaragoza},
url = {https://doi.org/10.1145/3664647.3681572},
doi = {10.1145/3664647.3681572},
isbn = {979-8-400-70686-8},
year = {2024},
date = {2024-10-28},
urldate = {2024-10-28},
booktitle = {MM '24: Proceedings of the 32nd ACM International Conference on Multimedia},
pages = {583-591},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively. Penarrubia, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Contrastive Self-Supervised Learning for Optical Music Recognition Conference
International Workshop on Document Analysis Systems, 2024, ISBN: 978-3-031-70442-0.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Contrastive Self-Supervised Learning for Optical Music Recognition},
author = {C. Penarrubia and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-70442-0_19},
isbn = {978-3-031-70442-0},
year = {2024},
date = {2024-09-11},
urldate = {2024-09-11},
booktitle = {International Workshop on Document Analysis Systems},
pages = {312-326},
abstract = {Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration.J. P. Martinez-Esteso F. J. Castellanos, A. Galán-Cuenca
A Region-Based Approach for Layout Analysis of Music Score Images in Scarce Data Scenarios Proceedings Article
In: Lecture Notes in Computer Science - International Conference on Document Analysis and Recognition, pp. 58-75, Springer, Athenes, Greece, 2024, ISBN: 978-3-031-70545-8.
@inproceedings{Castellanos:2024:scarceLA,
title = {A Region-Based Approach for Layout Analysis of Music Score Images in Scarce Data Scenarios},
author = {F. J. Castellanos, J. P. Martinez-Esteso, A. Galán-Cuenca, A. J. Gallego},
url = {https://link.springer.com/chapter/10.1007/978-3-031-70546-5_4},
doi = {https://doi.org/10.1007/978-3-031-70546-5_4},
isbn = {978-3-031-70545-8},
year = {2024},
date = {2024-09-11},
urldate = {2024-09-11},
booktitle = {Lecture Notes in Computer Science - International Conference on Document Analysis and Recognition},
volume = {14807},
pages = {58-75},
publisher = {Springer},
address = {Athenes, Greece},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
F. J. Castellanos E. Ayllon, J. Calvo-Zaragoza
Analysis of the Calibration of Handwriting Text Recognition Models Proceedings Article
In: Lecture Notes in Computer Science - International Conference on Document Analysis and Recognition, pp. 139-155, Springer, Athenes, Greece, 2024, ISBN: 978-3-031-70535-9.
@inproceedings{Ayllon:2024:calibration,
title = {Analysis of the Calibration of Handwriting Text Recognition Models},
author = {E. Ayllon, F. J. Castellanos, J. Calvo-Zaragoza},
url = {https://link.springer.com/chapter/10.1007/978-3-031-70536-6_9},
doi = {https://doi.org/10.1007/978-3-031-70536-6_9},
isbn = {978-3-031-70535-9},
year = {2024},
date = {2024-09-03},
urldate = {2024-09-03},
booktitle = {Lecture Notes in Computer Science - International Conference on Document Analysis and Recognition},
volume = {14805},
pages = {139-155},
publisher = {Springer},
address = {Athenes, Greece},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Calvo-Zaragoza, J.; Paquet, T.
Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription Conference
Document Analysis and Recognition - ICDAR 2024, vol. 1, Springer Nature Switzerland, 2024, ISBN: 978-3-031-70552-6.
BibTeX | Tags: MultiScore
@conference{RiosVila:ICDAR:2024,
title = {Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and T. Paquet},
isbn = {978-3-031-70552-6},
year = {2024},
date = {2024-09-02},
urldate = {2024-09-02},
booktitle = {Document Analysis and Recognition - ICDAR 2024},
volume = {1},
pages = {20-37},
publisher = {Springer Nature Switzerland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Maciá, M.; Rizo, D.
The Impact of UX/UI on Piano-Assisted Learning in Extended Reality Conference
Computer Supported Music Education. Angers, France., 2024.
BibTeX | Tags:
@conference{macia2024,
title = {The Impact of UX/UI on Piano-Assisted Learning in Extended Reality},
author = {M. Maciá and D. Rizo},
year = {2024},
date = {2024-05-04},
urldate = {2024-05-04},
booktitle = {Computer Supported Music Education. Angers, France.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Rios-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
A Transformer Approach for Polyphonic Audio-to-Score Transcription Proceedings Article
In: Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seul (Korea), 2024.
Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras:ICASSP24,
title = {A Transformer Approach for Polyphonic Audio-to-Score Transcription},
author = {M. Alfaro-Contreras and A. Rios-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1109/ICASSP48485.2024.10447162},
year = {2024},
date = {2024-04-19},
urldate = {2024-04-19},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)},
address = {Seul (Korea)},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Valero-Mas, J. J.; Gallego, A. J.; Rico-Juan, J. R.
An overview of ensemble and feature learning in few-shot image classification using siamese networks Journal Article
In: Multimedia Tools and Applications, vol. 83, pp. 19929–19952, 2024, ISSN: 1380-7501.
@article{nokey,
title = {An overview of ensemble and feature learning in few-shot image classification using siamese networks},
author = {J. J. Valero-Mas and A. J. Gallego and J. R. Rico-Juan },
doi = {https://doi.org/10.1007/s11042-023-15607-3},
issn = {1380-7501},
year = {2024},
date = {2024-02-01},
urldate = {2023-07-29},
journal = {Multimedia Tools and Applications},
volume = {83},
pages = {19929–19952},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Thomae, Martha E.; Rizo, David; Fuentes-Martínez, Eliseo; Raurich, Cristina Alís; Luca, Elsa De; Calvo-Zaragoza, Jorge
A Preliminary Proposal for a Systematic GABC Encoding of Gregorian Chant Proceedings Article
In: ACM International Conference Proceeding Series, pp. 45-53, Association for Computing Machinery, 2024, ISBN: 9798400717208.
Abstract | Links | BibTeX | Tags: Aquitanian neumes, GABC, Gregorian chant, MEI, music encoding, Plainchant, REPERTORIUM, square notation
@inproceedings{Thomae2024,
title = {A Preliminary Proposal for a Systematic GABC Encoding of Gregorian Chant},
author = {Martha E. Thomae and David Rizo and Eliseo Fuentes-Martínez and Cristina Alís Raurich and Elsa De Luca and Jorge Calvo-Zaragoza},
doi = {10.1145/3660570.3660581},
isbn = {9798400717208},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {ACM International Conference Proceeding Series},
pages = {45-53},
publisher = {Association for Computing Machinery},
abstract = {In the last years, several approaches have addressed the encoding of the different music scripts used for plainchant. One of these approaches is the GABC format. While being a comprehensive symbolic representation of square notation, the lack of a formal specification for GABC usually leads to ambiguities, which must be avoided in the specification of any encoding format. Sometimes, the simple trial-and-error approach of entering the GABC code into an engraving system - such as Illuminare, Scrib.io, or GABC Transcription Tool - can solve this ambiguity. However, these engraving systems have shown some inconsistency among themselves when rendering GABC, sometimes displaying different music for the same code snippet. This paper presents a systematic approach to encoding Gregorian chant originally written in Aquitanian neumes and square notation to eliminate ambiguities inherent in the GABC specification. By formalizing the grammar of GABC, we address the challenges of inaccurate renderings in current music notation software. Our methodology includes developing a "Systematic GABC"(S-GABC) following a critical and scientific mentality to ensure the endurance of the notation. This paper demonstrates our system's effectiveness in standardizing Gregorian chant encoding, offering significant contributions to digital musicology and enhancing the accuracy of musical heritage digitization.},
keywords = {Aquitanian neumes, GABC, Gregorian chant, MEI, music encoding, Plainchant, REPERTORIUM, square notation},
pubstate = {published},
tppubtype = {inproceedings}
}
In the last years, several approaches have addressed the encoding of the different music scripts used for plainchant. One of these approaches is the GABC format. While being a comprehensive symbolic representation of square notation, the lack of a formal specification for GABC usually leads to ambiguities, which must be avoided in the specification of any encoding format. Sometimes, the simple trial-and-error approach of entering the GABC code into an engraving system - such as Illuminare, Scrib.io, or GABC Transcription Tool - can solve this ambiguity. However, these engraving systems have shown some inconsistency among themselves when rendering GABC, sometimes displaying different music for the same code snippet. This paper presents a systematic approach to encoding Gregorian chant originally written in Aquitanian neumes and square notation to eliminate ambiguities inherent in the GABC specification. By formalizing the grammar of GABC, we address the challenges of inaccurate renderings in current music notation software. Our methodology includes developing a "Systematic GABC"(S-GABC) following a critical and scientific mentality to ensure the endurance of the notation. This paper demonstrates our system's effectiveness in standardizing Gregorian chant encoding, offering significant contributions to digital musicology and enhancing the accuracy of musical heritage digitization. Roselló, Adrián; Fuentes-Martínez, Eliseo; Alfaro-Contreras, María; Rizo, David; Calvo-Zaragoza, Jorge
Source-Free Domain Adaptation for Optical Music Recognition Book Chapter
In: pp. 3-19, 2024, ISSN: 16113349.
@inbook{Rosell2024,
title = {Source-Free Domain Adaptation for Optical Music Recognition},
author = {Adrián Roselló and Eliseo Fuentes-Martínez and María Alfaro-Contreras and David Rizo and Jorge Calvo-Zaragoza},
doi = {10.1007/978-3-031-70552-6_1},
issn = {16113349},
year = {2024},
date = {2024-01-01},
pages = {3-19},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
Rizo, David; Calvo-Zaragoza, Jorge; Garc'ıa-Iasci, Patricia; Delgado-Sánchez, Teresa
Lessons Learned From a Project to Encode Mensural Music on a Large
Scale With Optical Music Recognition Proceedings Article
In: Kaneshiro, Blair; Mysore, Gautham J; Nieto, Oriol; Donahue, Chris; Huang, Cheng-Zhi Anna; Lee, Jin Ha; McFee, Brian; McCallum, Matthew C (Ed.): Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024, pp. 225-231, 2024.
@inproceedings{Rizo2024,
title = {Lessons Learned From a Project to Encode Mensural Music on a Large
Scale With Optical Music Recognition},
author = {David Rizo and Jorge Calvo-Zaragoza and Patricia Garc'ıa-Iasci and Teresa Delgado-Sánchez},
editor = {Blair Kaneshiro and Gautham J Mysore and Oriol Nieto and Chris Donahue and Cheng-Zhi Anna Huang and Jin Ha Lee and Brian McFee and Matthew C McCallum},
url = {https://doi.org/10.5281/zenodo.14877315},
doi = {10.5281/ZENODO.14877315},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024},
pages = {225-231},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Luna-Barahona, Noelia N; Rosello, Adrian; Alfaro-Contreras, Mar'ıa; Rizo, David; Calvo-Zaragoza, Jorge
Unsupervised Synthetic-to-Real Adaptation for Optical Music Recognition Proceedings Article
In: Kaneshiro, Blair; Mysore, Gautham J; Nieto, Oriol; Donahue, Chris; Huang, Cheng-Zhi Anna; Lee, Jin Ha; McFee, Brian; McCallum, Matthew C (Ed.): Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024, pp. 462-469, 2024.
@inproceedings{Luna-Barahona2024,
title = {Unsupervised Synthetic-to-Real Adaptation for Optical Music Recognition},
author = {Noelia N Luna-Barahona and Adrian Rosello and Mar'ıa Alfaro-Contreras and David Rizo and Jorge Calvo-Zaragoza},
editor = {Blair Kaneshiro and Gautham J Mysore and Oriol Nieto and Chris Donahue and Cheng-Zhi Anna Huang and Jin Ha Lee and Brian McFee and Matthew C McCallum},
url = {https://doi.org/10.5281/zenodo.14877375},
doi = {10.5281/ZENODO.14877375},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024},
pages = {462-469},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Martinez-Sevilla, Juan Carlos; Rizo, David; Calvo-Zaragoza, Jorge
Towards Universal Optical Music Recognition: A Case Study on Notation
Types Proceedings Article
In: Kaneshiro, Blair; Mysore, Gautham J; Nieto, Oriol; Donahue, Chris; Huang, Cheng-Zhi Anna; Lee, Jin Ha; McFee, Brian; McCallum, Matthew C (Ed.): Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024, pp. 914-921, 2024.
@inproceedings{Martinez-Sevilla2024,
title = {Towards Universal Optical Music Recognition: A Case Study on Notation
Types},
author = {Juan Carlos Martinez-Sevilla and David Rizo and Jorge Calvo-Zaragoza},
editor = {Blair Kaneshiro and Gautham J Mysore and Oriol Nieto and Chris Donahue and Cheng-Zhi Anna Huang and Jin Ha Lee and Brian McFee and Matthew C McCallum},
url = {https://doi.org/10.5281/zenodo.14877479},
doi = {10.5281/ZENODO.14877479},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024},
pages = {914-921},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, Antonio; Calvo-Zaragoza, Jorge; Rizo, David; Paquet, Thierry
Sheet Music Transformer ++: End-to-End Full-Page Optical Music Recognition
for Pianoform Sheet Music Journal Article
In: CoRR, vol. abs/2405.12105, 2024.
@article{Ros-Vila2024,
title = {Sheet Music Transformer ++: End-to-End Full-Page Optical Music Recognition
for Pianoform Sheet Music},
author = {Antonio Ríos-Vila and Jorge Calvo-Zaragoza and David Rizo and Thierry Paquet},
url = {https://doi.org/10.48550/arXiv.2405.12105},
doi = {10.48550/ARXIV.2405.12105},
year = {2024},
date = {2024-01-01},
journal = {CoRR},
volume = {abs/2405.12105},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fuentes-Martínez, Eliseo; Ríos-Vila, Antonio; Martinez-Sevilla, Juan Carlos; Rizo, David; Calvo-Zaragoza, Jorge
Aligned Music Notation and Lyrics Transcription Journal Article
In: CoRR, vol. abs/2412.04217, 2024.
@article{Fuentes-Martnez2024,
title = {Aligned Music Notation and Lyrics Transcription},
author = {Eliseo Fuentes-Martínez and Antonio Ríos-Vila and Juan Carlos Martinez-Sevilla and David Rizo and Jorge Calvo-Zaragoza},
url = {https://doi.org/10.48550/arXiv.2412.04217},
doi = {10.48550/ARXIV.2412.04217},
year = {2024},
date = {2024-01-01},
journal = {CoRR},
volume = {abs/2412.04217},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
García-Iasci, Patricia; Martínez-Sevilla, Juan Carlos; Rizo, David; Calvo-Zaragoza, Jorge
Towards a standardization of lead sheet encoding:
an experience in OMR Proceedings Article
In: Music Encoding Conference 2024, unknown, 2024.
@inproceedings{Garca-Iasci2024,
title = {Towards a standardization of lead sheet encoding:
an experience in OMR},
author = {Patricia García-Iasci and Juan Carlos Martínez-Sevilla and David Rizo and Jorge Calvo-Zaragoza},
url = {https://doi.org/10.17613/fbrj-k426},
doi = {10.17613/fbrj-k426},
year = {2024},
date = {2024-01-01},
booktitle = {Music Encoding Conference 2024},
publisher = {unknown},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, David; López-Rocamora, Pablo; Pardo-Cayuela, Antonio
A workflow for Attribution Issues using Language Models Proceedings Article
In: International Association of Music Libraries, 2024.
BibTeX | Tags:
@inproceedings{Rizo2024b,
title = {A workflow for Attribution Issues using Language Models},
author = {David Rizo and Pablo López-Rocamora and Antonio Pardo-Cayuela},
year = {2024},
date = {2024-01-01},
booktitle = {International Association of Music Libraries},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
García-Iasci, Patricia; Rizo, David
EA-Digifolk: Digitizing and encoding Iris Traditional Music at ITMA Proceedings Article
In: International Association of Music Libraries, 2024.
@inproceedings{Garca-Iasci2024b,
title = {EA-Digifolk: Digitizing and encoding Iris Traditional Music at ITMA},
author = {Patricia García-Iasci and David Rizo},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {International Association of Music Libraries},
keywords = {PolifonIA},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, David; Delgado-Sánchez, Teresa; Calvo-Zaragoza, Jorge; García-Iasci, Patricia; Madueño-Madueño, Antonio
Insights into AI to encode a whole mensural collection with limited resources Proceedings Article
In: International Association of Music Libraries, 2024.
BibTeX | Tags:
@inproceedings{Rizo2024c,
title = {Insights into AI to encode a whole mensural collection with limited resources},
author = {David Rizo and Teresa Delgado-Sánchez and Jorge Calvo-Zaragoza and Patricia García-Iasci and Antonio Madueño-Madueño},
year = {2024},
date = {2024-01-01},
booktitle = {International Association of Music Libraries},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
María Alfaro-Contreras Pedro González-Barrachina,; Calvo-Zaragoza, Jorge
Continual Learning for Music Classification Proceedings Article
In: International Society for Music Information Retrieval Conference, ISMIR, pp. 596-602, 2024.
Abstract | Links | BibTeX | Tags:
@inproceedings{gonzalez2024continual,
title = {Continual Learning for Music Classification},
author = {Pedro González-Barrachina, María Alfaro-Contreras, and Jorge Calvo-Zaragoza},
doi = {10.5281/zenodo.14877406},
year = {2024},
date = {2024-01-01},
booktitle = {International Society for Music Information Retrieval Conference, ISMIR},
pages = {596-602},
abstract = {Music classification is a prominent research area within Music Information Retrieval. While Deep Learning methods can adequately perform this task, their classification space remains fixed once trained, which conflicts with the dynamic nature of the ever-evolving music landscape. This work explores, for the first time, the application of Continual Learning (CL) in the context of music classification. Specifically, we thoroughly evaluate five state-of-the-art CL approaches across four different music classification tasks. Additionally, we showcase that a foundation model might be the key to CL in music classification. To that end, we study a new approach called Pre-trained Class Centers, which leverages pre-trained features to create fixed class-center spaces. Our results reveal that existing CL methods struggle when applied to music classification tasks, whereas this simple method consistently out-performs them. This highlights the need for CL methods tailored specifically for music classification.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Music classification is a prominent research area within Music Information Retrieval. While Deep Learning methods can adequately perform this task, their classification space remains fixed once trained, which conflicts with the dynamic nature of the ever-evolving music landscape. This work explores, for the first time, the application of Continual Learning (CL) in the context of music classification. Specifically, we thoroughly evaluate five state-of-the-art CL approaches across four different music classification tasks. Additionally, we showcase that a foundation model might be the key to CL in music classification. To that end, we study a new approach called Pre-trained Class Centers, which leverages pre-trained features to create fixed class-center spaces. Our results reveal that existing CL methods struggle when applied to music classification tasks, whereas this simple method consistently out-performs them. This highlights the need for CL methods tailored specifically for music classification.2023
A. J. Gallego F. J. Castellanos, I. Fujinaga
A Few-Shot Neural Approach for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 24th International Society for Music Information Retrieval Conference, pp. 106-113, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
@inproceedings{Castellanos:2023:few,
title = {A Few-Shot Neural Approach for Layout Analysis of Music Score Images},
author = {F. J. Castellanos, A. J. Gallego, I. Fujinaga},
url = {https://archives.ismir.net/ismir2023/paper/000011.pdf},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-11-05},
urldate = {2023-11-05},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {106-113},
address = {Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
A Rios-Vila Juan C Martinez-Sevilla, FJ Castellanos
Towards Music Notation and Lyrics Alignment: Gregorian Chants as Case Study Working paper
2023.
Abstract | Links | BibTeX | Tags:
@workingpaper{Martinez:2023:lyrics,
title = {Towards Music Notation and Lyrics Alignment: Gregorian Chants as Case Study},
author = {Juan C Martinez-Sevilla, A Rios-Vila, FJ Castellanos, J Calvo-Zaragoza},
url = {https://arxiv.org/pdf/2311.04091#page=16},
year = {2023},
date = {2023-11-04},
issue = {WORMS},
abstract = {Proceedings of the 5th International Workshop on Reading Music Systems, Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Proceedings of the 5th International Workshop on Reading Music Systems, Milan, ItalyA. J. Gallego F. J. Castellanos, I. Fujinaga
A Preliminary Study of Few-shot Learning for Layout Analysis of Music Scores Working paper
2023.
Abstract | Links | BibTeX | Tags:
@workingpaper{Castellanos:2023:preFew,
title = {A Preliminary Study of Few-shot Learning for Layout Analysis of Music Scores},
author = {F. J. Castellanos, A. J. Gallego, I. Fujinaga},
url = {https://arxiv.org/pdf/2311.04091#page=45},
year = {2023},
date = {2023-11-04},
issue = {WORMS},
abstract = {Proceedings of the 5th International Workshop on Reading Music Systems, Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Proceedings of the 5th International Workshop on Reading Music Systems, Milan, Italy Ramoneda, P.; Jeong, D.; Valero-Mas, J. J.; Serra, X.
Predicting performance difficulty from piano sheet music images Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Predicting performance difficulty from piano sheet music images},
author = {P. Ramoneda and D. Jeong and J. J. Valero-Mas and X. Serra},
doi = {10.5281/zenodo.10265386},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-11-04},
urldate = {2023-11-04},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {708-715},
address = {Milan, Italy},
abstract = {Estimating the performance difficulty of a musical score is crucial in music education for adequately designing the learning curriculum of the students. Although the music information retrieval community has recently shown interest in this task, existing approaches mainly use machine-readable scores, leaving the broader case of sheet music images unaddressed. Based on previous works involving sheet music images, we use a mid-level representation, bootleg score, describing notehead positions relative to staff lines coupled with a transformer model. This architecture is adapted to our task by introducing a different encoding scheme that reduces the encoded sequence length to one-eighth of the original size. In terms of evaluation, we consider five datasets---more than 7500 scores with up to 9 difficulty levels---, two being mainly compiled for this work. The results obtained when pretraining the scheme on the IMSLP corpus and fine-tuning it on the considered datasets prove the proposal's validity, achieving the best-performing model with a balanced accuracy of 40.3% and a mean square error of 1.3. Finally, we provide access to our code, data, and models for transparency and reproducibility.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Estimating the performance difficulty of a musical score is crucial in music education for adequately designing the learning curriculum of the students. Although the music information retrieval community has recently shown interest in this task, existing approaches mainly use machine-readable scores, leaving the broader case of sheet music images unaddressed. Based on previous works involving sheet music images, we use a mid-level representation, bootleg score, describing notehead positions relative to staff lines coupled with a transformer model. This architecture is adapted to our task by introducing a different encoding scheme that reduces the encoded sequence length to one-eighth of the original size. In terms of evaluation, we consider five datasets---more than 7500 scores with up to 9 difficulty levels---, two being mainly compiled for this work. The results obtained when pretraining the scheme on the IMSLP corpus and fine-tuning it on the considered datasets prove the proposal's validity, achieving the best-performing model with a balanced accuracy of 40.3% and a mean square error of 1.3. Finally, we provide access to our code, data, and models for transparency and reproducibility. Penarrubia, C.; Garrido-Munoz, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Efficient notation assembly in optical music recognition Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
BibTeX | Tags:
@conference{nokey,
title = {Efficient notation assembly in optical music recognition},
author = {C. Penarrubia and C. Garrido-Munoz and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-10-30},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {182-189},
address = {Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Ríos-Vila, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
A Holistic Approach for Aligned Music and Lyrics Transcription Conference
Document Analysis and Recognition - ICDAR 2023, vol. 1, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-41676-7.
Abstract | Links | BibTeX | Tags: REPERTORIUM
@conference{MartinezSevilla:ICDAR:2023,
title = {A Holistic Approach for Aligned Music and Lyrics Transcription},
author = {J.C. Martínez-Sevilla and A. Ríos-Vila and F. J. Castellanos and J. Calvo-Zaragoza },
editor = {Fink, Gernot A. and Jain, Rajiv and Kise, Koichi and Zanibbi, Richard},
doi = {https://doi.org/10.1007/978-3-031-41676-7_11},
isbn = {978-3-031-41676-7},
year = {2023},
date = {2023-08-28},
urldate = {2023-08-28},
booktitle = {Document Analysis and Recognition - ICDAR 2023},
volume = {1},
pages = {185--201},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT.},
keywords = {REPERTORIUM},
pubstate = {published},
tppubtype = {conference}
}
In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT. Martínez-Sevilla, J. C.; Alfaro-Contreras, M.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works Proceedings Article
In: INTERSPEECH Conference, pp. 2793-2797, Dublin, Ireland, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Martínez-Sevilla2023,
title = {Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works},
author = {J.C. Martínez-Sevilla and M. Alfaro-Contreras and J. J. Valero-Mas and J. Calvo-Zaragoza
},
doi = {10.21437/Interspeech.2023-88},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
booktitle = {INTERSPEECH Conference},
pages = {2793-2797},
address = {Dublin, Ireland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study Proceedings Article
In: Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science, pp. 64-77, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-37731-0.
Links | BibTeX | Tags: MultiScore
@inproceedings{k505,
title = {Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-37731-0_6},
isbn = {978-3-031-37731-0},
year = {2023},
date = {2023-08-10},
urldate = {2022-01-01},
booktitle = {Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science},
volume = {13645},
pages = {64-77},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Evaluating Domain Generalization in Kitchen Utensils Classification Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 108-118, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido-Munoz2023,
title = {Evaluating Domain Generalization in Kitchen Utensils Classification},
author = {C. Garrido-Munoz and M. Alfaro-Contreras and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-36616-1_9},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {108-118},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
González-Barrachina, P.; Alfaro-Contreras, M.; Nieto-Hidalgo, M.; Calvo-Zaragoza, J.
Lifelong Learning for Document Image Binarization: An Experimental Study Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 146-157, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{González-Barrachina2023,
title = {Lifelong Learning for Document Image Binarization: An Experimental Study},
author = {P. González-Barrachina and M. Alfaro-Contreras and M. Nieto-Hidalgo and J. Calvo-Zaragoza },
doi = {10.1007/978-3-031-36616-1_12},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {146-157},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Penarrubia, C.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification Conference
Iberian Conference on Pattern Recognition and Image Analysis, Alicante, Spain, 2023, ISBN: 978-3-031-36616-1.
Abstract | Links | BibTeX | Tags: DOREMI
@conference{nokey,
title = {Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification},
author = {C. Penarrubia and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-36616-1_2},
isbn = {978-3-031-36616-1},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {15.27},
address = {Alicante, Spain},
abstract = {Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic.},
keywords = {DOREMI},
pubstate = {published},
tppubtype = {conference}
}
Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic.F. J. Castellanos E. Ayllon, J. Calvo-Zaragoza
A Weakly-Supervised Approach for Layout Analysis in Music Score Images Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 170-181, Springer Nature Switzerland, 2023, ISBN: 978-3-031-36615-4.
@inproceedings{Ayllon2023:weakly,
title = {A Weakly-Supervised Approach for Layout Analysis in Music Score Images},
author = {E. Ayllon, F. J. Castellanos, J. Calvo-Zaragoza},
url = {https://link.springer.com/chapter/10.1007/978-3-031-36616-1_14},
doi = {https://doi.org/10.1007/978-3-031-36616-1_14},
isbn = {978-3-031-36615-4},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
volume = {14062},
pages = {170-181},
publisher = {Springer Nature Switzerland},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
F. J. Castellanos A. Rosello, J. P. Martinez-Esteso
Test-Time Augmentation for Document Image Binarization Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 158-169, Springer Nature Switzerland, 2023, ISBN: 978-3-031-36615-4.
@inproceedings{Rosello2023:test-time,
title = {Test-Time Augmentation for Document Image Binarization},
author = {A. Rosello, F. J. Castellanos, J. P. Martinez-Esteso, A. J. Gallego, J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-36616-1_13},
isbn = {978-3-031-36615-4},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
volume = {14062},
pages = {158-169},
publisher = {Springer Nature Switzerland},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Iñesta, J. M.; Calvo-Zaragoza, J.
Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 12, pp. 12-24, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023b,
title = {Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation},
author = {M. Alfaro-Contreras and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/s13735-023-00278-5},
year = {2023},
date = {2023-05-26},
urldate = {2023-05-26},
journal = {International Journal of Multimedia Information Retrieval},
volume = {12},
pages = {12-24},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-end optical music recognition for pianoform sheet music Journal Article
In: International Journal on Document Analysis and Recognition (IJDAR), iss. ICDAR 2023, 2023, ISSN: 1433-2825.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Ríos-Vila2023,
title = {End-to-end optical music recognition for pianoform sheet music},
author = {A. Ríos-Vila and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://link.springer.com/content/pdf/10.1007/s10032-023-00432-z.pdf},
doi = {10.1007/s10032-023-00432-z},
issn = {1433-2825},
year = {2023},
date = {2023-05-12},
urldate = {2023-05-12},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
issue = {ICDAR 2023},
abstract = {End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner. Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor Journal Article
In: Pattern Recognition Letters, vol. 167, pp. 1-8, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023,
title = {Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2023.01.014},
year = {2023},
date = {2023-03-01},
journal = {Pattern Recognition Letters},
volume = {167},
pages = {1-8},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rico-Juan, J. R.; Sánchez-Cartagena, V. M.; Valero-Mas, J. J.; Gallego, A. J.
Identifying student profiles within online judge systems using explainable artificial intelligence Journal Article
In: IEEE Transactions on Learning Technologies, vol. 16, no. 6, pp. 955-969, 2023, ISSN: 1939-1382.
@article{nokey,
title = {Identifying student profiles within online judge systems using explainable artificial intelligence},
author = {J. R. Rico-Juan and V. M. Sánchez-Cartagena and J. J. Valero-Mas and A. J. Gallego},
doi = {10.1109/TLT.2023.3239110},
issn = {1939-1382},
year = {2023},
date = {2023-01-23},
urldate = {2023-01-23},
journal = {IEEE Transactions on Learning Technologies},
volume = {16},
number = {6},
pages = {955-969},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Valero-Mas, J. J.; Gallego, A. J.; Alonso-Jiménez, P.; Serra, X.
Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification Journal Article
In: Pattern Recognition, vol. 135, pp. 109190, 2023, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: DOREMI, MultiScore
@article{k519,
title = {Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification},
author = {J. J. Valero-Mas and A. J. Gallego and P. Alonso-Jiménez and X. Serra},
doi = {https://doi.org/10.1016/j.patcog.2022.109190},
issn = {0031-3203},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition},
volume = {135},
pages = {109190},
abstract = {Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works.},
keywords = {DOREMI, MultiScore},
pubstate = {published},
tppubtype = {article}
}
Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works. Sánchez-Ferrer, A.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Experimental Study on Marine Debris Location and Recognition using Object Detection Journal Article
In: Pattern Recognition Letters, 2023, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: TADMar
@article{k521,
title = {An Experimental Study on Marine Debris Location and Recognition using Object Detection},
author = {A. Sánchez-Ferrer and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1016/j.patrec.2022.12.019},
issn = {0167-8655},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition Letters},
abstract = {The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.},
keywords = {TADMar},
pubstate = {published},
tppubtype = {article}
}
The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent. Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Late multimodal fusion for image and audio music transcription Journal Article
In: Expert Systems With Applications, vol. 216, pp. 119491-119500, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023c,
title = {Late multimodal fusion for image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.eswa.2022.119491},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Expert Systems With Applications},
volume = {216},
pages = {119491-119500},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rizo, David; Calvo-Zaragoza, Jorge; Martínez-Sevilla, Juan C; Roselló, Adrián; Fuentes-Martínez, Eliseo
Design of a music recognition, encoding, and
transcription online tool Proceedings Article
In: Proceedings of the 16th International Symposium on
Computer Music Multidisciplinary Research, pp. 18-29, Zenodo, 2023.
@inproceedings{Rizo2023,
title = {Design of a music recognition, encoding, and
transcription online tool},
author = {David Rizo and Jorge Calvo-Zaragoza and Juan C Martínez-Sevilla and Adrián Roselló and Eliseo Fuentes-Martínez},
url = {https://doi.org/10.5281/zenodo.10109915},
doi = {10.5281/zenodo.10109915},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 16th International Symposium on
Computer Music Multidisciplinary Research},
pages = {18-29},
publisher = {Zenodo},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Martínez-Sevilla, Juan Carlos; Roselló, Adrián; Rizo, David; Calvo-Zaragoza, Jorge
On the Performance of Optical Music Recognition in the Absence of
Specific Training Data Proceedings Article
In: Sarti, Augusto; Antonacci, Fabio; Sandler, Mark; Bestagini, Paolo; Dixon, Simon; Liang, Beici; Richard, Gaël; Pauwels, Johan (Ed.): Proceedings of the 24th International Society for Music Information
Retrieval Conference, ISMIR 2023, Milan, Italy, November 5-9, 2023, pp. 319-326, 2023.
@inproceedings{Martnez-Sevilla2023,
title = {On the Performance of Optical Music Recognition in the Absence of
Specific Training Data},
author = {Juan Carlos Martínez-Sevilla and Adrián Roselló and David Rizo and Jorge Calvo-Zaragoza},
editor = {Augusto Sarti and Fabio Antonacci and Mark Sandler and Paolo Bestagini and Simon Dixon and Beici Liang and Gaël Richard and Johan Pauwels},
url = {https://doi.org/10.5281/zenodo.10265289},
doi = {10.5281/ZENODO.10265289},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 24th International Society for Music Information
Retrieval Conference, ISMIR 2023, Milan, Italy, November 5-9, 2023},
pages = {319-326},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, David; Calvo-Zaragoza, Jorge; Martínez-Sevilla, Juan Carlos; Madueño, Antonio; García-Iasci, Patricia; Delgado-Sánchez, Teresa
Encoding in human centered machine learning workflows: case study on mensural ligature recognition Proceedings Article
In: Joint MEC TEI conference 2023, 2023.
BibTeX | Tags:
@inproceedings{Rizo2023b,
title = {Encoding in human centered machine learning workflows: case study on mensural ligature recognition},
author = {David Rizo and Jorge Calvo-Zaragoza and Juan Carlos Martínez-Sevilla and Antonio Madueño and Patricia García-Iasci and Teresa Delgado-Sánchez},
year = {2023},
date = {2023-01-01},
booktitle = {Joint MEC TEI conference 2023},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, David; Delgado-Sánchez, Teresa; Calvo-Zaragoza, Jorge
Self-organization of sheet music through graphical patterns Proceedings Article
In: International Association of Music Libraries Congress, 2023.
BibTeX | Tags:
@inproceedings{Rizo2023c,
title = {Self-organization of sheet music through graphical patterns},
author = {David Rizo and Teresa Delgado-Sánchez and Jorge Calvo-Zaragoza},
year = {2023},
date = {2023-01-01},
booktitle = {International Association of Music Libraries Congress},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.A. Sánchez-Hernández C. Garrido-Munoz, F. J. Castellanos
Continual Learning for Document Image Binarization Proceedings Article
In: International Conference on Pattern Recognition, pp. 1443-1449, IEEE, Montreal, QC, Canada, 2022, ISBN: 978-1-6654-9063-4.
@inproceedings{Garrido:2022:CL,
title = {Continual Learning for Document Image Binarization},
author = {C. Garrido-Munoz, A. Sánchez-Hernández, F. J. Castellanos, J. Calvo-Zaragoza},
url = {https://ieeexplore.ieee.org/abstract/document/9956669},
doi = {10.1109/ICPR56361.2022.9956669},
isbn = {978-1-6654-9063-4},
year = {2022},
date = {2022-08-21},
urldate = {2022-08-21},
booktitle = {International Conference on Pattern Recognition},
pages = {1443-1449},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2025
F. J. Castellanos J. P. Martinez-Esteso, J. Calvo-Zaragoza
Maritime search and rescue missions with aerial images: A survey Journal Article
In: Computer Science Review, vol. 57, pp. 100736, 2025, ISSN: 1574-0137.
@article{Martinez-Esteso:2025:survey,
title = {Maritime search and rescue missions with aerial images: A survey},
author = {J. P. Martinez-Esteso, F. J. Castellanos, J. Calvo-Zaragoza, A. J. Gallego},
doi = {https://doi.org/10.1016/j.cosrev.2025.100736},
issn = {1574-0137},
year = {2025},
date = {2025-02-25},
urldate = {2025-02-25},
journal = {Computer Science Review},
volume = {57},
pages = {100736},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kim, D.; Han, D.; Jeong, D.; Valero-Mas, J. J.
On the automatic recognition of Jeongganbo music notation: dataset and approach Journal Article
In: Journal on Computing and Cultural Heritage, 2025, ISSN: 1556-4673.
@article{nokey,
title = {On the automatic recognition of Jeongganbo music notation: dataset and approach},
author = {D. Kim and D. Han and D. Jeong and J. J. Valero-Mas},
issn = {1556-4673},
year = {2025},
date = {2025-01-16},
urldate = {2025-01-16},
journal = {Journal on Computing and Cultural Heritage},
abstract = {The Jeongganbo notation, the first music representation system in East Asia capable of jointly expressing pitch and duration, has been extensively used---and still is---in the Korean music tradition since its inception in the 15th century. In this regard, there exists a plethora of music works that exclusively endure as physical sheets, which not only constitutes a heritage preservation challenge due to the inherent degradation of this format but also impedes the use of computational tools to study and exploit this music tradition. While the Optical Music Recognition (OMR) field, which represents the research area devoted to devising methods capable of automatically transcribing music sheets into digital formats, has addressed this issue in a number of music notations from the Western tradition, no previous research has considered the preservation of Jeonganbo scores. In this context, this work presents the following contributions: (i) the first data assortment of real Jeongganbo scores for OMR tasks; (ii) a collection of synthetic data generation and augmentation mechanisms to alleviate the scarcity of manual annotation; and (iii) a neural-based transcription scheme based on state-of-the-art OMR strategies specifically tailored to Jeongganbo scores. The experiments performed prove the validity of the approach---performance rates close to a 90% of success---and open new research avenues for under-resourced yet challenging music notations. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
F. J. Castellanos J. P. Martinez-Esteso, A. Rosello
On the use of synthetic data for body detection in maritime search and rescue operations Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 139, pp. 109586, 2024, ISSN: 0952-1976.
@article{Martinez-Esteso:2024:synthetic,
title = {On the use of synthetic data for body detection in maritime search and rescue operations},
author = {J. P. Martinez-Esteso, F. J. Castellanos, A. Rosello, J. Calvo-Zaragoza, A. J. Gallego},
url = {https://www.sciencedirect.com/science/article/pii/S0952197624017445},
doi = {https://doi.org/10.1016/j.engappai.2024.109586},
issn = {0952-1976},
year = {2024},
date = {2024-11-07},
urldate = {2024-11-07},
journal = {Engineering Applications of Artificial Intelligence},
volume = {139},
pages = {109586},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Galan-Cuenca, A.; Valero-Mas, J. J.; Martinez-Sevilla, J. C.; Hidalgo-Centeno, A.; Pertusa, A.; Calvo-Zaragoza, J.
Proceedings of the 32nd ACM International Conference on Multimedia, Association for Computing Machinery, 2024, ISBN: 979-8-4007-0686-8.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {MUSCAT: a Multimodal mUSic Collection for Automatic Transcription of real recordings and image scores},
author = {A. Galan-Cuenca and J. J. Valero-Mas and J. C. Martinez-Sevilla and A. Hidalgo-Centeno and A. Pertusa and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1145/3664647.3681572},
isbn = {979-8-4007-0686-8},
year = {2024},
date = {2024-10-28},
booktitle = {Proceedings of the 32nd ACM International Conference on Multimedia},
pages = {583-591},
publisher = {Association for Computing Machinery},
abstract = {Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
J. J Valero-Mas A. Galan-Cuenca, J. C. Martinez-Sevilla
MM '24: Proceedings of the 32nd ACM International Conference on Multimedia, Association for Computing Machinery, New York, NY, USA, 2024, ISBN: 979-8-400-70686-8.
Abstract | Links | BibTeX | Tags: MultiScore
@conference{nokey,
title = {MUSCAT: A Multimodal mUSic Collection for Automatic Transcription of Real Recordings and Image Scores},
author = {A. Galan-Cuenca, J. J Valero-Mas, J. C. Martinez-Sevilla, A. Hidalgo-Centeno, A. Pertusa, J. Calvo-Zaragoza},
url = {https://doi.org/10.1145/3664647.3681572},
doi = {10.1145/3664647.3681572},
isbn = {979-8-400-70686-8},
year = {2024},
date = {2024-10-28},
urldate = {2024-10-28},
booktitle = {MM '24: Proceedings of the 32nd ACM International Conference on Multimedia},
pages = {583-591},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Penarrubia, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Contrastive Self-Supervised Learning for Optical Music Recognition Conference
International Workshop on Document Analysis Systems, 2024, ISBN: 978-3-031-70442-0.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Contrastive Self-Supervised Learning for Optical Music Recognition},
author = {C. Penarrubia and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-70442-0_19},
isbn = {978-3-031-70442-0},
year = {2024},
date = {2024-09-11},
urldate = {2024-09-11},
booktitle = {International Workshop on Document Analysis Systems},
pages = {312-326},
abstract = {Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
J. P. Martinez-Esteso F. J. Castellanos, A. Galán-Cuenca
A Region-Based Approach for Layout Analysis of Music Score Images in Scarce Data Scenarios Proceedings Article
In: Lecture Notes in Computer Science - International Conference on Document Analysis and Recognition, pp. 58-75, Springer, Athenes, Greece, 2024, ISBN: 978-3-031-70545-8.
@inproceedings{Castellanos:2024:scarceLA,
title = {A Region-Based Approach for Layout Analysis of Music Score Images in Scarce Data Scenarios},
author = {F. J. Castellanos, J. P. Martinez-Esteso, A. Galán-Cuenca, A. J. Gallego},
url = {https://link.springer.com/chapter/10.1007/978-3-031-70546-5_4},
doi = {https://doi.org/10.1007/978-3-031-70546-5_4},
isbn = {978-3-031-70545-8},
year = {2024},
date = {2024-09-11},
urldate = {2024-09-11},
booktitle = {Lecture Notes in Computer Science - International Conference on Document Analysis and Recognition},
volume = {14807},
pages = {58-75},
publisher = {Springer},
address = {Athenes, Greece},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
F. J. Castellanos E. Ayllon, J. Calvo-Zaragoza
Analysis of the Calibration of Handwriting Text Recognition Models Proceedings Article
In: Lecture Notes in Computer Science - International Conference on Document Analysis and Recognition, pp. 139-155, Springer, Athenes, Greece, 2024, ISBN: 978-3-031-70535-9.
@inproceedings{Ayllon:2024:calibration,
title = {Analysis of the Calibration of Handwriting Text Recognition Models},
author = {E. Ayllon, F. J. Castellanos, J. Calvo-Zaragoza},
url = {https://link.springer.com/chapter/10.1007/978-3-031-70536-6_9},
doi = {https://doi.org/10.1007/978-3-031-70536-6_9},
isbn = {978-3-031-70535-9},
year = {2024},
date = {2024-09-03},
urldate = {2024-09-03},
booktitle = {Lecture Notes in Computer Science - International Conference on Document Analysis and Recognition},
volume = {14805},
pages = {139-155},
publisher = {Springer},
address = {Athenes, Greece},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Calvo-Zaragoza, J.; Paquet, T.
Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription Conference
Document Analysis and Recognition - ICDAR 2024, vol. 1, Springer Nature Switzerland, 2024, ISBN: 978-3-031-70552-6.
BibTeX | Tags: MultiScore
@conference{RiosVila:ICDAR:2024,
title = {Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and T. Paquet},
isbn = {978-3-031-70552-6},
year = {2024},
date = {2024-09-02},
urldate = {2024-09-02},
booktitle = {Document Analysis and Recognition - ICDAR 2024},
volume = {1},
pages = {20-37},
publisher = {Springer Nature Switzerland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Maciá, M.; Rizo, D.
The Impact of UX/UI on Piano-Assisted Learning in Extended Reality Conference
Computer Supported Music Education. Angers, France., 2024.
BibTeX | Tags:
@conference{macia2024,
title = {The Impact of UX/UI on Piano-Assisted Learning in Extended Reality},
author = {M. Maciá and D. Rizo},
year = {2024},
date = {2024-05-04},
urldate = {2024-05-04},
booktitle = {Computer Supported Music Education. Angers, France.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Rios-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
A Transformer Approach for Polyphonic Audio-to-Score Transcription Proceedings Article
In: Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seul (Korea), 2024.
Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras:ICASSP24,
title = {A Transformer Approach for Polyphonic Audio-to-Score Transcription},
author = {M. Alfaro-Contreras and A. Rios-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1109/ICASSP48485.2024.10447162},
year = {2024},
date = {2024-04-19},
urldate = {2024-04-19},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)},
address = {Seul (Korea)},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Valero-Mas, J. J.; Gallego, A. J.; Rico-Juan, J. R.
An overview of ensemble and feature learning in few-shot image classification using siamese networks Journal Article
In: Multimedia Tools and Applications, vol. 83, pp. 19929–19952, 2024, ISSN: 1380-7501.
@article{nokey,
title = {An overview of ensemble and feature learning in few-shot image classification using siamese networks},
author = {J. J. Valero-Mas and A. J. Gallego and J. R. Rico-Juan },
doi = {https://doi.org/10.1007/s11042-023-15607-3},
issn = {1380-7501},
year = {2024},
date = {2024-02-01},
urldate = {2023-07-29},
journal = {Multimedia Tools and Applications},
volume = {83},
pages = {19929–19952},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Thomae, Martha E.; Rizo, David; Fuentes-Martínez, Eliseo; Raurich, Cristina Alís; Luca, Elsa De; Calvo-Zaragoza, Jorge
A Preliminary Proposal for a Systematic GABC Encoding of Gregorian Chant Proceedings Article
In: ACM International Conference Proceeding Series, pp. 45-53, Association for Computing Machinery, 2024, ISBN: 9798400717208.
Abstract | Links | BibTeX | Tags: Aquitanian neumes, GABC, Gregorian chant, MEI, music encoding, Plainchant, REPERTORIUM, square notation
@inproceedings{Thomae2024,
title = {A Preliminary Proposal for a Systematic GABC Encoding of Gregorian Chant},
author = {Martha E. Thomae and David Rizo and Eliseo Fuentes-Martínez and Cristina Alís Raurich and Elsa De Luca and Jorge Calvo-Zaragoza},
doi = {10.1145/3660570.3660581},
isbn = {9798400717208},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {ACM International Conference Proceeding Series},
pages = {45-53},
publisher = {Association for Computing Machinery},
abstract = {In the last years, several approaches have addressed the encoding of the different music scripts used for plainchant. One of these approaches is the GABC format. While being a comprehensive symbolic representation of square notation, the lack of a formal specification for GABC usually leads to ambiguities, which must be avoided in the specification of any encoding format. Sometimes, the simple trial-and-error approach of entering the GABC code into an engraving system - such as Illuminare, Scrib.io, or GABC Transcription Tool - can solve this ambiguity. However, these engraving systems have shown some inconsistency among themselves when rendering GABC, sometimes displaying different music for the same code snippet. This paper presents a systematic approach to encoding Gregorian chant originally written in Aquitanian neumes and square notation to eliminate ambiguities inherent in the GABC specification. By formalizing the grammar of GABC, we address the challenges of inaccurate renderings in current music notation software. Our methodology includes developing a "Systematic GABC"(S-GABC) following a critical and scientific mentality to ensure the endurance of the notation. This paper demonstrates our system's effectiveness in standardizing Gregorian chant encoding, offering significant contributions to digital musicology and enhancing the accuracy of musical heritage digitization.},
keywords = {Aquitanian neumes, GABC, Gregorian chant, MEI, music encoding, Plainchant, REPERTORIUM, square notation},
pubstate = {published},
tppubtype = {inproceedings}
}
Roselló, Adrián; Fuentes-Martínez, Eliseo; Alfaro-Contreras, María; Rizo, David; Calvo-Zaragoza, Jorge
Source-Free Domain Adaptation for Optical Music Recognition Book Chapter
In: pp. 3-19, 2024, ISSN: 16113349.
@inbook{Rosell2024,
title = {Source-Free Domain Adaptation for Optical Music Recognition},
author = {Adrián Roselló and Eliseo Fuentes-Martínez and María Alfaro-Contreras and David Rizo and Jorge Calvo-Zaragoza},
doi = {10.1007/978-3-031-70552-6_1},
issn = {16113349},
year = {2024},
date = {2024-01-01},
pages = {3-19},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
Rizo, David; Calvo-Zaragoza, Jorge; Garc'ıa-Iasci, Patricia; Delgado-Sánchez, Teresa
Lessons Learned From a Project to Encode Mensural Music on a Large Scale With Optical Music Recognition Proceedings Article
In: Kaneshiro, Blair; Mysore, Gautham J; Nieto, Oriol; Donahue, Chris; Huang, Cheng-Zhi Anna; Lee, Jin Ha; McFee, Brian; McCallum, Matthew C (Ed.): Proceedings of the 25th International Society for Music Information Retrieval Conference, ISMIR 2024, San Francisco, California, USA and Online, November 10-14, 2024, pp. 225-231, 2024.
@inproceedings{Rizo2024,
title = {Lessons Learned From a Project to Encode Mensural Music on a Large
Scale With Optical Music Recognition},
author = {David Rizo and Jorge Calvo-Zaragoza and Patricia Garc'ıa-Iasci and Teresa Delgado-Sánchez},
editor = {Blair Kaneshiro and Gautham J Mysore and Oriol Nieto and Chris Donahue and Cheng-Zhi Anna Huang and Jin Ha Lee and Brian McFee and Matthew C McCallum},
url = {https://doi.org/10.5281/zenodo.14877315},
doi = {10.5281/ZENODO.14877315},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024},
pages = {225-231},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Luna-Barahona, Noelia N; Rosello, Adrian; Alfaro-Contreras, Mar'ıa; Rizo, David; Calvo-Zaragoza, Jorge
Unsupervised Synthetic-to-Real Adaptation for Optical Music Recognition Proceedings Article
In: Kaneshiro, Blair; Mysore, Gautham J; Nieto, Oriol; Donahue, Chris; Huang, Cheng-Zhi Anna; Lee, Jin Ha; McFee, Brian; McCallum, Matthew C (Ed.): Proceedings of the 25th International Society for Music Information Retrieval Conference, ISMIR 2024, San Francisco, California, USA and Online, November 10-14, 2024, pp. 462-469, 2024.
@inproceedings{Luna-Barahona2024,
title = {Unsupervised Synthetic-to-Real Adaptation for Optical Music Recognition},
author = {Noelia N Luna-Barahona and Adrian Rosello and Mar'ıa Alfaro-Contreras and David Rizo and Jorge Calvo-Zaragoza},
editor = {Blair Kaneshiro and Gautham J Mysore and Oriol Nieto and Chris Donahue and Cheng-Zhi Anna Huang and Jin Ha Lee and Brian McFee and Matthew C McCallum},
url = {https://doi.org/10.5281/zenodo.14877375},
doi = {10.5281/ZENODO.14877375},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024},
pages = {462-469},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Martinez-Sevilla, Juan Carlos; Rizo, David; Calvo-Zaragoza, Jorge
Towards Universal Optical Music Recognition: A Case Study on Notation Types Proceedings Article
In: Kaneshiro, Blair; Mysore, Gautham J; Nieto, Oriol; Donahue, Chris; Huang, Cheng-Zhi Anna; Lee, Jin Ha; McFee, Brian; McCallum, Matthew C (Ed.): Proceedings of the 25th International Society for Music Information Retrieval Conference, ISMIR 2024, San Francisco, California, USA and Online, November 10-14, 2024, pp. 914-921, 2024.
@inproceedings{Martinez-Sevilla2024,
title = {Towards Universal Optical Music Recognition: A Case Study on Notation
Types},
author = {Juan Carlos Martinez-Sevilla and David Rizo and Jorge Calvo-Zaragoza},
editor = {Blair Kaneshiro and Gautham J Mysore and Oriol Nieto and Chris Donahue and Cheng-Zhi Anna Huang and Jin Ha Lee and Brian McFee and Matthew C McCallum},
url = {https://doi.org/10.5281/zenodo.14877479},
doi = {10.5281/ZENODO.14877479},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 25th International Society for Music Information
Retrieval Conference, ISMIR 2024, San Francisco, California, USA
and Online, November 10-14, 2024},
pages = {914-921},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, Antonio; Calvo-Zaragoza, Jorge; Rizo, David; Paquet, Thierry
Sheet Music Transformer ++: End-to-End Full-Page Optical Music Recognition for Pianoform Sheet Music Journal Article
In: CoRR, vol. abs/2405.12105, 2024.
@article{Ros-Vila2024,
title = {Sheet Music Transformer ++: End-to-End Full-Page Optical Music Recognition
for Pianoform Sheet Music},
author = {Antonio Ríos-Vila and Jorge Calvo-Zaragoza and David Rizo and Thierry Paquet},
url = {https://doi.org/10.48550/arXiv.2405.12105},
doi = {10.48550/ARXIV.2405.12105},
year = {2024},
date = {2024-01-01},
journal = {CoRR},
volume = {abs/2405.12105},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fuentes-Martínez, Eliseo; Ríos-Vila, Antonio; Martinez-Sevilla, Juan Carlos; Rizo, David; Calvo-Zaragoza, Jorge
Aligned Music Notation and Lyrics Transcription Journal Article
In: CoRR, vol. abs/2412.04217, 2024.
@article{Fuentes-Martnez2024,
title = {Aligned Music Notation and Lyrics Transcription},
author = {Eliseo Fuentes-Martínez and Antonio Ríos-Vila and Juan Carlos Martinez-Sevilla and David Rizo and Jorge Calvo-Zaragoza},
url = {https://doi.org/10.48550/arXiv.2412.04217},
doi = {10.48550/ARXIV.2412.04217},
year = {2024},
date = {2024-01-01},
journal = {CoRR},
volume = {abs/2412.04217},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
García-Iasci, Patricia; Martínez-Sevilla, Juan Carlos; Rizo, David; Calvo-Zaragoza, Jorge
Towards a standardization of lead sheet encoding: an experience in OMR Proceedings Article
In: Music Encoding Conference 2024, unknown, 2024.
@inproceedings{Garca-Iasci2024,
title = {Towards a standardization of lead sheet encoding:
an experience in OMR},
author = {Patricia García-Iasci and Juan Carlos Martínez-Sevilla and David Rizo and Jorge Calvo-Zaragoza},
url = {https://doi.org/10.17613/fbrj-k426},
doi = {10.17613/fbrj-k426},
year = {2024},
date = {2024-01-01},
booktitle = {Music Encoding Conference 2024},
publisher = {unknown},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, David; López-Rocamora, Pablo; Pardo-Cayuela, Antonio
A workflow for Attribution Issues using Language Models Proceedings Article
In: International Association of Music Libraries, 2024.
BibTeX | Tags:
@inproceedings{Rizo2024b,
title = {A workflow for Attribution Issues using Language Models},
author = {David Rizo and Pablo López-Rocamora and Antonio Pardo-Cayuela},
year = {2024},
date = {2024-01-01},
booktitle = {International Association of Music Libraries},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
García-Iasci, Patricia; Rizo, David
EA-Digifolk: Digitizing and encoding Iris Traditional Music at ITMA Proceedings Article
In: International Association of Music Libraries, 2024.
@inproceedings{Garca-Iasci2024b,
title = {EA-Digifolk: Digitizing and encoding Iris Traditional Music at ITMA},
author = {Patricia García-Iasci and David Rizo},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {International Association of Music Libraries},
keywords = {PolifonIA},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, David; Delgado-Sánchez, Teresa; Calvo-Zaragoza, Jorge; García-Iasci, Patricia; Madueño-Madueño, Antonio
Insights into AI to encode a whole mensural collection with limited resources Proceedings Article
In: International Association of Music Libraries, 2024.
BibTeX | Tags:
@inproceedings{Rizo2024c,
title = {Insights into AI to encode a whole mensural collection with limited resources},
author = {David Rizo and Teresa Delgado-Sánchez and Jorge Calvo-Zaragoza and Patricia García-Iasci and Antonio Madueño-Madueño},
year = {2024},
date = {2024-01-01},
booktitle = {International Association of Music Libraries},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
María Alfaro-Contreras Pedro González-Barrachina,; Calvo-Zaragoza, Jorge
Continual Learning for Music Classification Proceedings Article
In: International Society for Music Information Retrieval Conference, ISMIR, pp. 596-602, 2024.
Abstract | Links | BibTeX | Tags:
@inproceedings{gonzalez2024continual,
title = {Continual Learning for Music Classification},
author = {Pedro González-Barrachina, María Alfaro-Contreras, and Jorge Calvo-Zaragoza},
doi = {10.5281/zenodo.14877406},
year = {2024},
date = {2024-01-01},
booktitle = {International Society for Music Information Retrieval Conference, ISMIR},
pages = {596-602},
abstract = {Music classification is a prominent research area within Music Information Retrieval. While Deep Learning methods can adequately perform this task, their classification space remains fixed once trained, which conflicts with the dynamic nature of the ever-evolving music landscape. This work explores, for the first time, the application of Continual Learning (CL) in the context of music classification. Specifically, we thoroughly evaluate five state-of-the-art CL approaches across four different music classification tasks. Additionally, we showcase that a foundation model might be the key to CL in music classification. To that end, we study a new approach called Pre-trained Class Centers, which leverages pre-trained features to create fixed class-center spaces. Our results reveal that existing CL methods struggle when applied to music classification tasks, whereas this simple method consistently out-performs them. This highlights the need for CL methods tailored specifically for music classification.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
A. J. Gallego F. J. Castellanos, I. Fujinaga
A Few-Shot Neural Approach for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 24th International Society for Music Information Retrieval Conference, pp. 106-113, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
@inproceedings{Castellanos:2023:few,
title = {A Few-Shot Neural Approach for Layout Analysis of Music Score Images},
author = {F. J. Castellanos, A. J. Gallego, I. Fujinaga},
url = {https://archives.ismir.net/ismir2023/paper/000011.pdf},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-11-05},
urldate = {2023-11-05},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {106-113},
address = {Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
A Rios-Vila Juan C Martinez-Sevilla, FJ Castellanos
Towards Music Notation and Lyrics Alignment: Gregorian Chants as Case Study Working paper
2023.
Abstract | Links | BibTeX | Tags:
@workingpaper{Martinez:2023:lyrics,
title = {Towards Music Notation and Lyrics Alignment: Gregorian Chants as Case Study},
author = {Juan C Martinez-Sevilla, A Rios-Vila, FJ Castellanos, J Calvo-Zaragoza},
url = {https://arxiv.org/pdf/2311.04091#page=16},
year = {2023},
date = {2023-11-04},
issue = {WORMS},
abstract = {Proceedings of the 5th International Workshop on Reading Music Systems, Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
A. J. Gallego F. J. Castellanos, I. Fujinaga
A Preliminary Study of Few-shot Learning for Layout Analysis of Music Scores Working paper
2023.
Abstract | Links | BibTeX | Tags:
@workingpaper{Castellanos:2023:preFew,
title = {A Preliminary Study of Few-shot Learning for Layout Analysis of Music Scores},
author = {F. J. Castellanos, A. J. Gallego, I. Fujinaga},
url = {https://arxiv.org/pdf/2311.04091#page=45},
year = {2023},
date = {2023-11-04},
issue = {WORMS},
abstract = {Proceedings of the 5th International Workshop on Reading Music Systems, Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Ramoneda, P.; Jeong, D.; Valero-Mas, J. J.; Serra, X.
Predicting performance difficulty from piano sheet music images Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Predicting performance difficulty from piano sheet music images},
author = {P. Ramoneda and D. Jeong and J. J. Valero-Mas and X. Serra},
doi = {10.5281/zenodo.10265386},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-11-04},
urldate = {2023-11-04},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {708-715},
address = {Milan, Italy},
abstract = {Estimating the performance difficulty of a musical score is crucial in music education for adequately designing the learning curriculum of the students. Although the music information retrieval community has recently shown interest in this task, existing approaches mainly use machine-readable scores, leaving the broader case of sheet music images unaddressed. Based on previous works involving sheet music images, we use a mid-level representation, bootleg score, describing notehead positions relative to staff lines coupled with a transformer model. This architecture is adapted to our task by introducing a different encoding scheme that reduces the encoded sequence length to one-eighth of the original size. In terms of evaluation, we consider five datasets---more than 7500 scores with up to 9 difficulty levels---, two being mainly compiled for this work. The results obtained when pretraining the scheme on the IMSLP corpus and fine-tuning it on the considered datasets prove the proposal's validity, achieving the best-performing model with a balanced accuracy of 40.3% and a mean square error of 1.3. Finally, we provide access to our code, data, and models for transparency and reproducibility.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Penarrubia, C.; Garrido-Munoz, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Efficient notation assembly in optical music recognition Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
BibTeX | Tags:
@conference{nokey,
title = {Efficient notation assembly in optical music recognition},
author = {C. Penarrubia and C. Garrido-Munoz and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-10-30},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {182-189},
address = {Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Ríos-Vila, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
A Holistic Approach for Aligned Music and Lyrics Transcription Conference
Document Analysis and Recognition - ICDAR 2023, vol. 1, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-41676-7.
Abstract | Links | BibTeX | Tags: REPERTORIUM
@conference{MartinezSevilla:ICDAR:2023,
title = {A Holistic Approach for Aligned Music and Lyrics Transcription},
author = {J.C. Martínez-Sevilla and A. Ríos-Vila and F. J. Castellanos and J. Calvo-Zaragoza },
editor = {Fink, Gernot A. and Jain, Rajiv and Kise, Koichi and Zanibbi, Richard},
doi = {https://doi.org/10.1007/978-3-031-41676-7_11},
isbn = {978-3-031-41676-7},
year = {2023},
date = {2023-08-28},
urldate = {2023-08-28},
booktitle = {Document Analysis and Recognition - ICDAR 2023},
volume = {1},
pages = {185--201},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT.},
keywords = {REPERTORIUM},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Alfaro-Contreras, M.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works Proceedings Article
In: INTERSPEECH Conference, pp. 2793-2797, Dublin, Ireland, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Martínez-Sevilla2023,
title = {Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works},
author = {J.C. Martínez-Sevilla and M. Alfaro-Contreras and J. J. Valero-Mas and J. Calvo-Zaragoza
},
doi = {10.21437/Interspeech.2023-88},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
booktitle = {INTERSPEECH Conference},
pages = {2793-2797},
address = {Dublin, Ireland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study Proceedings Article
In: Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science, pp. 64-77, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-37731-0.
Links | BibTeX | Tags: MultiScore
@inproceedings{k505,
title = {Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-37731-0_6},
isbn = {978-3-031-37731-0},
year = {2023},
date = {2023-08-10},
urldate = {2022-01-01},
booktitle = {Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science},
volume = {13645},
pages = {64-77},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Evaluating Domain Generalization in Kitchen Utensils Classification Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 108-118, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido-Munoz2023,
title = {Evaluating Domain Generalization in Kitchen Utensils Classification},
author = {C. Garrido-Munoz and M. Alfaro-Contreras and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-36616-1_9},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {108-118},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
González-Barrachina, P.; Alfaro-Contreras, M.; Nieto-Hidalgo, M.; Calvo-Zaragoza, J.
Lifelong Learning for Document Image Binarization: An Experimental Study Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 146-157, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{González-Barrachina2023,
title = {Lifelong Learning for Document Image Binarization: An Experimental Study},
author = {P. González-Barrachina and M. Alfaro-Contreras and M. Nieto-Hidalgo and J. Calvo-Zaragoza },
doi = {10.1007/978-3-031-36616-1_12},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {146-157},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Penarrubia, C.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification Conference
Iberian Conference on Pattern Recognition and Image Analysis, Alicante, Spain, 2023, ISBN: 978-3-031-36616-1.
Abstract | Links | BibTeX | Tags: DOREMI
@conference{nokey,
title = {Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification},
author = {C. Penarrubia and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-36616-1_2},
isbn = {978-3-031-36616-1},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {15.27},
address = {Alicante, Spain},
abstract = {Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic.},
keywords = {DOREMI},
pubstate = {published},
tppubtype = {conference}
}
F. J. Castellanos E. Ayllon, J. Calvo-Zaragoza
A Weakly-Supervised Approach for Layout Analysis in Music Score Images Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 170-181, Springer Nature Switzerland, 2023, ISBN: 978-3-031-36615-4.
@inproceedings{Ayllon2023:weakly,
title = {A Weakly-Supervised Approach for Layout Analysis in Music Score Images},
author = {E. Ayllon, F. J. Castellanos, J. Calvo-Zaragoza},
url = {https://link.springer.com/chapter/10.1007/978-3-031-36616-1_14},
doi = {https://doi.org/10.1007/978-3-031-36616-1_14},
isbn = {978-3-031-36615-4},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
volume = {14062},
pages = {170-181},
publisher = {Springer Nature Switzerland},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
F. J. Castellanos A. Rosello, J. P. Martinez-Esteso
Test-Time Augmentation for Document Image Binarization Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 158-169, Springer Nature Switzerland, 2023, ISBN: 978-3-031-36615-4.
@inproceedings{Rosello2023:test-time,
title = {Test-Time Augmentation for Document Image Binarization},
author = {A. Rosello, F. J. Castellanos, J. P. Martinez-Esteso, A. J. Gallego, J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-36616-1_13},
isbn = {978-3-031-36615-4},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
volume = {14062},
pages = {158-169},
publisher = {Springer Nature Switzerland},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Iñesta, J. M.; Calvo-Zaragoza, J.
Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 12, pp. 12-24, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023b,
title = {Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation},
author = {M. Alfaro-Contreras and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/s13735-023-00278-5},
year = {2023},
date = {2023-05-26},
urldate = {2023-05-26},
journal = {International Journal of Multimedia Information Retrieval},
volume = {12},
pages = {12-24},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-end optical music recognition for pianoform sheet music Journal Article
In: International Journal on Document Analysis and Recognition (IJDAR), iss. ICDAR 2023, 2023, ISSN: 1433-2825.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Ríos-Vila2023,
title = {End-to-end optical music recognition for pianoform sheet music},
author = {A. Ríos-Vila and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://link.springer.com/content/pdf/10.1007/s10032-023-00432-z.pdf},
doi = {10.1007/s10032-023-00432-z},
issn = {1433-2825},
year = {2023},
date = {2023-05-12},
urldate = {2023-05-12},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
issue = {ICDAR 2023},
abstract = {End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor Journal Article
In: Pattern Recognition Letters, vol. 167, pp. 1-8, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023,
title = {Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2023.01.014},
year = {2023},
date = {2023-03-01},
journal = {Pattern Recognition Letters},
volume = {167},
pages = {1-8},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rico-Juan, J. R.; Sánchez-Cartagena, V. M.; Valero-Mas, J. J.; Gallego, A. J.
Identifying student profiles within online judge systems using explainable artificial intelligence Journal Article
In: IEEE Transactions on Learning Technologies, vol. 16, no. 6, pp. 955-969, 2023, ISSN: 1939-1382.
@article{nokey,
title = {Identifying student profiles within online judge systems using explainable artificial intelligence},
author = {J. R. Rico-Juan and V. M. Sánchez-Cartagena and J. J. Valero-Mas and A. J. Gallego},
doi = {10.1109/TLT.2023.3239110},
issn = {1939-1382},
year = {2023},
date = {2023-01-23},
urldate = {2023-01-23},
journal = {IEEE Transactions on Learning Technologies},
volume = {16},
number = {6},
pages = {955-969},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Valero-Mas, J. J.; Gallego, A. J.; Alonso-Jiménez, P.; Serra, X.
Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification Journal Article
In: Pattern Recognition, vol. 135, pp. 109190, 2023, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: DOREMI, MultiScore
@article{k519,
title = {Multilabel Prototype Generation for Data Reduction in k-Nearest Neighbour classification},
author = {J. J. Valero-Mas and A. J. Gallego and P. Alonso-Jiménez and X. Serra},
doi = {https://doi.org/10.1016/j.patcog.2022.109190},
issn = {0031-3203},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition},
volume = {135},
pages = {109190},
abstract = {Prototype Generation (PG) methods are typically considered for improving the efficiency of the k-Nearest Neighbour (kNN) classifier when tackling high-size corpora. Such approaches aim at generating a reduced version of the corpus without decreasing the classification performance when compared to the initial set. Despite their large application in multiclass scenarios, very few works have addressed the proposal of PG methods for the multilabel space. In this regard, this work presents the novel adaptation of four multiclass PG strategies to the multilabel case. These proposals are evaluated with three multilabel kNN-based classifiers, 12 corpora comprising a varied range of domains and corpus sizes, and different noise scenarios artificially induced in the data. The results obtained show that the proposed adaptations are capable of significantly improving—both in terms of efficiency and classification performance—the only reference multilabel PG work in the literature as well as the case in which no PG method is applied, also presenting statistically superior robustness in noisy scenarios. Moreover, these novel PG strategies allow prioritising either the efficiency or efficacy criteria through its configuration depending on the target scenario, hence covering a wide area in the solution space not previously filled by other works.},
keywords = {DOREMI, MultiScore},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Ferrer, A.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Experimental Study on Marine Debris Location and Recognition using Object Detection Journal Article
In: Pattern Recognition Letters, 2023, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: TADMar
@article{k521,
title = {An Experimental Study on Marine Debris Location and Recognition using Object Detection},
author = {A. Sánchez-Ferrer and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1016/j.patrec.2022.12.019},
issn = {0167-8655},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition Letters},
abstract = {The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.},
keywords = {TADMar},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Late multimodal fusion for image and audio music transcription Journal Article
In: Expert Systems With Applications, vol. 216, pp. 119491-119500, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023c,
title = {Late multimodal fusion for image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.eswa.2022.119491},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Expert Systems With Applications},
volume = {216},
pages = {119491-119500},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rizo, David; Calvo-Zaragoza, Jorge; Martínez-Sevilla, Juan C; Roselló, Adrián; Fuentes-Martínez, Eliseo
Design of a music recognition, encoding, and transcription online tool Proceedings Article
In: Proceedings of the 16th International Symposium on Computer Music Multidisciplinary Research, pp. 18-29, Zenodo, 2023.
@inproceedings{Rizo2023,
title = {Design of a music recognition, encoding, and
transcription online tool},
author = {David Rizo and Jorge Calvo-Zaragoza and Juan C Martínez-Sevilla and Adrián Roselló and Eliseo Fuentes-Martínez},
url = {https://doi.org/10.5281/zenodo.10109915},
doi = {10.5281/zenodo.10109915},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 16th International Symposium on
Computer Music Multidisciplinary Research},
pages = {18-29},
publisher = {Zenodo},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Martínez-Sevilla, Juan Carlos; Roselló, Adrián; Rizo, David; Calvo-Zaragoza, Jorge
On the Performance of Optical Music Recognition in the Absence of Specific Training Data Proceedings Article
In: Sarti, Augusto; Antonacci, Fabio; Sandler, Mark; Bestagini, Paolo; Dixon, Simon; Liang, Beici; Richard, Gaël; Pauwels, Johan (Ed.): Proceedings of the 24th International Society for Music Information Retrieval Conference, ISMIR 2023, Milan, Italy, November 5-9, 2023, pp. 319-326, 2023.
@inproceedings{Martnez-Sevilla2023,
title = {On the Performance of Optical Music Recognition in the Absence of
Specific Training Data},
author = {Juan Carlos Martínez-Sevilla and Adrián Roselló and David Rizo and Jorge Calvo-Zaragoza},
editor = {Augusto Sarti and Fabio Antonacci and Mark Sandler and Paolo Bestagini and Simon Dixon and Beici Liang and Gaël Richard and Johan Pauwels},
url = {https://doi.org/10.5281/zenodo.10265289},
doi = {10.5281/ZENODO.10265289},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 24th International Society for Music Information
Retrieval Conference, ISMIR 2023, Milan, Italy, November 5-9, 2023},
pages = {319-326},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, David; Calvo-Zaragoza, Jorge; Martínez-Sevilla, Juan Carlos; Madueño, Antonio; García-Iasci, Patricia; Delgado-Sánchez, Teresa
Encoding in human centered machine learning workflows: case study on mensural ligature recognition Proceedings Article
In: Joint MEC TEI conference 2023, 2023.
BibTeX | Tags:
@inproceedings{Rizo2023b,
title = {Encoding in human centered machine learning workflows: case study on mensural ligature recognition},
author = {David Rizo and Jorge Calvo-Zaragoza and Juan Carlos Martínez-Sevilla and Antonio Madueño and Patricia García-Iasci and Teresa Delgado-Sánchez},
year = {2023},
date = {2023-01-01},
booktitle = {Joint MEC TEI conference 2023},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, David; Delgado-Sánchez, Teresa; Calvo-Zaragoza, Jorge
Self-organization of sheet music through graphical patterns Proceedings Article
In: International Association of Music Libraries Congress, 2023.
BibTeX | Tags:
@inproceedings{Rizo2023c,
title = {Self-organization of sheet music through graphical patterns},
author = {David Rizo and Teresa Delgado-Sánchez and Jorge Calvo-Zaragoza},
year = {2023},
date = {2023-01-01},
booktitle = {International Association of Music Libraries Congress},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
A. Sánchez-Hernández C. Garrido-Munoz, F. J. Castellanos
Continual Learning for Document Image Binarization Proceedings Article
In: International Conference on Pattern Recognition, pp. 1443-1449, IEEE, Montreal, QC, Canada, 2022, ISBN: 978-1-6654-9063-4.
@inproceedings{Garrido:2022:CL,
title = {Continual Learning for Document Image Binarization},
author = {C. Garrido-Munoz, A. Sánchez-Hernández, F. J. Castellanos, J. Calvo-Zaragoza},
url = {https://ieeexplore.ieee.org/abstract/document/9956669},
doi = {10.1109/ICPR56361.2022.9956669},
isbn = {978-1-6654-9063-4},
year = {2022},
date = {2022-08-21},
urldate = {2022-08-21},
booktitle = {International Conference on Pattern Recognition},
pages = {1443-1449},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}