2024
Galan-Cuenca, A.; Valero-Mas, J. J.; Martinez-Sevilla, J. C.; Hidalgo-Centeno, A.; Pertusa, A.; Calvo-Zaragoza, J.
Proceedings of the 32nd ACM International Conference on Multimedia, Association for Computing Machinery, 2024, ISBN: 979-8-4007-0686-8.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {MUSCAT: a Multimodal mUSic Collection for Automatic Transcription of real recordings and image scores},
author = {A. Galan-Cuenca and J. J. Valero-Mas and J. C. Martinez-Sevilla and A. Hidalgo-Centeno and A. Pertusa and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1145/3664647.3681572},
isbn = {979-8-4007-0686-8},
year = {2024},
date = {2024-10-28},
booktitle = {Proceedings of the 32nd ACM International Conference on Multimedia},
pages = {583-591},
publisher = {Association for Computing Machinery},
abstract = {Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively. Penarrubia, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Contrastive Self-Supervised Learning for Optical Music Recognition Conference
International Workshop on Document Analysis Systems, 2024, ISBN: 978-3-031-70442-0.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Contrastive Self-Supervised Learning for Optical Music Recognition},
author = {C. Penarrubia and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-70442-0_19},
isbn = {978-3-031-70442-0},
year = {2024},
date = {2024-09-11},
urldate = {2024-09-11},
booktitle = {International Workshop on Document Analysis Systems},
pages = {312-326},
abstract = {Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration. Ríos-Vila, A.; Calvo-Zaragoza, J.; Paquet, T.
Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription Conference
Document Analysis and Recognition - ICDAR 2024, vol. 1, Springer Nature Switzerland, 2024, ISBN: 978-3-031-70552-6.
BibTeX | Tags: MultiScore
@conference{RiosVila:ICDAR:2024,
title = {Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and T. Paquet},
isbn = {978-3-031-70552-6},
year = {2024},
date = {2024-09-02},
urldate = {2024-09-02},
booktitle = {Document Analysis and Recognition - ICDAR 2024},
volume = {1},
pages = {20-37},
publisher = {Springer Nature Switzerland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Rios-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
A Transformer Approach for Polyphonic Audio-to-Score Transcription Proceedings Article
In: Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seul (Korea), 2024.
Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras:ICASSP24,
title = {A Transformer Approach for Polyphonic Audio-to-Score Transcription},
author = {M. Alfaro-Contreras and A. Rios-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1109/ICASSP48485.2024.10447162},
year = {2024},
date = {2024-04-19},
urldate = {2024-04-19},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)},
address = {Seul (Korea)},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Penarrubia, C.; Garrido-Munoz, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Efficient notation assembly in optical music recognition Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
BibTeX | Tags:
@conference{nokey,
title = {Efficient notation assembly in optical music recognition},
author = {C. Penarrubia and C. Garrido-Munoz and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-10-30},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {182-189},
address = {Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Ríos-Vila, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
A Holistic Approach for Aligned Music and Lyrics Transcription Conference
Document Analysis and Recognition - ICDAR 2023, vol. 1, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-41676-7.
Abstract | Links | BibTeX | Tags: REPERTORIUM
@conference{MartinezSevilla:ICDAR:2023,
title = {A Holistic Approach for Aligned Music and Lyrics Transcription},
author = {J.C. Martínez-Sevilla and A. Ríos-Vila and F. J. Castellanos and J. Calvo-Zaragoza },
editor = {Fink, Gernot A. and Jain, Rajiv and Kise, Koichi and Zanibbi, Richard},
doi = {https://doi.org/10.1007/978-3-031-41676-7_11},
isbn = {978-3-031-41676-7},
year = {2023},
date = {2023-08-28},
urldate = {2023-08-28},
booktitle = {Document Analysis and Recognition - ICDAR 2023},
volume = {1},
pages = {185--201},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT.},
keywords = {REPERTORIUM},
pubstate = {published},
tppubtype = {conference}
}
In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT. Martínez-Sevilla, J. C.; Alfaro-Contreras, M.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works Proceedings Article
In: INTERSPEECH Conference, pp. 2793-2797, Dublin, Ireland, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Martínez-Sevilla2023,
title = {Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works},
author = {J.C. Martínez-Sevilla and M. Alfaro-Contreras and J. J. Valero-Mas and J. Calvo-Zaragoza
},
doi = {10.21437/Interspeech.2023-88},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
booktitle = {INTERSPEECH Conference},
pages = {2793-2797},
address = {Dublin, Ireland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study Proceedings Article
In: Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science, pp. 64-77, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-37731-0.
Links | BibTeX | Tags: MultiScore
@inproceedings{k505,
title = {Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-37731-0_6},
isbn = {978-3-031-37731-0},
year = {2023},
date = {2023-08-10},
urldate = {2022-01-01},
booktitle = {Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science},
volume = {13645},
pages = {64-77},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Evaluating Domain Generalization in Kitchen Utensils Classification Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 108-118, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido-Munoz2023,
title = {Evaluating Domain Generalization in Kitchen Utensils Classification},
author = {C. Garrido-Munoz and M. Alfaro-Contreras and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-36616-1_9},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {108-118},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
González-Barrachina, P.; Alfaro-Contreras, M.; Nieto-Hidalgo, M.; Calvo-Zaragoza, J.
Lifelong Learning for Document Image Binarization: An Experimental Study Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 146-157, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{González-Barrachina2023,
title = {Lifelong Learning for Document Image Binarization: An Experimental Study},
author = {P. González-Barrachina and M. Alfaro-Contreras and M. Nieto-Hidalgo and J. Calvo-Zaragoza },
doi = {10.1007/978-3-031-36616-1_12},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {146-157},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Penarrubia, C.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification Conference
Iberian Conference on Pattern Recognition and Image Analysis, Alicante, Spain, 2023, ISBN: 978-3-031-36616-1.
Abstract | Links | BibTeX | Tags: DOREMI
@conference{nokey,
title = {Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification},
author = {C. Penarrubia and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-36616-1_2},
isbn = {978-3-031-36616-1},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {15.27},
address = {Alicante, Spain},
abstract = {Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic.},
keywords = {DOREMI},
pubstate = {published},
tppubtype = {conference}
}
Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic. Alfaro-Contreras, M.; Iñesta, J. M.; Calvo-Zaragoza, J.
Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 12, pp. 12-24, 2023.
@article{Alfaro-Contreras2023b,
title = {Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation},
author = {M. Alfaro-Contreras and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/s13735-023-00278-5},
year = {2023},
date = {2023-05-26},
urldate = {2023-05-26},
journal = {International Journal of Multimedia Information Retrieval},
volume = {12},
pages = {12-24},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-end optical music recognition for pianoform sheet music Journal Article
In: International Journal on Document Analysis and Recognition (IJDAR), iss. ICDAR 2023, 2023, ISSN: 1433-2825.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Ríos-Vila2023,
title = {End-to-end optical music recognition for pianoform sheet music},
author = {A. Ríos-Vila and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://link.springer.com/content/pdf/10.1007/s10032-023-00432-z.pdf},
doi = {10.1007/s10032-023-00432-z},
issn = {1433-2825},
year = {2023},
date = {2023-05-12},
urldate = {2023-05-12},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
issue = {ICDAR 2023},
abstract = {End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner. Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor Journal Article
In: Pattern Recognition Letters, vol. 167, pp. 1-8, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023,
title = {Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2023.01.014},
year = {2023},
date = {2023-03-01},
journal = {Pattern Recognition Letters},
volume = {167},
pages = {1-8},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Late multimodal fusion for image and audio music transcription Journal Article
In: Expert Systems With Applications, vol. 216, pp. 119491-119500, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023c,
title = {Late multimodal fusion for image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.eswa.2022.119491},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Expert Systems With Applications},
volume = {216},
pages = {119491-119500},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Ferrer, A.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Experimental Study on Marine Debris Location and Recognition using Object Detection Journal Article
In: Pattern Recognition Letters, 2023, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: TADMar
@article{k521,
title = {An Experimental Study on Marine Debris Location and Recognition using Object Detection},
author = {A. Sánchez-Ferrer and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1016/j.patrec.2022.12.019},
issn = {0167-8655},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition Letters},
abstract = {The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.},
keywords = {TADMar},
pubstate = {published},
tppubtype = {article}
}
The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages. Rizo, D.; Delgado, T.; Calvo-Zaragoza, J.; Madueño, A.; García-Iasci, P.
Speeding-up the encoding of mensural collections from Spanish libraries Journal Article
In: IAML 2022 Prague, 2022.
BibTeX | Tags: MultiScore
@article{k502,
title = {Speeding-up the encoding of mensural collections from Spanish libraries},
author = {D. Rizo and T. Delgado and J. Calvo-Zaragoza and A. Madueño and P. García-Iasci},
year = {2022},
date = {2022-07-01},
booktitle = {IAML 2022 Prague},
journal = {IAML 2022 Prague},
organization = {IAML},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Decoupling music notation to improve end-to-end Optical Music Recognition Journal Article
In: Pattern Recognition Letters, vol. 158, pp. 157-163, 2022, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2022,
title = {Decoupling music notation to improve end-to-end Optical Music Recognition},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2022.04.032},
issn = {0167-8655},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Pattern Recognition Letters},
volume = {158},
pages = {157-163},
abstract = {Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases. Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Insights into transfer learning between image and audio music transcription Proceedings Article
In: Sound and Music Computing Conference, pp. 295-301, Zenodo, Saint-Étienne, France, 2022.
Abstract | Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras2022b,
title = {Insights into transfer learning between image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.5281/zenodo.6797870},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
booktitle = {Sound and Music Computing Conference},
pages = {295-301},
publisher = {Zenodo},
address = {Saint-Étienne, France},
abstract = {Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem. Arroyo, V.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Pertusa, A.
Neural audio-to-score music transcription for unconstrained polyphony using compact output representations Proceedings Article
In: Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, Singapur, Singapur, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k487,
title = {Neural audio-to-score music transcription for unconstrained polyphony using compact output representations},
author = {V. Arroyo and J. J. Valero-Mas and J. Calvo-Zaragoza and A. Pertusa},
year = {2022},
date = {2022-05-01},
booktitle = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
publisher = {IEEE},
address = {Singapur, Singapur},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 482-492, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: Leonardo2021
@inproceedings{k493,
title = {Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {482-492},
address = {Aveiro, Portugal},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {inproceedings}
}
Mas-Candela, E.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A First Approach to Image Transformation Sequence Retrieval Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 321-332, Aveiro, Portugal, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k494,
title = {A First Approach to Image Transformation Sequence Retrieval},
author = {E. Mas-Candela and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {321-332},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sánchez-Ferrer, A.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), pp. 616–628, Aveiro, Portugal, 2022, ISBN: 978-3-031-04881-4.
@inproceedings{k512,
title = {The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition},
author = {A. Sánchez-Ferrer and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-3-031-04881-4},
year = {2022},
date = {2022-05-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
pages = {616--628},
address = {Aveiro, Portugal},
abstract = {In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus. Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
On the Use of Transformers for End-to-End Optical Music Recognition Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 470-481, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: MultiScore
@inproceedings{k492,
title = {On the Use of Transformers for End-to-End Optical Music Recognition},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {470-481},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Fuente, C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Image and Audio Music Transcription Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 11, pp. 77-84, 2022.
BibTeX | Tags: MultiScore
@article{k479,
title = {Multimodal Image and Audio Music Transcription},
author = {C. Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
journal = {International Journal of Multimedia Information Retrieval},
volume = {11},
pages = {77-84},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rosello, A.; Ayllon, E.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Test Sample Selection for Handwriting Recognition Through Language Modeling Proceedings Article
In: Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k498,
title = {Test Sample Selection for Handwriting Recognition Through Language Modeling},
author = {A. Rosello and E. Ayllon and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.; Fujinaga, I.
Domain Adaptation for Staff-Region Retrieval of Music Score Images Journal Article
In: International Journal on Document Analysis and Recognition, 2022, ISSN: 1433-2825.
BibTeX | Tags: MultiScore
@article{k500,
title = {Domain Adaptation for Staff-Region Retrieval of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza and I. Fujinaga},
issn = {1433-2825},
year = {2022},
date = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor Proceedings Article
In: Pattern Recognition. ICPR International Workshops and Challenges, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k504,
title = {Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition. ICPR International Workshops and Challenges},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sáez-Pérez, J.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), 2022.
@inproceedings{k506,
title = {Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition},
author = {J. Sáez-Pérez and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
keywords = {ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A holistic approach for image-to-graph: application to optical music recognition Journal Article
In: International Journal on Document Analysis and Recognition, 2022.
BibTeX | Tags: Leonardo2021
@article{k522,
title = {A holistic approach for image-to-graph: application to optical music recognition},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR, Bangalore, India, 2022.
Abstract | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{k499,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR},
address = {Bangalore, India},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages. Castellanos, F. J.; Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Region-based Layout Analysis of Music Score Images Journal Article
In: Expert Systems with Applications, pp. 118211, 2022, ISSN: 0957-4174.
BibTeX | Tags: MultiScore
@article{k486,
title = {Region-based Layout Analysis of Music Score Images},
author = {F. J. Castellanos and C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
issn = {0957-4174},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
pages = {118211},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
de la Fuente, C.; Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks Journal Article
In: Multimedia Tools and Applications, 2022.
BibTeX | Tags:
@article{k501,
title = {Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks},
author = {C. de la Fuente and F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Multimedia Tools and Applications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 6, 2021.
@inproceedings{k482,
title = {An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {6},
keywords = {GRE19-04, ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Calvo-Zaragoza, J.; Iñesta, J. M.
CTC-based end-to-end approach for full page Optical Music Recognition Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 11, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k488,
title = {CTC-based end-to-end approach for full page Optical Music Recognition},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and J. M. Iñesta},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {11},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Calvo-Zaragoza, J.; Pertusa, A.; Gallego, A. J.; Iñesta, J. M.; Micó, L.; Oncina, J.; Perez-Sancho, C.; de León, P. J. Ponce; Rizo, D.
MultiScore Project: Multimodal Transcription of Music Scores Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 3, 2021.
Links | BibTeX | Tags: MultiScore
@inproceedings{k481,
title = {MultiScore Project: Multimodal Transcription of Music Scores},
author = {J. Calvo-Zaragoza and A. Pertusa and A. J. Gallego and J. M. Iñesta and L. Micó and J. Oncina and C. Perez-Sancho and P. J. Ponce de León and D. Rizo},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/481/MML2021__MultiScore_Final.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {3},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Neural Domain Adaptation for Document Image Binarization Journal Article
In: Pattern Recognition, vol. 119, pp. 108099, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@article{k467,
title = {Unsupervised Neural Domain Adaptation for Document Image Binarization},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Pattern Recognition},
volume = {119},
pages = {108099},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
OMR-assisted transcription: a case study with early prints Proceedings Article
In: Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR, pp. 35-41, 2021, ISBN: 978-1-7327299-0-2.
BibTeX | Tags: MultiScore
@inproceedings{k483,
title = {OMR-assisted transcription: a case study with early prints},
author = {M. Alfaro-Contreras and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-1-7327299-0-2},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
booktitle = {Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR},
pages = {35-41},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Fuente, C; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Audio and Image Music Transcription Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 18-22, 2021.
BibTeX | Tags:
@inproceedings{k469,
title = {Multimodal Audio and Image Music Transcription},
author = {C Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {18-22},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Calvo-Zaragoza, J.; Rizo, D.; Iñesta, J. M.
Reconocimiento Óptico de Partituras (OMR) aplicado al Fonde de Música Tradicional IMF-CSIC Book Chapter
In: Gambero-Ustárroz, M.; Ros-Fábregas, E. (Ed.): Musicología en Web. Patrimonio musical y Humanidades Digitales, Chapter 4, pp. 87-109, Edition Reichenberger, 2021, ISBN: 978-3-967280-14-2.
@inbook{k470,
title = {Reconocimiento Óptico de Partituras (OMR) aplicado al Fonde de Música Tradicional IMF-CSIC},
author = {J. Calvo-Zaragoza and D. Rizo and J. M. Iñesta},
editor = {M. Gambero-Ustárroz and E. Ros-Fábregas},
isbn = {978-3-967280-14-2},
year = {2021},
date = {2021-01-01},
booktitle = {Musicología en Web. Patrimonio musical y Humanidades Digitales},
pages = {87-109},
publisher = {Edition Reichenberger},
chapter = {4},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {inbook}
}
Garrido-Munoz, C.; Sánchez-Hernández, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
Domain Adaptation for Document Image Binarization via Domain Classification Proceedings Article
In: Tallón-Ballesteros, A. J. (Ed.): Frontiers in Artificial Intelligence and Applications, pp. 569-582, IOS Press, 2021, ISBN: 978-1-64368-224-2.
BibTeX | Tags: GRE19-04, GV/2020/030
@inproceedings{k480,
title = {Domain Adaptation for Document Image Binarization via Domain Classification},
author = {C. Garrido-Munoz and A. Sánchez-Hernández and F. J. Castellanos and J. Calvo-Zaragoza},
editor = {A. J. Tallón-Ballesteros},
isbn = {978-1-64368-224-2},
year = {2021},
date = {2021-01-01},
booktitle = {Frontiers in Artificial Intelligence and Applications},
pages = {569-582},
publisher = {IOS Press},
chapter = {-},
keywords = {GRE19-04, GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Gallego, A. J.; Calvo-Zaragoza, J.; Fisher, R. B.
Incremental Unsupervised Domain-Adversarial Training of Neural Networks Journal Article
In: IEEE Transactions on Neural Networks and Learning Systems, vol. 32, no. 11, pp. 4864-4878, 2021, ISSN: 2162-2388.
Abstract | Links | BibTeX | Tags: GRE19-04, HispaMus
@article{k455,
title = {Incremental Unsupervised Domain-Adversarial Training of Neural Networks},
author = {A. J. Gallego and J. Calvo-Zaragoza and R. B. Fisher},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/455/2001.04129.pdf},
issn = {2162-2388},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE Transactions on Neural Networks and Learning Systems},
volume = {32},
number = {11},
pages = {4864-4878},
abstract = {In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms.},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms. Ríos-Vila, A.; Rizo, D.; Calvo-Zaragoza, J.
Complete Optical Music Recognition via Agnostic Transcription and Machine Translation Proceedings Article
In: 16th International Conference on Document Analysis and Recognition, pp. 661-675, 2021.
BibTeX | Tags: GV/2020/030
@inproceedings{k477,
title = {Complete Optical Music Recognition via Agnostic Transcription and Machine Translation},
author = {A. Ríos-Vila and D. Rizo and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {16th International Conference on Document Analysis and Recognition},
pages = {661-675},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Prototype Generation in the String Space via Approximate Median for Data Reduction in Nearest Neighbor classification Journal Article
In: Soft Computing, vol. 25, 2021, ISSN: 15403-15415.
BibTeX | Tags: GRE19-04, HispaMus
@article{k476,
title = {Prototype Generation in the String Space via Approximate Median for Data Reduction in Nearest Neighbor classification},
author = {F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
issn = {15403-15415},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Soft Computing},
volume = {25},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Domain Adaptation for Document Analysis of Music Score Images Proceedings Article
In: Proc. of the 22nd International Society for Music Information Retrieval Conference, 2021.
@inproceedings{k475,
title = {Unsupervised Domain Adaptation for Document Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proc. of the 22nd International Society for Music Information Retrieval Conference},
keywords = {GRE19-04},
pubstate = {published},
tppubtype = {inproceedings}
}
Román, M. A.
An End-to-End Framework for Audio-to-Score Music Transcription PhD Thesis
2021.
@phdthesis{k462,
title = {An End-to-End Framework for Audio-to-Score Music Transcription},
author = {M. A. Román},
editor = {J. Calvo-Zaragoza and A. Pertusa},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
organization = {Universidad de Alicante},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {phdthesis}
}
Mas-Candela, E.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Sequential Next-Symbol Prediction for Optical Music Recognition Proceedings Article
In: 16th International Conference on Document Analysis and Recognition, pp. 708-722, 2021, ISBN: 978-3-030-86334-0.
Links | BibTeX | Tags: GV/2020/030
@inproceedings{k478,
title = {Sequential Next-Symbol Prediction for Optical Music Recognition},
author = {E. Mas-Candela and M. Alfaro-Contreras and J. Calvo-Zaragoza},
url = {https://link.springer.com/chapter/10.1007/978-3-030-86334-0_46},
isbn = {978-3-030-86334-0},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {16th International Conference on Document Analysis and Recognition},
pages = {708-722},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
López-Gutiérrez, J. C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Data Augmentation for End-to-End Optical Music Recognition Proceedings Article
In: Proceedings of the 14th IAPR International Workshop on Graphics Recognition (GREC), pp. 59-73, Springer, 2021.
BibTeX | Tags: GV/2020/030
@inproceedings{k473,
title = {Data Augmentation for End-to-End Optical Music Recognition},
author = {J. C. López-Gutiérrez and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proceedings of the 14th IAPR International Workshop on Graphics Recognition (GREC)},
pages = {59-73},
publisher = {Springer},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Ríos-Vila, A.; Calvo-Zaragoza, J.; Rizo, D.
Evaluating Simultaneous Recognition and Encoding for Optical Music Recognition Proceedings Article
In: DLfM 2020: 7th International Conference on Digital Libraries for Musicology, pp. 10-17, Association for Computing Machinery, 2020, ISBN: 978-1-4503-8760-6.
@inproceedings{k456,
title = {Evaluating Simultaneous Recognition and Encoding for Optical Music Recognition},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and D. Rizo},
isbn = {978-1-4503-8760-6},
year = {2020},
date = {2020-10-01},
booktitle = {DLfM 2020: 7th International Conference on Digital Libraries for Musicology},
journal = {DLfM 2020: 7th International Conference on Digital Libraries for Musicology},
pages = {10-17},
publisher = {Association for Computing Machinery},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Galan-Cuenca, A.; Valero-Mas, J. J.; Martinez-Sevilla, J. C.; Hidalgo-Centeno, A.; Pertusa, A.; Calvo-Zaragoza, J.
Proceedings of the 32nd ACM International Conference on Multimedia, Association for Computing Machinery, 2024, ISBN: 979-8-4007-0686-8.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {MUSCAT: a Multimodal mUSic Collection for Automatic Transcription of real recordings and image scores},
author = {A. Galan-Cuenca and J. J. Valero-Mas and J. C. Martinez-Sevilla and A. Hidalgo-Centeno and A. Pertusa and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1145/3664647.3681572},
isbn = {979-8-4007-0686-8},
year = {2024},
date = {2024-10-28},
booktitle = {Proceedings of the 32nd ACM International Conference on Multimedia},
pages = {583-591},
publisher = {Association for Computing Machinery},
abstract = {Multimodal audio-image music transcription has been recently posed as a means of retrieving a digital score representation by leveraging the individual estimations from Automatic Music Transcription (AMT)---acoustic recordings---and Optical Music Recognition (OMR)---image scores---systems. Nevertheless, while proven to outperform single-modality recognition rates, this approach has been exclusively validated under controlled scenarios---monotimbral and monophonic synthetic data---mainly due to a lack of collections with symbolic score-level annotations for both recordings and graphical sheets. To promote research on this topic, this work presents the Multimodal mUSic Collection for Automatic Transcription (MUSCAT) assortment of acoustic recordings, image sheets, and their score-level annotations in several notation formats. This dataset comprises almost 80 hours of real recordings with varied instrumentation and polyphony degrees---ranging from piano to orchestral music---, 1251 scanned sheets, and 880 symbolic scores from 37 composers, which may also be used in other tasks involving metadata such as instrument identification or composer recognition. A fragmented subset of this collection solely focused on acoustic data for score-level AMT---the MUSic Collection for aUtomatic Transcription - fragmented Subset (MUSCUTS) assortment---is also presented together with a baseline experimentation, concluding the need to foster research on this field with real recordings. Finally, a web-based service is also provided to increase the size of the collections collaboratively.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Penarrubia, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Contrastive Self-Supervised Learning for Optical Music Recognition Conference
International Workshop on Document Analysis Systems, 2024, ISBN: 978-3-031-70442-0.
Abstract | Links | BibTeX | Tags:
@conference{nokey,
title = {Contrastive Self-Supervised Learning for Optical Music Recognition},
author = {C. Penarrubia and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-70442-0_19},
isbn = {978-3-031-70442-0},
year = {2024},
date = {2024-09-11},
urldate = {2024-09-11},
booktitle = {International Workshop on Document Analysis Systems},
pages = {312-326},
abstract = {Optical Music Recognition (OMR) is the research area focused on transcribing images of musical scores. In recent years, this field has seen great development thanks to the emergence of Deep Learning. However, these types of solutions require large volumes of labeled data. To alleviate this problem, Contrastive Self-Supervised Learning (SSL) has emerged as a paradigm that leverages large amounts of unlabeled data to train neural networks, yielding meaningful and robust representations. In this work, we explore its first application to the field of OMR. By utilizing three datasets that represent the heterogeneity of musical scores in notations and graphic styles, and through multiple evaluation protocols, we demonstrate that contrastive SSL delivers promising results, significantly reducing data scarcity challenges in OMR. To the best of our knowledge, this is the first study that integrates these two fields. We hope this research serves as a baseline and stimulates further exploration.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Ríos-Vila, A.; Calvo-Zaragoza, J.; Paquet, T.
Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription Conference
Document Analysis and Recognition - ICDAR 2024, vol. 1, Springer Nature Switzerland, 2024, ISBN: 978-3-031-70552-6.
BibTeX | Tags: MultiScore
@conference{RiosVila:ICDAR:2024,
title = {Sheet Music Transformer: End-To-End Optical Music Recognition Beyond Monophonic Transcription},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and T. Paquet},
isbn = {978-3-031-70552-6},
year = {2024},
date = {2024-09-02},
urldate = {2024-09-02},
booktitle = {Document Analysis and Recognition - ICDAR 2024},
volume = {1},
pages = {20-37},
publisher = {Springer Nature Switzerland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Rios-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
A Transformer Approach for Polyphonic Audio-to-Score Transcription Proceedings Article
In: Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024), Seul (Korea), 2024.
Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras:ICASSP24,
title = {A Transformer Approach for Polyphonic Audio-to-Score Transcription},
author = {M. Alfaro-Contreras and A. Rios-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1109/ICASSP48485.2024.10447162},
year = {2024},
date = {2024-04-19},
urldate = {2024-04-19},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)},
address = {Seul (Korea)},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Penarrubia, C.; Garrido-Munoz, C.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Efficient notation assembly in optical music recognition Conference
Proceedings of the 24th International Society for Music Information Retrieval Conference, Milan, Italy, 2023, ISBN: 978-1-7327299-3-3.
BibTeX | Tags:
@conference{nokey,
title = {Efficient notation assembly in optical music recognition},
author = {C. Penarrubia and C. Garrido-Munoz and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-1-7327299-3-3},
year = {2023},
date = {2023-10-30},
booktitle = {Proceedings of the 24th International Society for Music Information Retrieval Conference},
pages = {182-189},
address = {Milan, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Ríos-Vila, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
A Holistic Approach for Aligned Music and Lyrics Transcription Conference
Document Analysis and Recognition - ICDAR 2023, vol. 1, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-41676-7.
Abstract | Links | BibTeX | Tags: REPERTORIUM
@conference{MartinezSevilla:ICDAR:2023,
title = {A Holistic Approach for Aligned Music and Lyrics Transcription},
author = {J.C. Martínez-Sevilla and A. Ríos-Vila and F. J. Castellanos and J. Calvo-Zaragoza },
editor = {Fink, Gernot A. and Jain, Rajiv and Kise, Koichi and Zanibbi, Richard},
doi = {https://doi.org/10.1007/978-3-031-41676-7_11},
isbn = {978-3-031-41676-7},
year = {2023},
date = {2023-08-28},
urldate = {2023-08-28},
booktitle = {Document Analysis and Recognition - ICDAR 2023},
volume = {1},
pages = {185--201},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In this paper, we present the Aligned Music Notation and Lyrics Transcription (AMNLT) challenge, whose goal is to retrieve the content from document images of vocal music. This new research area arises from the need to automatically transcribe notes and lyrics from music scores and align both sources of information conveniently. Although existing methods are able to deal with music notation and text, they work without providing their proper alignment, which is crucial to actually retrieve the content of the piece of vocal music. To overcome this challenge, we consider holistic neural approaches that transcribe music and text in one step, along with an encoding that implicitly aligns the sources of information. The methodology is evaluated on a benchmark specifically designed for AMNLT. The results report that existing methods can obtain high-quality text and music transcriptions, but posterior alignment errors are inevitably found. However, our formulation achieves relative improvements of over 80{%} in the metric that considers both transcription and alignment. We hope that this work will establish itself as a future reference for further research on AMNLT.},
keywords = {REPERTORIUM},
pubstate = {published},
tppubtype = {conference}
}
Martínez-Sevilla, J. C.; Alfaro-Contreras, M.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works Proceedings Article
In: INTERSPEECH Conference, pp. 2793-2797, Dublin, Ireland, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Martínez-Sevilla2023,
title = {Insights into end-to-end audio-to-score transcription with real recordings: A case study with saxophone works},
author = {J.C. Martínez-Sevilla and M. Alfaro-Contreras and J. J. Valero-Mas and J. Calvo-Zaragoza
},
doi = {10.21437/Interspeech.2023-88},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
booktitle = {INTERSPEECH Conference},
pages = {2793-2797},
address = {Dublin, Ireland},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study Proceedings Article
In: Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science, pp. 64-77, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-37731-0.
Links | BibTeX | Tags: MultiScore
@inproceedings{k505,
title = {Multimodal Strategies for Image and Audio Music Transcription: A Comparative Study},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-37731-0_6},
isbn = {978-3-031-37731-0},
year = {2023},
date = {2023-08-10},
urldate = {2022-01-01},
booktitle = {Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges. ICPR 2022. Lecture Notes in Computer Science},
volume = {13645},
pages = {64-77},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Evaluating Domain Generalization in Kitchen Utensils Classification Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 108-118, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{Garrido-Munoz2023,
title = {Evaluating Domain Generalization in Kitchen Utensils Classification},
author = {C. Garrido-Munoz and M. Alfaro-Contreras and J. Calvo-Zaragoza},
doi = {10.1007/978-3-031-36616-1_9},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {108-118},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
González-Barrachina, P.; Alfaro-Contreras, M.; Nieto-Hidalgo, M.; Calvo-Zaragoza, J.
Lifelong Learning for Document Image Binarization: An Experimental Study Proceedings Article
In: Iberian Conference on Pattern Recognition and Image Analysis, pp. 146-157, 2023.
Links | BibTeX | Tags: MultiScore
@inproceedings{González-Barrachina2023,
title = {Lifelong Learning for Document Image Binarization: An Experimental Study},
author = {P. González-Barrachina and M. Alfaro-Contreras and M. Nieto-Hidalgo and J. Calvo-Zaragoza },
doi = {10.1007/978-3-031-36616-1_12},
year = {2023},
date = {2023-06-25},
urldate = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {146-157},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Penarrubia, C.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification Conference
Iberian Conference on Pattern Recognition and Image Analysis, Alicante, Spain, 2023, ISBN: 978-3-031-36616-1.
Abstract | Links | BibTeX | Tags: DOREMI
@conference{nokey,
title = {Addressing Class Imbalance in Multilabel Prototype Generation for k-Nearest Neighbor Classification},
author = {C. Penarrubia and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1007/978-3-031-36616-1_2},
isbn = {978-3-031-36616-1},
year = {2023},
date = {2023-06-25},
booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
pages = {15.27},
address = {Alicante, Spain},
abstract = {Prototype Generation (PG) methods seek to improve the efficiency of the k-Nearest Neighbor (kNN) classifier by obtaining a reduced version of a given reference dataset following certain heuristics. Despite being largely addressed topic in multiclass scenarios, few works deal with PG in multilabel environments. Hence, the existing proposals exhibit a number of limitations, being label imbalance one of paramount relevance as it constitutes a typical challenge of multilabel datasets. This work proposes two novel merging policies for multilabel PG schemes specifically devised for label imbalance, as well as a mechanism to prevent inappropriate samples from undergoing a reduction process. These proposals are applied to three existing multilabel PG methods—Multilabel Reduction through Homogeneous Clustering, Multilabel Chen, and Multilabel Reduction through Space Partitioning—and evaluated on 12 different data assortments with different degrees of label imbalance. The results prove that the proposals overcome—in some cases in a significant manner—those obtained with the original methods, hence validating the presented approaches and enabling further research lines on this topic.},
keywords = {DOREMI},
pubstate = {published},
tppubtype = {conference}
}
Alfaro-Contreras, M.; Iñesta, J. M.; Calvo-Zaragoza, J.
Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 12, pp. 12-24, 2023.
@article{Alfaro-Contreras2023b,
title = {Optical Music Recognition for Homophonic Scores with Neural Networks and Synthetic Music Generation},
author = {M. Alfaro-Contreras and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1007/s13735-023-00278-5},
year = {2023},
date = {2023-05-26},
urldate = {2023-05-26},
journal = {International Journal of Multimedia Information Retrieval},
volume = {12},
pages = {12-24},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-end optical music recognition for pianoform sheet music Journal Article
In: International Journal on Document Analysis and Recognition (IJDAR), iss. ICDAR 2023, 2023, ISSN: 1433-2825.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Ríos-Vila2023,
title = {End-to-end optical music recognition for pianoform sheet music},
author = {A. Ríos-Vila and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://link.springer.com/content/pdf/10.1007/s10032-023-00432-z.pdf},
doi = {10.1007/s10032-023-00432-z},
issn = {1433-2825},
year = {2023},
date = {2023-05-12},
urldate = {2023-05-12},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
issue = {ICDAR 2023},
abstract = {End-to-end solutions have brought about significant advances in the field of Optical Music Recognition. These approaches directly provide the symbolic representation of a given image of a musical score. Despite this, several documents, such as pianoform musical scores, cannot yet benefit from these solutions since their structural complexity does not allow their effective transcription. This paper presents a neural method whose objective is to transcribe these musical scores in an end-to-end fashion. We also introduce the GrandStaff dataset, which contains 53,882 single-system piano scores in common western modern notation. The sources are encoded in both a standard digital music representation and its adaptation for current transcription technologies. The method proposed in this paper is trained and evaluated using this dataset. The results show that the approach presented is, for the first time, able to effectively transcribe pianoform notation in an end-to-end manner.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor Journal Article
In: Pattern Recognition Letters, vol. 167, pp. 1-8, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023,
title = {Few-Shot Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2023.01.014},
year = {2023},
date = {2023-03-01},
journal = {Pattern Recognition Letters},
volume = {167},
pages = {1-8},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Late multimodal fusion for image and audio music transcription Journal Article
In: Expert Systems With Applications, vol. 216, pp. 119491-119500, 2023.
Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2023c,
title = {Late multimodal fusion for image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.eswa.2022.119491},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Expert Systems With Applications},
volume = {216},
pages = {119491-119500},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Ferrer, A.; Valero-Mas, J. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Experimental Study on Marine Debris Location and Recognition using Object Detection Journal Article
In: Pattern Recognition Letters, 2023, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: TADMar
@article{k521,
title = {An Experimental Study on Marine Debris Location and Recognition using Object Detection},
author = {A. Sánchez-Ferrer and J. J. Valero-Mas and A. J. Gallego and J. Calvo-Zaragoza},
doi = {https://doi.org/10.1016/j.patrec.2022.12.019},
issn = {0167-8655},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Pattern Recognition Letters},
abstract = {The large amount of debris in our oceans is a global problem that dramatically impacts marine fauna and flora. While a large number of human-based campaigns have been proposed to tackle this issue, these efforts have been deemed insufficient due to the insurmountable amount of existing litter. In response to that, there exists a high interest in the use of autonomous underwater vehicles (AUV) that may locate, identify, and collect this garbage automatically. To perform such a task, AUVs consider state-of-the-art object detection techniques based on deep neural networks due to their reported high performance. Nevertheless, these techniques generally require large amounts of data with fine-grained annotations. In this work, we explore the capabilities of the reference object detector Mask Region-based Convolutional Neural Networks for automatic marine debris location and classification in the context of limited data availability. Considering the recent CleanSea corpus, we pose several scenarios regarding the amount of available train data and study the possibility of mitigating the adverse effects of data scarcity with synthetic marine scenes. Our results achieve a new state of the art in the task, establishing a new reference for future research. In addition, it is shown that the task still has room for improvement and that the lack of data can be somehow alleviated, yet to a limited extent.},
keywords = {TADMar},
pubstate = {published},
tppubtype = {article}
}
2022
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, pp. 226-232, 2022, ISBN: 978-1-7327299-2-6.
Abstract | Links | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{Ríos-Vila2022,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
url = {https://zenodo.org/record/7342678/files/000026.pdf?download=1},
doi = {https://doi.org/10.5281/zenodo.7342678},
isbn = {978-1-7327299-2-6},
year = {2022},
date = {2022-12-04},
urldate = {2022-12-04},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference},
journal = {Proceedings of the 23nd International Society for Music Information Retrieval Conference},
pages = {226-232},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizo, D.; Delgado, T.; Calvo-Zaragoza, J.; Madueño, A.; García-Iasci, P.
Speeding-up the encoding of mensural collections from Spanish libraries Journal Article
In: IAML 2022 Prague, 2022.
BibTeX | Tags: MultiScore
@article{k502,
title = {Speeding-up the encoding of mensural collections from Spanish libraries},
author = {D. Rizo and T. Delgado and J. Calvo-Zaragoza and A. Madueño and P. García-Iasci},
year = {2022},
date = {2022-07-01},
booktitle = {IAML 2022 Prague},
journal = {IAML 2022 Prague},
organization = {IAML},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Decoupling music notation to improve end-to-end Optical Music Recognition Journal Article
In: Pattern Recognition Letters, vol. 158, pp. 157-163, 2022, ISSN: 0167-8655.
Abstract | Links | BibTeX | Tags: MultiScore
@article{Alfaro-Contreras2022,
title = {Decoupling music notation to improve end-to-end Optical Music Recognition},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.1016/j.patrec.2022.04.032},
issn = {0167-8655},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
journal = {Pattern Recognition Letters},
volume = {158},
pages = {157-163},
abstract = {Inspired by the Text Recognition field, end-to-end schemes based on Convolutional Recurrent Neural Networks (CRNN) trained with the Connectionist Temporal Classification (CTC) loss function are considered one of the current state-of-the-art techniques for staff-level Optical Music Recognition (OMR). Unlike text symbols, music-notation elements may be defined as a combination of (i) a shape primitive located in (ii) a certain position in a staff. However, this double nature is generally neglected in the learning process, as each combination is treated as a single token. In this work, we study whether exploiting such particularity of music notation actually benefits the recognition performance and, if so, which approach is the most appropriate. For that, we thoroughly review existing specific approaches that explore this premise and propose different combinations of them. Furthermore, considering the limitations observed in such approaches, a novel decoding strategy specifically designed for OMR is proposed. The results obtained with four different corpora of historical manuscripts show the relevance of leveraging this double nature of music notation since it outperforms the standard approaches where it is ignored. In addition, the proposed decoding leads to significant reductions in the error rates with respect to the other cases.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Valero-Mas, J. J.; Iñesta, J. M.; Calvo-Zaragoza, J.
Insights into transfer learning between image and audio music transcription Proceedings Article
In: Sound and Music Computing Conference, pp. 295-301, Zenodo, Saint-Étienne, France, 2022.
Abstract | Links | BibTeX | Tags: MultiScore
@inproceedings{Alfaro-Contreras2022b,
title = {Insights into transfer learning between image and audio music transcription},
author = {M. Alfaro-Contreras and J. J. Valero-Mas and J. M. Iñesta and J. Calvo-Zaragoza},
doi = {10.5281/zenodo.6797870},
year = {2022},
date = {2022-06-01},
urldate = {2022-06-01},
booktitle = {Sound and Music Computing Conference},
pages = {295-301},
publisher = {Zenodo},
address = {Saint-Étienne, France},
abstract = {Optical Music Recognition (OMR) and Automatic Music Transcription (AMT) stand for the research fields that devise methods to transcribe music sources---documents or audio signals, respectively---into a structured digital format. Historically, they have followed different approaches to achieve the same goal. However, their recent definition in terms of sequence labeling tasks gathers them under a common formulation framework. Under this premise, one may wonder if there exist any synergies between the two fields that could be exploited to improve the individual recognition rates in their respective domains. In this work, we aim to further explore this question from a Transfer Learning (TL) point of view in the context of neural end-to-end recognition models. More precisely, we consider a music transcription system, trained on either image or audio data, and adapt its performance to the unseen domain during the training phase using different TL schemes. Results show that knowledge transfer slightly boosts model performance with sufficient available data, but it is not properly leveraged when the latter condition is not met. This opens up a new promising, yet challenging, research path towards building an effective bridge between two solutions of the same problem.},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Arroyo, V.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Pertusa, A.
Neural audio-to-score music transcription for unconstrained polyphony using compact output representations Proceedings Article
In: Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, Singapur, Singapur, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k487,
title = {Neural audio-to-score music transcription for unconstrained polyphony using compact output representations},
author = {V. Arroyo and J. J. Valero-Mas and J. Calvo-Zaragoza and A. Pertusa},
year = {2022},
date = {2022-05-01},
booktitle = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
publisher = {IEEE},
address = {Singapur, Singapur},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 482-492, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: Leonardo2021
@inproceedings{k493,
title = {Retrieval of Music-Notation Primitives via Image-to-Sequence Approaches},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {482-492},
address = {Aveiro, Portugal},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {inproceedings}
}
Mas-Candela, E.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A First Approach to Image Transformation Sequence Retrieval Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 321-332, Aveiro, Portugal, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k494,
title = {A First Approach to Image Transformation Sequence Retrieval},
author = {E. Mas-Candela and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {321-332},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sánchez-Ferrer, A.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), pp. 616–628, Aveiro, Portugal, 2022, ISBN: 978-3-031-04881-4.
@inproceedings{k512,
title = {The CleanSea Set: A Benchmark Corpus for Underwater Debris Detection and Recognition},
author = {A. Sánchez-Ferrer and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
isbn = {978-3-031-04881-4},
year = {2022},
date = {2022-05-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
pages = {616--628},
address = {Aveiro, Portugal},
abstract = {In recent years, the large amount of debris scattered throughout the ocean is becoming one of the major pollution problems, causing extinction of species and accelerating the degradation of our planet, among other environmental issues. Since the manual treatment of this waste represents a considerably tedious task, autonomous frameworks are gaining attention. Due to their reported good performance, such frameworks generally rely on Deep Learning techniques. However, the scarcity of data coupled with the inherent difficulties of the field---debris with different shapes and colors due to long-lasting exposure to the ocean, illumination variability or sea conditions---makes detecting underwater objects a particularly challenging task. The contribution of this work to the field is double: on the one hand, we introduce a novel data collection for supervised learning---the CleanSea corpus---annotated at both the bound box and contour levels of the objects to contribute with the research and progress in the field and on the other hand, we devise and optimize a recognition model based on the reference Mask Object-Based Convolutional Neural Network for this set to establish a benchmark for future comparison and assess its performance in both simulated and real-world scenarios. Results show the relevance of the contributions as the devised model is capable of properly addressing the detection and recognition of general debris when trained with the introduced CleanSea corpus.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
On the Use of Transformers for End-to-End Optical Music Recognition Proceedings Article
In: Iberian Pattern Recognition and Image Analysis, IbPRIA 2022., pp. 470-481, Aveiro, Portugal, 2022, ISBN: 978-3-031-04880-7.
BibTeX | Tags: MultiScore
@inproceedings{k492,
title = {On the Use of Transformers for End-to-End Optical Music Recognition},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-3-031-04880-7},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
booktitle = {Iberian Pattern Recognition and Image Analysis, IbPRIA 2022.},
pages = {470-481},
address = {Aveiro, Portugal},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Fuente, C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Image and Audio Music Transcription Journal Article
In: International Journal of Multimedia Information Retrieval, vol. 11, pp. 77-84, 2022.
BibTeX | Tags: MultiScore
@article{k479,
title = {Multimodal Image and Audio Music Transcription},
author = {C. Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
journal = {International Journal of Multimedia Information Retrieval},
volume = {11},
pages = {77-84},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Rosello, A.; Ayllon, E.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Test Sample Selection for Handwriting Recognition Through Language Modeling Proceedings Article
In: Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k498,
title = {Test Sample Selection for Handwriting Recognition Through Language Modeling},
author = {A. Rosello and E. Ayllon and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition and Image Analysis - 10th Iberian Conference, IbPRIA 2022, Aveiro, Portugal, May 4-6, 2022, Proceedings},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.; Fujinaga, I.
Domain Adaptation for Staff-Region Retrieval of Music Score Images Journal Article
In: International Journal on Document Analysis and Recognition, 2022, ISSN: 1433-2825.
BibTeX | Tags: MultiScore
@article{k500,
title = {Domain Adaptation for Staff-Region Retrieval of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza and I. Fujinaga},
issn = {1433-2825},
year = {2022},
date = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Ríos-Vila, A.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor Proceedings Article
In: Pattern Recognition. ICPR International Workshops and Challenges, 2022.
BibTeX | Tags: MultiScore
@inproceedings{k504,
title = {Few-Shot Music Symbol Classification via Self-Supervised Learning and Nearest Neighbor},
author = {M. Alfaro-Contreras and A. Ríos-Vila and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {Pattern Recognition. ICPR International Workshops and Challenges},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Sáez-Pérez, J.; Gallego, A. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition Proceedings Article
In: 10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA), 2022.
@inproceedings{k506,
title = {Domain Adaptation in Robotics: A Study Case on Kitchen Utensil Recognition},
author = {J. Sáez-Pérez and A. J. Gallego and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
booktitle = {10th Iberian Conference on Pattern Recognition and Image Analysis (IbPRIA)},
keywords = {ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
A holistic approach for image-to-graph: application to optical music recognition Journal Article
In: International Journal on Document Analysis and Recognition, 2022.
BibTeX | Tags: Leonardo2021
@article{k522,
title = {A holistic approach for image-to-graph: application to optical music recognition},
author = {C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Leonardo2021},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Iñesta, J. M.; Calvo-Zaragoza, J.
End-to-End Full-Page Optical Music Recognition for Mensural Notation Proceedings Article
In: Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR, Bangalore, India, 2022.
Abstract | BibTeX | Tags: Leonardo2021, MultiScore
@inproceedings{k499,
title = {End-to-End Full-Page Optical Music Recognition for Mensural Notation},
author = {A. Ríos-Vila and J. M. Iñesta and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR},
address = {Bangalore, India},
abstract = {Optical Music Recognition (OMR) systems typically consider workflows that include several steps, such as staff detection, symbol recognition, and semantic reconstruction. However, fine-tuning these systems is costly due to the specific data labeling process that has to be performed to train models for each of these steps. In this paper, we present the first segmentation-free full-page OMR system that receives a page image and directly outputs the transcription in a single step. This model requires only the annotations of full score pages, which greatly alleviates the task of manual labeling. The model has been tested with early music written in mensural notation, for which the presented approach is especially beneficial. Results show that this methodology provides a solution with promising results and establishes a new line of research for holistic transcription of music score pages.},
keywords = {Leonardo2021, MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Garrido-Munoz, C.; Ríos-Vila, A.; Calvo-Zaragoza, J.
Region-based Layout Analysis of Music Score Images Journal Article
In: Expert Systems with Applications, pp. 118211, 2022, ISSN: 0957-4174.
BibTeX | Tags: MultiScore
@article{k486,
title = {Region-based Layout Analysis of Music Score Images},
author = {F. J. Castellanos and C. Garrido-Munoz and A. Ríos-Vila and J. Calvo-Zaragoza},
issn = {0957-4174},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
pages = {118211},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {article}
}
de la Fuente, C.; Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks Journal Article
In: Multimedia Tools and Applications, 2022.
BibTeX | Tags:
@article{k501,
title = {Multimodal Recognition of Frustration during Game-Play with Deep Neural Networks},
author = {C. de la Fuente and F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Multimedia Tools and Applications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 6, 2021.
@inproceedings{k482,
title = {An Unsupervised Domain Adaptation framework for Layout Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {6},
keywords = {GRE19-04, ROMA},
pubstate = {published},
tppubtype = {inproceedings}
}
Ríos-Vila, A.; Calvo-Zaragoza, J.; Iñesta, J. M.
CTC-based end-to-end approach for full page Optical Music Recognition Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 11, 2021.
BibTeX | Tags: MultiScore
@inproceedings{k488,
title = {CTC-based end-to-end approach for full page Optical Music Recognition},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and J. M. Iñesta},
year = {2021},
date = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {11},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Calvo-Zaragoza, J.; Pertusa, A.; Gallego, A. J.; Iñesta, J. M.; Micó, L.; Oncina, J.; Perez-Sancho, C.; de León, P. J. Ponce; Rizo, D.
MultiScore Project: Multimodal Transcription of Music Scores Proceedings Article
In: Proceedings of the 14th Machine Learning and Music Workshop, pp. 3, 2021.
Links | BibTeX | Tags: MultiScore
@inproceedings{k481,
title = {MultiScore Project: Multimodal Transcription of Music Scores},
author = {J. Calvo-Zaragoza and A. Pertusa and A. J. Gallego and J. M. Iñesta and L. Micó and J. Oncina and C. Perez-Sancho and P. J. Ponce de León and D. Rizo},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/481/MML2021__MultiScore_Final.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Proceedings of the 14th Machine Learning and Music Workshop},
pages = {3},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Neural Domain Adaptation for Document Image Binarization Journal Article
In: Pattern Recognition, vol. 119, pp. 108099, 2021.
BibTeX | Tags: GRE19-04, HispaMus
@article{k467,
title = {Unsupervised Neural Domain Adaptation for Document Image Binarization},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Pattern Recognition},
volume = {119},
pages = {108099},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Alfaro-Contreras, M.; Rizo, D.; Iñesta, J. M.; Calvo-Zaragoza, J.
OMR-assisted transcription: a case study with early prints Proceedings Article
In: Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR, pp. 35-41, 2021, ISBN: 978-1-7327299-0-2.
BibTeX | Tags: MultiScore
@inproceedings{k483,
title = {OMR-assisted transcription: a case study with early prints},
author = {M. Alfaro-Contreras and D. Rizo and J. M. Iñesta and J. Calvo-Zaragoza},
isbn = {978-1-7327299-0-2},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
booktitle = {Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR},
pages = {35-41},
keywords = {MultiScore},
pubstate = {published},
tppubtype = {inproceedings}
}
Fuente, C; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Multimodal Audio and Image Music Transcription Proceedings Article
In: Proc. of the 3rd International Workshop on Reading Music Systems, pp. 18-22, 2021.
BibTeX | Tags:
@inproceedings{k469,
title = {Multimodal Audio and Image Music Transcription},
author = {C Fuente and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
booktitle = {Proc. of the 3rd International Workshop on Reading Music Systems},
pages = {18-22},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Calvo-Zaragoza, J.; Rizo, D.; Iñesta, J. M.
Reconocimiento Óptico de Partituras (OMR) aplicado al Fonde de Música Tradicional IMF-CSIC Book Chapter
In: Gambero-Ustárroz, M.; Ros-Fábregas, E. (Ed.): Musicología en Web. Patrimonio musical y Humanidades Digitales, Chapter 4, pp. 87-109, Edition Reichenberger, 2021, ISBN: 978-3-967280-14-2.
@inbook{k470,
title = {Reconocimiento Óptico de Partituras (OMR) aplicado al Fonde de Música Tradicional IMF-CSIC},
author = {J. Calvo-Zaragoza and D. Rizo and J. M. Iñesta},
editor = {M. Gambero-Ustárroz and E. Ros-Fábregas},
isbn = {978-3-967280-14-2},
year = {2021},
date = {2021-01-01},
booktitle = {Musicología en Web. Patrimonio musical y Humanidades Digitales},
pages = {87-109},
publisher = {Edition Reichenberger},
chapter = {4},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {inbook}
}
Garrido-Munoz, C.; Sánchez-Hernández, A.; Castellanos, F. J.; Calvo-Zaragoza, J.
Domain Adaptation for Document Image Binarization via Domain Classification Proceedings Article
In: Tallón-Ballesteros, A. J. (Ed.): Frontiers in Artificial Intelligence and Applications, pp. 569-582, IOS Press, 2021, ISBN: 978-1-64368-224-2.
BibTeX | Tags: GRE19-04, GV/2020/030
@inproceedings{k480,
title = {Domain Adaptation for Document Image Binarization via Domain Classification},
author = {C. Garrido-Munoz and A. Sánchez-Hernández and F. J. Castellanos and J. Calvo-Zaragoza},
editor = {A. J. Tallón-Ballesteros},
isbn = {978-1-64368-224-2},
year = {2021},
date = {2021-01-01},
booktitle = {Frontiers in Artificial Intelligence and Applications},
pages = {569-582},
publisher = {IOS Press},
chapter = {-},
keywords = {GRE19-04, GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Gallego, A. J.; Calvo-Zaragoza, J.; Fisher, R. B.
Incremental Unsupervised Domain-Adversarial Training of Neural Networks Journal Article
In: IEEE Transactions on Neural Networks and Learning Systems, vol. 32, no. 11, pp. 4864-4878, 2021, ISSN: 2162-2388.
Abstract | Links | BibTeX | Tags: GRE19-04, HispaMus
@article{k455,
title = {Incremental Unsupervised Domain-Adversarial Training of Neural Networks},
author = {A. J. Gallego and J. Calvo-Zaragoza and R. B. Fisher},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/455/2001.04129.pdf},
issn = {2162-2388},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE Transactions on Neural Networks and Learning Systems},
volume = {32},
number = {11},
pages = {4864-4878},
abstract = {In the context of supervised statistical learning, it is typically assumed that the training set comes from the same distribution that draws the test samples. When this is not the case, the behavior of the learned model is unpredictable and becomes dependent upon the degree of similarity between the distribution of the training set and the distribution of the test set. One of the research topics that investigates this scenario is referred to as Domain Adaptation (DA). Deep neural networks brought dramatic advances in pattern recognition and that is why there have been many attempts to provide good domain adaptation algorithms for these models. Here we take a different avenue and approach the problem from an incremental point of view, where the model is adapted to the new domain iteratively. We make use of an existing unsupervised domain-adaptation algorithm to identify the target samples on which there is greater confidence about their true label. The output of the model is analyzed in different ways to determine the candidate samples. The selected samples are then added to the source training set by self-labeling, and the process is repeated until all target samples are labeled. This approach implements a form of adversarial training in which, by moving the self-labeled samples from the target to the source set, the DA algorithm is forced to look for new features after each iteration. Our results report a clear improvement with respect to the non-incremental case in several datasets, also outperforming other state-of-the-art domain adaptation algorithms.},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Ríos-Vila, A.; Rizo, D.; Calvo-Zaragoza, J.
Complete Optical Music Recognition via Agnostic Transcription and Machine Translation Proceedings Article
In: 16th International Conference on Document Analysis and Recognition, pp. 661-675, 2021.
BibTeX | Tags: GV/2020/030
@inproceedings{k477,
title = {Complete Optical Music Recognition via Agnostic Transcription and Machine Translation},
author = {A. Ríos-Vila and D. Rizo and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {16th International Conference on Document Analysis and Recognition},
pages = {661-675},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.
Prototype Generation in the String Space via Approximate Median for Data Reduction in Nearest Neighbor classification Journal Article
In: Soft Computing, vol. 25, 2021, ISSN: 15403-15415.
BibTeX | Tags: GRE19-04, HispaMus
@article{k476,
title = {Prototype Generation in the String Space via Approximate Median for Data Reduction in Nearest Neighbor classification},
author = {F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza},
issn = {15403-15415},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Soft Computing},
volume = {25},
keywords = {GRE19-04, HispaMus},
pubstate = {published},
tppubtype = {article}
}
Castellanos, F. J.; Gallego, A. J.; Calvo-Zaragoza, J.
Unsupervised Domain Adaptation for Document Analysis of Music Score Images Proceedings Article
In: Proc. of the 22nd International Society for Music Information Retrieval Conference, 2021.
@inproceedings{k475,
title = {Unsupervised Domain Adaptation for Document Analysis of Music Score Images},
author = {F. J. Castellanos and A. J. Gallego and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proc. of the 22nd International Society for Music Information Retrieval Conference},
keywords = {GRE19-04},
pubstate = {published},
tppubtype = {inproceedings}
}
Román, M. A.
An End-to-End Framework for Audio-to-Score Music Transcription PhD Thesis
2021.
@phdthesis{k462,
title = {An End-to-End Framework for Audio-to-Score Music Transcription},
author = {M. A. Román},
editor = {J. Calvo-Zaragoza and A. Pertusa},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
organization = {Universidad de Alicante},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {phdthesis}
}
Mas-Candela, E.; Alfaro-Contreras, M.; Calvo-Zaragoza, J.
Sequential Next-Symbol Prediction for Optical Music Recognition Proceedings Article
In: 16th International Conference on Document Analysis and Recognition, pp. 708-722, 2021, ISBN: 978-3-030-86334-0.
Links | BibTeX | Tags: GV/2020/030
@inproceedings{k478,
title = {Sequential Next-Symbol Prediction for Optical Music Recognition},
author = {E. Mas-Candela and M. Alfaro-Contreras and J. Calvo-Zaragoza},
url = {https://link.springer.com/chapter/10.1007/978-3-030-86334-0_46},
isbn = {978-3-030-86334-0},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {16th International Conference on Document Analysis and Recognition},
pages = {708-722},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
López-Gutiérrez, J. C.; Valero-Mas, J. J.; Castellanos, F. J.; Calvo-Zaragoza, J.
Data Augmentation for End-to-End Optical Music Recognition Proceedings Article
In: Proceedings of the 14th IAPR International Workshop on Graphics Recognition (GREC), pp. 59-73, Springer, 2021.
BibTeX | Tags: GV/2020/030
@inproceedings{k473,
title = {Data Augmentation for End-to-End Optical Music Recognition},
author = {J. C. López-Gutiérrez and J. J. Valero-Mas and F. J. Castellanos and J. Calvo-Zaragoza},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Proceedings of the 14th IAPR International Workshop on Graphics Recognition (GREC)},
pages = {59-73},
publisher = {Springer},
keywords = {GV/2020/030},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Ríos-Vila, A.; Calvo-Zaragoza, J.; Rizo, D.
Evaluating Simultaneous Recognition and Encoding for Optical Music Recognition Proceedings Article
In: DLfM 2020: 7th International Conference on Digital Libraries for Musicology, pp. 10-17, Association for Computing Machinery, 2020, ISBN: 978-1-4503-8760-6.
@inproceedings{k456,
title = {Evaluating Simultaneous Recognition and Encoding for Optical Music Recognition},
author = {A. Ríos-Vila and J. Calvo-Zaragoza and D. Rizo},
isbn = {978-1-4503-8760-6},
year = {2020},
date = {2020-10-01},
booktitle = {DLfM 2020: 7th International Conference on Digital Libraries for Musicology},
journal = {DLfM 2020: 7th International Conference on Digital Libraries for Musicology},
pages = {10-17},
publisher = {Association for Computing Machinery},
keywords = {HispaMus},
pubstate = {published},
tppubtype = {inproceedings}
}