2019
Calvo-Zaragoza, J.; Gallego, A. J.
A selectional auto-encoder approach for document image binarization Journal Article
In: Pattern Recognition, vol. 86, pp. 37-47, 2019, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: GRE16-14
@article{k395,
title = {A selectional auto-encoder approach for document image binarization},
author = {J. Calvo-Zaragoza and A. J. Gallego},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/395/1706.10241.pdf},
issn = {0031-3203},
year = {2019},
date = {2019-01-01},
journal = {Pattern Recognition},
volume = {86},
pages = {37-47},
abstract = {Binarization plays a key role in the automatic information retrieval from document images. This process is usually performed in the first stages of document analysis systems, and serves as a basis for subsequent steps. Hence it has to be robust in order to allow the full analysis workflow to be successful. Several methods for document image binarization have been proposed so far, most of which are based on hand-crafted image processing strategies. Recently, Convolutional Neural Networks have shown an amazing performance in many disparate duties related to computer vision. In this paper we discuss the use of convolutional auto-encoders devoted to learning an end-to-end map from an input image to its selectional output, in which activations indicate the likelihood of pixels to be either foreground or background. Once trained, documents can therefore be binarized by parsing them through the model and applying a global threshold. This approach has proven to outperform existing binarization strategies in a number of document types.},
keywords = {GRE16-14},
pubstate = {published},
tppubtype = {article}
}
Binarization plays a key role in the automatic information retrieval from document images. This process is usually performed in the first stages of document analysis systems, and serves as a basis for subsequent steps. Hence it has to be robust in order to allow the full analysis workflow to be successful. Several methods for document image binarization have been proposed so far, most of which are based on hand-crafted image processing strategies. Recently, Convolutional Neural Networks have shown an amazing performance in many disparate duties related to computer vision. In this paper we discuss the use of convolutional auto-encoders devoted to learning an end-to-end map from an input image to its selectional output, in which activations indicate the likelihood of pixels to be either foreground or background. Once trained, documents can therefore be binarized by parsing them through the model and applying a global threshold. This approach has proven to outperform existing binarization strategies in a number of document types.2018
Román, M. A.; Pertusa, A.; Calvo-Zaragoza, J.
An End-to-End Framework for Audio-to-Score Music Transcription on Monophonic Excerpts Proceedings Article
In: Proc. of the 19th International Society for Music Information Retrieval Conference (ISMIR), Paris, France, 2018.
BibTeX | Tags: GRE16-14, HispaMus
@inproceedings{k389,
title = {An End-to-End Framework for Audio-to-Score Music Transcription on Monophonic Excerpts},
author = {M. A. Román and A. Pertusa and J. Calvo-Zaragoza},
year = {2018},
date = {2018-09-01},
urldate = {2018-09-01},
booktitle = {Proc. of the 19th International Society for Music Information Retrieval Conference (ISMIR)},
address = {Paris, France},
keywords = {GRE16-14, HispaMus},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Rico-Juan, J. R.
Oversampling imbalanced data in the string space Journal Article
In: Pattern Recognition Letters, vol. 103, pp. 32–38, 2018, ISSN: 0167-8655.
Abstract | BibTeX | Tags: GRE16-14
@article{k382,
title = {Oversampling imbalanced data in the string space},
author = {F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza and J. R. Rico-Juan},
issn = {0167-8655},
year = {2018},
date = {2018-02-01},
journal = {Pattern Recognition Letters},
volume = {103},
pages = {32--38},
abstract = {Imbalanced data is a typical problem in the supervised classification field, which occurs when the different classes are not equally represented. This fact typically results in the classifier biasing its performance towards the class representing the majority of the elements. Many methods have been proposed to alleviate this scenario, yet all of them assume that data is represented as feature vectors. In this paper we propose a strategy to balance a dataset whose samples are encoded as strings. Our approach is based on adapting the well-known Synthetic Minority Over-sampling Technique (SMOTE) algorithm to the string space. More precisely, data generation is achieved with an iterative approach to create artificial strings within the segment between two given samples of the training set. Results with several datasets and imbalance ratios show that the proposed strategy properly deals with the problem in all cases considered.},
keywords = {GRE16-14},
pubstate = {published},
tppubtype = {article}
}
Imbalanced data is a typical problem in the supervised classification field, which occurs when the different classes are not equally represented. This fact typically results in the classifier biasing its performance towards the class representing the majority of the elements. Many methods have been proposed to alleviate this scenario, yet all of them assume that data is represented as feature vectors. In this paper we propose a strategy to balance a dataset whose samples are encoded as strings. Our approach is based on adapting the well-known Synthetic Minority Over-sampling Technique (SMOTE) algorithm to the string space. More precisely, data generation is achieved with an iterative approach to create artificial strings within the segment between two given samples of the training set. Results with several datasets and imbalance ratios show that the proposed strategy properly deals with the problem in all cases considered.2017
Calvo-Zaragoza, J.; Gallego, A. J.; Pertusa, A.
Recognition of Handwritten Music Symbols with Convolutional Neural Codes Proceedings Article
In: 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), pp. 691–696, Kyoto, Japan, 2017.
BibTeX | Tags: GRE16-14, TIMuL
@inproceedings{k376,
title = {Recognition of Handwritten Music Symbols with Convolutional Neural Codes},
author = {J. Calvo-Zaragoza and A. J. Gallego and A. Pertusa},
year = {2017},
date = {2017-11-01},
urldate = {2017-11-01},
booktitle = {14th IAPR International Conference on Document Analysis and Recognition (ICDAR)},
pages = {691--696},
address = {Kyoto, Japan},
keywords = {GRE16-14, TIMuL},
pubstate = {published},
tppubtype = {inproceedings}
}
Calvo-Zaragoza, J.; Valero-Mas, J. J.; Pertusa, A
End-To-End Optical Music Recognition using Neural Networks Proceedings Article
In: Proc. of International Society for Music Information Retrieval Conference (ISMIR), Suzhou, China, 2017.
Abstract | BibTeX | Tags: GRE16-14, TIMuL
@inproceedings{k374,
title = {End-To-End Optical Music Recognition using Neural Networks},
author = {J. Calvo-Zaragoza and J. J. Valero-Mas and A Pertusa},
year = {2017},
date = {2017-10-01},
booktitle = {Proc. of International Society for Music Information Retrieval Conference (ISMIR)},
address = {Suzhou, China},
abstract = {This work addresses the Optical Music Recognition (OMR) task in an end-to-end fashion using neural net- works. The proposed architecture is based on a Recurrent Convolutional Neural Network topology that takes as input an image of a monophonic score and retrieves a sequence of music symbols as output. In the first stage, a series of convolutional filters are trained to extract meaningful fea- tures of the input image, and then a recurrent block models the sequential nature of music. The system is trained us- ing a Connectionist Temporal Classification loss function, which avoids the need for a frame-by-frame alignment be- tween the image and the ground-truth music symbols. Ex- perimentation has been carried on a set of 90,000 synthetic monophonic music scores with more than 50 different pos- sible labels. Results obtained depict classification error rates around 2 % at symbol level, thus proving the po- tential of the proposed end-to-end architecture for OMR. The source code, dataset, and trained models are publicly released for reproducible research and future comparison purposes.},
keywords = {GRE16-14, TIMuL},
pubstate = {published},
tppubtype = {inproceedings}
}
This work addresses the Optical Music Recognition (OMR) task in an end-to-end fashion using neural net- works. The proposed architecture is based on a Recurrent Convolutional Neural Network topology that takes as input an image of a monophonic score and retrieves a sequence of music symbols as output. In the first stage, a series of convolutional filters are trained to extract meaningful fea- tures of the input image, and then a recurrent block models the sequential nature of music. The system is trained us- ing a Connectionist Temporal Classification loss function, which avoids the need for a frame-by-frame alignment be- tween the image and the ground-truth music symbols. Ex- perimentation has been carried on a set of 90,000 synthetic monophonic music scores with more than 50 different pos- sible labels. Results obtained depict classification error rates around 2 % at symbol level, thus proving the po- tential of the proposed end-to-end architecture for OMR. The source code, dataset, and trained models are publicly released for reproducible research and future comparison purposes.
2019
Calvo-Zaragoza, J.; Gallego, A. J.
A selectional auto-encoder approach for document image binarization Journal Article
In: Pattern Recognition, vol. 86, pp. 37-47, 2019, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: GRE16-14
@article{k395,
title = {A selectional auto-encoder approach for document image binarization},
author = {J. Calvo-Zaragoza and A. J. Gallego},
url = {https://grfia.dlsi.ua.es/repositori/grfia/pubs/395/1706.10241.pdf},
issn = {0031-3203},
year = {2019},
date = {2019-01-01},
journal = {Pattern Recognition},
volume = {86},
pages = {37-47},
abstract = {Binarization plays a key role in the automatic information retrieval from document images. This process is usually performed in the first stages of document analysis systems, and serves as a basis for subsequent steps. Hence it has to be robust in order to allow the full analysis workflow to be successful. Several methods for document image binarization have been proposed so far, most of which are based on hand-crafted image processing strategies. Recently, Convolutional Neural Networks have shown an amazing performance in many disparate duties related to computer vision. In this paper we discuss the use of convolutional auto-encoders devoted to learning an end-to-end map from an input image to its selectional output, in which activations indicate the likelihood of pixels to be either foreground or background. Once trained, documents can therefore be binarized by parsing them through the model and applying a global threshold. This approach has proven to outperform existing binarization strategies in a number of document types.},
keywords = {GRE16-14},
pubstate = {published},
tppubtype = {article}
}
2018
Román, M. A.; Pertusa, A.; Calvo-Zaragoza, J.
An End-to-End Framework for Audio-to-Score Music Transcription on Monophonic Excerpts Proceedings Article
In: Proc. of the 19th International Society for Music Information Retrieval Conference (ISMIR), Paris, France, 2018.
BibTeX | Tags: GRE16-14, HispaMus
@inproceedings{k389,
title = {An End-to-End Framework for Audio-to-Score Music Transcription on Monophonic Excerpts},
author = {M. A. Román and A. Pertusa and J. Calvo-Zaragoza},
year = {2018},
date = {2018-09-01},
urldate = {2018-09-01},
booktitle = {Proc. of the 19th International Society for Music Information Retrieval Conference (ISMIR)},
address = {Paris, France},
keywords = {GRE16-14, HispaMus},
pubstate = {published},
tppubtype = {inproceedings}
}
Castellanos, F. J.; Valero-Mas, J. J.; Calvo-Zaragoza, J.; Rico-Juan, J. R.
Oversampling imbalanced data in the string space Journal Article
In: Pattern Recognition Letters, vol. 103, pp. 32–38, 2018, ISSN: 0167-8655.
Abstract | BibTeX | Tags: GRE16-14
@article{k382,
title = {Oversampling imbalanced data in the string space},
author = {F. J. Castellanos and J. J. Valero-Mas and J. Calvo-Zaragoza and J. R. Rico-Juan},
issn = {0167-8655},
year = {2018},
date = {2018-02-01},
journal = {Pattern Recognition Letters},
volume = {103},
pages = {32--38},
abstract = {Imbalanced data is a typical problem in the supervised classification field, which occurs when the different classes are not equally represented. This fact typically results in the classifier biasing its performance towards the class representing the majority of the elements. Many methods have been proposed to alleviate this scenario, yet all of them assume that data is represented as feature vectors. In this paper we propose a strategy to balance a dataset whose samples are encoded as strings. Our approach is based on adapting the well-known Synthetic Minority Over-sampling Technique (SMOTE) algorithm to the string space. More precisely, data generation is achieved with an iterative approach to create artificial strings within the segment between two given samples of the training set. Results with several datasets and imbalance ratios show that the proposed strategy properly deals with the problem in all cases considered.},
keywords = {GRE16-14},
pubstate = {published},
tppubtype = {article}
}
2017
Calvo-Zaragoza, J.; Gallego, A. J.; Pertusa, A.
Recognition of Handwritten Music Symbols with Convolutional Neural Codes Proceedings Article
In: 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), pp. 691–696, Kyoto, Japan, 2017.
BibTeX | Tags: GRE16-14, TIMuL
@inproceedings{k376,
title = {Recognition of Handwritten Music Symbols with Convolutional Neural Codes},
author = {J. Calvo-Zaragoza and A. J. Gallego and A. Pertusa},
year = {2017},
date = {2017-11-01},
urldate = {2017-11-01},
booktitle = {14th IAPR International Conference on Document Analysis and Recognition (ICDAR)},
pages = {691--696},
address = {Kyoto, Japan},
keywords = {GRE16-14, TIMuL},
pubstate = {published},
tppubtype = {inproceedings}
}
Calvo-Zaragoza, J.; Valero-Mas, J. J.; Pertusa, A
End-To-End Optical Music Recognition using Neural Networks Proceedings Article
In: Proc. of International Society for Music Information Retrieval Conference (ISMIR), Suzhou, China, 2017.
Abstract | BibTeX | Tags: GRE16-14, TIMuL
@inproceedings{k374,
title = {End-To-End Optical Music Recognition using Neural Networks},
author = {J. Calvo-Zaragoza and J. J. Valero-Mas and A Pertusa},
year = {2017},
date = {2017-10-01},
booktitle = {Proc. of International Society for Music Information Retrieval Conference (ISMIR)},
address = {Suzhou, China},
abstract = {This work addresses the Optical Music Recognition (OMR) task in an end-to-end fashion using neural net- works. The proposed architecture is based on a Recurrent Convolutional Neural Network topology that takes as input an image of a monophonic score and retrieves a sequence of music symbols as output. In the first stage, a series of convolutional filters are trained to extract meaningful fea- tures of the input image, and then a recurrent block models the sequential nature of music. The system is trained us- ing a Connectionist Temporal Classification loss function, which avoids the need for a frame-by-frame alignment be- tween the image and the ground-truth music symbols. Ex- perimentation has been carried on a set of 90,000 synthetic monophonic music scores with more than 50 different pos- sible labels. Results obtained depict classification error rates around 2 % at symbol level, thus proving the po- tential of the proposed end-to-end architecture for OMR. The source code, dataset, and trained models are publicly released for reproducible research and future comparison purposes.},
keywords = {GRE16-14, TIMuL},
pubstate = {published},
tppubtype = {inproceedings}
}