2016 |
||
| 1. | Wicker, Jörg; Fenner, Kathrin; Kramer, Stefan A Hybrid Machine Learning and Knowledge Based Approach to Limit Combinatorial Explosion in Biodegradation Prediction Incollection In: Lässig, Jörg; Kersting, Kristian; Morik, Katharina (Ed.): Computational Sustainability, pp. 75-97, Springer International Publishing, Cham, 2016, ISBN: 978-3-319-31858-5. Abstract | Links | BibTeX | Altmetric | Tags: application, biodegradation, cheminformatics, computational sustainability, enviPath, machine learning, metabolic pathways, multi-label classification @incollection{wicker2016ahybrid, title = {A Hybrid Machine Learning and Knowledge Based Approach to Limit Combinatorial Explosion in Biodegradation Prediction}, author = {Jörg Wicker and Kathrin Fenner and Stefan Kramer}, editor = {Jörg Lässig and Kristian Kersting and Katharina Morik}, url = {http://dx.doi.org/10.1007/978-3-319-31858-5_5}, doi = {10.1007/978-3-319-31858-5_5}, isbn = {978-3-319-31858-5}, year = {2016}, date = {2016-04-21}, booktitle = {Computational Sustainability}, pages = {75-97}, publisher = {Springer International Publishing}, address = {Cham}, abstract = {One of the main tasks in chemical industry regarding the sustainability of a product is the prediction of its environmental fate, i.e., its degradation products and pathways. Current methods for the prediction of biodegradation products and pathways of organic environmental pollutants either do not take into account domain knowledge or do not provide probability estimates. In this chapter, we propose a hybrid knowledge-based and machine learning-based approach to overcome these limitations in the context of the University of Minnesota Pathway Prediction System (UM-PPS). The proposed solution performs relative reasoning in a machine learning framework, and obtains one probability estimate for each biotransformation rule of the system. Since the application of a rule then depends on a threshold for the probability estimate, the trade-off between recall (sensitivity) and precision (selectivity) can be addressed and leveraged in practice. Results from leave-one-out cross-validation show that a recall and precision of approximately 0.8 can be achieved for a subset of 13 transformation rules. The set of used rules is further extended using multi-label classification, where dependencies among the transformation rules are exploited to improve the predictions. While the results regarding recall and precision vary, the area under the ROC curve can be improved using multi-label classification. Therefore, it is possible to optimize precision without compromising recall. Recently, we integrated the presented approach into enviPath, a complete redesign and re-implementation of UM-PPS.}, keywords = {application, biodegradation, cheminformatics, computational sustainability, enviPath, machine learning, metabolic pathways, multi-label classification}, pubstate = {published}, tppubtype = {incollection} } One of the main tasks in chemical industry regarding the sustainability of a product is the prediction of its environmental fate, i.e., its degradation products and pathways. Current methods for the prediction of biodegradation products and pathways of organic environmental pollutants either do not take into account domain knowledge or do not provide probability estimates. In this chapter, we propose a hybrid knowledge-based and machine learning-based approach to overcome these limitations in the context of the University of Minnesota Pathway Prediction System (UM-PPS). The proposed solution performs relative reasoning in a machine learning framework, and obtains one probability estimate for each biotransformation rule of the system. Since the application of a rule then depends on a threshold for the probability estimate, the trade-off between recall (sensitivity) and precision (selectivity) can be addressed and leveraged in practice. Results from leave-one-out cross-validation show that a recall and precision of approximately 0.8 can be achieved for a subset of 13 transformation rules. The set of used rules is further extended using multi-label classification, where dependencies among the transformation rules are exploited to improve the predictions. While the results regarding recall and precision vary, the area under the ROC curve can be improved using multi-label classification. Therefore, it is possible to optimize precision without compromising recall. Recently, we integrated the presented approach into enviPath, a complete redesign and re-implementation of UM-PPS. | |
| 2. | Wicker, Jörg; Lorsbach, Tim; Gütlein, Martin; Schmid, Emanuel; Latino, Diogo; Kramer, Stefan; Fenner, Kathrin enviPath - The Environmental Contaminant Biotransformation Pathway Resource Journal Article In: Nucleic Acid Research, 44 (D1), pp. D502-D508, 2016. Abstract | Links | BibTeX | Altmetric | Tags: application, biodegradation, cheminformatics, computational sustainability, data mining, enviPath, linked data, machine learning, metabolic pathways, multi-label classification @article{wicker2016envipath, title = {enviPath - The Environmental Contaminant Biotransformation Pathway Resource}, author = {Jörg Wicker and Tim Lorsbach and Martin Gütlein and Emanuel Schmid and Diogo Latino and Stefan Kramer and Kathrin Fenner}, editor = {Michael Galperin}, url = {http://nar.oxfordjournals.org/content/44/D1/D502.abstract}, doi = {10.1093/nar/gkv1229}, year = {2016}, date = {2016-01-01}, journal = {Nucleic Acid Research}, volume = {44}, number = {D1}, pages = {D502-D508}, abstract = {The University of Minnesota Biocatalysis/Biodegradation Database and Pathway Prediction System (UM-BBD/PPS) has been a unique resource covering microbial biotransformation pathways of primarily xenobiotic chemicals for over 15 years. This paper introduces the successor system, enviPath (The Environmental Contaminant Biotransformation Pathway Resource), which is a complete redesign and reimplementation of UM-BBD/PPS. enviPath uses the database from the UM-BBD/PPS as a basis, extends the use of this database, and allows users to include their own data to support multiple use cases. Relative reasoning is supported for the refinement of predictions and to allow its extensions in terms of previously published, but not implemented machine learning models. User access is simplified by providing a REST API that simplifies the inclusion of enviPath into existing workflows. An RDF database is used to enable simple integration with other databases. enviPath is publicly available at https://envipath.org with free and open access to its core data.}, keywords = {application, biodegradation, cheminformatics, computational sustainability, data mining, enviPath, linked data, machine learning, metabolic pathways, multi-label classification}, pubstate = {published}, tppubtype = {article} } The University of Minnesota Biocatalysis/Biodegradation Database and Pathway Prediction System (UM-BBD/PPS) has been a unique resource covering microbial biotransformation pathways of primarily xenobiotic chemicals for over 15 years. This paper introduces the successor system, enviPath (The Environmental Contaminant Biotransformation Pathway Resource), which is a complete redesign and reimplementation of UM-BBD/PPS. enviPath uses the database from the UM-BBD/PPS as a basis, extends the use of this database, and allows users to include their own data to support multiple use cases. Relative reasoning is supported for the refinement of predictions and to allow its extensions in terms of previously published, but not implemented machine learning models. User access is simplified by providing a REST API that simplifies the inclusion of enviPath into existing workflows. An RDF database is used to enable simple integration with other databases. enviPath is publicly available at https://envipath.org with free and open access to its core data. | |
2010 |
||
| 3. | Wicker, Jörg; Fenner, Kathrin; Ellis, Lynda; Wackett, Larry; Kramer, Stefan Predicting biodegradation products and pathways: a hybrid knowledge- and machine learning-based approach Journal Article In: Bioinformatics, 26 (6), pp. 814-821, 2010. Abstract | Links | BibTeX | Altmetric | Tags: application, biodegradation, cheminformatics, computational sustainability, enviPath, machine learning, metabolic pathways @article{wicker2010predicting, title = {Predicting biodegradation products and pathways: a hybrid knowledge- and machine learning-based approach}, author = {Jörg Wicker and Kathrin Fenner and Lynda Ellis and Larry Wackett and Stefan Kramer}, url = {http://bioinformatics.oxfordjournals.org/content/26/6/814.full}, doi = {10.1093/bioinformatics/btq024}, year = {2010}, date = {2010-01-01}, journal = {Bioinformatics}, volume = {26}, number = {6}, pages = {814-821}, publisher = {Oxford University Press}, abstract = {Motivation: Current methods for the prediction of biodegradation products and pathways of organic environmental pollutants either do not take into account domain knowledge or do not provide probability estimates. In this article, we propose a hybrid knowledge- and machine learning-based approach to overcome these limitations in the context of the University of Minnesota Pathway Prediction System (UM-PPS). The proposed solution performs relative reasoning in a machine learning framework, and obtains one probability estimate for each biotransformation rule of the system. As the application of a rule then depends on a threshold for the probability estimate, the trade-off between recall (sensitivity) and precision (selectivity) can be addressed and leveraged in practice.Results: Results from leave-one-out cross-validation show that a recall and precision of ∼0.8 can be achieved for a subset of 13 transformation rules. Therefore, it is possible to optimize precision without compromising recall. We are currently integrating the results into an experimental version of the UM-PPS server.Availability: The program is freely available on the web at http://wwwkramer.in.tum.de/research/applications/biodegradation/data.Contact: kramer@in.tum.de}, keywords = {application, biodegradation, cheminformatics, computational sustainability, enviPath, machine learning, metabolic pathways}, pubstate = {published}, tppubtype = {article} } Motivation: Current methods for the prediction of biodegradation products and pathways of organic environmental pollutants either do not take into account domain knowledge or do not provide probability estimates. In this article, we propose a hybrid knowledge- and machine learning-based approach to overcome these limitations in the context of the University of Minnesota Pathway Prediction System (UM-PPS). The proposed solution performs relative reasoning in a machine learning framework, and obtains one probability estimate for each biotransformation rule of the system. As the application of a rule then depends on a threshold for the probability estimate, the trade-off between recall (sensitivity) and precision (selectivity) can be addressed and leveraged in practice.Results: Results from leave-one-out cross-validation show that a recall and precision of ∼0.8 can be achieved for a subset of 13 transformation rules. Therefore, it is possible to optimize precision without compromising recall. We are currently integrating the results into an experimental version of the UM-PPS server.Availability: The program is freely available on the web at http://wwwkramer.in.tum.de/research/applications/biodegradation/data.Contact: kramer@in.tum.de | |
2008 |
||
| 4. | Wicker, Jörg; Fenner, Kathrin; Ellis, Lynda; Wackett, Larry; Kramer, Stefan Machine Learning and Data Mining Approaches to Biodegradation Pathway Prediction Inproceedings In: Bridewell, Will; Calders, Toon; de Medeiros, Ana Karla; Kramer, Stefan; Pechenizkiy, Mykola; Todorovski, Ljupco (Ed.): Proceedings of the Second International Workshop on the Induction of Process Models at ECML PKDD 2008, 2008. Links | BibTeX | Tags: application, biodegradation, cheminformatics, computational sustainability, enviPath, machine learning, metabolic pathways @inproceedings{wicker2008machine, title = {Machine Learning and Data Mining Approaches to Biodegradation Pathway Prediction}, author = {Jörg Wicker and Kathrin Fenner and Lynda Ellis and Larry Wackett and Stefan Kramer}, editor = {Will Bridewell and Toon Calders and Ana Karla de Medeiros and Stefan Kramer and Mykola Pechenizkiy and Ljupco Todorovski}, url = {http://www.ecmlpkdd2008.org/files/pdf/workshops/ipm/9.pdf}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of the Second International Workshop on the Induction of Process Models at ECML PKDD 2008}, keywords = {application, biodegradation, cheminformatics, computational sustainability, enviPath, machine learning, metabolic pathways}, pubstate = {published}, tppubtype = {inproceedings} } | |
2016 |
||
| 1. | A Hybrid Machine Learning and Knowledge Based Approach to Limit Combinatorial Explosion in Biodegradation Prediction Incollection In: Lässig, Jörg; Kersting, Kristian; Morik, Katharina (Ed.): Computational Sustainability, pp. 75-97, Springer International Publishing, Cham, 2016, ISBN: 978-3-319-31858-5. | |
| 2. | enviPath - The Environmental Contaminant Biotransformation Pathway Resource Journal Article In: Nucleic Acid Research, 44 (D1), pp. D502-D508, 2016. | |
2010 |
||
| 3. | Predicting biodegradation products and pathways: a hybrid knowledge- and machine learning-based approach Journal Article In: Bioinformatics, 26 (6), pp. 814-821, 2010. | |
2008 |
||
| 4. | Machine Learning and Data Mining Approaches to Biodegradation Pathway Prediction Inproceedings In: Bridewell, Will; Calders, Toon; de Medeiros, Ana Karla; Kramer, Stefan; Pechenizkiy, Mykola; Todorovski, Ljupco (Ed.): Proceedings of the Second International Workshop on the Induction of Process Models at ECML PKDD 2008, 2008. | |