@article{SABANDO2019105777,
title = "Neural-based approaches to overcome feature selection and applicability domain in drug-related property prediction",
journal = "Applied Soft Computing",
volume = "85",
pages = "105777",
year = "2019",
issn = "1568-4946",
doi = "https://doi.org/10.1016/j.asoc.2019.105777",
url = "http://www.sciencedirect.com/science/article/pii/S1568494619305587",
author = "María Virginia Sabando and Ignacio Ponzoni and Axel J. Soto",
keywords = "Neural networks, QSAR modeling, Model interpretability, Applicability domain, Feature selection",
abstract = "In the fields of pharmaceutical research and biomedical sciences, QSAR modeling is an established approach during drug discovery for prediction of biological activity of drug candidates. Yet, QSAR modeling poses a series of open challenges. First, chemical compounds are represented on a high-dimensional space and thus feature selection is typically applied, although this task entails a challenging combinatorial problem with potential loss of information. Second, the definition of the applicability domain of a QSAR model is a desirable aspect to determine the reliability of predictions on unseen chemicals, which is often difficult to assess due to the extent of the chemical space. Finally, interpretability of these models is also a critical issue for drug designers. The purpose of this work is to thoroughly assess the application of neural-based methods and recent advances deep learning for QSAR modeling. We hypothesize that neural-based methods can overcome the need to perform a descriptor selection phase. We developed three QSAR models based on neural networks for prediction of relevant chemical and biomedical properties that, in the absence of any feature selection step, can outperform the state-of-the-art models for such properties. We also implemented an embedded applicability domain technique based on network output probabilities that proved to be effective; its application improved the predictive performance of the model. Finally, we proposed the use of a post hoc feature analysis technique based on an aggregation of network weights, which enabled effective detection of relevant features in the model."
}