diff --git a/qml/aglaia/aglaia.py b/qml/aglaia/aglaia.py index a50d5708c..084839576 100644 --- a/qml/aglaia/aglaia.py +++ b/qml/aglaia/aglaia.py @@ -982,6 +982,22 @@ def _get_classes(self, indices): return np.asarray(zs, dtype=np.float32) + def _generate_compounds_from_data(self, xyz, classes): + """ + This function generates the compounds from xyz data and nuclear charges. + + :param xyz: cartesian coordinates + :type xyz: numpy array of shape (n_samples, n_atoms, 3) + :param classes: classes for atomic decomposition + :type classes: None + :return: array of compound objects + """ + compounds = np.empty(xyz.shape[0], dtype=object) + for i in range(xyz.shape[0]): + compounds[i] = Compound() + compounds[i].set_compounds(xyz=xyz[i], zs=classes[i]) + return compounds + def predict(self, x, classes=None): """ This function calls the predict function for either ARMP or MRMP. @@ -1088,8 +1104,13 @@ def _generate_representations_from_data(self, xyz, classes, method): :type method: string :return: numpy array of shape (n_samples, n_features) and None """ - # TODO implement - raise InputError("Not implemented yet. Use compounds.") + + if method != "fortran": + raise NotImplementedError + + self.compounds = self._generate_compounds_from_data(xyz, classes) + + return self._generate_representations_from_compounds('fortran') def _generate_representations_from_compounds(self, method): """ @@ -1238,9 +1259,9 @@ def _fit(self, x, y, dy, classes): opt, c = self.session.run([optimisation_op, cost], feed_dict=feed_dict) avg_cost += c * batch_x.shape[0] / x_approved.shape[0] - if self.tensorboard: + if self.tensorboard and j == 0: if i % self.tensorboard_logger_training.store_frequency == 0: - self.tensorboard_logger_training.write_summary(self.session, feed_dict, i, j) + self.tensorboard_logger_training.write_summary(self.session, i, feed_dict=feed_dict) self.training_cost.append(avg_cost) @@ -1642,8 +1663,8 @@ def _generate_representations_from_data(self, xyz, classes, method): representation = None if self.representation_name == 'slatm': - # TODO implement - raise InputError("Slatm from data has not been implemented yet. Use Compounds.") + self.compounds = self._generate_compounds_from_data(xyz, classes) + representation, classes = self._generate_representations_from_compounds('fortran') elif self.representation_name == 'acsf': if method == 'tf': @@ -1651,11 +1672,7 @@ def _generate_representations_from_data(self, xyz, classes, method): else: representation = self._generate_acsf_fortran(xyz, classes) - # Hotfix t make sure the representation is single precision - single_precision_representation = representation.astype(dtype=np.float32) - del representation - - return single_precision_representation, classes + return representation, classes def _generate_acsf_tf(self, xyz, classes): """ @@ -1776,7 +1793,11 @@ def _generate_acsf_fortran(self, xyz, classes): padded_g = np.zeros((initial_natoms, g.shape[-1])) padded_g[:g.shape[0], :] = g - representation.append(padded_g) + # Hotfix t make sure the representation is single precision + single_precision_g = padded_g.astype(dtype=np.float32) + del padded_g + + representation.append(single_precision_g) else: @@ -1790,7 +1811,10 @@ def _generate_acsf_fortran(self, xyz, classes): eta3=self.acsf_parameters['eta'], zeta=self.acsf_parameters['zeta']) - representation.append(g) + single_precision_g = g.astype(dtype=np.float32) + del g + + representation.append(single_precision_g) return np.asarray(representation) @@ -2275,7 +2299,8 @@ def _fit_from_scratch(self, x, y, dy, classes): init = tf.global_variables_initializer() iterator_init = iterator.make_initializer(dataset, name="dataset_init") - self._build_model_from_xyz(self.n_atoms, element_weights, element_biases) + if self.representation_name == "acsf": + self._build_model_from_xyz(self.n_atoms, element_weights, element_biases) self.session = tf.Session() diff --git a/qml/aglaia/tf_utils.py b/qml/aglaia/tf_utils.py index 5dc73aea5..4bf6e963e 100644 --- a/qml/aglaia/tf_utils.py +++ b/qml/aglaia/tf_utils.py @@ -55,10 +55,13 @@ def set_store_frequency(self, freq): def set_summary_writer(self, sess): self.summary_writer = tf.summary.FileWriter(logdir=self.path, graph=sess.graph) - def write_summary(self, sess, iteration): + def write_summary(self, sess, iteration, feed_dict=None): self.merged_summary = tf.summary.merge_all() - summary = sess.run(self.merged_summary) + if not isinstance(feed_dict, type(None)): + summary = sess.run(self.merged_summary, feed_dict) + else: + summary = sess.run(self.merged_summary) self.summary_writer.add_summary(summary, iteration) self.summary_writer.add_run_metadata(self.run_metadata, 'iteration %d' % (iteration)) diff --git a/qml/qmlearn/models.py b/qml/qmlearn/models.py index d4a0b93aa..c810fe59b 100644 --- a/qml/qmlearn/models.py +++ b/qml/qmlearn/models.py @@ -734,8 +734,8 @@ def _padding(self, representation, nuclear_charges): print("Trying to predict on larger molecules than given by the 'size' parameter at initialization") raise SystemExit - padded_rep = np.zeros((len(representation), max_n_atoms, representation[0].shape[1])) - padded_zs = np.zeros((len(representation), max_n_atoms)) + padded_rep = np.zeros((len(representation), self.size, representation[0].shape[1])) + padded_zs = np.zeros((len(representation), self.size)) for i in range(len(representation)): n_atoms = representation[i].shape[0] diff --git a/qml/qmlearn/representations.py b/qml/qmlearn/representations.py index 510311892..73cf33d2b 100644 --- a/qml/qmlearn/representations.py +++ b/qml/qmlearn/representations.py @@ -710,6 +710,11 @@ def transform(self, X): fgenerate_acsf(xyz, charge, self.elements, Rs, Rs, Ts, eta, eta, zeta, self.cutoff, self.cutoff, n, size))) + # Check to make sure there are no NANs + # if np.any(np.isnan(representations)): + # print("There are NANs in the representations.") + # exit() + data._representations = np.asarray(representations) return data diff --git a/qml/utils/compound.py b/qml/utils/compound.py index b2b7d9f5c..46a3660bc 100644 --- a/qml/utils/compound.py +++ b/qml/utils/compound.py @@ -373,3 +373,22 @@ def read_xyz(self, filename): self.coordinates[i] = np.asarray(tokens[1:4], dtype=float) self.natypes = dict([(key, len(value)) for key,value in self.atomtype_indices.items()]) + + def set_compounds(self, xyz, zs): + """ + Generating the compounds straight from XYZ rather than from the files. + + :param xyz: coordinates + :type xyz: np array of shape (n_samples, n_atoms, 3) + :param zs: nuclear charges + :type zs: np array of shape (n_samples, n_atoms) + :return: None + """ + + self.natoms = xyz.shape[0] + self.nuclear_charges = zs + self.coordinates = xyz + self.atomtypes = np.unique(zs) + + self.name = "Compound" + diff --git a/test/test_armp.py b/test/test_armp.py index eeaf0688e..bd5c26106 100644 --- a/test/test_armp.py +++ b/test/test_armp.py @@ -162,6 +162,28 @@ def test_fit_3(): estimator = ARMP() estimator.fit(x=descriptor, y=energies, classes=classes) +def test_fit_4(): + """ + This function tests the second way of fitting the descriptor: the data is passed by storing the compounds in the + class. + """ + test_dir = os.path.dirname(os.path.realpath(__file__)) + + data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") + descriptor = data["arr_0"] + classes = data["arr_1"] + energies = data["arr_2"] + + estimator = ARMP(tensorboard=True, tensorboard_subdir="./tb_test_4") + estimator.set_representations(representations=descriptor) + estimator.set_classes(classes=classes) + estimator.set_properties(energies) + + idx = np.arange(0, 100) + estimator.fit(idx) + + shutil.rmtree("./tb_test_4") + def test_score_3(): """ This function tests that all the scoring functions work. @@ -303,6 +325,7 @@ def test_retraining(): test_fit_1() test_fit_2() test_fit_3() + test_fit_4() test_score_3() test_predict_3() test_predict_fromxyz() diff --git a/test/test_mrmp.py b/test/test_mrmp.py index 2a54c3aec..3a5e6dea8 100644 --- a/test/test_mrmp.py +++ b/test/test_mrmp.py @@ -169,6 +169,22 @@ def test_fit_3(): estimator = MRMP() estimator.fit(descriptor, energies) +def test_fit_4(): + """ + This function tests a third way of fitting the descriptor: + The data is passed directly to the fit function. + """ + test_dir = os.path.dirname(os.path.realpath(__file__)) + + data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") + descriptor = data["arr_0"] + energies = data["arr_1"] + + estimator = MRMP(tensorboard=True, tensorboard_subdir="./tb_test_4") + estimator.fit(descriptor, energies) + + shutil.rmtree("./tb_test_4") + def test_score(): """ This function tests that all the scoring functions work. @@ -264,6 +280,7 @@ def test_load_external(): test_fit_1() test_fit_2() test_fit_3() + test_fit_4() test_score() test_load_external() # test_get_params()