Source code for derdava.data_source

import numpy as np


[docs]def generate_random_data_sources(X: np.ndarray, y: np.ndarray, num_of_data_sources: int=10): """Splits a given dataset to a specified number of data sources randomly. :param X: Feature set of the given dataset. :param y: Label set of the given dataset. :param num_of_data_sources: Number of data sources to be generated (default: ``10``). :return: A dictionary containing mappings between data source indices and their data ``(X, y)``. """ data_sources = {} n = len(X) // num_of_data_sources for i in range(num_of_data_sources): data_sources[i] = (X[n * i:n * i + n, :].copy(), y[n * i:n * i + n].copy()) return data_sources
[docs]def add_classification_noise(y: np.ndarray, noise_level: float=0.2): """Adds noises to the classification labels by randomly choosing one from the remaining label set. :param y: Labels of target dataset. :param noise_level: Amount of noise to be added (defaults ``0.2``). :return: ``None``. :raises ValueError: If ``noise_level`` is not in the range ``[0, 1]``. """ if not 0 <= noise_level <= 1: raise ValueError("Noise level must be between 0 and 1.") labels = set(y.tolist()) has_noises = np.random.binomial(1, noise_level, len(y)) for i in range(len(y)): if has_noises[i]: label_candidates = list(labels) label_candidates.remove(y[i]) noisy_label = label_candidates[np.random.randint(len(label_candidates))] y[i] = noisy_label return y