diff --git a/src/AlizeLib/GeneralizationModel.php b/src/AlizeLib/GeneralizationModel.php new file mode 100644 index 0000000..b9162bb --- /dev/null +++ b/src/AlizeLib/GeneralizationModel.php @@ -0,0 +1,158 @@ +labels = []; + $this->size = $size; + } + + /** + * Returns the label with the given name + * + * @param string $label + * @return Label|null + */ + public function getLabel(string $label): ?Label + { + return $this->labels[$label] ?? null; + } + + /** + * @return int + */ + public function getSize(): int + { + return $this->size; + } + + /** + * Resizes the model to the given size + * + * @param int $size + * @return void + */ + public function resize(int $size): void + { + foreach($this->labels as $label) + { + $label->resize($size); + } + } + + /** + * Adds a value to the label with the given name + * + * @param string $label + * @param $value + * @return void + */ + public function add(string $label, $value): void + { + if(!isset($this->labels[$label])) + { + $this->labels[$label] = new Label($label, $this->size); + } + + $this->labels[$label]->addValue($value); + } + + /** + * Returns the data array for the label with the given name + * + * @param string $label + * @return array + */ + public function getData(string $label): array + { + if(!isset($this->labels[$label])) + { + return []; + } + + return $this->labels[$label]->getData(); + } + + /** + * Calculates the mean for each label + * + * @return array + */ + public function calculateMean(): array + { + $means = []; + /** @var Label $label */ + foreach($this->labels as $label) + { + $means[$label->getLabel()] = $label->calculateMean(); + } + + return $means; + } + + /** + * Calculates the standard deviation for each label + * + * @return array + */ + public function calculateStandardDeviation(): array + { + $stds = []; + /** @var Label $label */ + foreach($this->labels as $label) + { + $stds[$label->getLabel()] = $label->calculateConfidence(); + } + + return $stds; + } + + /** + * Calculates the best labels based on the mean and standard deviation + * The best labels are the labels with the lowest mean + standard deviation + * + * @return array + */ + public function calculateBestLabels(): array + { + $means = $this->calculateMean(); + $stds = $this->calculateStandardDeviation(); + $results = []; + + foreach($this->labels as $name => $label) + { + $filtered_data = array_filter($label->getData(), function($value) use ($means, $stds, $name) { + return $value >= $means[$name] - $stds[$name]; + }); + + if(count($filtered_data) === 0) + { + $results[$name] = 0; + continue; + } + + $results[$name] = array_sum($filtered_data) / count($filtered_data); + } + + arsort($results); + return $results; + } + + } \ No newline at end of file diff --git a/src/AlizeLib/Objects/Label.php b/src/AlizeLib/Objects/Label.php new file mode 100644 index 0000000..304f4aa --- /dev/null +++ b/src/AlizeLib/Objects/Label.php @@ -0,0 +1,202 @@ +label = $label; + $this->data = []; + $this->size = $size; + $this->pointer = 0; + } + + /** + * Returns the name of the label + * + * @return string + */ + public function getLabel(): string + { + return $this->label; + } + + /** + * Returns the data array for this label + * + * @return array + */ + public function getData(): array + { + return $this->data; + } + + /** + * Returns the current size set for the data array (not the actual size of the array) + * + * @return int + */ + public function getSize(): int + { + return $this->size; + } + + /** + * Resizes the data array to the given size + * + * @param int $size + * @return void + */ + public function resize(int $size): void + { + $this->size = $size; + if($this->pointer > $size) + $this->pointer = $size; + if(count($this->data) > $size) + $this->data = array_slice($this->data, 0, $size); + } + + /** + * Adds a value to the data array + * + * @param $value + * @return void + */ + public function addValue($value): void + { + if($this->pointer > $this->size) + $this->pointer = 0; + + $this->data[$this->pointer] = $value; + $this->pointer += 1; + } + + /** + * Adds an array of values to the data array + * + * @param array $values + * @return void + */ + public function addValues(array $values): void + { + foreach($values as $value) + $this->addValue($value); + } + + /** + * Resets the data array and pointer to empty and 0 respectively + * + * @return void + */ + public function clear(): void + { + $this->data = []; + $this->pointer = 0; + } + + /** + * Calculates the mean of the data array by adding all the values and dividing by the number of values + * + * @return float|int + */ + public function calculateMean(): float|int + { + if(count($this->data) === 0) + return 0; + + return array_sum($this->data) / count($this->data); + } + + /** + * Calculates the variance of the data array by subtracting the mean from each value, squaring the result, adding + * all the results together and dividing by the number of values in the array (n) to get the variance of the + * data set. + * + * @return float|int + */ + public function calculateConfidence(): float|int + { + if(count($this->data) === 0) + return 0; + + $mean = $this->calculateMean(); + $variance = 0; + foreach($this->data as $value) + { + $variance += pow($value - $mean, 2); + } + return $variance / count($this->data); + } + + /** + * Calculates the standard deviation of the data array by taking the square root of the variance + * + * @return float|int + */ + public function total(): float|int + { + return array_sum($this->data); + } + + /** + * Returns an array representation of the label + * + * @return array + */ + public function toArray(): array + { + return [ + 'label' => $this->label, + 'data' => $this->data, + 'size' => $this->size, + 'pointer' => $this->pointer + ]; + } + + /** + * Constructs a label from an array representation + * + * @param array $array + * @return Label + */ + public static function fromArray(array $array): Label + { + $label = new Label($array['label'], $array['size']); + $label->data = $array['data']; + $label->pointer = $array['pointer']; + return $label; + } + } \ No newline at end of file diff --git a/tests/model_test.php b/tests/model_test.php new file mode 100644 index 0000000..86860fb --- /dev/null +++ b/tests/model_test.php @@ -0,0 +1,39 @@ +add("en", random_float(0.5, 1)); + } + + // Add random low float values to the model under label "zh" x 150 times + for($i = 0; $i < 150; $i++) + { + $model->add("zh", random_float(0, 0.5)); + } + + // Add random very low float values to the model under label "fr" x 150 times + for($i = 0; $i < 150; $i++) + { + $model->add("fr", random_float(0, 0.1)); + } + + // Add random very high float values to the model under label "de" x 150 times + for($i = 0; $i < 150; $i++) + { + $model->add("de", random_float(0.9, 1)); + } + + var_dump($model->calculateBestLabels());