This commit is contained in:
Netkas 2023-02-07 19:23:00 -05:00
parent 6f11ff674d
commit b74c906a12
3 changed files with 399 additions and 0 deletions

View file

@ -0,0 +1,158 @@
<?php
/** @noinspection PhpMissingFieldTypeInspection */
namespace AlizeLib;
use AlizeLib\Objects\Label;
class GeneralizationModel
{
/**
* @var Label[]
*/
private $labels;
/**
* @var int
*/
private $size;
public function __construct(int $size)
{
$this->labels = [];
$this->size = $size;
}
/**
* Returns the label with the given name
*
* @param string $label
* @return Label|null
*/
public function getLabel(string $label): ?Label
{
return $this->labels[$label] ?? null;
}
/**
* @return int
*/
public function getSize(): int
{
return $this->size;
}
/**
* Resizes the model to the given size
*
* @param int $size
* @return void
*/
public function resize(int $size): void
{
foreach($this->labels as $label)
{
$label->resize($size);
}
}
/**
* Adds a value to the label with the given name
*
* @param string $label
* @param $value
* @return void
*/
public function add(string $label, $value): void
{
if(!isset($this->labels[$label]))
{
$this->labels[$label] = new Label($label, $this->size);
}
$this->labels[$label]->addValue($value);
}
/**
* Returns the data array for the label with the given name
*
* @param string $label
* @return array
*/
public function getData(string $label): array
{
if(!isset($this->labels[$label]))
{
return [];
}
return $this->labels[$label]->getData();
}
/**
* Calculates the mean for each label
*
* @return array
*/
public function calculateMean(): array
{
$means = [];
/** @var Label $label */
foreach($this->labels as $label)
{
$means[$label->getLabel()] = $label->calculateMean();
}
return $means;
}
/**
* Calculates the standard deviation for each label
*
* @return array
*/
public function calculateStandardDeviation(): array
{
$stds = [];
/** @var Label $label */
foreach($this->labels as $label)
{
$stds[$label->getLabel()] = $label->calculateConfidence();
}
return $stds;
}
/**
* Calculates the best labels based on the mean and standard deviation
* The best labels are the labels with the lowest mean + standard deviation
*
* @return array
*/
public function calculateBestLabels(): array
{
$means = $this->calculateMean();
$stds = $this->calculateStandardDeviation();
$results = [];
foreach($this->labels as $name => $label)
{
$filtered_data = array_filter($label->getData(), function($value) use ($means, $stds, $name) {
return $value >= $means[$name] - $stds[$name];
});
if(count($filtered_data) === 0)
{
$results[$name] = 0;
continue;
}
$results[$name] = array_sum($filtered_data) / count($filtered_data);
}
arsort($results);
return $results;
}
}

View file

@ -0,0 +1,202 @@
<?php
/** @noinspection PhpMissingFieldTypeInspection */
namespace AlizeLib\Objects;
class Label
{
/**
* The name of the label
*
* @var string
*/
private $label;
/**
* An array of the data for this label
*
* @var float[]|int[]|double[]
*/
private $data;
/**
* The current pointer for the data array
*
* @var int
*/
private $pointer;
/**
* @var int
*/
private $size;
/**
* @param string $label
* @param int $size
*/
public function __construct(string $label, int $size)
{
$this->label = $label;
$this->data = [];
$this->size = $size;
$this->pointer = 0;
}
/**
* Returns the name of the label
*
* @return string
*/
public function getLabel(): string
{
return $this->label;
}
/**
* Returns the data array for this label
*
* @return array
*/
public function getData(): array
{
return $this->data;
}
/**
* Returns the current size set for the data array (not the actual size of the array)
*
* @return int
*/
public function getSize(): int
{
return $this->size;
}
/**
* Resizes the data array to the given size
*
* @param int $size
* @return void
*/
public function resize(int $size): void
{
$this->size = $size;
if($this->pointer > $size)
$this->pointer = $size;
if(count($this->data) > $size)
$this->data = array_slice($this->data, 0, $size);
}
/**
* Adds a value to the data array
*
* @param $value
* @return void
*/
public function addValue($value): void
{
if($this->pointer > $this->size)
$this->pointer = 0;
$this->data[$this->pointer] = $value;
$this->pointer += 1;
}
/**
* Adds an array of values to the data array
*
* @param array $values
* @return void
*/
public function addValues(array $values): void
{
foreach($values as $value)
$this->addValue($value);
}
/**
* Resets the data array and pointer to empty and 0 respectively
*
* @return void
*/
public function clear(): void
{
$this->data = [];
$this->pointer = 0;
}
/**
* Calculates the mean of the data array by adding all the values and dividing by the number of values
*
* @return float|int
*/
public function calculateMean(): float|int
{
if(count($this->data) === 0)
return 0;
return array_sum($this->data) / count($this->data);
}
/**
* Calculates the variance of the data array by subtracting the mean from each value, squaring the result, adding
* all the results together and dividing by the number of values in the array (n) to get the variance of the
* data set.
*
* @return float|int
*/
public function calculateConfidence(): float|int
{
if(count($this->data) === 0)
return 0;
$mean = $this->calculateMean();
$variance = 0;
foreach($this->data as $value)
{
$variance += pow($value - $mean, 2);
}
return $variance / count($this->data);
}
/**
* Calculates the standard deviation of the data array by taking the square root of the variance
*
* @return float|int
*/
public function total(): float|int
{
return array_sum($this->data);
}
/**
* Returns an array representation of the label
*
* @return array
*/
public function toArray(): array
{
return [
'label' => $this->label,
'data' => $this->data,
'size' => $this->size,
'pointer' => $this->pointer
];
}
/**
* Constructs a label from an array representation
*
* @param array $array
* @return Label
*/
public static function fromArray(array $array): Label
{
$label = new Label($array['label'], $array['size']);
$label->data = $array['data'];
$label->pointer = $array['pointer'];
return $label;
}
}

39
tests/model_test.php Normal file
View file

@ -0,0 +1,39 @@
<?php
require 'ncc';
import('net.nosial.alizelib');
$model = new \AlizeLib\GeneralizationModel(100);
function random_float ($min,$max)
{
return ($min+lcg_value()*(abs($max-$min)));
}
// Add random high float values to the model under label "en" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("en", random_float(0.5, 1));
}
// Add random low float values to the model under label "zh" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("zh", random_float(0, 0.5));
}
// Add random very low float values to the model under label "fr" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("fr", random_float(0, 0.1));
}
// Add random very high float values to the model under label "de" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("de", random_float(0.9, 1));
}
var_dump($model->calculateBestLabels());