progress
This commit is contained in:
parent
6f11ff674d
commit
b74c906a12
3 changed files with 399 additions and 0 deletions
158
src/AlizeLib/GeneralizationModel.php
Normal file
158
src/AlizeLib/GeneralizationModel.php
Normal file
|
@ -0,0 +1,158 @@
|
|||
<?php
|
||||
|
||||
/** @noinspection PhpMissingFieldTypeInspection */
|
||||
|
||||
namespace AlizeLib;
|
||||
|
||||
use AlizeLib\Objects\Label;
|
||||
|
||||
class GeneralizationModel
|
||||
{
|
||||
/**
|
||||
* @var Label[]
|
||||
*/
|
||||
private $labels;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $size;
|
||||
|
||||
public function __construct(int $size)
|
||||
{
|
||||
$this->labels = [];
|
||||
$this->size = $size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the label with the given name
|
||||
*
|
||||
* @param string $label
|
||||
* @return Label|null
|
||||
*/
|
||||
public function getLabel(string $label): ?Label
|
||||
{
|
||||
return $this->labels[$label] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
public function getSize(): int
|
||||
{
|
||||
return $this->size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resizes the model to the given size
|
||||
*
|
||||
* @param int $size
|
||||
* @return void
|
||||
*/
|
||||
public function resize(int $size): void
|
||||
{
|
||||
foreach($this->labels as $label)
|
||||
{
|
||||
$label->resize($size);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a value to the label with the given name
|
||||
*
|
||||
* @param string $label
|
||||
* @param $value
|
||||
* @return void
|
||||
*/
|
||||
public function add(string $label, $value): void
|
||||
{
|
||||
if(!isset($this->labels[$label]))
|
||||
{
|
||||
$this->labels[$label] = new Label($label, $this->size);
|
||||
}
|
||||
|
||||
$this->labels[$label]->addValue($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the data array for the label with the given name
|
||||
*
|
||||
* @param string $label
|
||||
* @return array
|
||||
*/
|
||||
public function getData(string $label): array
|
||||
{
|
||||
if(!isset($this->labels[$label]))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
return $this->labels[$label]->getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the mean for each label
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function calculateMean(): array
|
||||
{
|
||||
$means = [];
|
||||
/** @var Label $label */
|
||||
foreach($this->labels as $label)
|
||||
{
|
||||
$means[$label->getLabel()] = $label->calculateMean();
|
||||
}
|
||||
|
||||
return $means;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the standard deviation for each label
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function calculateStandardDeviation(): array
|
||||
{
|
||||
$stds = [];
|
||||
/** @var Label $label */
|
||||
foreach($this->labels as $label)
|
||||
{
|
||||
$stds[$label->getLabel()] = $label->calculateConfidence();
|
||||
}
|
||||
|
||||
return $stds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the best labels based on the mean and standard deviation
|
||||
* The best labels are the labels with the lowest mean + standard deviation
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function calculateBestLabels(): array
|
||||
{
|
||||
$means = $this->calculateMean();
|
||||
$stds = $this->calculateStandardDeviation();
|
||||
$results = [];
|
||||
|
||||
foreach($this->labels as $name => $label)
|
||||
{
|
||||
$filtered_data = array_filter($label->getData(), function($value) use ($means, $stds, $name) {
|
||||
return $value >= $means[$name] - $stds[$name];
|
||||
});
|
||||
|
||||
if(count($filtered_data) === 0)
|
||||
{
|
||||
$results[$name] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
$results[$name] = array_sum($filtered_data) / count($filtered_data);
|
||||
}
|
||||
|
||||
arsort($results);
|
||||
return $results;
|
||||
}
|
||||
|
||||
}
|
202
src/AlizeLib/Objects/Label.php
Normal file
202
src/AlizeLib/Objects/Label.php
Normal file
|
@ -0,0 +1,202 @@
|
|||
<?php
|
||||
|
||||
/** @noinspection PhpMissingFieldTypeInspection */
|
||||
|
||||
namespace AlizeLib\Objects;
|
||||
|
||||
class Label
|
||||
{
|
||||
/**
|
||||
* The name of the label
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $label;
|
||||
|
||||
/**
|
||||
* An array of the data for this label
|
||||
*
|
||||
* @var float[]|int[]|double[]
|
||||
*/
|
||||
private $data;
|
||||
|
||||
/**
|
||||
* The current pointer for the data array
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
private $pointer;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $size;
|
||||
|
||||
/**
|
||||
* @param string $label
|
||||
* @param int $size
|
||||
*/
|
||||
public function __construct(string $label, int $size)
|
||||
{
|
||||
$this->label = $label;
|
||||
$this->data = [];
|
||||
$this->size = $size;
|
||||
$this->pointer = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of the label
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getLabel(): string
|
||||
{
|
||||
return $this->label;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the data array for this label
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getData(): array
|
||||
{
|
||||
return $this->data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current size set for the data array (not the actual size of the array)
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function getSize(): int
|
||||
{
|
||||
return $this->size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resizes the data array to the given size
|
||||
*
|
||||
* @param int $size
|
||||
* @return void
|
||||
*/
|
||||
public function resize(int $size): void
|
||||
{
|
||||
$this->size = $size;
|
||||
if($this->pointer > $size)
|
||||
$this->pointer = $size;
|
||||
if(count($this->data) > $size)
|
||||
$this->data = array_slice($this->data, 0, $size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a value to the data array
|
||||
*
|
||||
* @param $value
|
||||
* @return void
|
||||
*/
|
||||
public function addValue($value): void
|
||||
{
|
||||
if($this->pointer > $this->size)
|
||||
$this->pointer = 0;
|
||||
|
||||
$this->data[$this->pointer] = $value;
|
||||
$this->pointer += 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an array of values to the data array
|
||||
*
|
||||
* @param array $values
|
||||
* @return void
|
||||
*/
|
||||
public function addValues(array $values): void
|
||||
{
|
||||
foreach($values as $value)
|
||||
$this->addValue($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the data array and pointer to empty and 0 respectively
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function clear(): void
|
||||
{
|
||||
$this->data = [];
|
||||
$this->pointer = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the mean of the data array by adding all the values and dividing by the number of values
|
||||
*
|
||||
* @return float|int
|
||||
*/
|
||||
public function calculateMean(): float|int
|
||||
{
|
||||
if(count($this->data) === 0)
|
||||
return 0;
|
||||
|
||||
return array_sum($this->data) / count($this->data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the variance of the data array by subtracting the mean from each value, squaring the result, adding
|
||||
* all the results together and dividing by the number of values in the array (n) to get the variance of the
|
||||
* data set.
|
||||
*
|
||||
* @return float|int
|
||||
*/
|
||||
public function calculateConfidence(): float|int
|
||||
{
|
||||
if(count($this->data) === 0)
|
||||
return 0;
|
||||
|
||||
$mean = $this->calculateMean();
|
||||
$variance = 0;
|
||||
foreach($this->data as $value)
|
||||
{
|
||||
$variance += pow($value - $mean, 2);
|
||||
}
|
||||
return $variance / count($this->data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the standard deviation of the data array by taking the square root of the variance
|
||||
*
|
||||
* @return float|int
|
||||
*/
|
||||
public function total(): float|int
|
||||
{
|
||||
return array_sum($this->data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array representation of the label
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function toArray(): array
|
||||
{
|
||||
return [
|
||||
'label' => $this->label,
|
||||
'data' => $this->data,
|
||||
'size' => $this->size,
|
||||
'pointer' => $this->pointer
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a label from an array representation
|
||||
*
|
||||
* @param array $array
|
||||
* @return Label
|
||||
*/
|
||||
public static function fromArray(array $array): Label
|
||||
{
|
||||
$label = new Label($array['label'], $array['size']);
|
||||
$label->data = $array['data'];
|
||||
$label->pointer = $array['pointer'];
|
||||
return $label;
|
||||
}
|
||||
}
|
39
tests/model_test.php
Normal file
39
tests/model_test.php
Normal file
|
@ -0,0 +1,39 @@
|
|||
<?php
|
||||
|
||||
|
||||
require 'ncc';
|
||||
|
||||
import('net.nosial.alizelib');
|
||||
|
||||
$model = new \AlizeLib\GeneralizationModel(100);
|
||||
|
||||
function random_float ($min,$max)
|
||||
{
|
||||
return ($min+lcg_value()*(abs($max-$min)));
|
||||
}
|
||||
|
||||
// Add random high float values to the model under label "en" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("en", random_float(0.5, 1));
|
||||
}
|
||||
|
||||
// Add random low float values to the model under label "zh" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("zh", random_float(0, 0.5));
|
||||
}
|
||||
|
||||
// Add random very low float values to the model under label "fr" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("fr", random_float(0, 0.1));
|
||||
}
|
||||
|
||||
// Add random very high float values to the model under label "de" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("de", random_float(0.9, 1));
|
||||
}
|
||||
|
||||
var_dump($model->calculateBestLabels());
|
Loading…
Add table
Reference in a new issue