Compare commits
4 commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a0e2becaf0 | ||
![]() |
3a77ed7b0a | ||
![]() |
56ded60b98 | ||
![]() |
b74c906a12 |
7 changed files with 497 additions and 20 deletions
6
.idea/alizelib.iml
generated
6
.idea/alizelib.iml
generated
|
@ -1,7 +1,11 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="WEB_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/build" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
|
|
1
.idea/php.xml
generated
1
.idea/php.xml
generated
|
@ -9,6 +9,7 @@
|
|||
<component name="PHPCodeSnifferOptionsConfiguration">
|
||||
<option name="transferred" value="true" />
|
||||
</component>
|
||||
<component name="PhpProjectSharedConfiguration" php_language_level="8.2" />
|
||||
<component name="PhpStanOptionsConfiguration">
|
||||
<option name="transferred" value="true" />
|
||||
</component>
|
||||
|
|
88
README.md
88
README.md
|
@ -1,6 +1,92 @@
|
|||
# AlizeLib
|
||||
|
||||
Coming soon...
|
||||
Calculate the averages, total & best of labelled data, import and export the model for later use.
|
||||
|
||||
## Installation
|
||||
|
||||
The library can be installed using ncc:
|
||||
|
||||
```bash
|
||||
ncc install -p "nosial/libs.alize=latest@n64"
|
||||
```
|
||||
|
||||
or by adding the following to your project.json file under
|
||||
|
||||
the `build.dependencies` section:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "net.nosial.alizelib",
|
||||
"version": "latest",
|
||||
"source_type": "remote",
|
||||
"source": "nosial/libs.alize=latest@n64"
|
||||
}
|
||||
```
|
||||
|
||||
If you don't have the n64 source configured you can add it
|
||||
|
||||
by running the following command:
|
||||
|
||||
```bash
|
||||
ncc source add --name n64 --type gitlab --host git.n64.cc
|
||||
```
|
||||
|
||||
## Compiling from source
|
||||
|
||||
The library can be compiled from source using ncc:
|
||||
|
||||
```bash
|
||||
ncc build --config release
|
||||
```
|
||||
|
||||
or by running the following command:
|
||||
|
||||
```bash
|
||||
make release
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```php
|
||||
// ncc
|
||||
require 'ncc';
|
||||
import('net.nosial.alizelib');
|
||||
|
||||
// Create a new model
|
||||
$size = 100; // The size of the model
|
||||
$model = new \AlizeLib\GeneralizationModel($size);
|
||||
|
||||
// We are intentionally adding more data to the model
|
||||
// than the model can hold, this is to demonstrate
|
||||
// how old data is removed from the model.
|
||||
|
||||
// Add random high float values to the model under label "en" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("en", random_float(0.5, 1));
|
||||
}
|
||||
|
||||
// Add random low float values to the model under label "zh" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("zh", random_float(0, 0.5));
|
||||
}
|
||||
|
||||
// Add random very low float values to the model under label "fr" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("fr", random_float(0, 0.1));
|
||||
}
|
||||
|
||||
// Add random very high float values to the model under label "de" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("de", random_float(0.9, 1));
|
||||
}
|
||||
|
||||
// Best label should be "de" or second best "en"
|
||||
var_dump($model->calculateBestLabels());
|
||||
```
|
||||
|
||||
# License
|
||||
|
||||
|
|
23
project.json
23
project.json
|
@ -10,32 +10,19 @@
|
|||
"assembly": {
|
||||
"name": "AlizeLib",
|
||||
"package": "net.nosial.alizelib",
|
||||
"description": "Calculate the averages, total & best of labelled values, import & export the model for later use with low overhead performance",
|
||||
"company": "Nosial",
|
||||
"copyright": "2022-2023 (c) Nosial. All Rights Reserved",
|
||||
"version": "1.0.0",
|
||||
"uuid": "4ae05ff6-a738-11ed-82fc-cf26dde64833"
|
||||
},
|
||||
"build": {
|
||||
"source_path": "src",
|
||||
"default_configuration": "debug",
|
||||
"define_constants": {
|
||||
"ASSEMBLY_NAME": "%ASSEMBLY.NAME%",
|
||||
"ASSEMBLY_PACKAGE": "%ASSEMBLY.PACKAGE%",
|
||||
"ASSEMBLY_VERSION": "%ASSEMBLY.VERSION%",
|
||||
"ASSEMBLY_UID": "%ASSEMBLY.UID%"
|
||||
},
|
||||
"default_configuration": "release",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "debug",
|
||||
"output_path": "build/debug",
|
||||
"define_constants": {
|
||||
"DEBUG": "1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "release",
|
||||
"output_path": "build/release",
|
||||
"define_constants": {
|
||||
"DEBUG": "0"
|
||||
}
|
||||
"output_path": "build/release"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
158
src/AlizeLib/GeneralizationModel.php
Normal file
158
src/AlizeLib/GeneralizationModel.php
Normal file
|
@ -0,0 +1,158 @@
|
|||
<?php
|
||||
|
||||
/** @noinspection PhpMissingFieldTypeInspection */
|
||||
|
||||
namespace AlizeLib;
|
||||
|
||||
use AlizeLib\Objects\Label;
|
||||
|
||||
class GeneralizationModel
|
||||
{
|
||||
/**
|
||||
* @var Label[]
|
||||
*/
|
||||
private $labels;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $size;
|
||||
|
||||
public function __construct(int $size)
|
||||
{
|
||||
$this->labels = [];
|
||||
$this->size = $size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the label with the given name
|
||||
*
|
||||
* @param string $label
|
||||
* @return Label|null
|
||||
*/
|
||||
public function getLabel(string $label): ?Label
|
||||
{
|
||||
return $this->labels[$label] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
public function getSize(): int
|
||||
{
|
||||
return $this->size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resizes the model to the given size
|
||||
*
|
||||
* @param int $size
|
||||
* @return void
|
||||
*/
|
||||
public function resize(int $size): void
|
||||
{
|
||||
foreach($this->labels as $label)
|
||||
{
|
||||
$label->resize($size);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a value to the label with the given name
|
||||
*
|
||||
* @param string $label
|
||||
* @param $value
|
||||
* @return void
|
||||
*/
|
||||
public function add(string $label, $value): void
|
||||
{
|
||||
if(!isset($this->labels[$label]))
|
||||
{
|
||||
$this->labels[$label] = new Label($label, $this->size);
|
||||
}
|
||||
|
||||
$this->labels[$label]->addValue($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the data array for the label with the given name
|
||||
*
|
||||
* @param string $label
|
||||
* @return array
|
||||
*/
|
||||
public function getData(string $label): array
|
||||
{
|
||||
if(!isset($this->labels[$label]))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
return $this->labels[$label]->getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the mean for each label
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function calculateMean(): array
|
||||
{
|
||||
$means = [];
|
||||
/** @var Label $label */
|
||||
foreach($this->labels as $label)
|
||||
{
|
||||
$means[$label->getLabel()] = $label->calculateMean();
|
||||
}
|
||||
|
||||
return $means;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the standard deviation for each label
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function calculateStandardDeviation(): array
|
||||
{
|
||||
$stds = [];
|
||||
/** @var Label $label */
|
||||
foreach($this->labels as $label)
|
||||
{
|
||||
$stds[$label->getLabel()] = $label->calculateConfidence();
|
||||
}
|
||||
|
||||
return $stds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the best labels based on the mean and standard deviation
|
||||
* The best labels are the labels with the lowest mean + standard deviation
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function calculateBestLabels(): array
|
||||
{
|
||||
$means = $this->calculateMean();
|
||||
$stds = $this->calculateStandardDeviation();
|
||||
$results = [];
|
||||
|
||||
foreach($this->labels as $name => $label)
|
||||
{
|
||||
$filtered_data = array_filter($label->getData(), function($value) use ($means, $stds, $name) {
|
||||
return $value >= $means[$name] - $stds[$name];
|
||||
});
|
||||
|
||||
if(count($filtered_data) === 0)
|
||||
{
|
||||
$results[$name] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
$results[$name] = array_sum($filtered_data) / count($filtered_data);
|
||||
}
|
||||
|
||||
arsort($results);
|
||||
return $results;
|
||||
}
|
||||
|
||||
}
|
202
src/AlizeLib/Objects/Label.php
Normal file
202
src/AlizeLib/Objects/Label.php
Normal file
|
@ -0,0 +1,202 @@
|
|||
<?php
|
||||
|
||||
/** @noinspection PhpMissingFieldTypeInspection */
|
||||
|
||||
namespace AlizeLib\Objects;
|
||||
|
||||
class Label
|
||||
{
|
||||
/**
|
||||
* The name of the label
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $label;
|
||||
|
||||
/**
|
||||
* An array of the data for this label
|
||||
*
|
||||
* @var float[]|int[]|double[]
|
||||
*/
|
||||
private $data;
|
||||
|
||||
/**
|
||||
* The current pointer for the data array
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
private $pointer;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $size;
|
||||
|
||||
/**
|
||||
* @param string $label
|
||||
* @param int $size
|
||||
*/
|
||||
public function __construct(string $label, int $size)
|
||||
{
|
||||
$this->label = $label;
|
||||
$this->data = [];
|
||||
$this->size = $size;
|
||||
$this->pointer = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of the label
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getLabel(): string
|
||||
{
|
||||
return $this->label;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the data array for this label
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getData(): array
|
||||
{
|
||||
return $this->data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current size set for the data array (not the actual size of the array)
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function getSize(): int
|
||||
{
|
||||
return $this->size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resizes the data array to the given size
|
||||
*
|
||||
* @param int $size
|
||||
* @return void
|
||||
*/
|
||||
public function resize(int $size): void
|
||||
{
|
||||
$this->size = $size;
|
||||
if($this->pointer > $size)
|
||||
$this->pointer = $size;
|
||||
if(count($this->data) > $size)
|
||||
$this->data = array_slice($this->data, 0, $size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a value to the data array
|
||||
*
|
||||
* @param $value
|
||||
* @return void
|
||||
*/
|
||||
public function addValue($value): void
|
||||
{
|
||||
if($this->pointer > $this->size)
|
||||
$this->pointer = 0;
|
||||
|
||||
$this->data[$this->pointer] = $value;
|
||||
$this->pointer += 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an array of values to the data array
|
||||
*
|
||||
* @param array $values
|
||||
* @return void
|
||||
*/
|
||||
public function addValues(array $values): void
|
||||
{
|
||||
foreach($values as $value)
|
||||
$this->addValue($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the data array and pointer to empty and 0 respectively
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function clear(): void
|
||||
{
|
||||
$this->data = [];
|
||||
$this->pointer = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the mean of the data array by adding all the values and dividing by the number of values
|
||||
*
|
||||
* @return float|int
|
||||
*/
|
||||
public function calculateMean(): float|int
|
||||
{
|
||||
if(count($this->data) === 0)
|
||||
return 0;
|
||||
|
||||
return array_sum($this->data) / count($this->data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the variance of the data array by subtracting the mean from each value, squaring the result, adding
|
||||
* all the results together and dividing by the number of values in the array (n) to get the variance of the
|
||||
* data set.
|
||||
*
|
||||
* @return float|int
|
||||
*/
|
||||
public function calculateConfidence(): float|int
|
||||
{
|
||||
if(count($this->data) === 0)
|
||||
return 0;
|
||||
|
||||
$mean = $this->calculateMean();
|
||||
$variance = 0;
|
||||
foreach($this->data as $value)
|
||||
{
|
||||
$variance += pow($value - $mean, 2);
|
||||
}
|
||||
return $variance / count($this->data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the standard deviation of the data array by taking the square root of the variance
|
||||
*
|
||||
* @return float|int
|
||||
*/
|
||||
public function total(): float|int
|
||||
{
|
||||
return array_sum($this->data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array representation of the label
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function toArray(): array
|
||||
{
|
||||
return [
|
||||
'label' => $this->label,
|
||||
'data' => $this->data,
|
||||
'size' => $this->size,
|
||||
'pointer' => $this->pointer
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a label from an array representation
|
||||
*
|
||||
* @param array $array
|
||||
* @return Label
|
||||
*/
|
||||
public static function fromArray(array $array): Label
|
||||
{
|
||||
$label = new Label($array['label'], $array['size']);
|
||||
$label->data = $array['data'];
|
||||
$label->pointer = $array['pointer'];
|
||||
return $label;
|
||||
}
|
||||
}
|
39
tests/model_test.php
Normal file
39
tests/model_test.php
Normal file
|
@ -0,0 +1,39 @@
|
|||
<?php
|
||||
|
||||
|
||||
require 'ncc';
|
||||
|
||||
import('net.nosial.alizelib');
|
||||
|
||||
$model = new \AlizeLib\GeneralizationModel(100);
|
||||
|
||||
function random_float ($min,$max)
|
||||
{
|
||||
return ($min+lcg_value()*(abs($max-$min)));
|
||||
}
|
||||
|
||||
// Add random high float values to the model under label "en" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("en", random_float(0.5, 1));
|
||||
}
|
||||
|
||||
// Add random low float values to the model under label "zh" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("zh", random_float(0, 0.5));
|
||||
}
|
||||
|
||||
// Add random very low float values to the model under label "fr" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("fr", random_float(0, 0.1));
|
||||
}
|
||||
|
||||
// Add random very high float values to the model under label "de" x 150 times
|
||||
for($i = 0; $i < 150; $i++)
|
||||
{
|
||||
$model->add("de", random_float(0.9, 1));
|
||||
}
|
||||
|
||||
var_dump($model->calculateBestLabels());
|
Loading…
Add table
Reference in a new issue