Compare commits

...
Sign in to create a new pull request.

4 commits
master ... dev

Author SHA1 Message Date
Netkas
a0e2becaf0 Updated README.md 2023-02-21 00:48:56 -05:00
Netkas
3a77ed7b0a Updated intellij files 2023-02-07 19:50:20 -05:00
Netkas
56ded60b98 Updated project.json 2023-02-07 19:49:57 -05:00
Netkas
b74c906a12 progress 2023-02-07 19:23:00 -05:00
7 changed files with 497 additions and 20 deletions

6
.idea/alizelib.iml generated
View file

@ -1,7 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="WEB_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/build" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>

1
.idea/php.xml generated
View file

@ -9,6 +9,7 @@
<component name="PHPCodeSnifferOptionsConfiguration">
<option name="transferred" value="true" />
</component>
<component name="PhpProjectSharedConfiguration" php_language_level="8.2" />
<component name="PhpStanOptionsConfiguration">
<option name="transferred" value="true" />
</component>

View file

@ -1,6 +1,92 @@
# AlizeLib
Coming soon...
Calculate the averages, total & best of labelled data, import and export the model for later use.
## Installation
The library can be installed using ncc:
```bash
ncc install -p "nosial/libs.alize=latest@n64"
```
or by adding the following to your project.json file under
the `build.dependencies` section:
```json
{
"name": "net.nosial.alizelib",
"version": "latest",
"source_type": "remote",
"source": "nosial/libs.alize=latest@n64"
}
```
If you don't have the n64 source configured you can add it
by running the following command:
```bash
ncc source add --name n64 --type gitlab --host git.n64.cc
```
## Compiling from source
The library can be compiled from source using ncc:
```bash
ncc build --config release
```
or by running the following command:
```bash
make release
```
## Usage
```php
// ncc
require 'ncc';
import('net.nosial.alizelib');
// Create a new model
$size = 100; // The size of the model
$model = new \AlizeLib\GeneralizationModel($size);
// We are intentionally adding more data to the model
// than the model can hold, this is to demonstrate
// how old data is removed from the model.
// Add random high float values to the model under label "en" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("en", random_float(0.5, 1));
}
// Add random low float values to the model under label "zh" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("zh", random_float(0, 0.5));
}
// Add random very low float values to the model under label "fr" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("fr", random_float(0, 0.1));
}
// Add random very high float values to the model under label "de" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("de", random_float(0.9, 1));
}
// Best label should be "de" or second best "en"
var_dump($model->calculateBestLabels());
```
# License

View file

@ -10,32 +10,19 @@
"assembly": {
"name": "AlizeLib",
"package": "net.nosial.alizelib",
"description": "Calculate the averages, total & best of labelled values, import & export the model for later use with low overhead performance",
"company": "Nosial",
"copyright": "2022-2023 (c) Nosial. All Rights Reserved",
"version": "1.0.0",
"uuid": "4ae05ff6-a738-11ed-82fc-cf26dde64833"
},
"build": {
"source_path": "src",
"default_configuration": "debug",
"define_constants": {
"ASSEMBLY_NAME": "%ASSEMBLY.NAME%",
"ASSEMBLY_PACKAGE": "%ASSEMBLY.PACKAGE%",
"ASSEMBLY_VERSION": "%ASSEMBLY.VERSION%",
"ASSEMBLY_UID": "%ASSEMBLY.UID%"
},
"default_configuration": "release",
"configurations": [
{
"name": "debug",
"output_path": "build/debug",
"define_constants": {
"DEBUG": "1"
}
},
{
"name": "release",
"output_path": "build/release",
"define_constants": {
"DEBUG": "0"
}
"output_path": "build/release"
}
]
}

View file

@ -0,0 +1,158 @@
<?php
/** @noinspection PhpMissingFieldTypeInspection */
namespace AlizeLib;
use AlizeLib\Objects\Label;
class GeneralizationModel
{
/**
* @var Label[]
*/
private $labels;
/**
* @var int
*/
private $size;
public function __construct(int $size)
{
$this->labels = [];
$this->size = $size;
}
/**
* Returns the label with the given name
*
* @param string $label
* @return Label|null
*/
public function getLabel(string $label): ?Label
{
return $this->labels[$label] ?? null;
}
/**
* @return int
*/
public function getSize(): int
{
return $this->size;
}
/**
* Resizes the model to the given size
*
* @param int $size
* @return void
*/
public function resize(int $size): void
{
foreach($this->labels as $label)
{
$label->resize($size);
}
}
/**
* Adds a value to the label with the given name
*
* @param string $label
* @param $value
* @return void
*/
public function add(string $label, $value): void
{
if(!isset($this->labels[$label]))
{
$this->labels[$label] = new Label($label, $this->size);
}
$this->labels[$label]->addValue($value);
}
/**
* Returns the data array for the label with the given name
*
* @param string $label
* @return array
*/
public function getData(string $label): array
{
if(!isset($this->labels[$label]))
{
return [];
}
return $this->labels[$label]->getData();
}
/**
* Calculates the mean for each label
*
* @return array
*/
public function calculateMean(): array
{
$means = [];
/** @var Label $label */
foreach($this->labels as $label)
{
$means[$label->getLabel()] = $label->calculateMean();
}
return $means;
}
/**
* Calculates the standard deviation for each label
*
* @return array
*/
public function calculateStandardDeviation(): array
{
$stds = [];
/** @var Label $label */
foreach($this->labels as $label)
{
$stds[$label->getLabel()] = $label->calculateConfidence();
}
return $stds;
}
/**
* Calculates the best labels based on the mean and standard deviation
* The best labels are the labels with the lowest mean + standard deviation
*
* @return array
*/
public function calculateBestLabels(): array
{
$means = $this->calculateMean();
$stds = $this->calculateStandardDeviation();
$results = [];
foreach($this->labels as $name => $label)
{
$filtered_data = array_filter($label->getData(), function($value) use ($means, $stds, $name) {
return $value >= $means[$name] - $stds[$name];
});
if(count($filtered_data) === 0)
{
$results[$name] = 0;
continue;
}
$results[$name] = array_sum($filtered_data) / count($filtered_data);
}
arsort($results);
return $results;
}
}

View file

@ -0,0 +1,202 @@
<?php
/** @noinspection PhpMissingFieldTypeInspection */
namespace AlizeLib\Objects;
class Label
{
/**
* The name of the label
*
* @var string
*/
private $label;
/**
* An array of the data for this label
*
* @var float[]|int[]|double[]
*/
private $data;
/**
* The current pointer for the data array
*
* @var int
*/
private $pointer;
/**
* @var int
*/
private $size;
/**
* @param string $label
* @param int $size
*/
public function __construct(string $label, int $size)
{
$this->label = $label;
$this->data = [];
$this->size = $size;
$this->pointer = 0;
}
/**
* Returns the name of the label
*
* @return string
*/
public function getLabel(): string
{
return $this->label;
}
/**
* Returns the data array for this label
*
* @return array
*/
public function getData(): array
{
return $this->data;
}
/**
* Returns the current size set for the data array (not the actual size of the array)
*
* @return int
*/
public function getSize(): int
{
return $this->size;
}
/**
* Resizes the data array to the given size
*
* @param int $size
* @return void
*/
public function resize(int $size): void
{
$this->size = $size;
if($this->pointer > $size)
$this->pointer = $size;
if(count($this->data) > $size)
$this->data = array_slice($this->data, 0, $size);
}
/**
* Adds a value to the data array
*
* @param $value
* @return void
*/
public function addValue($value): void
{
if($this->pointer > $this->size)
$this->pointer = 0;
$this->data[$this->pointer] = $value;
$this->pointer += 1;
}
/**
* Adds an array of values to the data array
*
* @param array $values
* @return void
*/
public function addValues(array $values): void
{
foreach($values as $value)
$this->addValue($value);
}
/**
* Resets the data array and pointer to empty and 0 respectively
*
* @return void
*/
public function clear(): void
{
$this->data = [];
$this->pointer = 0;
}
/**
* Calculates the mean of the data array by adding all the values and dividing by the number of values
*
* @return float|int
*/
public function calculateMean(): float|int
{
if(count($this->data) === 0)
return 0;
return array_sum($this->data) / count($this->data);
}
/**
* Calculates the variance of the data array by subtracting the mean from each value, squaring the result, adding
* all the results together and dividing by the number of values in the array (n) to get the variance of the
* data set.
*
* @return float|int
*/
public function calculateConfidence(): float|int
{
if(count($this->data) === 0)
return 0;
$mean = $this->calculateMean();
$variance = 0;
foreach($this->data as $value)
{
$variance += pow($value - $mean, 2);
}
return $variance / count($this->data);
}
/**
* Calculates the standard deviation of the data array by taking the square root of the variance
*
* @return float|int
*/
public function total(): float|int
{
return array_sum($this->data);
}
/**
* Returns an array representation of the label
*
* @return array
*/
public function toArray(): array
{
return [
'label' => $this->label,
'data' => $this->data,
'size' => $this->size,
'pointer' => $this->pointer
];
}
/**
* Constructs a label from an array representation
*
* @param array $array
* @return Label
*/
public static function fromArray(array $array): Label
{
$label = new Label($array['label'], $array['size']);
$label->data = $array['data'];
$label->pointer = $array['pointer'];
return $label;
}
}

39
tests/model_test.php Normal file
View file

@ -0,0 +1,39 @@
<?php
require 'ncc';
import('net.nosial.alizelib');
$model = new \AlizeLib\GeneralizationModel(100);
function random_float ($min,$max)
{
return ($min+lcg_value()*(abs($max-$min)));
}
// Add random high float values to the model under label "en" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("en", random_float(0.5, 1));
}
// Add random low float values to the model under label "zh" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("zh", random_float(0, 0.5));
}
// Add random very low float values to the model under label "fr" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("fr", random_float(0, 0.1));
}
// Add random very high float values to the model under label "de" x 150 times
for($i = 0; $i < 150; $i++)
{
$model->add("de", random_float(0.9, 1));
}
var_dump($model->calculateBestLabels());