keybert/tests/model_test.php
2023-07-29 15:47:20 -04:00

39 lines
No EOL
1.8 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
require 'ncc';
import('net.nosial.keybert');
$documents = array(
"The quick brown fox jumps over the lazy dog",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit",
"PHP is a popular general-purpose scripting language",
"Machine Learning provides computers the ability to learn without being explicitly programmed",
"OpenAI is an artificial intelligence research lab",
"Blockchain is a decentralized ledger of all transactions across a peer-to-peer network",
"Python is a popular language for data science",
"Artificial Intelligence is a branch of computer science that aims to create intelligent machines",
"Big data is a term that describes the large volume of data both structured and unstructured that inundates a business on a day-to-day basis",
"Quantum computing is the use of quantum-mechanical phenomena such as superposition and entanglement to perform computation"
);
$keybert = new \Keybert\Keybert();
$multi_start = microtime(true);
$keywords = $keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $documents);
$multi_end = microtime(true);
unset($keybert);
$keybert = new \Keybert\Keybert();
$single_start = microtime(true);
foreach($documents as $document)
{
var_dump($keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $document));
}
$single_end = microtime(true);
echo "Multi: " . ($multi_end - $multi_start) . PHP_EOL;
echo "Single: " . ($single_end - $single_start) . PHP_EOL;
// Which is faster? Multi or single?
echo "Faster is: " . (($multi_end - $multi_start) < ($single_end - $single_start) ? "Multi" : "Single") . PHP_EOL;
var_dump($keywords);