Fixed issue where multi-document queries would throw an unexpected exception due to the way tupals are handled in the in the wrapper class.

This commit is contained in:
Netkas 2023-07-29 15:46:22 -04:00
parent 9fd3ab3c47
commit d6f5d1108a
No known key found for this signature in database
GPG key ID: 5DAF58535614062B
5 changed files with 94 additions and 9 deletions

View file

@ -3,9 +3,39 @@
require 'ncc';
import('net.nosial.keybert');
$keybert = new \Keybert\Keybert(null, '127.0.0.1', 2131);
//$keybert = new \Keybert\Keybert(null, 'power.chan.int.n64.cc', 2131);
$document = 'The history of natural language processing (NLP) generally started in the 1950s, although work can be found from earlier periods. In 1950, Alan Turing published an article titled "Computing Machinery and Intelligence" which proposed what is now called the Turing test as a criterion of intelligence.';
$keywords = $keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $document);
$documents = array(
"The quick brown fox jumps over the lazy dog",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit",
"PHP is a popular general-purpose scripting language",
"Machine Learning provides computers the ability to learn without being explicitly programmed",
"OpenAI is an artificial intelligence research lab",
"Blockchain is a decentralized ledger of all transactions across a peer-to-peer network",
"Python is a popular language for data science",
"Artificial Intelligence is a branch of computer science that aims to create intelligent machines",
"Big data is a term that describes the large volume of data both structured and unstructured that inundates a business on a day-to-day basis",
"Quantum computing is the use of quantum-mechanical phenomena such as superposition and entanglement to perform computation"
);
$keybert = new \Keybert\Keybert();
$multi_start = microtime(true);
$keywords = $keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $documents);
$multi_end = microtime(true);
unset($keybert);
$keybert = new \Keybert\Keybert();
$single_start = microtime(true);
foreach($documents as $document)
{
var_dump($keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $document));
}
$single_end = microtime(true);
echo "Multi: " . ($multi_end - $multi_start) . PHP_EOL;
echo "Single: " . ($single_end - $single_start) . PHP_EOL;
// Which is faster? Multi or single?
echo "Faster is: " . (($multi_end - $multi_start) < ($single_end - $single_start) ? "Multi" : "Single") . PHP_EOL;
var_dump($keywords);