Compare commits

...

4 commits

5 changed files with 105 additions and 9 deletions

View file

@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.2] - 2023-07-29
### Fixed
* Minor bug fix
## [1.0.1] - 2023-07-29
### Fixed
* Fixed issue where multi-document queries would throw an unexpected exception due to the way tupals are handled in the
wrapper class.
## [1.0.0] - 2023-07-23

View file

@ -12,7 +12,7 @@
"assembly": {
"name": "Keybert",
"package": "net.nosial.keybert",
"version": "1.0.0",
"version": "1.0.2",
"uuid": "1695515c-2857-11ee-a7a6-6d740ea6cd07"
},
"execution_policies": [

View file

@ -39,6 +39,7 @@
private $preload;
/**
* @param array|null $preload
* @param string|null $host
* @param int|null $port
*/
@ -319,7 +320,7 @@
int $nr_candidates = 20,
array $seed_keywords = null
): array {
return $this->invoke('extract_keywords', [
$results = $this->invoke('extract_keywords', [
'model_name' => $model,
'docs' => $docs, // And this line
'candidates' => $candidates,
@ -333,6 +334,60 @@
'nr_candidates' => $nr_candidates,
'seed_keywords' => $seed_keywords,
]);
return $this->parseResults($results);
}
/**
* Parses the results from the internal Keybert process.
*
* @param $results
* @return array
*/
private function parseResults($results): array
{
$parsedResults = [];
if (is_array($results) && isset($results[0]))
{
// Check if the results are multi or single
if (is_array($results[0]) && isset($results[0][0]) && is_array($results[0][0]))
{
// This is multi-results
foreach ($results as $result)
{
$parsedResults[] = $this->parseSingleResult($result);
}
}
elseif (is_array($results[0]))
{
// This is a single-result
$parsedResults = $this->parseSingleResult($results);
}
}
return $parsedResults;
}
/**
* Parses a single result from the internal Keybert process.
*
* @param $result
* @return array
*/
private function parseSingleResult($result): array
{
$parsedResult = [];
foreach ($result as $keywordScore)
{
if (is_array($keywordScore) && count($keywordScore) === 2)
{
$parsedResult[$keywordScore[0]] = $keywordScore[1];
}
}
return $parsedResult;
}
/**

View file

@ -48,7 +48,7 @@ async def extract_keywords(request):
models[model_name] = KeyBERT(model=model_name)
if docs:
keywords = models[model_name].extract_keywords(
all_keywords = models[model_name].extract_keywords(
docs,
candidates=candidates,
keyphrase_ngram_range=keyphrase_ngram_range,
@ -61,9 +61,8 @@ async def extract_keywords(request):
nr_candidates=nr_candidates,
seed_keywords=seed_keywords,
)
# Transform the result from a list of tuples to a dictionary
keywords_dict = {word: score for word, score in keywords}
return web.json_response({'status': True, 'message': None, 'data': keywords_dict})
return web.json_response({'status': True, 'message': None, 'data': all_keywords})
else:
return web.json_response(
{'status': False, 'message': 'No document provided.', 'data': None})

View file

@ -3,9 +3,37 @@
require 'ncc';
import('net.nosial.keybert');
$keybert = new \Keybert\Keybert(null, '127.0.0.1', 2131);
$documents = array(
"The quick brown fox jumps over the lazy dog",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit",
"PHP is a popular general-purpose scripting language",
"Machine Learning provides computers the ability to learn without being explicitly programmed",
"OpenAI is an artificial intelligence research lab",
"Blockchain is a decentralized ledger of all transactions across a peer-to-peer network",
"Python is a popular language for data science",
"Artificial Intelligence is a branch of computer science that aims to create intelligent machines",
"Big data is a term that describes the large volume of data both structured and unstructured that inundates a business on a day-to-day basis",
"Quantum computing is the use of quantum-mechanical phenomena such as superposition and entanglement to perform computation"
);
$document = 'The history of natural language processing (NLP) generally started in the 1950s, although work can be found from earlier periods. In 1950, Alan Turing published an article titled "Computing Machinery and Intelligence" which proposed what is now called the Turing test as a criterion of intelligence.';
$keywords = $keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $document);
$keybert = new \Keybert\Keybert();
$multi_start = microtime(true);
$keywords = $keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $documents);
$multi_end = microtime(true);
unset($keybert);
$keybert = new \Keybert\Keybert();
$single_start = microtime(true);
foreach($documents as $document)
{
var_dump($keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $document));
}
$single_end = microtime(true);
echo "Multi: " . ($multi_end - $multi_start) . PHP_EOL;
echo "Single: " . ($single_end - $single_start) . PHP_EOL;
// Which is faster? Multi or single?
echo "Faster is: " . (($multi_end - $multi_start) < ($single_end - $single_start) ? "Multi" : "Single") . PHP_EOL;
var_dump($keywords);