Initial Commit

This commit is contained in:
Netkas 2023-07-23 16:01:28 -04:00
commit 402c3b5cb6
No known key found for this signature in database
GPG key ID: 5DAF58535614062B
27 changed files with 911 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
build/

65
.gitlab-ci.yml Normal file
View file

@ -0,0 +1,65 @@
image: php:8.1
before_script:
# Install some stuff that the image doesn't come with
- apt update -yqq
- apt install git libpq-dev libzip-dev zip make wget gnupg -yqq
# Install phive
- wget -O phive.phar https://phar.io/releases/phive.phar
- wget -O phive.phar.asc https://phar.io/releases/phive.phar.asc
- gpg --keyserver hkps://keys.openpgp.org --recv-keys 0x9D8A98B29B2D5D79
- gpg --verify phive.phar.asc phive.phar
- chmod +x phive.phar
- mv phive.phar /usr/local/bin/phive
# Install phab
- phive install phpab --global --trust-gpg-keys 0x2A8299CE842DD38C
# Install the latest version of ncc (Nosial Code Compiler)
- git clone https://git.n64.cc/nosial/ncc.git
- cd ncc
- make redist
- php build/src/INSTALL --auto --install-composer
- cd .. && rm -rf ncc
# Prepare submodules
- git config --global url."https://${CI_ACCESS_USERNAME}:${CI_ACCESS_TOKEN}@git.n64.cc/".insteadOf "https://git.n64.cc/"
- git submodule sync --recursive
- git submodule update --init --recursive
build:
stage: build
script:
- ncc build --config release --log-level debug
artifacts:
paths:
- build/
rules:
- if: $CI_COMMIT_BRANCH
release:
stage: deploy
script:
- ncc build --config release --log-level debug
- >
curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file build/release/net.nosial.keybert.ncc "$CI_API_V4_URL/projects/$CI_PROJECT_ID/packages/generic/net.nosial.keybert/$CI_COMMIT_REF_NAME/net.nosial.keybert.ncc"
artifacts:
paths:
- build/
rules:
- if: $CI_COMMIT_TAG
docker-build:
stage: deploy
image: docker:latest
services:
- docker:dind
before_script:
- docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
script:
- docker build --no-cache -t $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME .
- docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
only:
- branches
- tags

8
.idea/.gitignore generated vendored Normal file
View file

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

1
.idea/.name generated Normal file
View file

@ -0,0 +1 @@
Keybert

11
.idea/Keybert.iml generated Normal file
View file

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="WEB_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View file

@ -0,0 +1,29 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="IncorrectHttpHeaderInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="customHeaders">
<set>
<option value="Subject" />
<option value="Reply-To" />
<option value="X-JSON-Schema" />
<option value="X-JSON-Type" />
<option value="X-JSON-Path" />
<option value="X-Java-Type" />
<option value="X-Region-Id" />
<option value="X-GraphQL-Variables" />
<option value="X-SSH-Private-Key" />
<option value="X-Temperature" />
<option value="X-Model" />
<option value="X-OPENAI-API-KEY" />
<option value="X-Args-0" />
<option value="X-Args-1" />
<option value="X-Args-2" />
<option value="X-Args-3" />
<option value="X-Args-4" />
<option value="X-Args-5" />
</set>
</option>
</inspection_tool>
</profile>
</component>

4
.idea/misc.xml generated Normal file
View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Keybert.iml" filepath="$PROJECT_DIR$/.idea/Keybert.iml" />
</modules>
</component>
</project>

28
.idea/php.xml generated Normal file
View file

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="MessDetectorOptionsConfiguration">
<option name="transferred" value="true" />
</component>
<component name="PHPCSFixerOptionsConfiguration">
<option name="transferred" value="true" />
</component>
<component name="PHPCodeSnifferOptionsConfiguration">
<option name="highlightLevel" value="WARNING" />
<option name="transferred" value="true" />
</component>
<component name="PhpIncludePathManager">
<include_path>
<path value="/usr/share/php" />
<path value="/etc/ncc" />
<path value="/var/ncc/packages/com.symfony.process=6.2.10" />
<path value="/var/ncc/packages/net.nosial.optslib=1.0.0" />
</include_path>
</component>
<component name="PhpProjectSharedConfiguration" php_language_level="8.2" />
<component name="PhpStanOptionsConfiguration">
<option name="transferred" value="true" />
</component>
<component name="PsalmOptionsConfiguration">
<option name="transferred" value="true" />
</component>
</project>

10
.idea/runConfigurations/Build.xml generated Normal file
View file

@ -0,0 +1,10 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Build" type="MAKEFILE_TARGET_RUN_CONFIGURATION" factoryName="Makefile" activateToolWindowBeforeRun="false">
<makefile filename="$PROJECT_DIR$/Makefile" target="build" workingDirectory="" arguments="">
<envs />
</makefile>
<method v="2">
<option name="RunConfigurationTask" enabled="false" run_configuration_name="Clean" run_configuration_type="MAKEFILE_TARGET_RUN_CONFIGURATION" />
</method>
</configuration>
</component>

8
.idea/runConfigurations/Clean.xml generated Normal file
View file

@ -0,0 +1,8 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Clean" type="MAKEFILE_TARGET_RUN_CONFIGURATION" factoryName="Makefile" activateToolWindowBeforeRun="false">
<makefile filename="$PROJECT_DIR$/Makefile" target="clean" workingDirectory="" arguments="">
<envs />
</makefile>
<method v="2" />
</configuration>
</component>

10
.idea/runConfigurations/Install.xml generated Normal file
View file

@ -0,0 +1,10 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Install" type="MAKEFILE_TARGET_RUN_CONFIGURATION" factoryName="Makefile" activateToolWindowBeforeRun="false">
<makefile filename="$PROJECT_DIR$/Makefile" target="install" workingDirectory="" arguments="">
<envs />
</makefile>
<method v="2">
<option name="RunConfigurationTask" enabled="false" run_configuration_name="Build" run_configuration_type="MAKEFILE_TARGET_RUN_CONFIGURATION" />
</method>
</configuration>
</component>

6
.idea/vcs.xml generated Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

12
CHANGELOG.md Executable file
View file

@ -0,0 +1,12 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.0] - 2023-07-23
### Added
* First Release

50
Dockerfile Normal file
View file

@ -0,0 +1,50 @@
# Dockerfile
# Use an existing docker image as a base
FROM repo.n64.cc:443/nosial/ncc:debian as builder
# Metadata
LABEL maintainer="Netkas <netkas@nosial.net>"
LABEL version="1.0"
LABEL description="Keybert"
# Set working directory in the container
WORKDIR /keybert
# Increase PHP memory limit
RUN echo "memory_limit=-1" > /usr/local/etc/php/conf.d/memory-limit.ini
# Copy local code to the container
COPY . ./
# Install build dependencies for numpy and scipy
RUN apt-get update && apt-get install -y \
zip \
unzip \
build-essential \
gfortran \
python3 \
python3-pip \
python3-dev \
python3-venv
# Create a Python virtual environment
RUN python3 -m venv /opt/venv
# Make sure we use the virtualenv:
ENV PATH="/opt/venv/bin:$PATH"
# Install aiohttp and Keybert
RUN pip install --upgrade pip \
&& pip install aiohttp keybert
# Build and install Keybert
RUN ncc build --build-configuration release --log-level debug \
&& ncc package install -p "build/release/net.nosial.keybert.ncc" --log-level debug -y
# Clean up unnecessary files
RUN rm -rf /keybert
# Set working directory in the container
WORKDIR /
# Execute focuscrawler
CMD ["ncc", "exec", "--package", "net.nosial.keybert", "--exec-unit", "main", "--exec-args", "run"]

14
LICENSE Normal file
View file

@ -0,0 +1,14 @@
Copyright 2022-2023 Nosial
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the “Software”), to deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

8
Makefile Normal file
View file

@ -0,0 +1,8 @@
clean:
if [ -d build ]; then rm -rf build; fi
build:
ncc build --build-configuration release --log-level debug
install:
sudo ncc package install -p "build/release/net.nosial.keybert.ncc" --reinstall --skip-dependencies --log-level debug -y

14
README.md Normal file
View file

@ -0,0 +1,14 @@
# Keybert
KeyBERT is a minimal and easy-to-use keyword extraction technique that leverages BERT embeddings to create keywords and
keyphrases that are most similar to a document.
This library is a PHP wrapper for the [KeyBERT](https://https://github.com/MaartenGr/KeyBERT) Python library, allowing
you to use it in your PHP projects or spawn it as a individual process to use from another machine on your network.
This documentation is a work in progress. Please check back later for more information.
# License
This project is licensed under MIT. Please see the [LICENSE](LICENSE) file for more information.

15
docker-compose.yml Normal file
View file

@ -0,0 +1,15 @@
version: '3'
services:
keybert:
build:
context: .
dockerfile: Dockerfile
container_name: keybert
restart: unless-stopped
tty: true
environment:
- KEYBERT_PORT=2131
- KEYBERT_PRELOAD=["paraphrase-multilingual-MiniLM-L12-v2"]
ports:
- "2131:2131"

6
main Normal file
View file

@ -0,0 +1,6 @@
<?php
require 'ncc';
import('net.nosial.keybert');
\Keybert\Program::main(\OptsLib\Parse::getArguments());

54
project.json Normal file
View file

@ -0,0 +1,54 @@
{
"project": {
"compiler": {
"extension": "php",
"minimum_version": "8.0",
"maximum_version": "8.2"
},
"options": {
"create_symlink": true
}
},
"assembly": {
"name": "Keybert",
"package": "net.nosial.keybert",
"version": "1.0.0",
"uuid": "1695515c-2857-11ee-a7a6-6d740ea6cd07"
},
"execution_policies": [
{
"name": "main",
"runner": "php",
"execute": {
"target": "main",
"working_directory": "%CWD%",
"tty": true
}
}
],
"build": {
"source_path": "src",
"default_configuration": "release",
"main": "main",
"dependencies": [
{
"name": "com.symfony.process",
"version": "latest",
"source_type": "remote",
"source": "symfony/process=latest@composer"
},
{
"name": "net.nosial.optslib",
"version": "latest",
"source_type": "remote",
"source": "nosial/libs.opts=latest@n64"
}
],
"configurations": [
{
"name": "release",
"output_path": "build/release"
}
]
}
}

View file

@ -0,0 +1,13 @@
<?php
namespace Keybert\Exceptions;
use Throwable;
class KeybertException extends \Exception
{
public function __construct(string $message = "", int $code = 0, ?Throwable $previous = null)
{
parent::__construct($message, $code, $previous);
}
}

346
src/Keybert/Keybert.php Normal file
View file

@ -0,0 +1,346 @@
<?php
/** @noinspection PhpMissingFieldTypeInspection */
namespace Keybert;
use Exception;
use JsonException;
use Keybert\Exceptions\KeybertException;
use RuntimeException;
use Symfony\Component\Process\ExecutableFinder;
use Symfony\Component\Process\Process;
class Keybert
{
/**
* @var Process|null
*/
private $process;
/**
* @var string|null
*/
private $host;
/**
* @var int
*/
private $port;
/**
* @var bool
*/
private $internal;
/**
* @var array
*/
private $preload;
/**
* @param string|null $host
* @param int|null $port
*/
public function __construct(?array $preload=null, ?string $host=null, ?int $port=null)
{
$this->internal = ($host === null);
$this->preload = $preload ?? [];
if(!$this->internal && $port === null)
{
throw new RuntimeException('If Keybert is running remotely, a port must be specified. (Host is not empty, but the port is.)');
}
if($this->internal)
{
$this->host = '127.0.0.1';
}
else
{
$this->host = $host;
}
$this->port = $port ?? self::getAvailablePort();
// Start the internal Keybert process if we're running locally.
if($this->internal)
{
$this->start();
}
}
/**
* @param string $method
* @param array|null $parameters
* @return mixed
* @throws KeybertException
* @noinspection HttpUrlsUsage
*/
private function invoke(string $method, array $parameters = null): mixed
{
$ch = null;
try
{
$ch = curl_init(sprintf('http://%s:%d/%s', $this->host, $this->port, $method));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($parameters ?? [], JSON_THROW_ON_ERROR));
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Content-Type: application/json',
]);
$result = curl_exec($ch);
if($result === false)
{
throw new RuntimeException('Failed to invoke Keybert method "' . $method . '".');
}
$json_result = json_decode($result, true, 512, JSON_THROW_ON_ERROR);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_code !== 200)
{
throw new RuntimeException('HTTP request failed with status code ' . $http_code);
}
}
catch(Exception|JsonException $e)
{
throw new RuntimeException('Failed to invoke Keybert method "' . $method . '".', 0, $e);
}
finally
{
if ($ch)
{
curl_close($ch);
}
$this->printUpdates();
}
if(!isset($json_result['status']) || !$json_result['status'])
{
if(isset($json_result['message']))
{
throw new KeybertException($json_result['message']);
}
throw new KeybertException(sprintf('Failed to invoke Keybert method "%s", %s.', $method, $result));
}
return $json_result['data'];
}
/**
* Starts the internal Keybert process.
*
* @return void
*/
private function start(): void
{
if(!$this->internal)
{
return;
}
try
{
$this->process = new Process([
(new ExecutableFinder())->find('python3'),
__DIR__ . DIRECTORY_SEPARATOR . 'wrapper.py',
]);
$this->process->setEnv([
'KEYBERT_ENABLED' => '1',
'KEYBERT_PORT' => $this->port,
'KEYBERT_PRELOAD' => json_encode($this->preload, JSON_THROW_ON_ERROR),
]);
}
catch(Exception $e)
{
throw new RuntimeException('Failed to start Keybert process.', 0, $e);
}
// Start the process and wait for it to be ready. (2-Minutes timeout)
$this->process->start();
$start_time = time();
while($this->process->getExitCode() === null)
{
$this->printUpdates();
try
{
if($this->invoke('ping'))
{
break;
}
}
catch(Exception $e)
{
if(time() - $start_time > 120)
{
throw new RuntimeException('Keybert wrapper process failed to start.', 0, $e);
}
sleep(1);
continue;
}
}
if($this->process->getExitCode() !== null)
{
throw new RuntimeException(sprintf('Keybert wrapper process exited with code %d.', $this->process->getExitCode()));
}
}
/**
* Stops the internal Keybert process.
*
* @return void
*/
private function stop()
{
if(!$this->process || !$this->process->isRunning())
{
return;
}
$this->process->stop();
$this->process = null;
}
/**
* Prints out the updates from the internal Keybert process.
*
* @return void
*/
private function printUpdates(): void
{
if(!$this->internal || !$this->process)
{
return;
}
print($this->process->getIncrementalOutput());
print($this->process->getIncrementalErrorOutput());
}
/**
* Returns an available port in the given range.
*
* @param string $host
* @param int $start
* @param int $end
* @return int
*/
private static function getAvailablePort(string $host='127.0.0.1', int $start=1024, int $end=65535): int
{
$range = range($start, $end);
shuffle($range);
foreach ($range as $port)
{
$connection = @stream_socket_client('tcp://' . $host . ':' . $port);
if (is_resource($connection))
{
fclose($connection);
}
else
{
return $port;
}
}
throw new RuntimeException('No available port found in range ' . $start . ' to ' . $end . '.');
}
/**
* Runs the internal Keybert process and prints out the updates indefinitely.
*
* @return void
*/
public function run(): void
{
if(!$this->internal)
{
return;
}
while($this->process->isRunning())
{
$this->printUpdates();
sleep(1);
}
}
/**
* Loads a model into Keybert.
*
* @param string $model The model to load, e.g. "paraphrase-multilingual-MiniLM-L12-v2"
* @return void
* @throws KeybertException
*/
public function loadModel(string $model): void
{
$this->invoke('load_model', [
'model_name' => $model,
]);
}
/**
* Extracts keywords from the given documents.
*
* @param string $model The model to use, e.g. "paraphrase-multilingual-MiniLM-L12-v2"
* @param string|array $docs The documents or document to extract keywords from.
* @param array|null $candidates The candidates to use for the keywords (eg; ["NOUN", "PROPN", "ADJ"])
* @param array $keyphrase_ngram_range The ngram range to use for the keyphrases. (eg; [1, 2])
* @param string $stop_words The stop words to use. (eg; "english")
* @param int $top_n The number of keywords to return.
* @param int $min_df The minimum document frequency.
* @param bool $use_maxsum Whether to use the MaxSum algorithm.
* @param bool $use_mmr Whether to use the Maximal Marginal Relevance algorithm.
* @param float $diversity The diversity of the MMR algorithm.
* @param int $nr_candidates The number of candidates to use for the MMR algorithm.
* @param array|null $seed_keywords The seed keywords to use for the MMR algorithm.
* @return array The extracted keywords.
* @throws KeybertException If the extraction failed.
*/
public function extractKeywords(
string $model,
string|array $docs,
array $candidates = null,
array $keyphrase_ngram_range = [1, 1],
string $stop_words = 'english',
int $top_n = 5,
int $min_df = 1,
bool $use_maxsum = false,
bool $use_mmr = false,
float $diversity = 0.5,
int $nr_candidates = 20,
array $seed_keywords = null
): array {
return $this->invoke('extract_keywords', [
'model_name' => $model,
'docs' => $docs, // And this line
'candidates' => $candidates,
'keyphrase_ngram_range' => $keyphrase_ngram_range,
'stop_words' => $stop_words,
'top_n' => $top_n,
'min_df' => $min_df,
'use_maxsum' => $use_maxsum,
'use_mmr' => $use_mmr,
'diversity' => $diversity,
'nr_candidates' => $nr_candidates,
'seed_keywords' => $seed_keywords,
]);
}
/**
* Destructor.
*/
public function __destruct()
{
$this->stop();
}
}

59
src/Keybert/Program.php Normal file
View file

@ -0,0 +1,59 @@
<?php
namespace Keybert;
use ncc\Runtime;
class Program
{
/**
* Main entry point for the CLI
*
* @param array $args
* @return void
*/
public static function main(array $args=[]): void
{
if(getenv('KEYBERT_PORT') === false)
{
$port = $args['port'] ?? $args['p'] ?? null;
}
else
{
$port = getenv('KEYBERT_PORT');
}
if(getenv('KEYBERT_PRELOAD') === false)
{
$preload = $args['preload'] ?? $args['l'] ?? null;
}
else
{
$preload = getenv('KEYBERT_PRELOAD');
}
$keybert = new Keybert($preload, null, $port);
$keybert->run();
}
/**
* Displays the help message
*
* @return void
*/
public static function help(): void
{
print('Keybert v' . Runtime::getConstant('net.nosial.federationlib', 'version') . PHP_EOL . PHP_EOL);
print('Usage: Keybert [options]' . PHP_EOL);
print('Options:' . PHP_EOL);
print(' --port|-p - The port to listen on' . PHP_EOL);
print(' --preload|-l - Optional. Models to preload, seperated by commas' . PHP_EOL);
print(PHP_EOL);
print('Environment Variables:' . PHP_EOL);
print(' KEYBERT_PORT - The port to listen on' . PHP_EOL);
print(' KEYBERT_PRELOAD - Optional. Models to preload, seperated by commas' . PHP_EOL);
exit(0);
}
}

97
src/Keybert/wrapper.py Normal file
View file

@ -0,0 +1,97 @@
from aiohttp import web
from keybert import KeyBERT
import json
import os
app = web.Application()
models = {}
# Ping
async def ping(request):
return web.json_response({'status': True, 'message': None, 'data': True})
# Load a model
async def load_model(request):
try:
data = await request.json()
model_name = data.get('model_name', '')
if model_name:
models[model_name] = KeyBERT(model=model_name)
return web.json_response({'status': True, 'message': None, 'data': f'Model {model_name} loaded.'})
else:
return web.json_response({'status': False, 'message': 'No model name provided.', 'data': None})
except Exception as e:
return web.json_response({'status': False, 'message': str(e), 'data': None})
# Extract keywords
async def extract_keywords(request):
try:
data = await request.json()
model_name = data.get('model_name', '')
docs = data.get('docs', [])
candidates = data.get('candidates', None)
keyphrase_ngram_range = tuple(data.get('keyphrase_ngram_range', (1, 1)))
stop_words = data.get('stop_words', 'english')
top_n = data.get('top_n', 5)
min_df = data.get('min_df', 1)
use_maxsum = data.get('use_maxsum', False)
use_mmr = data.get('use_mmr', False)
diversity = data.get('diversity', 0.5)
nr_candidates = data.get('nr_candidates', 20)
seed_keywords = data.get('seed_keywords', None)
# Check if the model is loaded, if not, load it
if model_name not in models:
models[model_name] = KeyBERT(model=model_name)
if docs:
keywords = models[model_name].extract_keywords(
docs,
candidates=candidates,
keyphrase_ngram_range=keyphrase_ngram_range,
stop_words=stop_words,
top_n=top_n,
min_df=min_df,
use_maxsum=use_maxsum,
use_mmr=use_mmr,
diversity=diversity,
nr_candidates=nr_candidates,
seed_keywords=seed_keywords,
)
# Transform the result from a list of tuples to a dictionary
keywords_dict = {word: score for word, score in keywords}
return web.json_response({'status': True, 'message': None, 'data': keywords_dict})
else:
return web.json_response(
{'status': False, 'message': 'No document provided.', 'data': None})
except Exception as e:
return web.json_response({'status': False, 'message': str(e), 'data': None})
if __name__ == '__main__':
# Check for required environment variables
if 'KEYBERT_ENABLED' not in os.environ or os.environ['KEYBERT_ENABLED'] != '1':
print('Error: KEYBERT_ENABLED environment variable not set or not "true".')
exit(1)
if 'KEYBERT_PORT' not in os.environ:
print('Error: KEYBERT_PORT environment variable not set.')
exit(1)
# Preload models if KEYBERT_PRELOAD is set
if 'KEYBERT_PRELOAD' in os.environ:
preload_models = json.loads(os.environ['KEYBERT_PRELOAD'])
for model_name in preload_models:
models[model_name] = KeyBERT(model=model_name)
print(f'Model {model_name} preloaded.')
# Add routes
app.router.add_post('/ping', ping)
app.router.add_post('/load_model', load_model)
app.router.add_post('/extract_keywords', extract_keywords)
# Start server
web.run_app(app, host='0.0.0.0', port=int(os.environ['KEYBERT_PORT']))

11
tests/model_test.php Normal file
View file

@ -0,0 +1,11 @@
<?php
require 'ncc';
import('net.nosial.keybert');
$keybert = new \Keybert\Keybert(null, '127.0.0.1', 1241);
$document = 'The history of natural language processing (NLP) generally started in the 1950s, although work can be found from earlier periods. In 1950, Alan Turing published an article titled "Computing Machinery and Intelligence" which proposed what is now called the Turing test as a criterion of intelligence.';
$keywords = $keybert->extractKeywords('paraphrase-multilingual-MiniLM-L12-v2', $document);
var_dump($keywords);

23
tests/wrapper.http Normal file
View file

@ -0,0 +1,23 @@
###
POST http://0.0.0.0:5000/load_model
Content-Type: application/json
{
"model_name": "paraphrase-multilingual-mpnet-base-v2"
}
###
POST http://0.0.0.0:5000/extract_keywords
Content-Type: application/json
{
"model_name": "paraphrase-multilingual-mpnet-base-v2",
"docs": "The history of natural language processing (NLP) generally started in the 1950s, although work can be found from earlier periods. In 1950, Alan Turing published an article titled \"Computing Machinery and Intelligence\" which proposed what is now called the Turing test as a criterion of intelligence.",
"keyphrase_ngram_range": [1, 1],
"top_n": 5,
"min_df": 1,
"use_maxsum": false,
"use_mmr": false,
"diversity": 0.5,
"nr_candidates": 20
}