Skip to content

Commit

Permalink
Merge pull request #132 from scrapnode/feature/upgrade-to-es-8.x
Browse files Browse the repository at this point in the history
upgrade: docker-compose with version 8.x
  • Loading branch information
duydo authored Apr 14, 2023
2 parents 8117287 + dd6362b commit 85d45ac
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .env.sample
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ES_VERSION=7.5.1
ES_VERSION=8.3.3
48 changes: 48 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
ARG ES_VERSION=8.3.3
ARG DEBIAN_FRONTEND=noninteractive

# thanks to https://github.com/cpfriend1721994/docker-es-cococ-tokenizer
FROM docker.elastic.co/elasticsearch/elasticsearch:$ES_VERSION as builder
ARG ES_VERSION
ARG DEBIAN_FRONTEND
USER root

RUN apt-get update -y && apt-get install -y software-properties-common build-essential
RUN gcc --version
RUN apt-get update -y && \
apt-get install -y make cmake pkg-config wget git openjdk-17-jdk
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64

RUN cd /tmp && wget https://dlcdn.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz && \
tar xvf apache-maven-3.8.8-bin.tar.gz && \
mkdir -p /usr/share/maven && cd /usr/share/maven && \
cp -r /tmp/apache-maven-3.8.8/* .
ENV PATH=/usr/share/maven/bin:$PATH

WORKDIR /

COPY pom.xml .
RUN mvn verify clean --fail-never

RUN git clone https://github.com/coccoc/coccoc-tokenizer.git

RUN mkdir /coccoc-tokenizer/build
WORKDIR /coccoc-tokenizer/build
RUN cmake -DBUILD_JAVA=1 ..
RUN make install

COPY . /elasticsearch-analysis-vietnamese
WORKDIR /elasticsearch-analysis-vietnamese
RUN mvn package -Dmaven.test.skip -e

FROM docker.elastic.co/elasticsearch/elasticsearch:$ES_VERSION
ARG ES_VERSION

COPY --from=builder /coccoc-tokenizer/dicts/tokenizer /usr/local/share/tokenizer/dicts
COPY --from=builder /coccoc-tokenizer/dicts/vn_lang_tool /usr/local/share/tokenizer/dicts
COPY --from=builder /coccoc-tokenizer/build/libcoccoc_tokenizer_jni.so /usr/lib
COPY --from=builder /coccoc-tokenizer/build/multiterm_trie.dump /usr/local/share/tokenizer/dicts
COPY --from=builder /coccoc-tokenizer/build/nontone_pair_freq_map.dump /usr/local/share/tokenizer/dicts
COPY --from=builder /coccoc-tokenizer/build/syllable_trie.dump /usr/local/share/tokenizer/dicts
COPY --from=builder /elasticsearch-analysis-vietnamese/target/releases/elasticsearch-analysis-vietnamese-$ES_VERSION.zip /
RUN echo "Y" | /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch file:///elasticsearch-analysis-vietnamese-$ES_VERSION.zip
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,28 @@ The above example produces the following terms:
```

## Use Docker

Make sure you have installed both Docker & docker-compose as well

### Use Docker Compose

```bash
# build the image
cp .env.sample .env
docker compose build
docker compose up

# test
curl -k http://elastic:changemenow@localhost:9200/_analyze -H 'Content-Type: application/json' -d '
{
"analyzer": "vi_analyzer",
"text": "Cộng hòa Xã hội chủ nghĩa Việt Nam"
}'
# example result
{"tokens":[{"token":"cộng hòa","start_offset":0,"end_offset":8,"type":"<WORD>","position":0},{"token":"xã hội","start_offset":9,"end_offset":15,"type":"<WORD>","position":1},{"token":"chủ nghĩa","start_offset":16,"end_offset":25,"type":"<WORD>","position":2},{"token":"việt nam","start_offset":26,"end_offset":34,"type":"<WORD>","position":3}]}
```

## Build from Source
### Step 1: Build C++ tokenizer for Vietnamese library
```sh
Expand Down Expand Up @@ -136,7 +158,7 @@ Optionally, edit the `elasticsearch-analysis-vietnamese/pom.xml` to change the v

```xml
...
<version>7.17.1</version>
<version>8.3.3</version>
...
```

Expand All @@ -149,7 +171,7 @@ mvn package
### Step 3: Installation the plugin on Elasticsearch

```sh
bin/elasticsearch-plugin install file://target/releases/elasticsearch-analysis-vietnamese-7.17.1.zip
bin/elasticsearch-plugin install file://target/releases/elasticsearch-analysis-vietnamese-8.3.3.zip
```

## Compatible Versions
Expand Down
29 changes: 21 additions & 8 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
version: '3.4'

services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:${ES_VERSION}
build: .
restart: on-failure
ports:
- "9200:9200"
volumes:
- ./target/releases/elasticsearch-analysis-vietnamese-${ES_VERSION}.zip:/usr/share/elasticsearch/plugin/elasticsearch-analysis-vietnamese-${ES_VERSION}.zip
- ./install-es-plugin.sh:/apps/install-es-plugin.sh
ulimits:
nofile:
soft: 65536
hard: 65536
memlock:
hard: -1
soft: -1
environment:
- "ES_VERSION=${ES_VERSION}"
- "discovery.type=single-node"
entrypoint:
- /apps/install-es-plugin.sh
ES_JAVA_OPTS: "-Xmx2g -Xms2g"
ELASTIC_USERNAME: elastic
ELASTIC_PASSWORD: changemenow
bootstrap.memory_lock: "true"
discovery.type: single-node
xpack.security.enabled: true
networks:
- elastic

networks:
elastic:
driver: bridge
8 changes: 0 additions & 8 deletions install-es-plugin.sh

This file was deleted.

0 comments on commit 85d45ac

Please sign in to comment.