Skip to content

Commit

Permalink
Adding pyspark and monitoring stack.
Browse files Browse the repository at this point in the history
  • Loading branch information
giacuong171 committed Dec 30, 2024
1 parent cd3ce7d commit f38ff55
Show file tree
Hide file tree
Showing 37 changed files with 27,993 additions and 2 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,6 @@ airflow/plugins
airflow/data
datastorage/datalake/minio_storage
datastorage/kafka_connect
utils
utils

jars/
16 changes: 15 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,18 @@ airflow_down:
kafka_up:
docker compose -f stream_processing/kafka/docker-compose.yml up -d
kafka_down:
docker compose -f stream_processing/kafka/docker-compose.yml down
docker compose -f stream_processing/kafka/docker-compose.yml down
monitoring_up:
docker compose -f monitoring/prom-graf-docker-compose.yaml up -d
monitoring_down:
docker compose -f monitoring/prom-graf-docker-compose.yaml down
monitoring_restart:
docker compose -f monitoring/prom-graf-docker-compose.yaml restart
elk_up:
cd monitoring/elk && docker compose -f elk-docker-compose.yml -f extensions/filebeat/filebeat-compose.yml up -d
elk_down:
cd monitoring/elk && docker compose -f elk-docker-compose.yml -f extensions/filebeat/filebeat-compose.yml down
warehouse_up:
docker compose -f postgresql-docker-compose.yaml up -d
warehouse_down:
docker compose -f postgresql-docker-compose.yaml down
21 changes: 21 additions & 0 deletions monitoring/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
This repo is used for 3 lessons, so it will be a bit complicated, but don't worry about it, said Prof. Andrew Ng.

## How-to Guide

### Up and running services
Start Prometheus, Grafana (to see metrics), and Jaeger Tracing (to see traces) as follows

```shell
docker compose -f prom-graf-docker-compose.yaml up -d
```

Start ELK stack to see container logs by the following command:
```shell
cd elk
docker compose -f elk-docker-compose.yml -f extensions/filebeat/filebeat-compose.yml up -d
```

### Access services
- Grafana: http://localhost:3000 with `username/password` is `admin/admin`
- Kibana: http://localhost:5601 with `username/password` is `elastic/changeme`
- Jaeger: http://localhost:16686
6 changes: 6 additions & 0 deletions monitoring/elk/elasticsearch/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Ignore Docker build files
Dockerfile
.dockerignore

# Ignore OS artifacts
**/.DS_Store
7 changes: 7 additions & 0 deletions monitoring/elk/elasticsearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ARG ELASTIC_VERSION

# https://www.docker.elastic.co/
FROM docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION}

# Add your elasticsearch plugins setup here
# Example: RUN elasticsearch-plugin install analysis-icu
12 changes: 12 additions & 0 deletions monitoring/elk/elasticsearch/config/elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
## Default Elasticsearch configuration from Elasticsearch base image.
## https://github.com/elastic/elasticsearch/blob/master/distribution/docker/src/docker/config/elasticsearch.yml
#
cluster.name: "docker-cluster"
network.host: 0.0.0.0

## X-Pack settings
## see https://www.elastic.co/guide/en/elasticsearch/reference/current/security-settings.html
#
xpack.license.self_generated.type: trial
xpack.security.enabled: false
75 changes: 75 additions & 0 deletions monitoring/elk/elk-docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Source: https://github.com/deviantony/docker-elk/edit/main/docker-compose.yml
version: '3.7'

services:

# The 'setup' service runs a one-off script which initializes the
# 'logstash_internal' and 'kibana_system' users inside Elasticsearch with the
# values of the passwords defined in the '.env' file.
#
# This task is only performed during the *initial* startup of the stack. On all
# subsequent runs, the service simply returns immediately, without performing
# any modification to existing users.
setup:
build:
context: setup/
args:
ELASTIC_VERSION: ${ELASTIC_VERSION}
init: true
volumes:
- setup:/state:Z
environment:
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
LOGSTASH_INTERNAL_PASSWORD: ${LOGSTASH_INTERNAL_PASSWORD:-}
KIBANA_SYSTEM_PASSWORD: ${KIBANA_SYSTEM_PASSWORD:-}
networks:
- elk
depends_on:
- elasticsearch

elasticsearch:
build:
context: elasticsearch/
args:
ELASTIC_VERSION: ${ELASTIC_VERSION}
volumes:
- ./elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml:ro,z
- elasticsearch:/usr/share/elasticsearch/data:z
ports:
- "9200:9200"
- "9300:9300"
environment:
ES_JAVA_OPTS: -Xms512m -Xmx512m
# Bootstrap password.
# Used to initialize the keystore during the initial startup of
# Elasticsearch. Ignored on subsequent runs.
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
# Use single node discovery in order to disable production mode and avoid bootstrap checks.
# see: https://www.elastic.co/guide/en/elasticsearch/reference/current/bootstrap-checks.html
discovery.type: single-node
networks:
- elk

kibana:
build:
context: kibana/
args:
ELASTIC_VERSION: ${ELASTIC_VERSION}
volumes:
- ./kibana/config/kibana.yml:/usr/share/kibana/config/kibana.yml:ro,Z
ports:
- "5601:5601"
environment:
KIBANA_SYSTEM_PASSWORD: ${KIBANA_SYSTEM_PASSWORD:-}
networks:
- elk
depends_on:
- elasticsearch

networks:
elk:
driver: bridge

volumes:
setup:
elasticsearch:
3 changes: 3 additions & 0 deletions monitoring/elk/extensions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Extensions

Third-party extensions that enable extra integrations with the Elastic stack.
6 changes: 6 additions & 0 deletions monitoring/elk/extensions/filebeat/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Ignore Docker build files
Dockerfile
.dockerignore

# Ignore OS artifacts
**/.DS_Store
3 changes: 3 additions & 0 deletions monitoring/elk/extensions/filebeat/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ARG ELASTIC_VERSION

FROM docker.elastic.co/beats/filebeat:${ELASTIC_VERSION}
36 changes: 36 additions & 0 deletions monitoring/elk/extensions/filebeat/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Filebeat

Filebeat is a lightweight shipper for forwarding and centralizing log data. Installed as an agent on your servers,
Filebeat monitors the log files or locations that you specify, collects log events, and forwards them either to
Elasticsearch or Logstash for indexing.

## Usage

To include Filebeat in the stack, run Docker Compose from the root of the repository with an additional command line
argument referencing the `filebeat-compose.yml` file:

```console
$ docker-compose -f docker-compose.yml -f extensions/filebeat/filebeat-compose.yml up
```

## Configuring Filebeat

The Filebeat configuration is stored in [`config/filebeat.yml`](./config/filebeat.yml). You can modify this file with
the help of the [Configuration reference][filebeat-config].

Any change to the Filebeat configuration requires a restart of the Filebeat container:

```console
$ docker-compose -f docker-compose.yml -f extensions/filebeat/filebeat-compose.yml restart filebeat
```

Please refer to the following documentation page for more details about how to configure Filebeat inside a Docker
container: [Run Filebeat on Docker][filebeat-docker].

## See also

[Filebeat documentation][filebeat-doc]

[filebeat-config]: https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-reference-yml.html
[filebeat-docker]: https://www.elastic.co/guide/en/beats/filebeat/current/running-on-docker.html
[filebeat-doc]: https://www.elastic.co/guide/en/beats/filebeat/current/index.html
30 changes: 30 additions & 0 deletions monitoring/elk/extensions/filebeat/config/filebeat.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Filebeat configuration
## https://github.com/elastic/beats/blob/master/deploy/docker/filebeat.docker.yml
#

filebeat.config:
modules:
path: ${path.config}/modules.d/*.yml
reload.enabled: false

filebeat.autodiscover:
providers:
# The Docker autodiscover provider automatically retrieves logs from Docker
# containers as they start and stop.
- type: docker
hints.enabled: true

processors:
- add_cloud_metadata: ~

output.elasticsearch:
hosts: ['http://elasticsearch:9200']
username: elastic
password: ${ELASTIC_PASSWORD}

## HTTP endpoint for health checking
## https://www.elastic.co/guide/en/beats/filebeat/current/http-endpoint.html
#

http.enabled: true
http.host: 0.0.0.0
34 changes: 34 additions & 0 deletions monitoring/elk/extensions/filebeat/filebeat-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
version: '3.7'

services:
filebeat:
build:
context: extensions/filebeat/
args:
ELASTIC_VERSION: ${ELASTIC_VERSION}
# Run as 'root' instead of 'filebeat' (uid 1000) to allow reading
# 'docker.sock' and the host's filesystem.
user: root
command:
# Log to stderr.
- -e
# Disable config file permissions checks. Allows mounting
# 'config/filebeat.yml' even if it's not owned by root.
# see: https://www.elastic.co/guide/en/beats/libbeat/current/config-file-permissions.html
- --strict.perms=false
volumes:
- ./extensions/filebeat/config/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro,Z
- type: bind
source: /var/lib/docker/containers
target: /var/lib/docker/containers
read_only: true
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock
read_only: true
environment:
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-}
networks:
- elk
depends_on:
- elasticsearch
6 changes: 6 additions & 0 deletions monitoring/elk/kibana/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Ignore Docker build files
Dockerfile
.dockerignore

# Ignore OS artifacts
**/.DS_Store
7 changes: 7 additions & 0 deletions monitoring/elk/kibana/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ARG ELASTIC_VERSION

# https://www.docker.elastic.co/
FROM docker.elastic.co/kibana/kibana:${ELASTIC_VERSION}

# Add your kibana plugins setup here
# Example: RUN kibana-plugin install <name|url>
13 changes: 13 additions & 0 deletions monitoring/elk/kibana/config/kibana.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
## Default Kibana configuration from Kibana base image.
## https://github.com/elastic/kibana/blob/master/src/dev/build/tasks/os_packages/docker_generator/templates/kibana_yml.template.ts
#
server.name: kibana
server.host: 0.0.0.0
elasticsearch.hosts: [ "http://elasticsearch:9200" ]
monitoring.ui.container.elasticsearch.enabled: true

## X-Pack security credentials
#
elasticsearch.username: kibana_system
elasticsearch.password: ${KIBANA_SYSTEM_PASSWORD}
Empty file added monitoring/elk/run_env/.gitkeep
Empty file.
12 changes: 12 additions & 0 deletions monitoring/elk/setup/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Ignore Docker build files
Dockerfile
.dockerignore

# Ignore OS artifacts
**/.DS_Store

# Ignore Git files
.gitignore

# Ignore setup state
state/
1 change: 1 addition & 0 deletions monitoring/elk/setup/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/state/
17 changes: 17 additions & 0 deletions monitoring/elk/setup/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
ARG ELASTIC_VERSION

# https://www.docker.elastic.co/
FROM docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION}

USER root

COPY . /

RUN set -eux; \
mkdir /state; \
chown elasticsearch /state; \
chmod +x /entrypoint.sh

USER elasticsearch:root

ENTRYPOINT ["/entrypoint.sh"]
Loading

0 comments on commit f38ff55

Please sign in to comment.