-
option |
previous steps
(system setup) |
custom-opensearch.yml
(sudo chown
1000:1000 custom-opensearch*.yml) |
docker-compose.yml
(docker compose up) |
verify |
|
2 nodes + dashboards (demo certs) |
- set demo password
echo "OPENSEARCH_INITIAL_ADMIN_PASSWORD=xxxxxx"
>.env
|
|
docker-compose.yml
services:
opensearch-node1: # This is also the
hostname of the container within the Docker
network (i.e. https://opensearch-node1/)
image:
opensearchproject/opensearch:latest
container_name:
opensearch-node1
environment:
-
cluster.name=opensearch-cluster # Name the cluster
-
node.name=opensearch-node1 # Name the node that
will run in this container
-
discovery.seed_hosts=opensearch-node1,opensearch-node2
# Nodes to look for when discovering the cluster
-
cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2
# Nodes eligibile to serve as cluster manager
-
bootstrap.memory_lock=true # Disable JVM heap
memory swapping
-
"OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min
and max JVM heap sizes to at least 50% of system
RAM
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
# Sets the demo admin user password when using
demo configuration (for OpenSearch 2.12 and later)
ulimits:
memlock:
soft:
-1 # Set memlock to unlimited (no soft or hard
limit)
hard:
-1
nofile:
soft:
65536 # Maximum number of open files for the
opensearch user - set to at least 65536
hard:
65536
volumes:
-
opensearch-data1:/usr/share/opensearch/data #
Creates volume called opensearch-data1 and mounts
it to the container
ports:
- 9200:9200 # REST
API
- 9600:9600 #
Performance Analyzer
networks:
- opensearch-net #
All of the containers will join the same Docker
bridge network
opensearch-node2:
image:
opensearchproject/opensearch:latest # This should
be the same image used for opensearch-node1 to
avoid issues
container_name:
opensearch-node2
environment:
-
cluster.name=opensearch-cluster
-
node.name=opensearch-node2
-
discovery.seed_hosts=opensearch-node1,opensearch-node2
-
cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2
-
bootstrap.memory_lock=true
-
"OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
-
OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
ulimits:
memlock:
soft:
-1
hard:
-1
nofile:
soft:
65536
hard:
65536
volumes:
-
opensearch-data2:/usr/share/opensearch/data
networks:
- opensearch-net
opensearch-dashboards:
image:
opensearchproject/opensearch-dashboards:latest #
Make sure the version of opensearch-dashboards
matches the version of opensearch installed on
other nodes
container_name:
opensearch-dashboards
ports:
- 5601:5601 # Map
host port 5601 to container port 5601
expose:
- "5601" # Expose
port 5601 for web access to OpenSearch Dashboards
environment:
OPENSEARCH_HOSTS:
'["https://opensearch-node1:9200","https://opensearch-node2:9200"]'
# Define the OpenSearch nodes that OpenSearch
Dashboards will query
networks:
- opensearch-net
volumes:
opensearch-data1:
opensearch-data2:
networks:
opensearch-net:
|
|
|
2 nodes + dashboards (own certs) |
- generate certificates (for: root, node1, node2,
admin user)
|
custom-opensearch-node1.yml
---
cluster.name: docker-cluster
# Bind to all interfaces because we don't know
what IP address Docker will assign to us.
network.host: 0.0.0.0
# # minimum_master_nodes need to be explicitly set
when bound on a public IP
# # set to 1 to allow single node clusters
# discovery.zen.minimum_master_nodes: 1
# Setting network.host to a non-loopback address
enables the annoying bootstrap checks.
"Single-node" mode disables them again.
# discovery.type: single-node
plugins.security.ssl.transport.pemcert_filepath: node1.pem
plugins.security.ssl.transport.pemkey_filepath: node1-key.pem
plugins.security.ssl.transport.pemtrustedcas_filepath:
root-ca.pem
plugins.security.ssl.transport.enforce_hostname_verification:
false
plugins.security.ssl.http.enabled: true
plugins.security.ssl.http.pemcert_filepath: node1.pem
plugins.security.ssl.http.pemkey_filepath: node1-key.pem
plugins.security.ssl.http.pemtrustedcas_filepath:
root-ca.pem
plugins.security.allow_unsafe_democertificates:
true
plugins.security.allow_default_init_securityindex:
true
plugins.security.authcz.admin_dn:
-
CN=A,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA
plugins.security.nodes_dn:
-
"CN=node1.dns.a-record,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA"
-
"CN=node2.dns.a-record,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA"
plugins.security.audit.type: internal_opensearch
plugins.security.enable_snapshot_restore_privilege:
true
plugins.security.check_snapshot_restore_write_privileges:
true
plugins.security.restapi.roles_enabled:
["all_access", "security_rest_api_access"]
#cluster.routing.allocation.disk.threshold_enabled:
false
#opendistro_security.audit.config.disabled_rest_categories:
NONE
#opendistro_security.audit.config.disabled_transport_categories:
NONE
plugins.security.system_indices.enabled: true
plugins.security.system_indices.indices:
[.plugins-ml-agent, .plugins-ml-config,
.plugins-ml-connector, .plugins-ml-controller,
.plugins-ml-model-group, .plugins-ml-model,
.plugins-ml-task, .plugins-ml-conversation-meta,
.plugins-ml-conversation-interactions,
.plugins-ml-memory-meta,
.plugins-ml-memory-message,
.plugins-ml-stop-words,
.opendistro-alerting-config,
.opendistro-alerting-alert*,
.opendistro-anomaly-results*,
.opendistro-anomaly-detector*,
.opendistro-anomaly-checkpoints,
.opendistro-anomaly-detection-state,
.opendistro-reports-*,
.opensearch-notifications-*,
.opensearch-notebooks, .opensearch-observability,
.ql-datasources,
.opendistro-asynchronous-search-response*,
.replication-metadata-store,
.opensearch-knn-models, .geospatial-ip2geo-data*,
.plugins-flow-framework-config,
.plugins-flow-framework-templates,
.plugins-flow-framework-state]
node.max_local_storage_nodes: 3
custom-opensearch-node2.yml
---
cluster.name: docker-cluster
# Bind to all interfaces because we don't know
what IP address Docker will assign to us.
network.host: 0.0.0.0
# # minimum_master_nodes need to be explicitly set
when bound on a public IP
# # set to 1 to allow single node clusters
# discovery.zen.minimum_master_nodes: 1
# Setting network.host to a non-loopback address
enables the annoying bootstrap checks.
"Single-node" mode disables them again.
# discovery.type: single-node
plugins.security.ssl.transport.pemcert_filepath: node2.pem
plugins.security.ssl.transport.pemkey_filepath: node2-key.pem
plugins.security.ssl.transport.pemtrustedcas_filepath:
root-ca.pem
plugins.security.ssl.transport.enforce_hostname_verification:
false
plugins.security.ssl.http.enabled: true
plugins.security.ssl.http.pemcert_filepath: node2.pem
plugins.security.ssl.http.pemkey_filepath: node2-key.pem
plugins.security.ssl.http.pemtrustedcas_filepath:
root-ca.pem
plugins.security.allow_unsafe_democertificates:
true
plugins.security.allow_default_init_securityindex:
true
plugins.security.authcz.admin_dn:
-
CN=A,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA
plugins.security.nodes_dn:
-
"CN=node1.dns.a-record,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA"
-
"CN=node2.dns.a-record,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA"
plugins.security.audit.type: internal_opensearch
plugins.security.enable_snapshot_restore_privilege:
true
plugins.security.check_snapshot_restore_write_privileges:
true
plugins.security.restapi.roles_enabled:
["all_access", "security_rest_api_access"]
#cluster.routing.allocation.disk.threshold_enabled:
false
#opendistro_security.audit.config.disabled_rest_categories:
NONE
#opendistro_security.audit.config.disabled_transport_categories:
NONE
plugins.security.system_indices.enabled: true
plugins.security.system_indices.indices:
[.plugins-ml-agent, .plugins-ml-config,
.plugins-ml-connector, .plugins-ml-controller,
.plugins-ml-model-group, .plugins-ml-model,
.plugins-ml-task, .plugins-ml-conversation-meta,
.plugins-ml-conversation-interactions,
.plugins-ml-memory-meta,
.plugins-ml-memory-message,
.plugins-ml-stop-words,
.opendistro-alerting-config,
.opendistro-alerting-alert*,
.opendistro-anomaly-results*,
.opendistro-anomaly-detector*,
.opendistro-anomaly-checkpoints,
.opendistro-anomaly-detection-state,
.opendistro-reports-*,
.opensearch-notifications-*,
.opensearch-notebooks, .opensearch-observability,
.ql-datasources,
.opendistro-asynchronous-search-response*,
.replication-metadata-store,
.opensearch-knn-models, .geospatial-ip2geo-data*,
.plugins-flow-framework-config,
.plugins-flow-framework-templates,
.plugins-flow-framework-state]
node.max_local_storage_nodes: 3
|
docker-compose.yml
services:
opensearch-node1: # This is also the
hostname of the container within the Docker
network (i.e. https://opensearch-node1/)
image:
opensearchproject/opensearch:latest
container_name:
opensearch-node1
environment:
-
cluster.name=opensearch-cluster # Name the cluster
-
node.name=opensearch-node1 # Name the node that
will run in this container
-
discovery.seed_hosts=opensearch-node1,opensearch-node2
# Nodes to look for when discovering the cluster
-
cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2
# Nodes eligibile to serve as cluster manager
-
bootstrap.memory_lock=true # Disable JVM heap
memory swapping
-
"OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min
and max JVM heap sizes to at least 50% of system
RAM
#-
OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
# Sets the demo admin user password when using
demo configuration (for OpenSearch 2.12 and later)
- "DISABLE_INSTALL_DEMO_CONFIG=true"
# Prevents execution of bundled demo script which
installs demo certificates and security
configurations to OpenSearch
#-
"DISABLE_SECURITY_PLUGIN=true" # Disables Security
plugin
ulimits:
memlock:
soft:
-1 # Set memlock to unlimited (no soft or hard
limit)
hard:
-1
nofile:
soft:
65536 # Maximum number of open files for the
opensearch user - set to at least 65536
hard:
65536
volumes:
-
opensearch-data1:/usr/share/opensearch/data #
Creates volume called opensearch-data1 and mounts
it to the container
-
./root-ca.pem:/usr/share/opensearch/config/root-ca.pem
-
./admin.pem:/usr/share/opensearch/config/admin.pem
-
./admin-key.pem:/usr/share/opensearch/config/admin-key.pem
- ./node1.pem:/usr/share/opensearch/config/node1.pem
- ./node1-key.pem:/usr/share/opensearch/config/node1-key.pem
- ./custom-opensearch-node1.yml:/usr/share/opensearch/config/opensearch.yml
ports:
- 9200:9200 # REST
API
- 9600:9600 #
Performance Analyzer
networks:
- opensearch-net #
All of the containers will join the same Docker
bridge network
opensearch-node2:
image:
opensearchproject/opensearch:latest # This should
be the same image used for opensearch-node1 to
avoid issues
container_name:
opensearch-node2
environment:
-
cluster.name=opensearch-cluster
-
node.name=opensearch-node2
-
discovery.seed_hosts=opensearch-node1,opensearch-node2
-
cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2
-
bootstrap.memory_lock=true
-
"OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
#-
OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
- "DISABLE_INSTALL_DEMO_CONFIG=true"
# Prevents execution of bundled demo script which
installs demo certificates and security
configurations to OpenSearch
#-
"DISABLE_SECURITY_PLUGIN=true" # Disables Security
plugin
ulimits:
memlock:
soft:
-1
hard:
-1
nofile:
soft:
65536
hard:
65536
volumes:
-
opensearch-data2:/usr/share/opensearch/data
-
./root-ca.pem:/usr/share/opensearch/config/root-ca.pem
-
./admin.pem:/usr/share/opensearch/config/admin.pem
-
./admin-key.pem:/usr/share/opensearch/config/admin-key.pem
- ./node2.pem:/usr/share/opensearch/config/node2.pem
- ./node2-key.pem:/usr/share/opensearch/config/node2-key.pem
- ./custom-opensearch-node2.yml:/usr/share/opensearch/config/opensearch.yml
networks:
- opensearch-net
opensearch-dashboards:
image:
opensearchproject/opensearch-dashboards:latest #
Make sure the version of opensearch-dashboards
matches the version of opensearch installed on
other nodes
container_name:
opensearch-dashboards
ports:
- 5601:5601 # Map
host port 5601 to container port 5601
expose:
- "5601" # Expose
port 5601 for web access to OpenSearch Dashboards
environment:
OPENSEARCH_HOSTS:
'["https://opensearch-node1:9200","https://opensearch-node2:9200"]'
# Define the OpenSearch nodes that OpenSearch
Dashboards will query
networks:
- opensearch-net
volumes:
opensearch-data1:
opensearch-data2:
networks:
opensearch-net:
|
sudo cp admin.pem admin.myuser.pem
sudo chown myuser:mygroup admin.myuser.pem
sudo cp admin-key.pem admin-key.myuser.pem
sudo chown myuser:mygroup admin-key.myuser.pem
curl -k --cert ./admin.myuser.pem --key
./admin-key.myuser.pem -X GET
"https://localhost:9200"
- create
master user:
curl -k --cert ./admin.myuser.pem --key
./admin-key.myuser.pem -H "Content-Type:
application/json" -X PUT
"https://localhost:9200/_plugins/_security/api/internalusers/master"
-d '{
"password": "yyyyyy",
"opendistro_security_roles":
["all_access"],
"backend_roles": []
}'
- http://localhost:5601/
|
|
2 nodes with AWS
S3 plugin + dashboards (own certs) |
- build custom image
- Dockerfile
FROM
opensearchproject/opensearch:3.0.0
ENV AWS_ACCESS_KEY_ID xxxxxx
ENV AWS_SECRET_ACCESS_KEY yyyyyy
# Optional
#ENV AWS_SESSION_TOKEN
<optional-session-token>
RUN
/usr/share/opensearch/bin/opensearch-plugin
install --batch repository-s3
RUN
/usr/share/opensearch/bin/opensearch-keystore
create
RUN echo $AWS_ACCESS_KEY_ID |
/usr/share/opensearch/bin/opensearch-keystore
add --stdin s3.client.default.access_key
RUN echo $AWS_SECRET_ACCESS_KEY |
/usr/share/opensearch/bin/opensearch-keystore
add --stdin s3.client.default.secret_key
# Optional
#RUN echo $AWS_SESSION_TOKEN |
/usr/share/opensearch/bin/opensearch-keystore
add --stdin s3.client.default.session_token
docker build --tag=opensearch-custom-plugin
.
- generate certificates (for: root, node1, node2,
admin user)
|
custom-opensearch-node1.yml
---
cluster.name: docker-cluster
# Bind to all interfaces because we don't know
what IP address Docker will assign to us.
network.host: 0.0.0.0
# # minimum_master_nodes need to be explicitly
set when bound on a public IP
# # set to 1 to allow single node clusters
# discovery.zen.minimum_master_nodes: 1
# Setting network.host to a non-loopback address
enables the annoying bootstrap checks.
"Single-node" mode disables them again.
# discovery.type: single-node
plugins.security.ssl.transport.pemcert_filepath:
node1.pem
plugins.security.ssl.transport.pemkey_filepath:
node1-key.pem
plugins.security.ssl.transport.pemtrustedcas_filepath:
root-ca.pem
plugins.security.ssl.transport.enforce_hostname_verification:
false
plugins.security.ssl.http.enabled: true
plugins.security.ssl.http.pemcert_filepath: node1.pem
plugins.security.ssl.http.pemkey_filepath: node1-key.pem
plugins.security.ssl.http.pemtrustedcas_filepath:
root-ca.pem
plugins.security.allow_unsafe_democertificates:
true
plugins.security.allow_default_init_securityindex:
true
plugins.security.authcz.admin_dn:
-
CN=A,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA
plugins.security.nodes_dn:
-
"CN=node1.dns.a-record,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA"
-
"CN=node2.dns.a-record,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA"
plugins.security.audit.type: internal_opensearch
plugins.security.enable_snapshot_restore_privilege:
true
plugins.security.check_snapshot_restore_write_privileges:
true
plugins.security.restapi.roles_enabled:
["all_access", "security_rest_api_access"]
#cluster.routing.allocation.disk.threshold_enabled:
false
#opendistro_security.audit.config.disabled_rest_categories:
NONE
#opendistro_security.audit.config.disabled_transport_categories:
NONE
plugins.security.system_indices.enabled: true
plugins.security.system_indices.indices:
[.plugins-ml-agent, .plugins-ml-config,
.plugins-ml-connector, .plugins-ml-controller,
.plugins-ml-model-group, .plugins-ml-model,
.plugins-ml-task, .plugins-ml-conversation-meta,
.plugins-ml-conversation-interactions,
.plugins-ml-memory-meta,
.plugins-ml-memory-message,
.plugins-ml-stop-words,
.opendistro-alerting-config,
.opendistro-alerting-alert*,
.opendistro-anomaly-results*,
.opendistro-anomaly-detector*,
.opendistro-anomaly-checkpoints,
.opendistro-anomaly-detection-state,
.opendistro-reports-*,
.opensearch-notifications-*,
.opensearch-notebooks,
.opensearch-observability, .ql-datasources,
.opendistro-asynchronous-search-response*,
.replication-metadata-store,
.opensearch-knn-models,
.geospatial-ip2geo-data*,
.plugins-flow-framework-config,
.plugins-flow-framework-templates,
.plugins-flow-framework-state]
node.max_local_storage_nodes: 3
# aws s3
s3.client.default.region: eu-west-1
custom-opensearch-node2.yml
---
cluster.name: docker-cluster
# Bind to all interfaces because we don't know
what IP address Docker will assign to us.
network.host: 0.0.0.0
# # minimum_master_nodes need to be explicitly set
when bound on a public IP
# # set to 1 to allow single node clusters
# discovery.zen.minimum_master_nodes: 1
# Setting network.host to a non-loopback address
enables the annoying bootstrap checks.
"Single-node" mode disables them again.
# discovery.type: single-node
plugins.security.ssl.transport.pemcert_filepath: node2.pem
plugins.security.ssl.transport.pemkey_filepath: node2-key.pem
plugins.security.ssl.transport.pemtrustedcas_filepath:
root-ca.pem
plugins.security.ssl.transport.enforce_hostname_verification:
false
plugins.security.ssl.http.enabled: true
plugins.security.ssl.http.pemcert_filepath: node2.pem
plugins.security.ssl.http.pemkey_filepath: node2-key.pem
plugins.security.ssl.http.pemtrustedcas_filepath:
root-ca.pem
plugins.security.allow_unsafe_democertificates:
true
plugins.security.allow_default_init_securityindex:
true
plugins.security.authcz.admin_dn:
-
CN=A,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA
plugins.security.nodes_dn:
-
"CN=node1.dns.a-record,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA"
-
"CN=node2.dns.a-record,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA"
plugins.security.audit.type: internal_opensearch
plugins.security.enable_snapshot_restore_privilege:
true
plugins.security.check_snapshot_restore_write_privileges:
true
plugins.security.restapi.roles_enabled:
["all_access", "security_rest_api_access"]
#cluster.routing.allocation.disk.threshold_enabled:
false
#opendistro_security.audit.config.disabled_rest_categories:
NONE
#opendistro_security.audit.config.disabled_transport_categories:
NONE
plugins.security.system_indices.enabled: true
plugins.security.system_indices.indices:
[.plugins-ml-agent, .plugins-ml-config,
.plugins-ml-connector, .plugins-ml-controller,
.plugins-ml-model-group, .plugins-ml-model,
.plugins-ml-task, .plugins-ml-conversation-meta,
.plugins-ml-conversation-interactions,
.plugins-ml-memory-meta,
.plugins-ml-memory-message,
.plugins-ml-stop-words,
.opendistro-alerting-config,
.opendistro-alerting-alert*,
.opendistro-anomaly-results*,
.opendistro-anomaly-detector*,
.opendistro-anomaly-checkpoints,
.opendistro-anomaly-detection-state,
.opendistro-reports-*,
.opensearch-notifications-*,
.opensearch-notebooks, .opensearch-observability,
.ql-datasources,
.opendistro-asynchronous-search-response*,
.replication-metadata-store,
.opensearch-knn-models, .geospatial-ip2geo-data*,
.plugins-flow-framework-config,
.plugins-flow-framework-templates,
.plugins-flow-framework-state]
node.max_local_storage_nodes: 3
# aws s3
s3.client.default.region: eu-west-1
|
docker-compose.yml
services:
opensearch-node1: # This is also the
hostname of the container within the Docker
network (i.e. https://opensearch-node1/)
image: opensearch-custom-plugin:latest
container_name:
opensearch-node1
environment:
-
cluster.name=opensearch-cluster # Name the cluster
-
node.name=opensearch-node1 # Name the node that
will run in this container
-
discovery.seed_hosts=opensearch-node1,opensearch-node2
# Nodes to look for when discovering the cluster
-
cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2
# Nodes eligibile to serve as cluster manager
-
bootstrap.memory_lock=true # Disable JVM heap
memory swapping
-
"OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min
and max JVM heap sizes to at least 50% of system
RAM
#-
OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
# Sets the demo admin user password when using
demo configuration (for OpenSearch 2.12 and later)
- "DISABLE_INSTALL_DEMO_CONFIG=true"
# Prevents execution of bundled demo script which
installs demo certificates and security
configurations to OpenSearch
#-
"DISABLE_SECURITY_PLUGIN=true" # Disables Security
plugin
ulimits:
memlock:
soft:
-1 # Set memlock to unlimited (no soft or hard
limit)
hard:
-1
nofile:
soft:
65536 # Maximum number of open files for the
opensearch user - set to at least 65536
hard:
65536
volumes:
-
opensearch-data1:/usr/share/opensearch/data #
Creates volume called opensearch-data1 and mounts
it to the container
-
./root-ca.pem:/usr/share/opensearch/config/root-ca.pem
-
./admin.pem:/usr/share/opensearch/config/admin.pem
-
./admin-key.pem:/usr/share/opensearch/config/admin-key.pem
- ./node1.pem:/usr/share/opensearch/config/node1.pem
- ./node1-key.pem:/usr/share/opensearch/config/node1-key.pem
- ./custom-opensearch-node1.yml:/usr/share/opensearch/config/opensearch.yml
ports:
- 9200:9200 # REST
API
- 9600:9600 #
Performance Analyzer
networks:
- opensearch-net #
All of the containers will join the same Docker
bridge network
opensearch-node2:
image: opensearch-custom-plugin :latest
# This should be the same image used for
opensearch-node1 to avoid issues
container_name:
opensearch-node2
environment:
-
cluster.name=opensearch-cluster
-
node.name=opensearch-node2
-
discovery.seed_hosts=opensearch-node1,opensearch-node2
-
cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2
-
bootstrap.memory_lock=true
-
"OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
#-
OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
- "DISABLE_INSTALL_DEMO_CONFIG=true"
# Prevents execution of bundled demo script which
installs demo certificates and security
configurations to OpenSearch
#-
"DISABLE_SECURITY_PLUGIN=true" # Disables Security
plugin
ulimits:
memlock:
soft:
-1
hard:
-1
nofile:
soft:
65536
hard:
65536
volumes:
-
opensearch-data2:/usr/share/opensearch/data
-
./root-ca.pem:/usr/share/opensearch/config/root-ca.pem
-
./admin.pem:/usr/share/opensearch/config/admin.pem
-
./admin-key.pem:/usr/share/opensearch/config/admin-key.pem
- ./node2.pem:/usr/share/opensearch/config/node2.pem
- ./node2-key.pem:/usr/share/opensearch/config/node2-key.pem
- ./custom-opensearch-node2.yml:/usr/share/opensearch/config/opensearch.yml
networks:
- opensearch-net
opensearch-dashboards:
image:
opensearchproject/opensearch-dashboards:latest #
Make sure the version of opensearch-dashboards
matches the version of opensearch installed on
other nodes
container_name:
opensearch-dashboards
ports:
- 5601:5601 # Map
host port 5601 to container port 5601
expose:
- "5601" # Expose
port 5601 for web access to OpenSearch Dashboards
environment:
OPENSEARCH_HOSTS:
'["https://opensearch-node1:9200","https://opensearch-node2:9200"]'
# Define the OpenSearch nodes that OpenSearch
Dashboards will query
networks:
- opensearch-net
volumes:
opensearch-data1:
opensearch-data2:
networks:
opensearch-net:
|
|
|
- Getting
started
- Passos comuns / Common steps:
- system setup (Linux
settings)
sudo -i
# disable memory paging and swapping
performance on the host to improve performance
swapoff -a
# increase the number of memory maps
available to OpenSearch
# if not set, you will get the error:
# max virtual memory areas vm.max_map_count
[65530] is too low, increase to at least [262144]
echo "vm.max_map_count=262144"
>>/etc/sysctl.conf
# reload the kernel parameters using sysctl
# if not set, you will get the
error:
# max virtual memory areas vm.max_map_count
[65530] is too low, increase to at least
[262144]
sysctl -p
- set demo admin password
cd ~/opensearch
echo "OPENSEARCH_INITIAL_ADMIN_PASSWORD=xxxxxx"
>.env
- if not set, you will get an error:
- Please
define an environment variable
'OPENSEARCH_INITIAL_ADMIN_PASSWORD' with a
strong password string
- Option 1: single node, using Docker, without compose:
- system setup
docker run -d \
-p 9200:9200 -p 9600:9600 \
-e "discovery.type=single-node" \
-e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=<custom-admin-password>"
\
opensearchproject/opensearch:latest
- verify opensearch:
curl https://localhost:9200 -ku
admin:<custom-admin-password>
- docker stop
<containerId>
- Option 2: single node, with custom dashboards.yml, using Docker,
without compose (IMPORTANT: not working, because
certificates are not automatically generated):
- system setup
/path/to/custom-opensearch.yml
- sudo chown
1000:1000
/path/to/custom-opensearch.yml
docker run \
-p 9200:9200 -p 9600:9600 \
-e "discovery.type=single-node" \
-e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=<custom-admin-password>"
\
-v
/path/to/custom-opensearch.yml:/usr/share/opensearch/config/opensearch.yml
\
opensearchproject/opensearch:latest
- Option 3: single node, using Docker compose:
- system
setup
- set admin
password
- docker-compose.yml
services:
opensearch-node1: # This is also the
hostname of the container within the Docker
network (i.e. https://opensearch-node1/)
image:
opensearchproject/opensearch:latest
container_name:
opensearch-node1
environment:
-
cluster.name=opensearch-cluster # Name the cluster
-
node.name=opensearch-node1 # Name the node that
will run in this container
-
discovery.seed_hosts=opensearch-node1 # Nodes to
look for when discovering the cluster
-
cluster.initial_cluster_manager_nodes=opensearch-node1
# Nodes eligibile to serve as cluster manager
-
bootstrap.memory_lock=true # Disable JVM heap
memory swapping
-
"OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min
and max JVM heap sizes to at least 50% of system
RAM
-
OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
# Sets the demo admin user password when using
demo configuration (for OpenSearch 2.12 and later)
ulimits:
memlock:
soft:
-1 # Set memlock to unlimited (no soft or hard
limit)
hard:
-1
nofile:
soft:
65536 # Maximum number of open files for the
opensearch user - set to at least 65536
hard:
65536
volumes:
-
opensearch-data1:/usr/share/opensearch/data #
Creates volume called opensearch-data1 and mounts
it to the container
ports:
- 9200:9200 # REST
API
- 9600:9600 #
Performance Analyzer
networks:
- opensearch-net #
All of the containers will join the same Docker
bridge network
volumes:
opensearch-data1:
networks:
opensearch-net:
- start container:
- verify that is running:
- verify opensearch:
curl https://localhost:9200 -ku
admin:<custom-admin-password>
- Option 4: single node, own certificates, with custom
opensearch.yml, using Docker compose:
- system setup
- set demo
admin password
- generate self-signed certificates:
- (optional) add users (other than admin; admin is a
super admin user and it will use admin.pem certificate
to connect to opensearch)
- custom-opensearch.yml (Configuring
basic security settings)
---
cluster.name: docker-cluster
# Bind to all interfaces because we don't know what
IP address Docker will assign to us.
network.host: 0.0.0.0
# # minimum_master_nodes need to be explicitly set
when bound on a public IP
# # set to 1 to allow single node clusters
# discovery.zen.minimum_master_nodes: 1
# Setting network.host to a non-loopback address
enables the annoying bootstrap checks. "Single-node"
mode disables them again.
# discovery.type: single-node
plugins.security.ssl.transport.pemcert_filepath:
node1.pem
plugins.security.ssl.transport.pemkey_filepath:
node1-key.pem
plugins.security.ssl.transport.pemtrustedcas_filepath:
root-ca.pem
plugins.security.ssl.transport.enforce_hostname_verification:
false
plugins.security.ssl.http.enabled: true
plugins.security.ssl.http.pemcert_filepath:
node1.pem
plugins.security.ssl.http.pemkey_filepath:
node1-key.pem
plugins.security.ssl.http.pemtrustedcas_filepath:
root-ca.pem
plugins.security.allow_unsafe_democertificates: true
plugins.security.allow_default_init_securityindex:
true
plugins.security.authcz.admin_dn:
-
CN=A,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA
plugins.security.nodes_dn:
-
'CN=N,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA'
plugins.security.audit.type: internal_opensearch
plugins.security.enable_snapshot_restore_privilege:
true
plugins.security.check_snapshot_restore_write_privileges:
true
plugins.security.restapi.roles_enabled:
["all_access", "security_rest_api_access"]
#cluster.routing.allocation.disk.threshold_enabled:
false
#opendistro_security.audit.config.disabled_rest_categories:
NONE
#opendistro_security.audit.config.disabled_transport_categories:
NONE
plugins.security.system_indices.enabled: true
plugins.security.system_indices.indices:
[.plugins-ml-agent, .plugins-ml-config,
.plugins-ml-connector, .plugins-ml-controller,
.plugins-ml-model-group, .plugins-ml-model,
.plugins-ml-task, .plugins-ml-conversation-meta,
.plugins-ml-conversation-interactions,
.plugins-ml-memory-meta, .plugins-ml-memory-message,
.plugins-ml-stop-words, .opendistro-alerting-config,
.opendistro-alerting-alert*,
.opendistro-anomaly-results*,
.opendistro-anomaly-detector*,
.opendistro-anomaly-checkpoints,
.opendistro-anomaly-detection-state,
.opendistro-reports-*, .opensearch-notifications-*,
.opensearch-notebooks, .opensearch-observability,
.ql-datasources,
.opendistro-asynchronous-search-response*,
.replication-metadata-store, .opensearch-knn-models,
.geospatial-ip2geo-data*,
.plugins-flow-framework-config,
.plugins-flow-framework-templates,
.plugins-flow-framework-state]
node.max_local_storage_nodes: 3
sudo chown 1000:1000 custom-opensearch.yml
- docker-compose.yml (Sample
Docker Compose file for development) (Configuring
basic security settings)
services:
opensearch-node1: # This is also the
hostname of the container within the Docker
network (i.e. https://opensearch-node1/)
image:
opensearchproject/opensearch:latest
container_name:
opensearch-node1
environment:
-
cluster.name=opensearch-cluster # Name the cluster
-
node.name=opensearch-node1 # Name the node that
will run in this container
-
discovery.seed_hosts=opensearch-node1 # Nodes to
look for when discovering the cluster
-
cluster.initial_cluster_manager_nodes=opensearch-node1
# Nodes eligibile to serve as cluster manager
-
bootstrap.memory_lock=true # Disable JVM heap
memory swapping
-
"OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min
and max JVM heap sizes to at least 50% of system
RAM
#-
OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
# Sets the demo admin user password when using
demo configuration (for OpenSearch 2.12 and later)
- "DISABLE_INSTALL_DEMO_CONFIG=true"
# Prevents execution of bundled demo script which
installs demo certificates and security
configurations to OpenSearch
#-
"DISABLE_SECURITY_PLUGIN=true" # Disables Security
plugin
ulimits:
memlock:
soft:
-1 # Set memlock to unlimited (no soft or hard
limit)
hard:
-1
nofile:
soft:
65536 # Maximum number of open files for the
opensearch user - set to at least 65536
hard:
65536
volumes:
-
opensearch-data1:/usr/share/opensearch/data #
Creates volume called opensearch-data1 and mounts
it to the container
-
./root-ca.pem:/usr/share/opensearch/config/root-ca.pem
-
./admin.pem:/usr/share/opensearch/config/admin.pem
-
./admin-key.pem:/usr/share/opensearch/config/admin-key.pem
-
./node1.pem:/usr/share/opensearch/config/node1.pem
-
./node1-key.pem:/usr/share/opensearch/config/node1-key.pem
-
./custom-opensearch.yml:/usr/share/opensearch/config/opensearch.yml
ports:
- 9200:9200 # REST
API
- 9600:9600 #
Performance Analyzer
networks:
- opensearch-net #
All of the containers will join the same Docker
bridge network
volumes:
opensearch-data1:
networks:
opensearch-net:
- start container:
- verify that is running:
- verify opensearch:
curl
https://localhost:9200 -ku
admin:<custom-admin-password>
sudo cp admin.pem
admin.with_my_user_permissions.pem
sudo cp admin-key.pem
admin-key.with_my_user_permissions.pem
sudo chmod my_user_my_group admin.with_my_user_permissions.pem
admin-key.with_my_user_permissions.pem
curl -k --cert
./admin.with_my_user_permissions.pem --key
./admin-key.with_my_user_permissions.pem -X
GET "https://localhost:9200"
- Problemes:
- admin: Unauthorized
- you cannot use:
curl
https://localhost:9200 -ku
admin:<custom-admin-password>
you must use admin certificate: curl
-k --cert ./admin.with_my_user_permissions.pem
--key
./admin-key.with_my_user_permissions.pem
-X GET "https://localhost:9200"
curl: (35) OpenSSL SSL_connect:
SSL_ERROR_SYSCALL in connection to
localhost:9200
- verify:
openssl s_client -connect
localhost:9200 </dev/null
- it should not return (this means that
server certificate is not available):
- no
peer certificate available
- solution:
- custom-opensearch.yml
# Bind to all
interfaces because we don't know what IP
address Docker will assign to us.
network.host: 0.0.0.0
- Option 5: two nodes and Opensearch Dashboards, using Docker compose:
- system setup
- download compose file
mkdir ~/opensearch
cd ~/opensearch
curl -O
https://raw.githubusercontent.com/opensearch-project/documentation-website/3.0/assets/examples/docker-compose.yml
- (opcional) si es vol tenir accés a AWS S3
com a repositori de snapshots, cal crear una imatge de
docker específica i referenciar-la des de
docker-compose.yml:
- Security
configuration
- creació d'una imatge específica
- Dockerfile:
FROM
opensearchproject/opensearch:3.0.0
ENV AWS_ACCESS_KEY_ID xxxxxx
ENV AWS_SECRET_ACCESS_KEY yyyyyy
# Optional
#ENV AWS_SESSION_TOKEN
<optional-session-token>
RUN
/usr/share/opensearch/bin/opensearch-plugin
install --batch repository-s3
RUN
/usr/share/opensearch/bin/opensearch-keystore
create
RUN echo $AWS_ACCESS_KEY_ID |
/usr/share/opensearch/bin/opensearch-keystore
add --stdin s3.client.default.access_key
RUN echo $AWS_SECRET_ACCESS_KEY |
/usr/share/opensearch/bin/opensearch-keystore
add --stdin s3.client.default.secret_key
# Optional
#RUN echo $AWS_SESSION_TOKEN |
/usr/share/opensearch/bin/opensearch-keystore
add --stdin
s3.client.default.session_token
docker build --tag=opensearch-custom-plugin
.
cp docker-compose.yml
docker-compose-custom.yml
docker-compose-custom.yml ha
d'apuntar a la nova imatge local, per als dos nodes:
services:
opensearch-node1:
# image:
opensearchproject/opensearch:latest
image: opensearch-custom-plugin:latest
[...] opensearch-node2:
# image:
opensearchproject/opensearch:latest
image: opensearch-custom-plugin:latest
- set admin
password
- start 3 containers (defined in docker-compose.yml):
two containers running the OpenSearch service and a
single container running OpenSearch Dashboards
- setup
docker compose
# use default docker-compose.yml
docker compose up
# use custom docker-compose-custom.yml
docker compose -f docker-compose-custom.yml up
- verify (3 lines should appear)
# IMPORTANT: you must run this command from
the same directory where you called: docker
compose up
cd ~/opensearch
docker compose ps
- si no us apareixen les tres línies és que us
cal fer les accions del primer pas
- if you want to bash node1:
container_id=$(docker container ls -a
--format '{{.ID}} {{.Names}}' | awk '$2 ~
/^opensearch-node1/ {print $1}')
docker exec -it ${container_id} bash
- dashboards:
- Experiment
with sample data
-
|
generate your own from an existing index |
or download a sample |
apply |
mapping |
elasticdump
--debug
--input=https://master:xxx@<my_cluster_host>/myindex
--output=myindex_mappings.json
--type=mapping
|
ecommerce-field_mappings.json |
curl -H "Content-Type:
application/json" -X PUT
"https://localhost:9200/ecommerce" -ku
admin:<custom-admin-password>
--data-binary
"@ecommerce-field_mappings.json"
curl -H "Content-Type:
application/json" -X PUT
"https://localhost:9200/myindex" -ku
admin:<custom-admin-password>
--data-binary "@myindex_mappings.json"
|
data |
elasticdump
--debug
--input=https://master:xxx@<my_cluster_host>/myindex
--output=myindex.ndjson
|
ecommerce.ndjson |
curl -H "Content-Type:
application/x-ndjson" -X PUT
"https://localhost:9200/ecommerce/_bulk"
-ku
admin:<custom-admin-password>
--data-binary "@ecommerce.ndjson"
curl -H "Content-Type:
application/x-ndjson" -X PUT
"https://localhost:9200/myindex/_bulk"
-ku
admin:<custom-admin-password>
--data-binary "@myindex.ndjson"
|
- Import / Export
- MANAGING
INDEXES
- CRUD
-
table caption
|
|
|
bulk
|
template |
create template index |
|
|
|
create template data stream |
- create data stream template
PUT
_index_template/<datastream_template_name>
{
"index_patterns": "logs-nginx",
"data_stream":
{
"timestamp_field":
{
"name":
"request_time"
}
},
"priority": 200,
"template": {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
}
|
|
index |
create index |
- only needed if parameters are non-default
PUT
<index>
{ "settings": {
"number_of_shards": 6,
"number_of_replicas": 2 } }
|
|
|
rollover index or datastream
(can be automated with ISM) |
- rollover:
POST
<index_or_datastream>/_rollover
|
|
data stream |
create data stream |
- create explicit data stream
(will use matching datastream template, if
any; error if no matching datastream
template):
PUT
_data_stream/<datastream_name>
- create implicit data stream by creating a
document in a new index:
|
|
|
retrieve data stream |
- retrieve info about all datastreams:
- retrieve info about a datastream:
GET
_data_stream/<datastream_name>
- retrieve stats about a datastream:
GET
_data_stream/<datastream_name>/_stats
|
|
|
delete data stream |
- delete a data stream:
DELETE
_data_stream/<name_of_data_stream>
|
|
document |
create documents |
- if index:
- exists:
- a document will be added to
existing index
- (order?) matches an index
template:
- specified index will be created,
with settings from template
- (order?) matches a data stream
template:
- a data stream will be created:
<index>
- an index will be created (
.ds-<index>-00001 )
- does not match a template:
- specified index will be created,
with default settings
- specifying id:
PUT
<index>/_doc/<id>
{ "A JSON": "document" }
- without specifying id:
POST
<index>/_doc
{ "A JSON": "document" }
|
- bulk (using ndjson)
POST
_bulk
{ "index": { "_index":
"<index>", "_id": "<id>" }
}
{ "A JSON": "document" }
|
|
retrieve documents
|
|
- multiple documents with all fields:
GET
_mget
{
"docs": [
{
"_index": "<index>",
"_id":
"<id>"
},
{
"_index": "<index>",
"_id":
"<id>"
}
]
}
- multiple documents with selected fields:
GET
_mget
{
"docs": [
{
"_index": "<index>",
"_id":
"<id>",
"_source": "field1"
},
{
"_index": "<index>",
"_id":
"<id>",
"_source": "field2"
}
]
}
|
|
search documents |
- search documents:
GET
<index>/_search
{
"query": {
"match": {
"message": "login"
}
}
}
|
|
|
check documents
|
- verify whether a document exists:
|
|
|
update documents
|
- total update (replace), specifying id
(same as creating a new document with the
same id):
PUT
<index>/_doc/<id>
{ "A JSON": "document" }
- partial update, specifying id:
POST
<index>/_update/<id> {
"doc": {
"A JSON": "document" }
}
- conditional update (
upsert ),
specifying id
(if it exists: update its info with doc; if
it does not exist: create a document with
upsert):
POST
movies/_update/2
{
"doc": {
"title": "Castle in
the Sky"
},
"upsert": {
"title": "Only
Yesterday",
"genre":
["Animation", "Fantasy"],
"date": 1993
}
}
|
|
|
delete documents
|
- delete a document, specifying id:
|
|
- Templates
- quan es crea un index o bé un data stream
(explícitament; o bé implícitament, quan es crea un
document), opensearch comprova si el nom quadra amb
algun template. Si quadra, crearà l'índex o el data
stream amb la configuració especificada al template
- Tipus
- Index template: va bé per exemple quan AWS
Firehose crea automàticament índexs amb rotació
(diària, setmanal, mensual...)
- Data stream template: configures a set of indexes
as a data stream
- Data
streams
- "A data stream is internally composed of multiple
backing indexes. Search requests are routed to all the
backing indexes, while indexing requests are routed to
the latest write index. ISM policies let you
automatically handle index rollovers or deletions."
- un dels camps ha de ser "
@timestamp "
- Info
- steps
- create a data stream template
- create a data stream
- ingest data into data stream
- search documents
- rollover a data stream
- ...
- Index content
- Reindex
data
- crea un nou índex a partir d'un índex (que pot ser
fins i tot en un cluster remot)
- es pot copiar només un subconjunt (filtrat)
- es poden combinar diversos índexs font cap a un únix
índex destinació
- es poden transformar
a mida que es van transferint
- Index State Management
- Steps
- set up policies
- Cada policy (màquina d'estats) defineix:
- ISM templates: a quins índexs s'aplica la
policy
- States:
estat en el qual està l'índex ("hot",
"warm", "delete")

- Actions:
accions que s'executen quan s'entra en
aquell estat ("set number of replicas to
1", "move index to warm nodes"; "send a
notification email", "delete the index")
- si hi ha definides diverses
accions, només quan una acció acaba
amb èxtit, al cap de
plugins.index_state_management.job_interval
(5 minuts) s'executa la següent; es
poden definir timeout
i retry
- accions possibles / ISM
supported operations:
force_merge,
read_only, read_write,
replica_count, shrink, close,
open, delete, rollover,
notification, snapshot,
index_priority, allocation, rollup
close :
Closed indexes remain on disk,
but consume no CPU or memory.
You can’t read from, write to,
or search closed indexes.
Closing an index is a good
option if you need to retain
data for longer than you need to
actively search it and have
sufficient disk space on your
data nodes. If you need to
search the data again, reopening
a closed index is simpler than
restoring an index from a snapshot.
rollover :
Rolls an alias
over to a new index when the
managed index meets one of the
rollover conditions.
rollup :
reduce data granularity
by rolling up old data into
summarized indexes.
notification :
envia una notificació a Slack,
Amazon Chime, webhook URL
snapshot :
(snapshots)
...
- ...
- Transitions:
condicions (
conditions :
"after 7 days"; "after 30 days") que
s'han de complir (hi ha un job
que ho comprova cada 5 minuts) per anar
cap a un altre estat (state_name ).
After all actions in the current state
are completed, the policy starts
checking the conditions for transitions.
- Error
notifications
- ...
- attach policies to indexes
- manage indexes
- ...
- Index transforms
- Index rollups
- ...
- CREATING AND TUNING YOUR CLUSTER
- Availability and recovery
- Snapshots
- Take
and restore snapshots
- Register repository
- shared file system
- Amazon S3
- when using Amazon OpenSearch service:
see Registering
a manual snapshot repository
- prerequisites
- create S3 bucket
- create policy (to be attached
to role in next step)
- JSON
{
"Version":
"2012-10-17",
"Statement": [{
"Action": [
"s3:ListBucket"
],
"Effect": "Allow",
"Resource": [
"arn:aws:s3:::s3-bucket-name"
]
},
{
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Effect": "Allow",
"Resource": [
"arn:aws:s3:::s3-bucket-name/*"
]
}
]
}
- Name:
bucket-mybucket-read-write
- create IAM role
- Select trusted entity
- Trusted entity type:
Custom trust policy
- Custom trust policy:
{
"Version":
"2012-10-17",
"Statement":
[
{
"Sid": "",
"Effect":
"Allow",
"Principal":
{
"Service":
"es.amazonaws.com"
},
"Action":
"sts:AssumeRole"
}
]
}
- Add permissions
- bucket-mybucket-read-write
(created in previous
step)
- Name, review and create
- Role name:
role-opensearch-snapshots-bucket-mybucket
(in docs: TheSnapshotRole )
- permissions
- Map
the snapshot role in OpenSearch
Dashboards (if using fine-grained
access control)
- Dashboards
- Security > Roles
- manage_snapshots
- Mapped users >
Manage mapping
- Backend roles:
(arn for
role-opensearch-snapshots-bucket-mybucket )
- Directly type user arn in
Management > Security > Roles
> manage_snapshots > Mapped
users > Manage mapping > Users
- NOTE: no need to add a new
internal user; just type arn in
the case
- Register
a repository
- Using PUT or
- Using
the sample Python client:
- register-repo-mybucket.py
- Problems:
-
no
permissions for
[cluster:admin/repository/put]
and User
[name=arn:aws:iam::xxxx:user/my_user,
backend_roles=[],
requestedTenant=null]"
- Solution:
- directly
type
arn:aws:iam::xxxx:user/my_user
in Management
> Security
> Roles
>
manage_snapshots
> Mapped
users >
Manage mapping
> Users
- Hadoop Distributed File System (HDFS)
- Microsoft Azure
- Take snapshots
- Restore snapshots
- Get all snapshots in all repositories
- Get all snapshots in a given repository
GET
/_snapshot/my-repository/_all
- Restore a snapshot
- Snapshot management (SM)
- using Index Management (IM) Plugin
- Dashboards: Management > Snapshot
Management
- Searchable snapshots
- Cluster
- API
GET _cluster/stats/nodes/_all
- Optimal sizes
-
... |
|
... |
real usage |
minimum storage |
Calculating
storage requirements |
minimum_storage = Source_data * (1 +
number_of_replicas) * (1 +
indexing_overhead) / (1 -
Linux_reserved_space) / (1 -
OpenSearch_service_overhead)
minimum_storage = Source_data * (1 +
number_of_replicas) * 1.45
|
_cat/indices?v
_cat/allocation?v
- Dashboards: Index Management > Indexes
- Total size (= Size of primaries *
(1+number_of_replicas) + overhead)
|
number of shards |
Choosing
the number of shards |
- the number of primary shards cannot be
changed for an existing index
- default:
- AWS OpenSearch Service: 5 primary
shards + 1 replica = 10 shards
- open source OpenSearch: 1 primary
shard + 1 replica = 2 shards
- optimal size of a shard:
- where search latency is a key
performance objective: 10-30 GiB / shard
- for write-heavy workloads such as log
analytics: 30-50 GiB / shard
number_of_primary_shards =
(Source_data + room_to_grow) * (1 +
indexing_overhead) / desired_shard_size
- Maximum shards per node
- default: 1000 shards / node
cluster.max_shards_per_node
- ...
|
|
|
Choosing
instance types and testing |
|
|
- Sizing
Amazon OpenSearch Service domains
- Configuring
a multi-AZ domain in Amazon OpenSearch Service
- Shard distribution
- cada AZ ha de tenir, sumant tots els nodes de
dades d'aquella AZ, tots els shards, ja siguin els
primaris o les rèpliques
- standby
- with standby: una de les AZ està en stand-by
- without standby: totes les AZ estan actives,
però l'usuari ha de gestionar bé el nombre de
primaris i de rèpliques (almenys 1 rèplica)
- Availability zone disruptions
- Index
State Management
- ...
- Shards and nodes
- Each shard stores a subset of all documents in an index

- Shards are used for even distribution across nodes in a
cluster. A good rule of thumb is to limit shard size to
10–50 GB. (index 1: split into 2 shards; index 2: split into
4 shards)

- Primary and replica shards (index 1: 2 primary shards + 2
replica shards; index 2: 4 primary shards + 4 replica
shards). Default: 5 primary shards + 1 replica = 10 shards

- ...
|