Skip to content

Commit 65d08ea

Browse files
committed
S3 bucket access via s3fs
1 parent 60f1a60 commit 65d08ea

File tree

5 files changed

+120
-27
lines changed

5 files changed

+120
-27
lines changed

README.md

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Your source PostgreSQL database can be located anywhere, but DLE with other comp
2020
* Read/Write permissions on Route53
2121
* Read/Write permissions on Cloudwatch
2222

23-
## How to use
23+
## Configuration overview
2424
- :construction: Currently, it is supposed that you run `terraform` commands on a Linux machine. MacOS and Windows support is not yet implemented (but planned).
2525
- It is recommended to clone this Git repository and adjust for your needs. Below we provide the detailed step-by-step instructions for quick start (see "Quick start") for a PoC setup
2626
- To configure parameters used by Terraform (and the Database Lab Engine itself), you will need to modify `terraform.tfvars` and create a file with secrets (`secret.tfvars`)
@@ -31,7 +31,7 @@ Your source PostgreSQL database can be located anywhere, but DLE with other comp
3131
- values passed on the command line
3232
- All variables starting with `postgres_` represent the source database connection information for the data (from that database) to be fetched by the DLE. That database must be accessible from the instance hosting the DLE (that one created by Terraform)
3333

34-
## Quick start
34+
## How-to guide: using this Terraform module to set up DLE and its components
3535
The following steps were tested on Ubuntu 20.04 but supposed to be valid for other Linux distributions without significant modification.
3636

3737
1. SSH to any machine with internet access, it will be used as deployment machine
@@ -65,12 +65,23 @@ The following steps were tested on Ubuntu 20.04 but supposed to be valid for oth
6565
aws_deploy_allow_ssh_from_cidrs = ["0.0.0.0/0"]
6666
aws_deploy_dns_api_subdomain = "tf-test" # subdomain in aws.postgres.ai, fqdn will be ${dns_api_subdomain}-engine.aws.postgres
6767
68+
# Source – two options. Choose one of two:
69+
# - direct connection to source DB
70+
# - dump stored on AWS S3
71+
72+
# option 1 – direct PG connection
73+
source_type = "postgres" # source is working dome postgres database
6874
source_postgres_version = "13"
69-
source_postgres_host = "ec2-3-215-57-87.compute-1.amazonaws.com"
75+
source_postgres_host = "ec2-3-215-57-87.compute-1.amazonaws.com" # an example DB at Heroku
7076
source_postgres_port = "5432"
71-
source_postgres_dbname = "d3dljqkrnopdvg" # this is an existing DB (Heroku example DB)
72-
source_postgres_username = "postgres"
73-
77+
source_postgres_dbname = "d3dljqkrnopdvg" # an example DB at Heroku
78+
source_postgres_username = "bfxuriuhcfpftt" # an example DB at Heroku
79+
80+
# option 2 – dump on S3. Important: your AWS user has to be able to create IAM roles to work with S3 buckets in your AWS account
81+
# source_type = 's3' # source is dump stored on demo s3 bucket
82+
# source_pgdump_s3_bucket = "tf-demo-dump" # This is an example public bucket
83+
# source_pgdump_path_on_s3_bucket = "heroku.dmp" # This is an example dump from demo database
84+
7485
dle_debug_mode = "true"
7586
dle_retrieval_refresh_timetable = "0 0 * * 0"
7687
postgres_config_shared_preload_libraries = "pg_stat_statements,logerrors" # DB Migration Checker requires logerrors extension
@@ -79,7 +90,7 @@ The following steps were tested on Ubuntu 20.04 but supposed to be valid for oth
7990
```
8091
1. Create `secret.tfvars` containing `source_postgres_password`, `platform_access_token`, and `vcs_github_secret_token`. An example:
8192
```config
82-
source_postgres_password = "YOUR_DB_PASSWORD" # todo: put pwd for heroku example DB here
93+
source_postgres_password = "dfe01cbd809a71efbaecafec5311a36b439460ace161627e5973e278dfe960b7" # an example DB at Heroku
8394
platform_access_token = "YOUR_ACCESS_TOKEN" # to generate, open https://console.postgres.ai/, choose your organization,
8495
# then "Access tokens" in the left menu
8596
vcs_github_secret_token = "vcs_secret_token" # to generate, open https://github.com/settings/tokens/new
@@ -94,16 +105,16 @@ The following steps were tested on Ubuntu 20.04 but supposed to be valid for oth
94105
export AWS_SECRET_ACCESS_KEY = "accesskey"
95106
```
96107
1. Deploy:
97-
```
108+
```shell
98109
terraform apply -var-file="secret.tfvars" -auto-approve
99110
```
100111
1. If everything goes well, you should get an output like this:
101112
```config
102113
vcs_db_migration_checker_verification_token = "gsio7KmgaxECfJ80kUx2tUeIf4kEXZex"
103114
dle_verification_token = "zXPodd13LyQaKgVXGmSCeB8TUtnGNnIa"
104-
ec2_public_dns = "ec2-18-118-126-25.us-east-2.compute.amazonaws.com"
105-
ec2instance = "i-0b07738148950af25"
106-
ip = "18.118.126.25"
115+
ec2_public_dns = "ec2-11-111-111-11.us-east-2.compute.amazonaws.com"
116+
ec2instance = "i-0000000000000"
117+
ip = "11.111.111.11"
107118
platform_joe_signing_secret = "lG23qZbUh2kq0ULIBfW6TRwKzqGZu1aP"
108119
public_dns_name = "demo-api-engine.aws.postgres.ai" # todo: this should be URL, not hostname – further we'll need URL, with protocol – `https://`
109120
```

dle-logical-init.sh.tpl

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,34 +99,48 @@ for i in $${!disks[@]}; do
9999
$${disks[$i]}
100100
done
101101

102-
#configure and start DLE
102+
# Adjust DLE config
103103
mkdir ~/.dblab
104-
#cp /home/ubuntu/.dblab/config.example.logical_generic.yml ~/.dblab/server.yml
105104
curl https://gitlab.com/postgres-ai/database-lab/-/raw/${dle_version_full}/configs/config.example.logical_generic.yml --output ~/.dblab/server.yml
106105
sed -ri "s/^(\s*)(debug:.*$)/\1debug: ${dle_debug_mode}/" ~/.dblab/server.yml
106+
sed -ri "s/^(\s*)(verificationToken:.*$)/\1verificationToken: ${dle_verification_token}/" ~/.dblab/server.yml
107107
sed -ri "s/^(\s*)(timetable:.*$)/\1timetable: \"${dle_retrieval_refresh_timetable}\"/" ~/.dblab/server.yml
108108
sed -ri "s/^(\s*)(forceInit:.*$)/\1forceInit: true/" ~/.dblab/server.yml
109-
sed -ri "s/^(\s*)(verificationToken:.*$)/\1verificationToken: ${dle_verification_token}/" ~/.dblab/server.yml
110109
sed -ri "s/^(\s*)(dbname:.*$)/\1dbname: ${source_postgres_dbname}/" ~/.dblab/server.yml
111-
sed -ri "s/^(\s*)(host: 34.56.78.90$)/\1host: ${source_postgres_host}/" ~/.dblab/server.yml
112-
sed -ri "s/^(\s*)(port: 5432$)/\1port: ${source_postgres_port}/" ~/.dblab/server.yml
113-
sed -ri "s/^(\s*)( username: postgres$)/\1 username: ${source_postgres_username}/" ~/.dblab/server.yml
114-
sed -ri "s/^(\s*)(password:.*$)/\1password: ${source_postgres_password}/" ~/.dblab/server.yml
115-
sed -ri "s/:13/:${source_postgres_version}/g" ~/.dblab/server.yml
116-
#restore pg_dump via pipe - without saving it on the disk
117-
sed -ri "s/^(\s*)(parallelJobs:.*$)/\1parallelJobs: 1/" ~/.dblab/server.yml
118-
sed -ri "s/^(\s*)(# immediateRestore:.*$)/\1immediateRestore: /" ~/.dblab/server.yml
119-
sed -ri "s/^(\s*)(# forceInit: false.*$)/\1 forceInit: true /" ~/.dblab/server.yml
120-
sed -ri "s/^(\s*)( # configs:$)/\1 configs: /" ~/.dblab/server.yml
121-
sed -ri "s/^(\s*)( # shared_preload_libraries: .*$)/\1 shared_preload_libraries: '${postgres_config_shared_preload_libraries}'/" ~/.dblab/server.yml
122-
sed -ri "s/^(\s*)( shared_preload_libraries:.*$)/\1 shared_preload_libraries: '${postgres_config_shared_preload_libraries}'/" ~/.dblab/server.yml
123-
sed -ri "s/^(\s*)(- logicalRestore.*$)/\1#- logicalRestore /" ~/.dblab/server.yml
124110
# Enable Platform
125111
sed -ri "s/^(\s*)(#platform:$)/\1platform: /" ~/.dblab/server.yml
126112
sed -ri "s/^(\s*)(# url: \"https\\:\\/\\/postgres.ai\\/api\\/general\"$)/\1 url: \"https\\:\\/\\/postgres.ai\\/api\\/general\" /" ~/.dblab/server.yml
127113
sed -ri "s/^(\s*)(# accessToken: \"platform_access_token\"$)/\1 accessToken: \"${platform_access_token}\"/" ~/.dblab/server.yml
128114
sed -ri "s/^(\s*)(# enablePersonalTokens: true$)/\1 enablePersonalTokens: true/" ~/.dblab/server.yml
115+
sed -ri "s/:13/:${source_postgres_version}/g" ~/.dblab/server.yml
129116

117+
case "${source_type}" in
118+
119+
postgres)
120+
sed -ri "s/^(\s*)(host: 34.56.78.90$)/\1host: ${source_postgres_host}/" ~/.dblab/server.yml
121+
sed -ri "s/^(\s*)(port: 5432$)/\1port: ${source_postgres_port}/" ~/.dblab/server.yml
122+
sed -ri "s/^(\s*)( username: postgres$)/\1 username: ${source_postgres_username}/" ~/.dblab/server.yml
123+
sed -ri "s/^(\s*)(password:.*$)/\1password: ${source_postgres_password}/" ~/.dblab/server.yml
124+
#restore pg_dump via pipe - without saving it on the disk
125+
sed -ri "s/^(\s*)(parallelJobs:.*$)/\1parallelJobs: 1/" ~/.dblab/server.yml
126+
sed -ri "s/^(\s*)(# immediateRestore:.*$)/\1immediateRestore: /" ~/.dblab/server.yml
127+
sed -ri "s/^(\s*)(# forceInit: false.*$)/\1 forceInit: true /" ~/.dblab/server.yml
128+
sed -ri "s/^(\s*)( # configs:$)/\1 configs: /" ~/.dblab/server.yml
129+
sed -ri "s/^(\s*)( # shared_preload_libraries: .*$)/\1 shared_preload_libraries: '${postgres_config_shared_preload_libraries}'/" ~/.dblab/server.yml
130+
sed -ri "s/^(\s*)( shared_preload_libraries:.*$)/\1 shared_preload_libraries: '${postgres_config_shared_preload_libraries}'/" ~/.dblab/server.yml
131+
sed -ri "s/^(\s*)(- logicalRestore.*$)/\1#- logicalRestore /" ~/.dblab/server.yml
132+
;;
133+
134+
s3)
135+
# Mount S3 bucket if it's defined in Terraform variables
136+
mkdir -p "${source_pgdump_s3_mount_point}"
137+
s3fs ${source_pgdump_s3_bucket} ${source_pgdump_s3_mount_point} -o iam_role -o use_cache=/tmp -o allow_other
138+
139+
sed -ri "s/^(\s*)(- logicalDump.*$)/\1#- logicalDump /" ~/.dblab/server.yml
140+
sed -ri "s|^(\s*)( dumpLocation:.*$)|\1 dumpLocation: ${source_pgdump_s3_mount_point}/${source_pgdump_path_on_s3_bucket}|" ~/.dblab/server.yml
141+
;;
142+
143+
esac
130144

131145
sudo docker run \
132146
--name dblab_server \

instance.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ data "template_file" "init" {
4545
platform_joe_signing_secret = "${random_string.platform_joe_signing_secret.result}"
4646
vcs_db_migration_checker_verification_token = "${random_string.vcs_db_migration_checker_verification_token.result}"
4747
vcs_github_secret_token = "${var.vcs_github_secret_token}"
48+
source_type = "${var.source_type}"
49+
source_pgdump_s3_bucket = "${var.source_pgdump_s3_bucket}"
50+
source_pgdump_s3_mount_point = "${var.source_pgdump_s3_mount_point}"
51+
source_pgdump_path_on_s3_bucket = "${var.source_pgdump_path_on_s3_bucket}"
4852
}
4953
}
5054

@@ -55,5 +59,6 @@ resource "aws_instance" "aws_ec2" {
5559
security_groups = ["${aws_security_group.dle_instance_sg.name}"]
5660
key_name = "${var.aws_keypair}"
5761
tags = "${local.common_tags}"
62+
iam_instance_profile = "${var.source_type == "s3" ? "${aws_iam_instance_profile.instance_profile[0].name}" : null}"
5863
user_data = "${data.template_file.init.rendered}"
5964
}

role.tf

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
resource "aws_iam_role" "db_lab_engine_role" {
2+
count = "${var.source_type == "s3" ? 1 : 0}"
3+
name = "database_lab_engine"
4+
assume_role_policy = jsonencode({
5+
Version = "2012-10-17"
6+
Statement = [
7+
{
8+
Action = "sts:AssumeRole"
9+
Effect = "Allow"
10+
Sid = ""
11+
Principal = {
12+
Service = "ec2.amazonaws.com"
13+
}
14+
},
15+
]
16+
})
17+
18+
inline_policy {
19+
name = "pg_dump_access"
20+
21+
policy = jsonencode({
22+
Version = "2012-10-17"
23+
Statement = [
24+
{
25+
Action = ["s3:ListBucket"]
26+
Effect = "Allow"
27+
Resource = "arn:aws:s3:::${var.source_pgdump_s3_bucket}"
28+
},
29+
{
30+
Action = ["s3:GetObject"]
31+
Effect = "Allow"
32+
Resource = "arn:aws:s3:::${var.source_pgdump_s3_bucket}/*" # Grant read access to entire bucket
33+
}
34+
]
35+
})
36+
}
37+
}
38+
resource "aws_iam_instance_profile" "instance_profile" {
39+
count = "${var.source_type == "s3" ? 1 : 0}"
40+
name = "dle-instance-profile"
41+
role = "${aws_iam_role.db_lab_engine_role[0].name}"
42+
}

variables.tf

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,24 @@ variable "vcs_github_secret_token" {
147147
variable "postgres_config_shared_preload_libraries" {
148148
description = "shared_preload_libraries postgresql.conf parameter value for clones"
149149
}
150+
151+
variable "source_type" {
152+
description = "Type of data source used for DLE. For now it can be postgres,S3"
153+
default = ""
154+
}
155+
156+
variable "source_pgdump_s3_bucket" {
157+
description = "S3 bucket name where a dump (created using pg_dump) is stored. This dump will be used as data source. Leave the value empty (default) to use a different source of data."
158+
default = ""
159+
}
160+
161+
variable "source_pgdump_path_on_s3_bucket" {
162+
description = "relative path to pg_dump file or directory"
163+
default = ""
164+
}
165+
166+
variable "source_pgdump_s3_mount_point"{
167+
description = "mount point on DLE EC2 instance where S3 bucket with the source dump file/directory is mounted to. If pgdump_s3_bucket is empty, pgdump_s3_mount_point is ignored"
168+
default = "/s3/pg_dump"
169+
}
170+

0 commit comments

Comments
 (0)