-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstage_redshift.py
59 lines (52 loc) · 2.07 KB
/
stage_redshift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from airflow.providers.postgres.hooks.postgres import PostgresHook
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from airflow.secrets.metastore import MetastoreBackend
class StageToRedshiftOperator(BaseOperator):
ui_color = '#89DA59'
template_fields = ("s3_key",)
copy_sql = """
COPY {}
FROM '{}'
ACCESS_KEY_ID '{}'
SECRET_ACCESS_KEY '{}'
{}
"""
@apply_defaults
def __init__(self,
redshift_conn_id="",
aws_credentials_id="",
table="",
s3_bucket="",
s3_key="",
s3_format="",
append_data=False,
*args, **kwargs):
super(StageToRedshiftOperator, self).__init__(*args, **kwargs)
self.table = table
self.redshift_conn_id = redshift_conn_id
self.s3_bucket = s3_bucket
self.s3_key = s3_key
self.s3_format = s3_format
self.append_data = append_data
self.aws_credentials_id = aws_credentials_id
def execute(self, context):
self.log.info("Starting StageToRedshiftOperator")
metastoreBackend = MetastoreBackend()
aws_connection=metastoreBackend.get_connection(self.aws_credentials_id)
redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
if not self.append_data:
self.log.info("Clearing data from destination Redshift table")
redshift.run("DELETE FROM {}".format(self.table))
self.log.info("Copying data from S3 to Redshift")
rendered_key = self.s3_key.format(**context)
s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)
formatted_sql = StageToRedshiftOperator.copy_sql.format(
self.table,
s3_path,
aws_connection.login,
aws_connection.password,
self.s3_format
)
redshift.run(formatted_sql)
self.log.info("StageToRedshiftOperator completed successfully")