This is a quick “Cut the bullshit and give me what I Need” blog.
Your biggest problem in AWS Athena – is how to create table 🙂
Create table with separator pipe separator
CREATE EXTERNAL TABLE logs (
id STRING,
query STRING
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
ESCAPED BY '\\'
LINES TERMINATED BY '\n'
LOCATION 's3://myBucket/logs';
create table with CSV SERDE
CREATE EXTERNAL TABLE IF NOT EXISTS logs(
`date` string,
`query` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
LOCATION 's3://omidongage/logs'
Create table with partition and parquet
CREATE EXTERNAL TABLE users (
first string,
last string,
username string
)
PARTITIONED BY (id string)
STORED AS parquet
LOCATION 's3://bucket/folder/'
create table on local HDFS using pipe separator with header skip
create table lineitem (
l_orderkey BIGINT ,
l_partkey BIGINT ,
l_suppkey BIGINT ,
l_linenumber BIGINT ,
l_quantity DOUBLE ,
l_extendedprice DOUBLE ,
l_discount DOUBLE ,
l_tax DOUBLE ,
l_returnflag CHAR(1) ,
l_linestatus CHAR(1) ,
l_shipdate DATE ,
l_commitdate DATE ,
l_receiptdate DATE,
l_shipinstruct CHAR(25) ,
l_shipmode CHAR(10) ,
l_comment VARCHAR(44)
)
row format delimited
fields terminated by '|'
lines terminated by '\n'
location '/user/hive/warehouse/tpch/lineitem/'
tblproperties ("skip.header.line.count"="1");
CTAS Examples by official Examples – VERY good read!
https://docs.aws.amazon.com/athena/latest/ug/ctas-examples.html
Hive data types – official documentation
AWS Athena Documentation:
https://docs.aws.amazon.com/athena/latest/ug/create-table.html
https://docs.aws.amazon.com/athena/latest/ug/ctas.html
——————————————————————————————————————————
I put a lot of thoughts into these blogs, so I could share the information in a clear and useful way. If you have any comments, thoughts, questions, or you need someone to consult with, feel free to contact me: