This is an example to understand the power of Dynamic partitioning in Hive
set hive.exec.dynamic.partition.mode=nonstrict;
CREATE EXTERNAL TABLE IF NOT EXISTS sampledb.myTable(
GAID string,
leave_timestamp string,
latitude string,
longitude string,
stay_time string,
country string,
city string,
Street string,
house string,
Home_Country string,
Home_City string,
Home_Neighborhood string,
Home_Zip_Code string,
Office_Country string,
Office_City string,
Office_Neighborhood string,
Office_Zip_Code string,
Zip_of_the_location string,
POI_Name string,
POI_Type string,
POI_Tag_Value string,
Altitude string,
Server_Version string,
Ver_No string)
PARTITIONED BY (dt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ LINES TERMINATED BY ‘\n’
LOCATION ‘s3://myBucket/production/rawdata/online/‘ ;
MSCK REPAIR TABLE sampledb.myTable;
CREATE EXTERNAL TABLE IF NOT EXISTS sampledb.myTable_parquet(
GAID string,
leave_timestamp string,
latitude string,
longitude string,
stay_time string,
country string,
city string,
Street string,
house string,
Home_Country string,
Home_City string,
Home_Neighborhood string,
Home_Zip_Code string,
Office_Country string,
Office_City string,
Office_Neighborhood string,
Office_Zip_Code string,
Zip_of_the_location string,
POI_Name string,
POI_Type string,
POI_Tag_Value string,
Altitude string,
Server_Version string,
Ver_No string
)
PARTITIONED BY (dt string)
STORED AS PARQUET
LOCATION ‘s3://myBucket/production/parquetdata/online_new/‘ ;
MSCK REPAIR TABLE sampledb.myTable_parquet;
INSERT OVERWRITE TABLE sampledb.myTable_parquet partition (dt)
SELECT
regexp_replace(gaid, ‘”‘,”),
regexp_replace(leave_timestamp, ‘”‘,”) ,
regexp_replace(latitude, ‘”‘,”),
regexp_replace(longitude, ‘”‘,”) ,
regexp_replace(stay_time, ‘”‘,”) ,
regexp_replace(country, ‘”‘,”),
regexp_replace(regexp_replace(city, ‘”‘,”), “‘”,””),
regexp_replace(street, ‘”‘,”),
regexp_replace(house, ‘”‘,”),
regexp_replace(Home_Country, ‘”‘,”),
regexp_replace(regexp_replace(home_city, ‘”‘,”), “‘”,””),
regexp_replace(Home_Neighborhood, ‘”‘,”),
regexp_replace(Home_Zip_Code, ‘”‘,”),
regexp_replace(Office_Country, ‘”‘,”),
regexp_replace(regexp_replace(office_city, ‘”‘,”), “‘”,””),
regexp_replace(Office_Neighborhood, ‘”‘,”),
regexp_replace(Office_Zip_Code, ‘”‘,”),
regexp_replace(Zip_of_the_location, ‘”‘,”),
regexp_replace(POI_Name, ‘”‘,”),
regexp_replace(POI_Type, ‘”‘,”),
regexp_replace(POI_Tag_Value, ‘”‘,”),
regexp_replace(Altitude, ‘”‘,”),
regexp_replace(server_version, ‘”‘,”),
regexp_replace(ver_no, ‘”‘,”),
dt
FROM sampledb.myTable WHERE dt>=current_date() – interval ‘3’ day;
Other good hive create external table examples:
https://community.hortonworks.com/questions/28856/hive-table-format-and-compression.html
——————————————————————————————————————————
I put a lot of thoughts into these blogs, so I could share the information in a clear and useful way. If you have any comments, thoughts, questions, or you need someone to consult with, feel free to contact me:
1 thought on “AWS EMR Hive Create External table with Dynamic partitioning transformation job example in SQL”