Hive

AWS EMR Hive Create External table with Dynamic partitioning transformation job example in SQL

This is an example to understand the power of Dynamic partitioning in Hive

set hive.exec.dynamic.partition.mode=nonstrict;

CREATE EXTERNAL TABLE IF NOT EXISTS sampledb.myTable(

  GAID string,

  leave_timestamp string,

  latitude  string,

  longitude string,

  stay_time string,

  country string,

  city string,

  Street string,

  house string,

Home_Country string,

Home_City string,

Home_Neighborhood string,

Home_Zip_Code string,

Office_Country string,

Office_City string,

Office_Neighborhood string,

Office_Zip_Code string,

Zip_of_the_location string,

POI_Name string,

POI_Type string,

POI_Tag_Value string,

Altitude string,

Server_Version string,

Ver_No string)

PARTITIONED BY (dt string)

ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ LINES TERMINATED BY ‘\n’

LOCATION ‘s3://myBucket/production/rawdata/online/‘ ;

MSCK REPAIR TABLE sampledb.myTable;

    CREATE EXTERNAL TABLE IF NOT EXISTS sampledb.myTable_parquet(

    GAID string,

    leave_timestamp string,

    latitude  string,

    longitude string,

    stay_time string,

    country string,

    city string,

    Street string,

    house string,

    Home_Country string,

    Home_City string,

    Home_Neighborhood string,

    Home_Zip_Code string,

    Office_Country string,

    Office_City string,

    Office_Neighborhood string,

    Office_Zip_Code string,

    Zip_of_the_location string,

    POI_Name string,

    POI_Type string,

    POI_Tag_Value string,

    Altitude string,

    Server_Version string,

    Ver_No string

    )

    PARTITIONED BY (dt string)

    STORED AS PARQUET

    LOCATION ‘s3://myBucket/production/parquetdata/online_new/‘ ;

MSCK REPAIR TABLE sampledb.myTable_parquet;

INSERT OVERWRITE TABLE sampledb.myTable_parquet  partition (dt)

SELECT

regexp_replace(gaid, ‘”‘,”),

regexp_replace(leave_timestamp, ‘”‘,”) ,

regexp_replace(latitude, ‘”‘,”),

regexp_replace(longitude, ‘”‘,”) ,

regexp_replace(stay_time, ‘”‘,”) ,

regexp_replace(country, ‘”‘,”),

regexp_replace(regexp_replace(city, ‘”‘,”), “‘”,””),

regexp_replace(street, ‘”‘,”),

regexp_replace(house, ‘”‘,”),

regexp_replace(Home_Country, ‘”‘,”),

regexp_replace(regexp_replace(home_city, ‘”‘,”), “‘”,””),

regexp_replace(Home_Neighborhood, ‘”‘,”),

regexp_replace(Home_Zip_Code, ‘”‘,”),

regexp_replace(Office_Country, ‘”‘,”),

regexp_replace(regexp_replace(office_city, ‘”‘,”), “‘”,””),

regexp_replace(Office_Neighborhood, ‘”‘,”),

regexp_replace(Office_Zip_Code, ‘”‘,”),

regexp_replace(Zip_of_the_location, ‘”‘,”),

regexp_replace(POI_Name, ‘”‘,”),

regexp_replace(POI_Type, ‘”‘,”),

regexp_replace(POI_Tag_Value, ‘”‘,”),

regexp_replace(Altitude, ‘”‘,”),

regexp_replace(server_version, ‘”‘,”),

regexp_replace(ver_no, ‘”‘,”),

  dt

FROM sampledb.myTable WHERE dt>=current_date() – interval ‘3’ day;

Other good hive create external table examples:

https://community.hortonworks.com/questions/28856/hive-table-format-and-compression.html

——————————————————————————————————————————

I put a lot of thoughts into these blogs, so I could share the information in a clear and useful way. If you have any comments, thoughts, questions, or you need someone to consult with, feel free to contact me:

https://www.linkedin.com/in/omid-vahdaty/

1 thought on “AWS EMR Hive Create External table with Dynamic partitioning transformation job example in SQL”

Leave a Reply