Create table in Hive
hive> create table employee(
> empno int,
> empname string,
> empcity string,
> empcountry string)
> row format delimited
> fields terminated by ','
> stored as textfile;
OK
Time taken: 0.168 seconds
hive> describe employee;
OK
empno int
empname string
empcity string
empcountry string
Time taken: 0.115 seconds, Fetched: 4 row(s)
Load data from local path
hive> load data local inpath '/home/ram/employee.txt' into table employee;
Loading data to table hivedemo.employee
Table hivedemo.employee stats: [numFiles=1, totalSize=48]
OK
Time taken: 0.352 seconds
hive> select * from employee;
OK
1 ramkumar palani india
2 kavitha ramnad india
Time taken: 0.146 seconds, Fetched: 3 row(s)
hive>
load data from dfs
hadoop fs -copyFromLocal /home/ram/employee1.txt /tmp
hive> load data inpath '/tmp/employee1.txt' into table employee;
hive> select * from employee where empno is not null;
OK
1 ramkumar palani india
2 kavitha ramnad india
1 ramkumar palani india
2 kavitha ramnad india
Time taken: 0.179 seconds, Fetched: 4 row(s)
hive>
files available in employee folder
ram@ram:~$ hadoop fs -ls /user/hive/warehouse/hivedemo.db/employee
Found 2 items
-rwxrwxr-x 3 ram 48 2015-08-31 21:00 /user/hive/warehouse/hivedemo.db/employee/employee.txt
-rwxrwxr-x 3 ram 48 2015-08-31 21:11 /user/hive/warehouse/hivedemo.db/employee/employee1.txt
hive> alter table employee rename to empnew;
OK
Time taken: 0.223 seconds
hive> show tables;
OK
empnew
Time taken: 0.036 seconds, Fetched: 1 row(s)
hive>
ram@ram:~$ hadoop fs -ls /user/hive/warehouse/hivedemo.db/Found 1 items
drwxrwxr-x - ram 0 2015-08-31 21:12 /user/hive/warehouse/hivedemo.db/empnew
ram@ram:~$ hadoop fs -ls /user/hive/warehouse/hivedemo.db/empnewFound 2 items
-rwxrwxr-x 3 ram 48 2015-08-31 21:00 /user/hive/warehouse/hivedemo.db/empnew/employee.txt
-rwxrwxr-x 3 ram 48 2015-08-31 21:11 /user/hive/warehouse/hivedemo.db/empnew/employee1.txt
ram@ram:~$
overwrite - overwrite existing files in employee folder
hive> load data inpath '/tmp/employee1.txt' overwrite into table empnew;
Loading data to table hivedemo.empnew
Table hivedemo.empnew stats: [numFiles=1, numRows=0, totalSize=48, rawDataSize=0]
OK
Time taken: 0.42 seconds
hive> select * from empnew;
OK
1 ramkumar palani india
2 kavitha ramnad india
NULL NULL NULL NULL
Time taken: 0.182 seconds, Fetched: 3 row(s)
hive>
Drop the table
hive> drop table empnew1;
OK
Time taken: 0.185 seconds
hive>
create external table
- multiple tables can point same path
- non hive application (without exclusive lock) can use this
hive> create external table empexternal (col1 int, col2 string, col3 string, col4 string)
> row format delimited
> fields terminated by ','
> stored as textfile
> location '/tmp/emptmp'
> ;
OK
Time taken: 0.129 seconds
hive> select * from empexternal;
OK
1 ramkumar palani india
2 kavitha ramnad india
Time taken: 0.146 seconds, Fetched: 3 row(s)
hive>
copy file to external table path
hive> load data inpath '/tmp/emptmp/emp2.txt' into table emp2;
Partitions
source: http://hadooptutorial.info/partitioning-in-hive/
hive> CREATE TABLE partitioned_user(
> firstname VARCHAR(64),
> lastname VARCHAR(64),
> address STRING,
> city VARCHAR(64),
> post STRING,
> phone1 VARCHAR(64),
> phone2 STRING,
> email STRING,
> web STRING
> )
> PARTITIONED BY (country VARCHAR(64), state VARCHAR(64))
> row format delimited
> fields terminated by ','
> STORED AS TEXTFILE;
OK
Time taken: 0.139 seconds
USCA.txt
Rebbecca,Didio,171 E 24th St,Leith,7315,03-8174-9123,0458-665-290,rebbecca.didio@didio.com.au,http://www.brandtjonathanfesq.com.au
Stevie,Hallo,22222 Acoma St,Proston,4613,07-9997-3366,0497-622-620,stevie.hallo@hotmail.com,http://www.landrumtemporaryservices.com.au
Mariko,Stayer,534 Schoenborn St #51,Hamel,6215,08-5558-9019,0427-885-282,mariko_stayer@hotmail.com,http://www.inabinetmacreesq.com.au
hive> LOAD DATA LOCAL INPATH '/home/ram/USCA.txt'
> INTO TABLE partitioned_user
> PARTITION (country = 'US', state = 'CA');
Loading data to table default.partitioned_user partition (country=US, state=CA)
Partition default.partitioned_user{country=US, state=CA} stats: [numFiles=1, numRows=0, totalSize=400, rawDataSize=0]
OK
Time taken: 0.619 seconds
hive> select firstname, city, state, country, post from partitioned_user;
OK
Rebbecca Leith CA US 7315
Stevie Proston CA US 4613
Mariko Hamel CA US 6215
Time taken: 0.238 seconds, Fetched: 3 row(s)
hive>
ram@ram:~$ hadoop fs -ls /user/hive/warehouse/partitioned_user/country=US/state=CA
Found 1 items
-rwxrwxr-x 3 ram 400 2015-09-01 09:50 /user/hive/warehouse/partitioned_user/country=US/state=CA/USCA.txt
ram@ram:~$
INDCHN.txt
Ramkumar,Gopal,171 E 24th St,Leith,7315,03-8174-9123,0458-665-290,rebbecca.didio@didio.com.au,http://www.brandtjonathanfesq.com.au
Nagarajan,Palani,22222 Acoma St,Proston,4613,07-9997-3366,0497-622-620,stevie.hallo@hotmail.com,http://www.landrumtemporaryservices.com.au
Karthik,Bhaskaran,534 Schoenborn St #51,Hamel,6215,08-5558-9019,0427-885-282,mariko_stayer@hotmail.com,http://www.inabinetmacreesq.com.au
hive> LOAD DATA LOCAL INPATH '/home/ram/INDCHN.txt'
> INTO TABLE partitioned_user
> PARTITION (country = 'IND', state = 'CHN');
Loading data to table default.partitioned_user partition (country=IND, state=CHN)
Partition default.partitioned_user{country=IND, state=CHN} stats: [numFiles=1, numRows=0, totalSize=409, rawDataSize=0]
OK
Time taken: 0.614 seconds
hive> select firstname, city, state, country, post from partitioned_user;
OK
Ramkumar Leith CHN IND 7315
Nagarajan Proston CHN IND 4613
Karthik Hamel CHN IND 6215
Rebbecca Leith CA US 7315
Stevie Proston CA US 4613
Mariko Hamel CA US 6215
Time taken: 0.238 seconds, Fetched: 7 row(s)
hive> select firstname, city, state, country, post from partitioned_user
> where country = 'IND';
OK
Ramkumar Leith CHN IND 7315
Nagarajan Proston CHN IND 4613
Karthik Hamel CHN IND 6215
Time taken: 0.584 seconds, Fetched: 4 row(s)
INSERT FROM ANOTHER TABLE
hive> CREATE TABLE user_demo(
> firstname VARCHAR(64),
> lastname VARCHAR(64),
> address STRING,
> city VARCHAR(64),
> post STRING,
> phone1 VARCHAR(64),
> phone2 STRING,
> email STRING,
> web STRING,
> country varchar(64),
> state varchar(64)
> )
> row format delimited
> fields terminated by ','
> STORED AS TEXTFILE;
OK
Time taken: 0.163 seconds
hive> insert overwrite table user_demo
> select * from partitioned_user
> where firstname = 'Nagarajan' or firstname = 'Mariko';
Query ID = ram_20150901102328_85ea0fcb-86d4-4aa4-8d5a-6a5eb7e997c3
Total jobs = 3
Launching Job 1 out of 3
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1441075729906_0001, Tracking URL = http://ram:8088/proxy/application_1441075729906_0001/
Kill Command = /usr/local/hadoop/bin/hadoop job -kill job_1441075729906_0001
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
2015-09-01 10:23:39,660 Stage-1 map = 0%, reduce = 0%
2015-09-01 10:23:48,262 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 2.97 sec
MapReduce Total cumulative CPU time: 2 seconds 970 msec
Ended Job = job_1441075729906_0001
Stage-4 is selected by condition resolver.
Stage-3 is filtered out by condition resolver.
Stage-5 is filtered out by condition resolver.
Moving data to: hdfs://localhost:9000/user/hive/warehouse/user_demo/.hive-staging_hive_2015-09-01_10-23-28_494_5725270058421044549-1/-ext-10000
Loading data to table default.user_demo
Table default.user_demo stats: [numFiles=1, numRows=2, totalSize=287, rawDataSize=285]
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1 Cumulative CPU: 2.97 sec HDFS Read: 7370 HDFS Write: 361 SUCCESS
Total MapReduce CPU Time Spent: 2 seconds 970 msec
OKTime taken: 22.406 seconds
hive>
hive> create table employee(
> empno int,
> empname string,
> empcity string,
> empcountry string)
> row format delimited
> fields terminated by ','
> stored as textfile;
OK
Time taken: 0.168 seconds
hive> describe employee;
OK
empno int
empname string
empcity string
empcountry string
Time taken: 0.115 seconds, Fetched: 4 row(s)
Load data from local path
hive> load data local inpath '/home/ram/employee.txt' into table employee;
Loading data to table hivedemo.employee
Table hivedemo.employee stats: [numFiles=1, totalSize=48]
OK
Time taken: 0.352 seconds
hive> select * from employee;
OK
1 ramkumar palani india
2 kavitha ramnad india
Time taken: 0.146 seconds, Fetched: 3 row(s)
hive>
load data from dfs
hadoop fs -copyFromLocal /home/ram/employee1.txt /tmp
hive> load data inpath '/tmp/employee1.txt' into table employee;
hive> select * from employee where empno is not null;
OK
1 ramkumar palani india
2 kavitha ramnad india
1 ramkumar palani india
2 kavitha ramnad india
Time taken: 0.179 seconds, Fetched: 4 row(s)
hive>
files available in employee folder
ram@ram:~$ hadoop fs -ls /user/hive/warehouse/hivedemo.db/employee
Found 2 items
-rwxrwxr-x 3 ram 48 2015-08-31 21:00 /user/hive/warehouse/hivedemo.db/employee/employee.txt
-rwxrwxr-x 3 ram 48 2015-08-31 21:11 /user/hive/warehouse/hivedemo.db/employee/employee1.txt
hive> alter table employee rename to empnew;
OK
Time taken: 0.223 seconds
hive> show tables;
OK
empnew
Time taken: 0.036 seconds, Fetched: 1 row(s)
hive>
ram@ram:~$ hadoop fs -ls /user/hive/warehouse/hivedemo.db/Found 1 items
drwxrwxr-x - ram 0 2015-08-31 21:12 /user/hive/warehouse/hivedemo.db/empnew
ram@ram:~$ hadoop fs -ls /user/hive/warehouse/hivedemo.db/empnewFound 2 items
-rwxrwxr-x 3 ram 48 2015-08-31 21:00 /user/hive/warehouse/hivedemo.db/empnew/employee.txt
-rwxrwxr-x 3 ram 48 2015-08-31 21:11 /user/hive/warehouse/hivedemo.db/empnew/employee1.txt
ram@ram:~$
overwrite - overwrite existing files in employee folder
hive> load data inpath '/tmp/employee1.txt' overwrite into table empnew;
Loading data to table hivedemo.empnew
Table hivedemo.empnew stats: [numFiles=1, numRows=0, totalSize=48, rawDataSize=0]
OK
Time taken: 0.42 seconds
hive> select * from empnew;
OK
1 ramkumar palani india
2 kavitha ramnad india
NULL NULL NULL NULL
Time taken: 0.182 seconds, Fetched: 3 row(s)
hive>
Drop the table
hive> drop table empnew1;
OK
Time taken: 0.185 seconds
hive>
create external table
- multiple tables can point same path
- non hive application (without exclusive lock) can use this
hive> create external table empexternal (col1 int, col2 string, col3 string, col4 string)
> row format delimited
> fields terminated by ','
> stored as textfile
> location '/tmp/emptmp'
> ;
OK
Time taken: 0.129 seconds
hive> select * from empexternal;
OK
1 ramkumar palani india
2 kavitha ramnad india
Time taken: 0.146 seconds, Fetched: 3 row(s)
hive>
copy file to external table path
hive> load data inpath '/tmp/emptmp/emp2.txt' into table emp2;
Partitions
source: http://hadooptutorial.info/partitioning-in-hive/
hive> CREATE TABLE partitioned_user(
> firstname VARCHAR(64),
> lastname VARCHAR(64),
> address STRING,
> city VARCHAR(64),
> post STRING,
> phone1 VARCHAR(64),
> phone2 STRING,
> email STRING,
> web STRING
> )
> PARTITIONED BY (country VARCHAR(64), state VARCHAR(64))
> row format delimited
> fields terminated by ','
> STORED AS TEXTFILE;
OK
Time taken: 0.139 seconds
USCA.txt
Rebbecca,Didio,171 E 24th St,Leith,7315,03-8174-9123,0458-665-290,rebbecca.didio@didio.com.au,http://www.brandtjonathanfesq.com.au
Stevie,Hallo,22222 Acoma St,Proston,4613,07-9997-3366,0497-622-620,stevie.hallo@hotmail.com,http://www.landrumtemporaryservices.com.au
Mariko,Stayer,534 Schoenborn St #51,Hamel,6215,08-5558-9019,0427-885-282,mariko_stayer@hotmail.com,http://www.inabinetmacreesq.com.au
hive> LOAD DATA LOCAL INPATH '/home/ram/USCA.txt'
> INTO TABLE partitioned_user
> PARTITION (country = 'US', state = 'CA');
Loading data to table default.partitioned_user partition (country=US, state=CA)
Partition default.partitioned_user{country=US, state=CA} stats: [numFiles=1, numRows=0, totalSize=400, rawDataSize=0]
OK
Time taken: 0.619 seconds
hive> select firstname, city, state, country, post from partitioned_user;
OK
Rebbecca Leith CA US 7315
Stevie Proston CA US 4613
Mariko Hamel CA US 6215
Time taken: 0.238 seconds, Fetched: 3 row(s)
hive>
ram@ram:~$ hadoop fs -ls /user/hive/warehouse/partitioned_user/country=US/state=CA
Found 1 items
-rwxrwxr-x 3 ram 400 2015-09-01 09:50 /user/hive/warehouse/partitioned_user/country=US/state=CA/USCA.txt
ram@ram:~$
INDCHN.txt
Ramkumar,Gopal,171 E 24th St,Leith,7315,03-8174-9123,0458-665-290,rebbecca.didio@didio.com.au,http://www.brandtjonathanfesq.com.au
Nagarajan,Palani,22222 Acoma St,Proston,4613,07-9997-3366,0497-622-620,stevie.hallo@hotmail.com,http://www.landrumtemporaryservices.com.au
Karthik,Bhaskaran,534 Schoenborn St #51,Hamel,6215,08-5558-9019,0427-885-282,mariko_stayer@hotmail.com,http://www.inabinetmacreesq.com.au
hive> LOAD DATA LOCAL INPATH '/home/ram/INDCHN.txt'
> INTO TABLE partitioned_user
> PARTITION (country = 'IND', state = 'CHN');
Loading data to table default.partitioned_user partition (country=IND, state=CHN)
Partition default.partitioned_user{country=IND, state=CHN} stats: [numFiles=1, numRows=0, totalSize=409, rawDataSize=0]
OK
Time taken: 0.614 seconds
hive> select firstname, city, state, country, post from partitioned_user;
OK
Ramkumar Leith CHN IND 7315
Nagarajan Proston CHN IND 4613
Karthik Hamel CHN IND 6215
Rebbecca Leith CA US 7315
Stevie Proston CA US 4613
Mariko Hamel CA US 6215
Time taken: 0.238 seconds, Fetched: 7 row(s)
hive> select firstname, city, state, country, post from partitioned_user
> where country = 'IND';
OK
Ramkumar Leith CHN IND 7315
Nagarajan Proston CHN IND 4613
Karthik Hamel CHN IND 6215
Time taken: 0.584 seconds, Fetched: 4 row(s)
INSERT FROM ANOTHER TABLE
hive> CREATE TABLE user_demo(
> firstname VARCHAR(64),
> lastname VARCHAR(64),
> address STRING,
> city VARCHAR(64),
> post STRING,
> phone1 VARCHAR(64),
> phone2 STRING,
> email STRING,
> web STRING,
> country varchar(64),
> state varchar(64)
> )
> row format delimited
> fields terminated by ','
> STORED AS TEXTFILE;
OK
Time taken: 0.163 seconds
hive> insert overwrite table user_demo
> select * from partitioned_user
> where firstname = 'Nagarajan' or firstname = 'Mariko';
Query ID = ram_20150901102328_85ea0fcb-86d4-4aa4-8d5a-6a5eb7e997c3
Total jobs = 3
Launching Job 1 out of 3
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1441075729906_0001, Tracking URL = http://ram:8088/proxy/application_1441075729906_0001/
Kill Command = /usr/local/hadoop/bin/hadoop job -kill job_1441075729906_0001
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
2015-09-01 10:23:39,660 Stage-1 map = 0%, reduce = 0%
2015-09-01 10:23:48,262 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 2.97 sec
MapReduce Total cumulative CPU time: 2 seconds 970 msec
Ended Job = job_1441075729906_0001
Stage-4 is selected by condition resolver.
Stage-3 is filtered out by condition resolver.
Stage-5 is filtered out by condition resolver.
Moving data to: hdfs://localhost:9000/user/hive/warehouse/user_demo/.hive-staging_hive_2015-09-01_10-23-28_494_5725270058421044549-1/-ext-10000
Loading data to table default.user_demo
Table default.user_demo stats: [numFiles=1, numRows=2, totalSize=287, rawDataSize=285]
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1 Cumulative CPU: 2.97 sec HDFS Read: 7370 HDFS Write: 361 SUCCESS
Total MapReduce CPU Time Spent: 2 seconds 970 msec
OKTime taken: 22.406 seconds
hive>