error with bucketing with clusters in hive - hadoop

DROP TABLE filtered_online_march_customers;
--creating bucketed table with customer id
CREATE TABLE IF NOT EXISTS filtered_online_march_customers(
customer_id string,
order_id string
)
CLUSTERED BY(customer_id) INTO 32 BUCKETS;
--populating the table
set hive.enforce.bucketing = true;
FROM filtered_march_online_transactions
INSERT OVERWRITE TABLE filtered_online_march_customers
SELECT
*
I created this table that clusters by customer_id. However when I actually try to use the buckets it does not work.
CREATE TABLE randomized_filtered_march_customers
AS
SELECT
*
FROM
filtered_online_march_customers
TABLESAMPLE(BUCKET 1 OUT OF 32 ON customer_id)
I got the error:
cannot find dir = maprfs:///hive/v0k0020.db/filtered_online_march_customers/000000_0 in pathToPartitionInfo: [maprfs:/hive/v0k0020.db/filtered_online_march_customers/000000_0]
at org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getPartitionDescFromPathRecursively(HiveFileFormatUtils.java:344)
at org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getPartitionDescFromPathRecursively(HiveFileFormatUtils.java:306)
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat$CombineHiveInputSplit.(CombineHiveInputFormat.java:108)
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:455)
at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:1098)
at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1090)
at org.apache.hadoop.mapred.JobClient.access$500(JobClient.java:176)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:931)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:882)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:882)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:856)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:420)
at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:136)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1503)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1270)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1088)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:911)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:901)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:268)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:220)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:423)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:359)
at org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:456)
at org.apache.hadoop.hive.cli.CliDriver.processFile(CliDriver.java:466)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:748)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:686)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:625)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Job Submission failed with exception 'java.io.IOException(cannot find dir = maprfs:///hive/v0k0020.db/filtered_online_march_customers/000000_0 in pathToPartitionInfo: [maprfs:/hive/v0k0020.db/filtered_online_march_customers/000000_0])'
If I change the query to
CREATE TABLE randomized_filtered_march_customers
AS
SELECT
*
FROM
filtered_online_march_customers
TABLESAMPLE(BUCKET 1 OUT OF 32 ON rand())
It works fine. Any idea how to address it?

Related

Sqoop import is failed due to java.io.IOException: SQLException in nextKeyValue

I am joining three tables data and importing the data from Oracle to Hive using Sqoop import command. Find table data count below.
select count(*) from table1; -- 40446561
select count(*) from table2; -- 16886690
select count(*) from table3; -- 15142664
Sqoop Query:
sqoop-import -D mapred.child.java.opts="-Djava.security.egd=file:/dev/../dev/urandom" --connect $CONNECTION --username $DB_USER_NAME --password $DB_PASSWORD --hive-import --hive-overwrite --hive-table ${HIVE_TABLE_NAME} --target-dir $HDFS_TARGET_DIR --mapreduce-job-name $JOB_NAME --query " SELECT t.* FROM (SELECT rownum ID, a.column1, a.column2, a.column3, a.column4, b.column5, b.column6, c.column7 FROM table1 a LEFT OUTER JOIN table2 b on (b.column5 = a.column1) LEFT OUTER JOIN table3 c on (c.column7= a.column1)) t WHERE \$CONDITIONS" --split-by t.ID --null-string '\\N' --null-non-string '\\N' --num-mappers 12 --fetch-size 10000 --delete-target-dir --direct --verbose
I am getting the below exception:
Error: java.io.IOException: SQLException in nextKeyValue
at org.apache.sqoop.mapreduce.db.DBRecordReader.nextKeyValue(DBRecordReader.java:277)
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:556)
at org.apache.hadoop.mapreduce.task.MapContextImpl.nextKeyValue(MapContextImpl.java:80)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.nextKeyValue(WrappedMapper.java:91)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:162)
Caused by: java.sql.SQLRecoverableException: IO Error: Connection reset
at oracle.jdbc.driver.T4CPreparedStatement.fetch(T4CPreparedStatement.java:1080)
at oracle.jdbc.driver.OracleStatement.fetchMoreRows(OracleStatement.java:3716)
at oracle.jdbc.driver.InsensitiveScrollableResultSet.fetchMoreRows(InsensitiveScrollableResultSet.java:1015)
at oracle.jdbc.driver.InsensitiveScrollableResultSet.absoluteInternal(InsensitiveScrollableResultSet.java:979)
at oracle.jdbc.driver.InsensitiveScrollableResultSet.next(InsensitiveScrollableResultSet.java:579)
at org.apache.sqoop.mapreduce.db.DBRecordReader.nextKeyValue(DBRecordReader.java:237)
... 12 more
Caused by: java.net.SocketException: Connection reset
at java.net.SocketInputStream.read(SocketInputStream.java:209)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at oracle.net.ns.Packet.receive(Packet.java:311)
at oracle.net.ns.DataPacket.receive(DataPacket.java:105)
at oracle.net.ns.NetInputStream.getNextPacket(NetInputStream.java:305)
at oracle.net.ns.NetInputStream.read(NetInputStream.java:249)
at oracle.jdbc.driver.T4CSocketInputStreamWrapper.read(T4CSocketInputStreamWrapper.java:104)
at oracle.jdbc.driver.T4CMAREngineStream.getNBytes(T4CMAREngineStream.java:646)
at oracle.jdbc.driver.T4CMAREngineStream.unmarshalNBytes(T4CMAREngineStream.java:616)
at oracle.jdbc.driver.DynamicByteArray.unmarshalBuffer(DynamicByteArray.java:338)
at oracle.jdbc.driver.DynamicByteArray.unmarshalCLR(DynamicByteArray.java:226)
at oracle.jdbc.driver.T4CMarshaller$BasicMarshaller.unmarshalBytes(T4CMarshaller.java:124)
at oracle.jdbc.driver.T4CMarshaller$BasicMarshaller.unmarshalOneRow(T4CMarshaller.java:101)
at oracle.jdbc.driver.T4CVarcharAccessor.unmarshalOneRow(T4CVarcharAccessor.java:212)
at oracle.jdbc.driver.T4CTTIrxd.unmarshal(T4CTTIrxd.java:1474)
at oracle.jdbc.driver.T4CTTIrxd.unmarshal(T4CTTIrxd.java:1282)
at oracle.jdbc.driver.T4C8Oall.readRXD(T4C8Oall.java:851)
at oracle.jdbc.driver.T4CTTIfun.receive(T4CTTIfun.java:448)
at oracle.jdbc.driver.T4CTTIfun.doRPC(T4CTTIfun.java:257)
at oracle.jdbc.driver.T4C8Oall.doOALL(T4C8Oall.java:587)
at oracle.jdbc.driver.T4CPreparedStatement.doOall8(T4CPreparedStatement.java:225)
at oracle.jdbc.driver.T4CPreparedStatement.fetch(T4CPreparedStatement.java:1066)
... 17 more
Please let me know to hot to fix it.
This simply means that something in the backend ( DBMS ) decided to stop working due to unavailability of resources etc. It has nothing to do with your code or the number of inserts. You can read more about similar problems here:
http://kr.forums.oracle.com/forums/thread.jspa?threadID=941911
http://forums.oracle.com/forums/thread.jspa?messageID=3800354
This may not answer your question, but you will get an idea of why it might be happening. You could further discuss with your DBA and see if there is something specific in your case.[duplicate]

jdbc / PreparedStatement escape sequence

I'm trying to implement jdbc escape sequence while calling a SQL via PreparedStatement in Java.
Below is my sample code.
String sql ="select 1 from dual where 'abcd' = ? {escape '|'}";
ps = con.prepareStatement(sql);
ps.setString(1,"abcd");
ps.executeQuery();
While executing the class I'm getting the below error.
java.sql.SQLSyntaxErrorException: ORA-00933: SQL command not properly ended
at oracle.jdbc.driver.T4CTTIoer.processError(T4CTTIoer.java:445)
at oracle.jdbc.driver.T4CTTIoer.processError(T4CTTIoer.java:396)
at oracle.jdbc.driver.T4C8Oall.processError(T4C8Oall.java:879)
at oracle.jdbc.driver.T4CTTIfun.receive(T4CTTIfun.java:450)
at oracle.jdbc.driver.T4CTTIfun.doRPC(T4CTTIfun.java:192)
at oracle.jdbc.driver.T4C8Oall.doOALL(T4C8Oall.java:531)
at oracle.jdbc.driver.T4CPreparedStatement.doOall8(T4CPreparedStatement.java:207)
at oracle.jdbc.driver.T4CPreparedStatement.executeForDescribe(T4CPreparedStatement.java:884)
at oracle.jdbc.driver.OracleStatement.executeMaybeDescribe(OracleStatement.java:1167)
at oracle.jdbc.driver.OracleStatement.doExecuteWithTimeout(OracleStatement.java:1289)
at oracle.jdbc.driver.OraclePreparedStatement.executeInternal(OraclePreparedStatement.java:3584)
at oracle.jdbc.driver.OraclePreparedStatement.executeQuery(OraclePreparedStatement.java:3628)
at oracle.jdbc.driver.OraclePreparedStatementWrapper.executeQuery(OraclePreparedStatementWrapper.java:1493)
at org.apache.commons.dbcp.DelegatingPreparedStatement.executeQuery(DelegatingPreparedStatement.java:96)
at org.apache.commons.dbcp.DelegatingPreparedStatement.executeQuery(DelegatingPreparedStatement.java:96)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.jdbcdslog.PreparedStatementLoggingHandler.invoke(PreparedStatementLoggingHandler.java:35)
Can anyone please help?
The escape sequence you mention is only valid with LIKE clauses.
Try the following instead:
String sql = "select 1 from dual where 'abcd' like ? {escape '|'}";

NamedParameterJdbcTemplate batchUpdate not working for merge

I am running the below merge query using NamedParameterJdbcTemplate batchUpdate.
The batchUpdate(String sql, SqlParameterSource[] batchArgs) works for batchArgs with 1 item and does not work for multiple items. Below is the query and I renamed table column names.
The NamedParameterJdbcTemplate.batchUpdate works if run for each array item seperately or if run as NamedParameterJdbcTemplate .update()
Sql Query:
MERGE INTO tableName C1 USING (VALUES (:f1)) AS C2(Field1)
ON (C1.CMP = C2.CO_C)
WHEN MATCHED THEN
UPDATE SET Field2 = :date1, Field3 = :time1, Field4 =:userId
WHEN NOT MATCHED THEN
INSERT (Field5, Field1, Field4, Field2, Field3) VALUES (:offCC, :f1,:userId, :date1, :time1)
[err] SQL exception:
[err] Message: [jcc][t4][102][10040][4.22.29] Batch failure. The batch was submitted, but at least one exception occurred on an individual member of the batch. Use getNextException() to retrieve the exceptions for specific batched elements. ERRORCODE=-4229, SQLSTATE=null
[err] SQLSTATE: null
[err] Error code: -4229
[err] SQL exception:
[err] Message: VARIABLE IS NOT DEFINED OR NOT USABLE.
SQLCODE=-312, SQLSTATE=42618, DRIVER=4.22.29
[err] SQLSTATE: 42618
[err] Error code: -312
[err] SQL exception:
[err] Message: Error for batch element #1: VARIABLE IS NOT DEFINED OR NOT USABLE
[err] SQLSTATE: 42618
[err] Error code: -312

How to set JDBC prepared statement parameters when querying Amazon Redshift

I'm using JDBC to execute queries on Amazon Redshift. I have a java.sql.PreparedStatement object. When I set the parameters on the PreparedStatement object and then call:
preparedStatement.executeQuery();
with this query:
SELECT start_date from sometable_:year order by start_date desc limit 1
in the end, the query should look like this:
SELECT start_date from sometable_2010 order by start_date desc limit 1
I get this exception:
org.postgresql.util.PSQLException: ERROR: relation "sometable_$1" does not exist
at org.postgresql.core.v3.QueryExecutorImpl.receiveErrorResponse(QueryExecutorImpl.java:1592)
at org.postgresql.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:1327)
at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:192)
at org.postgresql.jdbc2.AbstractJdbc2Statement.execute(AbstractJdbc2Statement.java:451)
at org.postgresql.jdbc2.AbstractJdbc2Statement.executeWithFlags(AbstractJdbc2Statement.java:350)
at org.postgresql.jdbc2.AbstractJdbc2Statement.executeQuery(AbstractJdbc2Statement.java:254)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.postgresql.ds.jdbc23.AbstractJdbc23PooledConnection$StatementHandler.invoke(AbstractJdbc23PooledConnection.java:477)
at com.sun.proxy.$Proxy159.executeQuery(Unknown Source)
at org.jboss.resource.adapter.jdbc.WrappedPreparedStatement.executeQuery(WrappedPreparedStatement.java:342)
at com.example.persistence.dao.impl.Redshift01JdbcDaoImpl.getSomeTableLastArchiveDate(Redshift01JdbcDaoImpl.java:46)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.springframework.aop.support.AopUtils.invokeJoinpointUsingReflection(AopUtils.java:309)
at org.springframework.aop.framework.ReflectiveMethodInvocation.invokeJoinpoint(ReflectiveMethodInvocation.java:183)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:150)
at org.springframework.dao.support.PersistenceExceptionTranslationInterceptor.invoke(PersistenceExceptionTranslationInterceptor.java:155)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:172)
at org.springframework.aop.framework.JdkDynamicAopProxy.invoke(JdkDynamicAopProxy.java:202)
at com.sun.proxy.$Proxy314.getSomeTableLastArchiveDate(Unknown Source)
at com.example.service.impl.someTableArchiveServiceImpl.process(SomeTableArchiveServiceImpl.java:69)
at com.example.service.impl.AbstractBatchServiceImpl.run(AbstractBatchServiceImpl.java:106)
at com.example.service.jms.JmsMessageListener.callBatchService(JmsMessageListener.java:72)
at com.example.service.jms.JmsMessageListener.onMessage(JmsMessageListener.java:136)
at org.springframework.jms.listener.adapter.MessageListenerAdapter.onMessage(MessageListenerAdapter.java:339)
at org.springframework.jms.listener.AbstractMessageListenerContainer.doInvokeListener(AbstractMessageListenerContainer.java:535)
at org.springframework.jms.listener.AbstractMessageListenerContainer.invokeListener(AbstractMessageListenerContainer.java:495)
at org.springframework.jms.listener.AbstractMessageListenerContainer.doExecuteListener(AbstractMessageListenerContainer.java:467)
at org.springframework.jms.listener.AbstractPollingMessageListenerContainer.doReceiveAndExecute(AbstractPollingMessageListenerContainer.java:325)
at org.springframework.jms.listener.AbstractPollingMessageListenerContainer.receiveAndExecute(AbstractPollingMessageListenerContainer.java:263)
at org.springframework.jms.listener.DefaultMessageListenerContainer$AsyncMessageListenerInvoker.invokeListener(DefaultMessageListenerContainer.java:1058)
at org.springframework.jms.listener.DefaultMessageListenerContainer$AsyncMessageListenerInvoker.executeOngoingLoop(DefaultMessageListenerContainer.java:1050)
at org.springframework.jms.listener.DefaultMessageListenerContainer$AsyncMessageListenerInvoker.run(DefaultMessageListenerContainer.java:947)
at java.lang.Thread.run(Thread.java:722)
I would rather not do string concatenation, what am I doing wrong? The PreparedStatement setParameters method works for all other JDBC queries in my code base.
Thank you,
Tom
You can't use a prepared statement to replace arbitrary portions of the query dynamically. Only values, that don't affect the query plan, can be bound as parameters. You'll have to use String concatenation to do this.
Explanation: a PreparedStatement, as its name indicates, is a statement that is prepared by the database and can then be executed multiple times after with various parameters. Preparing the statement consists in parsing the query to examine which tables and columns are used, and build a plan to execute the query (which index to use, etc.).
The same plan is then used each time the statement is executed with various parameters. If the table from which the query must select data isn't even known when the statement is prepared, no plan can be computed. This is why the following query is fine:
select f.* from foo f where f.name = ?
but these ones aren't fine:
select f.* from ? f where f.name = 'bar' -- unknown table: the plan can't be computed
select f.? from foo f where f.name = 'bar -- unknown column: the plan can't be computed
select f.* from foo f where f.name ? 'bar' -- unknown operator: the plan can't be computed

Exception while trying to fetch an un-grouped-by column in jdbc

I have a table called test, defined as followed:
A bigint(20) NOT NULL
B bigint(20) NULL
I'm trying to execute the following prepared statement:
sql = "SELECT * FROM test GROUP BY B";
PreparedStatement preparedStatement = connection.prepareStatement(sql);
ResultSet rs = preparedStatement.executeQuery();
But I get the following exception:
Caused by: com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: 'anya_products.test.A' isn't in GROUP BY
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:411)
at com.mysql.jdbc.Util.getInstance(Util.java:386)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:1052)
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3609)
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3541)
at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2002)
at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2163)
at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2624)
at com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:2127)
at com.mysql.jdbc.PreparedStatement.executeQuery(PreparedStatement.java:2293)
at com.mchange.v2.c3p0.impl.NewProxyPreparedStatement.executeQuery(NewProxyPreparedStatement.java:76)
The exact same query works in SQLYog. What is the problem? JDBC limitation?
Thanks
Udi
Here's what I came up with. Not sure it's the most efficient or the only way, but it seems to at least work:
select test.a, test.b from test
inner join (select b, count(b) from test group by b having count(b) = 1) as c
on test.b = c.b

Resources