ArrowVectorIterator.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.adapter.jdbc;
import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.isColumnNullable;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.Iterator;
import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer;
import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer;
import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException;
import org.apache.arrow.util.AutoCloseables;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.ValueVectorUtility;
/** VectorSchemaRoot iterator for partially converting JDBC data. */
public class ArrowVectorIterator implements Iterator<VectorSchemaRoot>, AutoCloseable {
private final ResultSet resultSet;
private final JdbcToArrowConfig config;
private final Schema schema;
private final ResultSetMetaData rsmd;
private final JdbcConsumer[] consumers;
final CompositeJdbcConsumer compositeConsumer;
// this is used only if resuing vector schema root is enabled.
private VectorSchemaRoot nextBatch;
private final int targetBatchSize;
// This is used to track whether the ResultSet has been fully read, and is needed specifically for
// cases where there
// is a ResultSet having zero rows (empty):
private boolean readComplete = false;
/** Construct an instance. */
private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException {
this.resultSet = resultSet;
this.config = config;
this.schema = JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config);
this.targetBatchSize = config.getTargetBatchSize();
rsmd = resultSet.getMetaData();
consumers = new JdbcConsumer[rsmd.getColumnCount()];
this.compositeConsumer = new CompositeJdbcConsumer(consumers);
this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null;
}
/** Create a ArrowVectorIterator to partially convert data. */
public static ArrowVectorIterator create(ResultSet resultSet, JdbcToArrowConfig config)
throws SQLException {
ArrowVectorIterator iterator = null;
try {
iterator = new ArrowVectorIterator(resultSet, config);
} catch (Throwable e) {
AutoCloseables.close(e, iterator);
throw new RuntimeException("Error occurred while creating iterator.", e);
}
return iterator;
}
private void consumeData(VectorSchemaRoot root) {
// consume data
try {
int readRowCount = 0;
if (targetBatchSize == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
while (resultSet.next()) {
ValueVectorUtility.ensureCapacity(root, readRowCount + 1);
compositeConsumer.consume(resultSet);
readRowCount++;
}
readComplete = true;
} else {
while ((readRowCount < targetBatchSize) && !readComplete) {
if (resultSet.next()) {
compositeConsumer.consume(resultSet);
readRowCount++;
} else {
readComplete = true;
}
}
}
root.setRowCount(readRowCount);
} catch (Throwable e) {
compositeConsumer.close();
if (e instanceof JdbcConsumerException) {
throw (JdbcConsumerException) e;
} else {
throw new RuntimeException("Error occurred while consuming data.", e);
}
}
}
private VectorSchemaRoot createVectorSchemaRoot() throws SQLException {
VectorSchemaRoot root = null;
try {
root = VectorSchemaRoot.create(schema, config.getAllocator());
if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
ValueVectorUtility.preAllocate(root, config.getTargetBatchSize());
}
} catch (Throwable e) {
if (root != null) {
root.close();
}
throw new RuntimeException("Error occurred while creating schema root.", e);
}
initialize(root);
return root;
}
private void initialize(VectorSchemaRoot root) throws SQLException {
for (int i = 1; i <= consumers.length; i++) {
final JdbcFieldInfo columnFieldInfo =
JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config);
ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo);
consumers[i - 1] =
config
.getJdbcConsumerGetter()
.apply(
arrowType,
i,
isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo),
root.getVector(i - 1),
config);
}
}
// Loads the next schema root or null if no more rows are available.
private void load(VectorSchemaRoot root) {
for (int i = 0; i < consumers.length; i++) {
FieldVector vec = root.getVector(i);
if (config.isReuseVectorSchemaRoot()) {
// if we are reusing the vector schema root,
// we must reset the vector before populating it with data.
vec.reset();
}
consumers[i].resetValueVector(vec);
}
consumeData(root);
}
@Override
public boolean hasNext() {
return !readComplete;
}
/**
* Gets the next vector. If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, the
* client is responsible for freeing its resources.
*
* @throws JdbcConsumerException on error from VectorConsumer
*/
@Override
public VectorSchemaRoot next() {
Preconditions.checkArgument(hasNext());
try {
VectorSchemaRoot ret =
config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot();
load(ret);
return ret;
} catch (Exception e) {
close();
if (e instanceof JdbcConsumerException) {
throw (JdbcConsumerException) e;
} else {
throw new RuntimeException("Error occurred while getting next schema root.", e);
}
}
}
/**
* Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a
* new VectorSchemaRoot is created for each batch, each root must be closed manually by the client
* code.
*/
@Override
public void close() {
if (config.isReuseVectorSchemaRoot()) {
nextBatch.close();
compositeConsumer.close();
}
}
}