JdbcToArrowConfigBuilder.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.adapter.jdbc;
import static org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE;
import java.math.RoundingMode;
import java.util.Calendar;
import java.util.Map;
import java.util.function.Function;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
/** This class builds {@link JdbcToArrowConfig}s. */
public class JdbcToArrowConfigBuilder {
private Calendar calendar;
private BufferAllocator allocator;
private boolean includeMetadata;
private boolean reuseVectorSchemaRoot;
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
private Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex;
private Map<String, JdbcFieldInfo> explicitTypesByColumnName;
private Map<String, String> schemaMetadata;
private Map<Integer, Map<String, String>> columnMetadataByColumnIndex;
private int targetBatchSize;
private Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter;
private JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter;
private RoundingMode bigDecimalRoundingMode;
/**
* Default constructor for the <code>JdbcToArrowConfigBuilder}</code>. Use the setter methods for
* the allocator and calendar; the allocator must be set. Otherwise, {@link #build()} will throw a
* {@link NullPointerException}.
*/
public JdbcToArrowConfigBuilder() {
this.allocator = null;
this.calendar = null;
this.includeMetadata = false;
this.reuseVectorSchemaRoot = false;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
this.explicitTypesByColumnIndex = null;
this.explicitTypesByColumnName = null;
this.schemaMetadata = null;
this.columnMetadataByColumnIndex = null;
this.bigDecimalRoundingMode = null;
}
/**
* Constructor for the <code>JdbcToArrowConfigBuilder</code>. The allocator is required, and a
* {@link NullPointerException} will be thrown if it is <code>null</code>.
*
* <p>The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is
* used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link
* java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single,
* common time zone when reading from the result set.
*
* @param allocator The Arrow Vector memory allocator.
* @param calendar The calendar to use when constructing timestamp fields.
*/
public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) {
this();
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
this.allocator = allocator;
this.calendar = calendar;
this.includeMetadata = false;
this.reuseVectorSchemaRoot = false;
this.targetBatchSize = DEFAULT_TARGET_BATCH_SIZE;
}
/**
* Constructor for the <code>JdbcToArrowConfigBuilder</code>. Both the allocator and calendar are
* required. A {@link NullPointerException} will be thrown if either of those arguments is <code>
* null</code>.
*
* <p>The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is
* used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link
* java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single,
* common time zone when reading from the result set.
*
* <p>The <code>includeMetadata</code> argument, if <code>true</code> will cause various
* information about each database field to be added to the Vector Schema's field metadata.
*
* @param allocator The Arrow Vector memory allocator.
* @param calendar The calendar to use when constructing timestamp fields.
*/
public JdbcToArrowConfigBuilder(
BufferAllocator allocator, Calendar calendar, boolean includeMetadata) {
this(allocator, calendar);
this.includeMetadata = includeMetadata;
}
/**
* Sets the memory allocator to use when constructing the Arrow vectors from the ResultSet.
*
* @param allocator the allocator to set.
* @exception NullPointerException if <code>allocator</code> is null.
*/
public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
this.allocator = allocator;
return this;
}
/**
* Sets the {@link Calendar} to use when constructing timestamp fields in the Arrow schema, and
* reading time-based fields from the JDBC <code>ResultSet</code>.
*
* @param calendar the calendar to set.
*/
public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) {
this.calendar = calendar;
return this;
}
/**
* Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
*
* @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field
* metadata.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
this.includeMetadata = includeMetadata;
return this;
}
/**
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link
* java.sql.Types#ARRAY}. The column index is 1-based, to match the JDBC column index.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) {
this.arraySubTypesByColumnIndex = map;
return this;
}
/**
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link
* java.sql.Types#ARRAY}.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) {
this.arraySubTypesByColumnName = map;
return this;
}
/**
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for column types.
*
* <p>This can be useful to override type information from JDBC drivers that provide incomplete
* type info, e.g. DECIMAL with precision = scale = 0.
*
* <p>The column index is 1-based, to match the JDBC column index.
*
* @param map The mapping.
*/
public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map<Integer, JdbcFieldInfo> map) {
this.explicitTypesByColumnIndex = map;
return this;
}
/**
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for column types.
*
* <p>This can be useful to override type information from JDBC drivers that provide incomplete
* type info, e.g. DECIMAL with precision = scale = 0.
*
* @param map The mapping.
*/
public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map<String, JdbcFieldInfo> map) {
this.explicitTypesByColumnName = map;
return this;
}
/**
* Set the target number of rows to convert at once.
*
* <p>Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once.
*/
public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) {
this.targetBatchSize = targetBatchSize;
return this;
}
/**
* Set the function used to convert JDBC types to Arrow types.
*
* <p>Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo,
* Calendar)}.
*
* @see JdbcToArrowUtils#reportUnsupportedTypesAsUnknown(Function)
*/
public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter(
Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter;
return this;
}
/**
* Set the function used to get a JDBC consumer for a given type.
*
* <p>Defaults to wrapping {@link JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean,
* FieldVector, JdbcToArrowConfig)}.
*/
public JdbcToArrowConfigBuilder setJdbcConsumerGetter(
JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter) {
this.jdbcConsumerGetter = jdbcConsumerGetter;
return this;
}
/**
* Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each
* iteration, or to allocate a new one.
*/
public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) {
this.reuseVectorSchemaRoot = reuseVectorSchemaRoot;
return this;
}
/** Set metadata for schema. */
public JdbcToArrowConfigBuilder setSchemaMetadata(Map<String, String> schemaMetadata) {
this.schemaMetadata = schemaMetadata;
return this;
}
/** Set metadata from columnIndex->meta map on per field basis. */
public JdbcToArrowConfigBuilder setColumnMetadataByColumnIndex(
Map<Integer, Map<String, String>> columnMetadataByColumnIndex) {
this.columnMetadataByColumnIndex = columnMetadataByColumnIndex;
return this;
}
/**
* Set the rounding mode used when the scale of the actual value does not match the declared
* scale.
*
* <p>By default, an error is raised in such cases.
*/
public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecimalRoundingMode) {
this.bigDecimalRoundingMode = bigDecimalRoundingMode;
return this;
}
/**
* This builds the {@link JdbcToArrowConfig} from the provided {@link BufferAllocator} and {@link
* Calendar}.
*
* @return The built {@link JdbcToArrowConfig}
* @throws NullPointerException if either the allocator or calendar was not set.
*/
public JdbcToArrowConfig build() {
return new JdbcToArrowConfig(
allocator,
calendar,
includeMetadata,
reuseVectorSchemaRoot,
arraySubTypesByColumnIndex,
arraySubTypesByColumnName,
targetBatchSize,
jdbcToArrowTypeConverter,
jdbcConsumerGetter,
explicitTypesByColumnIndex,
explicitTypesByColumnName,
schemaMetadata,
columnMetadataByColumnIndex,
bigDecimalRoundingMode);
}
}