Skip to content
This repository was archived by the owner on Feb 27, 2025. It is now read-only.

Commit cf7b810

Browse files
committed
remove dfAutoCount() and make columns set
1 parent 8b7aa57 commit cf7b810

File tree

2 files changed

+10
-39
lines changed

2 files changed

+10
-39
lines changed

src/main/scala/com/microsoft/sqlserver/jdbc/spark/SQLServerBulkJdbcOptions.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class SQLServerBulkJdbcOptions(val params: CaseInsensitiveMap[String])
7474

7575
// user input column names array to match dataframe
7676
val columnsToWrite =
77-
params.getOrElse("columnsToWrite", Array[String]())
77+
params.getOrElse("columnsToWrite", Set())
7878

7979
// Not a feature
8080
// Only used for internally testing data idempotency

src/main/scala/com/microsoft/sqlserver/jdbc/spark/utils/BulkCopyUtils.scala

+9-38
Original file line numberDiff line numberDiff line change
@@ -205,29 +205,6 @@ object BulkCopyUtils extends Logging {
205205
autoCols.toList
206206
}
207207

208-
/**
209-
* dfAutoColCount
210-
* utility function to get number of auto columns in dataframe.
211-
* Use number of auto columns in dataframe to get number of non auto columns in df,
212-
* and compare with the number of non auto columns in sql table
213-
*/
214-
private[spark] def dfAutoColCount(
215-
dfColNames: List[String],
216-
autoCols: List[String],
217-
dfColCaseMap: Map[String, String],
218-
isCaseSensitive: Boolean): Int ={
219-
var dfAutoColCt = 0
220-
for (j <- 0 to autoCols.length-1){
221-
if (isCaseSensitive && dfColNames.contains(autoCols(j)) ||
222-
!isCaseSensitive && dfColCaseMap.contains(autoCols(j).toLowerCase())
223-
&& dfColCaseMap(autoCols(j).toLowerCase()) == autoCols(j)) {
224-
dfAutoColCt += 1
225-
}
226-
}
227-
dfAutoColCt
228-
}
229-
230-
231208
/**
232209
* getColMetadataMap
233210
* Utility function convert result set meta data to array.
@@ -315,20 +292,13 @@ object BulkCopyUtils extends Logging {
315292
val tableCols = getSchema(rs, JdbcDialects.get(url))
316293
val autoCols = getAutoCols(conn, dbtable)
317294

295+
val columnsToWrite = columnsToWrite.toSet
296+
318297
val prefix = "Spark Dataframe and SQL Server table have differing"
319298

320-
if (autoCols.length == 0) {
321-
assertIfCheckEnabled(dfCols.length == tableCols.length, strictSchemaCheck,
322-
s"${prefix} numbers of columns")
323-
} else if (strictSchemaCheck) {
324-
val dfColNames = df.schema.fieldNames.toList
325-
val dfAutoColCt = dfAutoColCount(dfColNames, autoCols, dfColCaseMap, isCaseSensitive)
326-
// if df has auto column(s), check column length using non auto column in df and table.
327-
// non auto column number in df: dfCols.length - dfAutoColCt
328-
// non auto column number in table: tableCols.length - autoCols.length
329-
assertIfCheckEnabled(dfCols.length-dfAutoColCt == tableCols.length-autoCols.length, strictSchemaCheck,
330-
s"${prefix} numbers of columns")
331-
}
299+
// auto columns should not exist in df
300+
assertIfCheckEnabled(dfCols.length + autoCols.length == tableCols.length, strictSchemaCheck,
301+
s"${prefix} numbers of columns")
332302

333303
if (columnsToWrite.isEmpty()) {
334304
val result = new Array[ColumnMetadata](tableCols.length - autoCols.length)
@@ -341,10 +311,11 @@ object BulkCopyUtils extends Logging {
341311
for (i <- 0 to tableCols.length-1) {
342312
val tableColName = tableCols(i).name
343313
var dfFieldIndex = -1
344-
// set dfFieldIndex = -1 for all auto columns to skip ColumnMetadata
345-
if (!columnsToWrite.isEmpty() && !columnsToWrite.contain(tableColName)) {
346-
logDebug(s"skipping col index $i col name $tableColName, not provided in columnsToWrite list")
314+
if (!columnsToWrite.isEmpty() && !columnsToWrite.contains(tableColName)) {
315+
// if columnsToWrite provided, and column name not in it, skip column mapping and ColumnMetadata
316+
logDebug(s"skipping col index $i col name $tableColName, user not provided in columnsToWrite list")
347317
} else if (autoCols.contains(tableColName)) {
318+
// if auto columns, skip column mapping and ColumnMetadata
348319
logDebug(s"skipping auto generated col index $i col name $tableColName dfFieldIndex $dfFieldIndex")
349320
}else{
350321
var dfColName:String = ""

0 commit comments

Comments
 (0)