没有“明确”的方法可以做到这一点。你可以使用类似的技巧here https://stackoverflow.com/questions/33193958/change-nullable-property-of-column-in-spark-dataframe
该答案的相关代码:
def setNullableStateOfColumn( df: DataFrame, cn: String, nullable: Boolean) : DataFrame = {
// get schema
val schema = df.schema
// modify [[StructField] with name `cn`
val newSchema = StructType(schema.map {
case StructField( c, t, _, m) if c.equals(cn) => StructField( c, t, nullable = nullable, m)
case y: StructField => y
})
// apply new schema
df.sqlContext.createDataFrame( df.rdd, newSchema )
}
它将复制 DataFrame 并复制架构,但以编程方式指定可为空
许多列的版本:
def setNullableStateOfColumn(df: DataFrame, nullValues: Map[String, Boolean]) : DataFrame = {
// get schema
val schema = df.schema
// modify [[StructField]s with name `cn`
val newSchema = StructType(schema.map {
case StructField( c, t, _, m) if nullValues.contains(c) => StructField( c, t, nullable = nullValues.get(c), m)
case y: StructField => y
})
// apply new schema
df.sqlContext.createDataFrame( df.rdd, newSchema )
}
用法:
setNullableStateOfColumn(df1, Map ("col1" -> true, "col2" -> true, "col7" -> false));