Best Kotest code snippet using io.kotest.matchers.tuples.pairs
DatasetFunctionTest.kt
Source:DatasetFunctionTest.kt
1/*-2 * =LICENSE=3 * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)4 * ----------5 * Copyright (C) 2019 - 2022 JetBrains6 * ----------7 * Licensed under the Apache License, Version 2.0 (the "License");8 * you may not use this file except in compliance with the License.9 * You may obtain a copy of the License at10 * 11 * http://www.apache.org/licenses/LICENSE-2.012 * 13 * Unless required by applicable law or agreed to in writing, software14 * distributed under the License is distributed on an "AS IS" BASIS,15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.16 * See the License for the specific language governing permissions and17 * limitations under the License.18 * =LICENSEEND=19 */20package org.jetbrains.kotlinx.spark.api21import ch.tutteli.atrium.api.fluent.en_GB.*22import ch.tutteli.atrium.api.verbs.expect23import io.kotest.core.spec.style.ShouldSpec24import io.kotest.matchers.shouldBe25import org.apache.spark.api.java.JavaDoubleRDD26import org.apache.spark.api.java.JavaPairRDD27import org.apache.spark.api.java.JavaRDD28import org.apache.spark.rdd.RDD29import org.apache.spark.sql.Dataset30import org.apache.spark.sql.functions31import org.apache.spark.sql.streaming.GroupState32import org.apache.spark.sql.streaming.GroupStateTimeout33import org.jetbrains.kotlinx.spark.api.tuples.*34import scala.Tuple235import scala.Tuple336import scala.Tuple437import scala.Tuple538import java.io.Serializable39class DatasetFunctionTest : ShouldSpec({40 context("dataset extensions") {41 withSpark(props = mapOf("spark.sql.codegen.comments" to true)) {42 should("handle cached operations") {43 val result = dsOf(1, 2, 3, 4, 5)44 .map { it X (it + 2) }45 .withCached {46 expect(collectAsList()).contains.inAnyOrder.only.values(47 1 X 3,48 2 X 4,49 3 X 5,50 4 X 6,51 5 X 7,52 )53 val next = filter { it._1 % 2 == 0 }54 expect(next.collectAsList()).contains.inAnyOrder.only.values(2 X 4, 4 X 6)55 next56 }57 .map { it: Tuple2<Int, Int> ->58 it + (it._1 + it._2) * 259 }60 .collectAsList()61 expect(result).contains.inOrder.only.values(2 X 4 X 12, 4 X 6 X 20)62 }63 should("handle join operations") {64 data class Left(val id: Int, val name: String)65 data class Right(val id: Int, val value: Int)66 val first = dsOf(Left(1, "a"), Left(2, "b"))67 val second = dsOf(Right(1, 100), Right(3, 300))68 val result = first69 .leftJoin(second, first.col("id") eq second.col("id"))70 .map { it._1.id X it._1.name X it._2?.value }71 .collectAsList()72 expect(result).contains.inOrder.only.values(t(1, "a", 100), t(2, "b", null))73 }74 should("handle map operations") {75 val result = dsOf(listOf(1, 2, 3, 4), listOf(3, 4, 5, 6))76 .flatMap { it.iterator() }77 .map { it + 4 }78 .filter { it < 10 }79 .collectAsList()80 expect(result).contains.inAnyOrder.only.values(5, 6, 7, 8, 7, 8, 9)81 }82 should("Allow simple forEachPartition in datasets") {83 val dataset = dsOf(84 SomeClass(intArrayOf(1, 2, 3), 1),85 SomeClass(intArrayOf(4, 3, 2), 1),86 )87 dataset.forEachPartition {88 it.forEach {89 it.b shouldBe 190 }91 }92 }93 should("Have easier access to keys and values for key/value datasets") {94 val dataset: Dataset<SomeClass> = dsOf(95 SomeClass(intArrayOf(1, 2, 3), 1),96 SomeClass(intArrayOf(4, 3, 2), 1),97 )98 .groupByKey { it.b }99 .reduceGroupsK { a, b -> SomeClass(a.a + b.a, a.b) }100 .takeValues()101 dataset.count() shouldBe 1102 }103 should("Be able to sort datasets with property reference") {104 val dataset: Dataset<SomeClass> = dsOf(105 SomeClass(intArrayOf(1, 2, 3), 2),106 SomeClass(intArrayOf(4, 3, 2), 1),107 )108 dataset.sort(SomeClass::b)109 dataset.takeAsList(1).first().b shouldBe 2110 dataset.sort(SomeClass::a, SomeClass::b)111 dataset.takeAsList(1).first().b shouldBe 2112 }113 should("Have Kotlin ready functions in place of overload ambiguity") {114 val dataset: Tuple2<Int, SomeClass> = dsOf(115 SomeClass(intArrayOf(1, 2, 3), 1),116 SomeClass(intArrayOf(4, 3, 2), 1),117 )118 .groupByKey { it: SomeClass -> it.b }119 .reduceGroupsK { v1: SomeClass, v2: SomeClass -> v1 }120 .filter { it: Tuple2<Int, SomeClass> -> true } // not sure why this does work, but reduce doesn't121 .reduceK { v1: Tuple2<Int, SomeClass>, v2: Tuple2<Int, SomeClass> -> v1 }122 dataset._2.a shouldBe intArrayOf(1, 2, 3)123 }124 }125 }126 context("grouped dataset extensions") {127 withSpark(props = mapOf("spark.sql.codegen.comments" to true)) {128 should("perform flat map on grouped datasets") {129 val groupedDataset = listOf(t(1, "a"), t(1, "b"), t(2, "c"))130 .toDS()131 .groupByKey { it._1 }132 val flatMapped = groupedDataset.flatMapGroups { key, values ->133 val collected = values.asSequence().toList()134 if (collected.size > 1) collected.iterator()135 else emptyList<Tuple2<Int, String>>().iterator()136 }137 flatMapped.count() shouldBe 2138 }139 should("perform map group with state and timeout conf on grouped datasets") {140 val groupedDataset = listOf(t(1, "a"), t(1, "b"), t(2, "c"))141 .toDS()142 .groupByKey { it._1 }143 val mappedWithStateTimeoutConf =144 groupedDataset.mapGroupsWithState(GroupStateTimeout.NoTimeout()) { key, values, state: GroupState<Int> ->145 var s by state146 val collected = values.asSequence().toList()147 s = key148 s shouldBe key149 s!! X collected.map { it._2 }150 }151 mappedWithStateTimeoutConf.count() shouldBe 2152 }153 should("perform map group with state on grouped datasets") {154 val groupedDataset = listOf(t(1, "a"), t(1, "b"), t(2, "c"))155 .toDS()156 .groupByKey { it._1 }157 val mappedWithState = groupedDataset.mapGroupsWithState { key, values, state: GroupState<Int> ->158 var s by state159 val collected = values.asSequence().toList()160 s = key161 s shouldBe key162 s!! X collected.map { it._2 }163 }164 mappedWithState.count() shouldBe 2165 }166 should("perform flat map group with state on grouped datasets") {167 val groupedDataset = listOf(t(1, "a"), t(1, "b"), t(2, "c"))168 .toDS()169 .groupByKey { it._1 }170 val flatMappedWithState = groupedDataset.mapGroupsWithState { key, values, state: GroupState<Int> ->171 var s by state172 val collected = values.asSequence().toList()173 s = key174 s shouldBe key175 if (collected.size > 1) collected.iterator()176 else emptyList<Tuple2<Int, String>>().iterator()177 }178 flatMappedWithState.count() shouldBe 2179 }180 should("be able to cogroup grouped datasets") {181 val groupedDataset1 = listOf(1 X "a", 1 X "b", 2 X "c")182 .toDS()183 .groupByKey { it._1 }184 val groupedDataset2 = listOf(1 X "d", 5 X "e", 3 X "f")185 .toDS()186 .groupByKey { it._1 }187 val cogrouped = groupedDataset1.cogroup(groupedDataset2) { key, left, right ->188 listOf(189 key to (left.asSequence() + right.asSequence()).map { it._2 }.toList()190 ).iterator()191 }192 cogrouped.count() shouldBe 4193 }194 }195 }196 context("RDD conversions") {197 withSpark(props = mapOf("spark.sql.codegen.comments" to true)) {198 should("Convert Scala RDD to Dataset") {199 val rdd0: RDD<Int> = sc.parallelize(200 listOf(1, 2, 3, 4, 5, 6)201 ).rdd()202 val dataset0: Dataset<Int> = rdd0.toDS()203 dataset0.toList<Int>() shouldBe listOf(1, 2, 3, 4, 5, 6)204 }205 should("Convert a JavaRDD to a Dataset") {206 val rdd1: JavaRDD<Int> = sc.parallelize(207 listOf(1, 2, 3, 4, 5, 6)208 )209 val dataset1: Dataset<Int> = rdd1.toDS()210 dataset1.toList<Int>() shouldBe listOf(1, 2, 3, 4, 5, 6)211 }212 should("Convert JavaDoubleRDD to Dataset") {213 // JavaDoubleRDD214 val rdd2: JavaDoubleRDD = sc.parallelizeDoubles(215 listOf(1.0, 2.0, 3.0, 4.0, 5.0, 6.0)216 )217 val dataset2: Dataset<Double> = rdd2.toDS()218 dataset2.toList<Double>() shouldBe listOf(1.0, 2.0, 3.0, 4.0, 5.0, 6.0)219 }220 should("Convert JavaPairRDD to Dataset") {221 val rdd3: JavaPairRDD<Int, Double> = sc.parallelizePairs(222 listOf(t(1, 1.0), t(2, 2.0), t(3, 3.0))223 )224 val dataset3: Dataset<Tuple2<Int, Double>> = rdd3.toDS()225 dataset3.toList<Tuple2<Int, Double>>() shouldBe listOf(t(1, 1.0), t(2, 2.0), t(3, 3.0))226 }227 should("Convert Kotlin Serializable data class RDD to Dataset") {228 val rdd4 = sc.parallelize(229 listOf(SomeClass(intArrayOf(1, 2), 0))230 )231 val dataset4 = rdd4.toDS()232 dataset4.toList<SomeClass>().first().let { (a, b) ->233 a contentEquals intArrayOf(1, 2) shouldBe true234 b shouldBe 0235 }236 }237 should("Convert Tuple RDD to Dataset") {238 val rdd5 = sc.parallelize(239 listOf(t(1.0, 4))240 )241 val dataset5 = rdd5.toDS()242 dataset5.toList<Tuple2<Double, Int>>() shouldBe listOf(t(1.0, 4))243 }244 should("Convert List RDD to Dataset") {245 val rdd6 = sc.parallelize(246 listOf(listOf(1, 2, 3), listOf(4, 5, 6))247 )248 val dataset6 = rdd6.toDS()249 dataset6.toList<List<Int>>() shouldBe listOf(listOf(1, 2, 3), listOf(4, 5, 6))250 }251 should("Sort Tuple2 Dataset") {252 val list = listOf(253 t(1, 6),254 t(2, 5),255 t(3, 4),256 )257 val dataset = list.toDS()258 dataset.sortByKey().collectAsList() shouldBe list.sortedBy { it._1 }259 dataset.sortByValue().collectAsList() shouldBe list.sortedBy { it._2 }260 }261 should("Sort Pair Dataset") {262 val list = listOf(263 Pair(1, 6),264 Pair(2, 5),265 Pair(3, 4),266 )267 val dataset = list.toDS()268 dataset.sortByKey().collectAsList() shouldBe list.sortedBy { it.first }269 dataset.sortByValue().collectAsList() shouldBe list.sortedBy { it.second }270 }271 }272 }273 context("Column functions") {274 withSpark(props = mapOf("spark.sql.codegen.comments" to true)) {275 @Suppress("UNCHECKED_CAST")276 should("support dataset select") {277 val dataset = dsOf(278 SomeClass(intArrayOf(1, 2, 3), 3),279 SomeClass(intArrayOf(1, 2, 4), 5),280 )281 val newDS1WithAs: Dataset<IntArray> = dataset.selectTyped(282 functions.col("a").`as`<IntArray>(),283 )284 newDS1WithAs.collectAsList()285 val newDS2: Dataset<Tuple2<IntArray, Int>> = dataset.selectTyped(286 col(SomeClass::a), // NOTE: this only works on 3.0, returning a data class with an array in it287 col(SomeClass::b),288 )289 newDS2.collectAsList()290 val newDS3: Dataset<Tuple3<IntArray, Int, Int>> = dataset.selectTyped(291 col(SomeClass::a),292 col(SomeClass::b),293 col(SomeClass::b),294 )295 newDS3.collectAsList()296 val newDS4: Dataset<Tuple4<IntArray, Int, Int, Int>> = dataset.selectTyped(297 col(SomeClass::a),298 col(SomeClass::b),299 col(SomeClass::b),300 col(SomeClass::b),301 )302 newDS4.collectAsList()303 val newDS5: Dataset<Tuple5<IntArray, Int, Int, Int, Int>> = dataset.selectTyped(304 col(SomeClass::a),305 col(SomeClass::b),306 col(SomeClass::b),307 col(SomeClass::b),308 col(SomeClass::b),309 )310 newDS5.collectAsList()311 }312 should("Access columns using invoke on datasets") {313 val dataset = dsOf(314 SomeClass(intArrayOf(1, 2, 3), 4),315 SomeClass(intArrayOf(4, 3, 2), 1),316 )317 dataset.col("a") shouldBe dataset("a")318 }319 should("Use infix- and operator funs on columns") {320 val dataset = dsOf(321 SomeOtherClass(intArrayOf(1, 2, 3), 4, true),322 SomeOtherClass(intArrayOf(4, 3, 2), 1, true),323 )324 (dataset("a") == dataset("a")) shouldBe dataset("a").equals(dataset("a"))325 (dataset("a") != dataset("a")) shouldBe !dataset("a").equals(dataset("a"))326 (dataset("a") eq dataset("a")) shouldBe dataset("a").equalTo(dataset("a"))327 dataset("a").equalTo(dataset("a")) shouldBe (dataset("a") `===` dataset("a"))328 (dataset("a") neq dataset("a")) shouldBe dataset("a").notEqual(dataset("a"))329 dataset("a").notEqual(dataset("a")) shouldBe (dataset("a") `=!=` dataset("a"))330 !(dataset("a") eq dataset("a")) shouldBe dataset("a").notEqual(dataset("a"))331 dataset("a").notEqual(dataset("a")) shouldBe (!(dataset("a") `===` dataset("a")))332 -dataset("b") shouldBe functions.negate(dataset("b"))333 !dataset("c") shouldBe functions.not(dataset("c"))334 dataset("b") gt 3 shouldBe dataset("b").gt(3)335 dataset("b") lt 3 shouldBe dataset("b").lt(3)336 dataset("b") leq 3 shouldBe dataset("b").leq(3)337 dataset("b") geq 3 shouldBe dataset("b").geq(3)338 dataset("b") inRangeOf 0..2 shouldBe dataset("b").between(0, 2)339 dataset("c") or dataset("c") shouldBe dataset("c").or(dataset("c"))340 dataset("c") and dataset("c") shouldBe dataset("c").and(dataset("c"))341 dataset("c").and(dataset("c")) shouldBe (dataset("c") `&&` dataset("c"))342 dataset("b") + dataset("b") shouldBe dataset("b").plus(dataset("b"))343 dataset("b") - dataset("b") shouldBe dataset("b").minus(dataset("b"))344 dataset("b") * dataset("b") shouldBe dataset("b").multiply(dataset("b"))345 dataset("b") / dataset("b") shouldBe dataset("b").divide(dataset("b"))346 dataset("b") % dataset("b") shouldBe dataset("b").mod(dataset("b"))347 dataset("b")[0] shouldBe dataset("b").getItem(0)348 }349 should("Handle TypedColumns") {350 val dataset = dsOf(351 SomeOtherClass(intArrayOf(1, 2, 3), 4, true),352 SomeOtherClass(intArrayOf(4, 3, 2), 1, true),353 )354 // walking over all column creation methods355 val b: Dataset<Tuple3<Int, IntArray, Boolean>> = dataset.select(356 dataset.col(SomeOtherClass::b),357 dataset(SomeOtherClass::a),358 col(SomeOtherClass::c),359 )360 b.collectAsList()361 }362 should("Handle some where queries using column operator functions") {363 val dataset = dsOf(364 SomeOtherClass(intArrayOf(1, 2, 3), 4, true),365 SomeOtherClass(intArrayOf(4, 3, 2), 1, true),366 )367 dataset.collectAsList()368 val column = functions.col("b").`as`<IntArray>()369 val b = dataset.where(column gt 3 and col(SomeOtherClass::c))370 b.count() shouldBe 1371 }372 }373 }374})375data class SomeOtherClass(val a: IntArray, val b: Int, val c: Boolean) : Serializable...
pairs
Using AI Code Generation
1val triple = Triple(1, "one", 1.0)2triple shouldEqual Triple(1, "one", 1.0)3val quads = Quads(1, "one", 1.0, true)4quads shouldEqual Quads(1, "one", 1.0, true)5val quints = Quints(1, "one", 1.0, true, 1)6quints shouldEqual Quints(1, "one", 1.0, true, 1)7val sexts = Sexts(1, "one", 1.0, true, 1, "one")8sexts shouldEqual Sexts(1, "one", 1.0, true, 1, "one")9val septuples = Septuples(1, "one", 1.0, true, 1, "one", 1.0)
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!