From 1199e4d29a3961efad58c9c3e6657462a125b62b Mon Sep 17 00:00:00 2001 From: Rainer Stuetz Date: Mon, 16 Aug 2021 14:46:21 +0200 Subject: [PATCH] Update data model for AddressTransactions in Spark Remove unused field `timestamp` (not used in graphsense-rest). See #41 --- src/main/scala/info/graphsense/Model.scala | 3 +- .../info/graphsense/Transformation.scala | 8 +--- .../info/graphsense/TransformationJob.scala | 1 - src/test/resources/reference/address_txs.json | 48 +++++++++---------- .../info/graphsense/TransformationTest.scala | 1 - 5 files changed, 27 insertions(+), 34 deletions(-) diff --git a/src/main/scala/info/graphsense/Model.scala b/src/main/scala/info/graphsense/Model.scala index ae3fb4a..a0b629f 100644 --- a/src/main/scala/info/graphsense/Model.scala +++ b/src/main/scala/info/graphsense/Model.scala @@ -121,8 +121,7 @@ case class AddressTransaction( addressId: Int, txId: Long, value: Long, - blockId: Int, - timestamp: Int + blockId: Int ) case class BasicAddress( diff --git a/src/main/scala/info/graphsense/Transformation.scala b/src/main/scala/info/graphsense/Transformation.scala index da836ba..b257f58 100644 --- a/src/main/scala/info/graphsense/Transformation.scala +++ b/src/main/scala/info/graphsense/Transformation.scala @@ -296,7 +296,6 @@ class Transformation( } def computeAddressTransactions( - tx: Dataset[Transaction], regInputs: Dataset[RegularInput], regOutputs: Dataset[RegularOutput], addressIds: Dataset[AddressId] @@ -305,11 +304,8 @@ class Transformation( .withColumn(F.value, -col(F.value)) .union(regOutputs.drop(F.n)) .groupBy(F.txId, F.address) - .agg(sum(F.value).as(F.value)) - .join( - tx.select(F.txId, F.blockId, F.timestamp).distinct(), - F.txId - ) + // blockId is constant in each group, get single value with min + .agg(sum(F.value).as(F.value), min(F.blockId).as(F.blockId)) .join(addressIds, Seq(F.address)) .drop(F.addressPrefix, F.address) .transform(t.withIdGroup(F.addressId, F.addressIdGroup)) diff --git a/src/main/scala/info/graphsense/TransformationJob.scala b/src/main/scala/info/graphsense/TransformationJob.scala index 548af6f..557ab11 100644 --- a/src/main/scala/info/graphsense/TransformationJob.scala +++ b/src/main/scala/info/graphsense/TransformationJob.scala @@ -169,7 +169,6 @@ object TransformationJob { val addressTransactions = transformation .computeAddressTransactions( - transactions, regInputs, regOutputs, addressIds diff --git a/src/test/resources/reference/address_txs.json b/src/test/resources/reference/address_txs.json index 34680e6..a0b9ab8 100644 --- a/src/test/resources/reference/address_txs.json +++ b/src/test/resources/reference/address_txs.json @@ -1,24 +1,24 @@ -{"value": 2500000, "blockId": 1, "txId": 1, "timestamp": 1230948000, "addressIdGroup": 0, "addressId": 0} -{"value": -2500000, "blockId": 2, "txId": 3, "timestamp": 1231466400, "addressIdGroup": 0, "addressId": 0} -{"value": 2500000, "blockId": 2, "txId": 2, "timestamp": 1231466400, "addressIdGroup": 0, "addressId": 1} -{"value": -2500000, "blockId": 3, "txId": 5, "timestamp": 1231552800, "addressIdGroup": 0, "addressId": 1} -{"value": 1480000, "blockId": 2, "txId": 3, "timestamp": 1231466400, "addressIdGroup": 1, "addressId": 2} -{"value": -1480000, "blockId": 3, "txId": 5, "timestamp": 1231552800, "addressIdGroup": 1, "addressId": 2} -{"value": 495000, "blockId": 2, "txId": 3, "timestamp": 1231466400, "addressIdGroup": 1, "addressId": 3} -{"value": -150000, "blockId": 3, "txId": 6, "timestamp": 1231552800, "addressIdGroup": 1, "addressId": 3} -{"value": -345000, "blockId": 4, "txId": 9, "timestamp": 1231639200, "addressIdGroup": 1, "addressId": 3} -{"value": 495000, "blockId": 2, "txId": 3, "timestamp": 1231466400, "addressIdGroup": 2, "addressId": 4} -{"value": -495000, "blockId": 4, "txId": 8, "timestamp": 1231639200, "addressIdGroup": 2, "addressId": 4} -{"value": 2500000, "blockId": 3, "txId": 4, "timestamp": 1231552800, "addressIdGroup": 2, "addressId": 5} -{"value": -2500000, "blockId": 4, "txId": 8, "timestamp": 1231639200, "addressIdGroup": 2, "addressId": 5} -{"value": 1325000, "blockId": 3, "txId": 5, "timestamp": 1231552800, "addressIdGroup": 3, "addressId": 6} -{"value": -1325000, "blockId": 4, "txId": 10, "timestamp": 1231639200, "addressIdGroup": 3, "addressId": 6} -{"value": 100000, "blockId": 3, "txId": 5, "timestamp": 1231552800, "addressIdGroup": 3, "addressId": 7} -{"value": -100000, "blockId": 4, "txId": 10, "timestamp": 1231639200, "addressIdGroup": 3, "addressId": 7} -{"value": 2350000, "blockId": 3, "txId": 5, "timestamp": 1231552800, "addressIdGroup": 4, "addressId": 8} -{"value": 140000, "blockId": 3, "txId": 6, "timestamp": 1231552800, "addressIdGroup": 4, "addressId": 9} -{"value": 340000, "blockId": 4, "txId": 9, "timestamp": 1231639200, "addressIdGroup": 4, "addressId": 9} -{"value": 2500000, "blockId": 4, "txId": 7, "timestamp": 1231639200, "addressIdGroup": 5, "addressId": 10} -{"value": 4300000, "blockId": 4, "txId": 8, "timestamp": 1231639200, "addressIdGroup": 5, "addressId": 11} -{"value": 149000, "blockId": 4, "txId": 8, "timestamp": 1231639200, "addressIdGroup": 6, "addressId": 12} -{"value": 1400000, "blockId": 4, "txId": 10, "timestamp": 1231639200, "addressIdGroup": 6, "addressId": 13} +{"value": 2500000, "blockId": 1, "txId": 1, "addressIdGroup": 0, "addressId": 0} +{"value": -2500000, "blockId": 2, "txId": 3, "addressIdGroup": 0, "addressId": 0} +{"value": 2500000, "blockId": 2, "txId": 2, "addressIdGroup": 0, "addressId": 1} +{"value": -2500000, "blockId": 3, "txId": 5, "addressIdGroup": 0, "addressId": 1} +{"value": 1480000, "blockId": 2, "txId": 3, "addressIdGroup": 1, "addressId": 2} +{"value": -1480000, "blockId": 3, "txId": 5, "addressIdGroup": 1, "addressId": 2} +{"value": 495000, "blockId": 2, "txId": 3, "addressIdGroup": 1, "addressId": 3} +{"value": -150000, "blockId": 3, "txId": 6, "addressIdGroup": 1, "addressId": 3} +{"value": -345000, "blockId": 4, "txId": 9, "addressIdGroup": 1, "addressId": 3} +{"value": 495000, "blockId": 2, "txId": 3, "addressIdGroup": 2, "addressId": 4} +{"value": -495000, "blockId": 4, "txId": 8, "addressIdGroup": 2, "addressId": 4} +{"value": 2500000, "blockId": 3, "txId": 4, "addressIdGroup": 2, "addressId": 5} +{"value": -2500000, "blockId": 4, "txId": 8, "addressIdGroup": 2, "addressId": 5} +{"value": 1325000, "blockId": 3, "txId": 5, "addressIdGroup": 3, "addressId": 6} +{"value": -1325000, "blockId": 4, "txId": 10, "addressIdGroup": 3, "addressId": 6} +{"value": 100000, "blockId": 3, "txId": 5, "addressIdGroup": 3, "addressId": 7} +{"value": -100000, "blockId": 4, "txId": 10, "addressIdGroup": 3, "addressId": 7} +{"value": 2350000, "blockId": 3, "txId": 5, "addressIdGroup": 4, "addressId": 8} +{"value": 140000, "blockId": 3, "txId": 6, "addressIdGroup": 4, "addressId": 9} +{"value": 340000, "blockId": 4, "txId": 9, "addressIdGroup": 4, "addressId": 9} +{"value": 2500000, "blockId": 4, "txId": 7, "addressIdGroup": 5, "addressId": 10} +{"value": 4300000, "blockId": 4, "txId": 8, "addressIdGroup": 5, "addressId": 11} +{"value": 149000, "blockId": 4, "txId": 8, "addressIdGroup": 6, "addressId": 12} +{"value": 1400000, "blockId": 4, "txId": 10, "addressIdGroup": 6, "addressId": 13} diff --git a/src/test/scala/info/graphsense/TransformationTest.scala b/src/test/scala/info/graphsense/TransformationTest.scala index 9c739bd..d18827e 100644 --- a/src/test/scala/info/graphsense/TransformationTest.scala +++ b/src/test/scala/info/graphsense/TransformationTest.scala @@ -106,7 +106,6 @@ class TransformationTest val addressTransactions = t.computeAddressTransactions( - transactions, regInputs, regOutputs, addressIds