Adding dataflow: MappingOrderAggregation Adding dataset: LakeFileOrderHeaderParquet Adding dataset: LakeFileOrderDetailLinesParquet Adding dataset: TableOrderSummary Adding linkedService: traininglake01noneKV Adding linkedService: trainingdb01 Adding linkedService: TrainingKeys01 Adding dataflow: WranglingOrderAggregation Adding dataset: GetSetMetadata Adding dataset: AnyDatabaseTable Adding linkedService: AnyDatabaseConnection Adding dataset: LaptopFolders Adding linkedService: LaptopFiles Adding integrationRuntime: PaulsLaptopIR Adding dataset: LaptopFilePersonCSV Adding dataset: LakeFiles Adding linkedService: traininglake01 Adding dataset: LaptopsFiles Adding dataset: LakeFilePersonCSV Adding dataset: LakeFileParquet Adding integrationRuntime: ForDataFlowDemos Adding integrationRuntime: VNetEnabledIR Adding managedVirtualNetwork: default Adding linkedService: traininglak01 Adding linkedService: TrainingKeys01withUMI Adding linkedService: TrainingStore01 Adding linkedService: BatchForTraining01 Adding pipeline: Scale Out Level 2 Adding pipeline: Scale Out Level 1 Adding pipeline: Lazy Replication Adding pipeline: 03 - Upload - From Discovery Adding pipeline: 01 - Upload - Simple Adding pipeline: 04 - Upload - From Metadata Adding pipeline: Order Summary with Mapping Adding pipeline: 02 - Upload - Copy Params Adding pipeline: 1950 Activities Adding pipeline: Custom Activity Adding managedPrivateEndpoint: AzureKeyVault1 Adding factory: TrainingFactoryDev
42 lines
2.8 KiB
JSON
42 lines
2.8 KiB
JSON
{
|
|
"name": "MappingOrderAggregation",
|
|
"properties": {
|
|
"type": "MappingDataFlow",
|
|
"typeProperties": {
|
|
"sources": [
|
|
{
|
|
"dataset": {
|
|
"referenceName": "LakeFileOrderHeaderParquet",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "OrderHeader"
|
|
},
|
|
{
|
|
"dataset": {
|
|
"referenceName": "LakeFileOrderDetailLinesParquet",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "OrderLineDetails"
|
|
}
|
|
],
|
|
"sinks": [
|
|
{
|
|
"dataset": {
|
|
"referenceName": "TableOrderSummary",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "OrderSummary"
|
|
}
|
|
],
|
|
"transformations": [
|
|
{
|
|
"name": "JoinHeaderToLineDetails"
|
|
},
|
|
{
|
|
"name": "OrderLineCount"
|
|
}
|
|
],
|
|
"script": "source(output(\n\t\tSalesOrderID as integer,\n\t\tRevisionNumber as integer,\n\t\tOrderDate as timestamp,\n\t\tDueDate as timestamp,\n\t\tShipDate as timestamp,\n\t\tStatus as integer,\n\t\tOnlineOrderFlag as boolean,\n\t\tSalesOrderNumber as string,\n\t\tPurchaseOrderNumber as string,\n\t\tAccountNumber as string,\n\t\tCustomerID as integer,\n\t\tShipToAddressID as integer,\n\t\tBillToAddressID as integer,\n\t\tShipMethod as string,\n\t\tCreditCardApprovalCode as string,\n\t\tSubTotal as decimal(19,4),\n\t\tTaxAmt as decimal(19,4),\n\t\tFreight as decimal(19,4),\n\t\tTotalDue as decimal(19,4),\n\t\tComment as string,\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderHeader\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tSalesOrderDetailID as integer,\n\t\tOrderQty as integer,\n\t\tProductID as integer,\n\t\tUnitPrice as decimal(19,4),\n\t\tUnitPriceDiscount as decimal(19,4),\n\t\tLineTotal as decimal(38,6),\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderLineDetails\nOrderHeader, OrderLineDetails join(OrderHeader@SalesOrderID == OrderLineDetails@SalesOrderID,\n\tjoinType:'inner',\n\tpartitionBy('hash', 1),\n\tbroadcast: 'both')~> JoinHeaderToLineDetails\nJoinHeaderToLineDetails aggregate(groupBy(SalesOrderNumber),\n\tRecordCount = count(SalesOrderDetailID),\n\tpartitionBy('roundRobin', 4)) ~> OrderLineCount\nOrderLineCount sink(allowSchemaDrift: false,\n\tvalidateSchema: false,\n\tinput(\n\t\tSalesOrderNumber as string,\n\t\tRecordCount as integer\n\t),\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\ttruncate:true,\n\tformat: 'table',\n\tskipDuplicateMapInputs: true,\n\tskipDuplicateMapOutputs: true,\n\terrorHandlingOption: 'stopOnFirstError',\n\tmapColumn(\n\t\tSalesOrderNumber,\n\t\tRecordCount\n\t),\n\tpartitionBy('roundRobin', 4)) ~> OrderSummary"
|
|
}
|
|
}
|
|
} |