From 07d95ecd8aa33ead4f8c331226fb8dfe04c1ef10 Mon Sep 17 00:00:00 2001 From: Paul Andrew <60190428+mrpaulandrew@users.noreply.github.com> Date: Mon, 24 Oct 2022 11:46:11 +0100 Subject: [PATCH] ARM template and parameters deployed on 10-24-2022 11:46:8, based on the collaboration branch's commit ID: 5e460f3eb4befe46c3f759c2808f645b9739d594 --- TrainingFactoryDev/ARMTemplateForFactory.json | 209 ++++++++++- .../ARMTemplateParametersForFactory.json | 6 + .../ArmTemplateParameters_master.json | 6 + .../linkedTemplates/ArmTemplate_0.json | 291 +++++---------- .../linkedTemplates/ArmTemplate_1.json | 280 ++++++++++----- .../linkedTemplates/ArmTemplate_2.json | 307 +++++----------- .../linkedTemplates/ArmTemplate_3.json | 333 +++++++++++++++++- .../linkedTemplates/ArmTemplate_master.json | 14 + 8 files changed, 934 insertions(+), 512 deletions(-) diff --git a/TrainingFactoryDev/ARMTemplateForFactory.json b/TrainingFactoryDev/ARMTemplateForFactory.json index d539284..ef3d4c9 100644 --- a/TrainingFactoryDev/ARMTemplateForFactory.json +++ b/TrainingFactoryDev/ARMTemplateForFactory.json @@ -11,6 +11,10 @@ "type": "secureString", "metadata": "Secure string for 'connectionString' of 'AzureSqlDatabase1'" }, + "EmailSenderFunction_functionKey": { + "type": "secureString", + "metadata": "Secure string for 'functionKey' of 'EmailSenderFunction'" + }, "TrainingStore01_connectionString": { "type": "secureString", "metadata": "Secure string for 'connectionString' of 'TrainingStore01'" @@ -51,6 +55,10 @@ "type": "string", "defaultValue": "batchfortraining01" }, + "EmailSenderFunction_properties_typeProperties_functionAppUrl": { + "type": "string", + "defaultValue": "https://frameworksupportfunctions.azurewebsites.net" + }, "GenericKeys_properties_typeProperties_baseUrl": { "type": "string", "defaultValue": "@{linkedService().baseUrl}" @@ -996,7 +1004,7 @@ } }, { - "name": "Set Global Param", + "name": "Set Var From Global Param", "type": "SetVariable", "dependsOn": [ { @@ -1415,7 +1423,7 @@ "cancelAfter": {} }, "folder": { - "name": "Demo Pipelines/Misc" + "name": "Demo Pipelines/Scaling Out" }, "annotations": [], "lastPublishTime": "2022-02-03T14:39:03Z" @@ -1744,6 +1752,67 @@ "[concat(variables('factoryId'), '/linkedServices/BatchForTraining01')]" ] }, + { + "name": "[concat(parameters('factoryName'), '/EmailSender')]", + "type": "Microsoft.DataFactory/factories/pipelines", + "apiVersion": "2018-06-01", + "properties": { + "activities": [ + { + "name": "Send Email", + "type": "AzureFunctionActivity", + "dependsOn": [], + "policy": { + "timeout": "0.12:00:00", + "retry": 0, + "retryIntervalInSeconds": 30, + "secureOutput": false, + "secureInput": false + }, + "userProperties": [], + "typeProperties": { + "functionName": "SendEmail", + "method": "POST", + "headers": {}, + "body": { + "value": "@concat('\n{\n\"emailRecipients\": \"',pipeline().parameters.Recipients,'\",\n\"emailSubject\": \"',pipeline().parameters.Subject,'\",\n\"emailBody\": \"',pipeline().parameters.Body,'\",\n\"emailImportance\": \"',pipeline().parameters.Importance,'\"\n}')", + "type": "Expression" + } + }, + "linkedServiceName": { + "referenceName": "EmailSenderFunction", + "type": "LinkedServiceReference" + } + } + ], + "policy": { + "elapsedTimeMetric": {}, + "cancelAfter": {} + }, + "parameters": { + "Recipients": { + "type": "string" + }, + "Subject": { + "type": "string" + }, + "Body": { + "type": "string" + }, + "Importance": { + "type": "string", + "defaultValue": "High" + } + }, + "folder": { + "name": "Demo Pipelines/Misc" + }, + "annotations": [] + }, + "dependsOn": [ + "[concat(variables('factoryId'), '/linkedServices/EmailSenderFunction')]" + ] + }, { "name": "[concat(parameters('factoryName'), '/Get Key Vault Value')]", "type": "Microsoft.DataFactory/factories/pipelines", @@ -2031,6 +2100,68 @@ "[concat(variables('factoryId'), '/datasets/AnyDatabaseTable')]" ] }, + { + "name": "[concat(parameters('factoryName'), '/Order Summary with Mapping With Param')]", + "type": "Microsoft.DataFactory/factories/pipelines", + "apiVersion": "2018-06-01", + "properties": { + "activities": [ + { + "name": "Mapping Order Aggregation", + "type": "ExecuteDataFlow", + "dependsOn": [], + "policy": { + "timeout": "7.00:00:00", + "retry": 0, + "retryIntervalInSeconds": 30, + "secureOutput": false, + "secureInput": false + }, + "userProperties": [], + "typeProperties": { + "dataflow": { + "referenceName": "MappingOrderAggregationWithParam", + "type": "DataFlowReference", + "parameters": { + "AuditColumn": { + "value": "'@{pipeline().parameters.AuditColumnValue}'", + "type": "Expression" + } + }, + "datasetParameters": { + "OrderHeader": {}, + "OrderLineDetails": {}, + "OrderSummary": {} + } + }, + "staging": {}, + "compute": { + "coreCount": 8, + "computeType": "General" + }, + "traceLevel": "Fine" + } + } + ], + "policy": { + "elapsedTimeMetric": {}, + "cancelAfter": {} + }, + "parameters": { + "AuditColumnValue": { + "type": "string" + } + }, + "folder": { + "name": "Demo Pipelines/Data Flows" + }, + "annotations": [], + "lastPublishTime": "2020-09-02T14:13:15Z" + }, + "dependsOn": [ + "[concat(variables('factoryId'), '/dataflows/MappingOrderAggregationWithParam')]" + ] + }, { "name": "[concat(parameters('factoryName'), '/Order Summary with Mapping')]", "type": "Microsoft.DataFactory/factories/pipelines", @@ -2388,12 +2519,12 @@ "properties": { "activities": [ { - "name": "Wait 15", + "name": "Wait 10", "type": "Wait", "dependsOn": [], "userProperties": [], "typeProperties": { - "waitTimeInSeconds": 15 + "waitTimeInSeconds": 10 } } ], @@ -3432,6 +3563,24 @@ "[concat(variables('factoryId'), '/linkedServices/TrainingStore01')]" ] }, + { + "name": "[concat(parameters('factoryName'), '/EmailSenderFunction')]", + "type": "Microsoft.DataFactory/factories/linkedServices", + "apiVersion": "2018-06-01", + "properties": { + "annotations": [], + "type": "AzureFunction", + "typeProperties": { + "functionAppUrl": "[parameters('EmailSenderFunction_properties_typeProperties_functionAppUrl')]", + "functionKey": { + "type": "SecureString", + "value": "[parameters('EmailSenderFunction_functionKey')]" + }, + "authentication": "Anonymous" + } + }, + "dependsOn": [] + }, { "name": "[concat(parameters('factoryName'), '/GenericKeys')]", "type": "Microsoft.DataFactory/factories/linkedServices", @@ -3773,6 +3922,58 @@ "[concat(variables('factoryId'), '/datasets/TableOrderSummary')]" ] }, + { + "name": "[concat(parameters('factoryName'), '/MappingOrderAggregationWithParam')]", + "type": "Microsoft.DataFactory/factories/dataflows", + "apiVersion": "2018-06-01", + "properties": { + "type": "MappingDataFlow", + "typeProperties": { + "sources": [ + { + "dataset": { + "referenceName": "LakeFileOrderHeaderParquet", + "type": "DatasetReference" + }, + "name": "OrderHeader" + }, + { + "dataset": { + "referenceName": "LakeFileOrderDetailLinesParquet", + "type": "DatasetReference" + }, + "name": "OrderLineDetails" + } + ], + "sinks": [ + { + "dataset": { + "referenceName": "TableOrderSummary", + "type": "DatasetReference" + }, + "name": "OrderSummary" + } + ], + "transformations": [ + { + "name": "JoinHeaderToLineDetails" + }, + { + "name": "OrderLineCount" + }, + { + "name": "AddAuditColum" + } + ], + "script": "parameters{\n\tAuditColumn as string\n}\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tRevisionNumber as integer,\n\t\tOrderDate as timestamp,\n\t\tDueDate as timestamp,\n\t\tShipDate as timestamp,\n\t\tStatus as integer,\n\t\tOnlineOrderFlag as boolean,\n\t\tSalesOrderNumber as string,\n\t\tPurchaseOrderNumber as string,\n\t\tAccountNumber as string,\n\t\tCustomerID as integer,\n\t\tShipToAddressID as integer,\n\t\tBillToAddressID as integer,\n\t\tShipMethod as string,\n\t\tCreditCardApprovalCode as string,\n\t\tSubTotal as decimal(19,4),\n\t\tTaxAmt as decimal(19,4),\n\t\tFreight as decimal(19,4),\n\t\tTotalDue as decimal(19,4),\n\t\tComment as string,\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderHeader\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tSalesOrderDetailID as integer,\n\t\tOrderQty as integer,\n\t\tProductID as integer,\n\t\tUnitPrice as decimal(19,4),\n\t\tUnitPriceDiscount as decimal(19,4),\n\t\tLineTotal as decimal(38,6),\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderLineDetails\nOrderHeader, OrderLineDetails join(OrderHeader@SalesOrderID == OrderLineDetails@SalesOrderID,\n\tjoinType:'inner',\n\tmatchType:'exact',\n\tignoreSpaces: false,\n\tpartitionBy('hash', 1),\n\tbroadcast: 'both')~> JoinHeaderToLineDetails\nJoinHeaderToLineDetails aggregate(groupBy(SalesOrderNumber),\n\tRecordCount = count(SalesOrderDetailID),\n\tpartitionBy('roundRobin', 4)) ~> OrderLineCount\nOrderLineCount derive(AuditValue = $AuditColumn) ~> AddAuditColum\nAddAuditColum sink(allowSchemaDrift: false,\n\tvalidateSchema: false,\n\tinput(\n\t\tSalesOrderNumber as string,\n\t\tRecordCount as integer\n\t),\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\ttruncate:true,\n\tformat: 'table',\n\tskipDuplicateMapInputs: true,\n\tskipDuplicateMapOutputs: true,\n\terrorHandlingOption: 'stopOnFirstError',\n\tmapColumn(\n\t\tSalesOrderNumber,\n\t\tRecordCount\n\t),\n\tpartitionBy('roundRobin', 4)) ~> OrderSummary" + } + }, + "dependsOn": [ + "[concat(variables('factoryId'), '/datasets/LakeFileOrderHeaderParquet')]", + "[concat(variables('factoryId'), '/datasets/LakeFileOrderDetailLinesParquet')]", + "[concat(variables('factoryId'), '/datasets/TableOrderSummary')]" + ] + }, { "name": "[concat(parameters('factoryName'), '/UpdateProductDimension')]", "type": "Microsoft.DataFactory/factories/dataflows", diff --git a/TrainingFactoryDev/ARMTemplateParametersForFactory.json b/TrainingFactoryDev/ARMTemplateParametersForFactory.json index a384d97..51a917e 100644 --- a/TrainingFactoryDev/ARMTemplateParametersForFactory.json +++ b/TrainingFactoryDev/ARMTemplateParametersForFactory.json @@ -8,6 +8,9 @@ "AzureSqlDatabase1_connectionString": { "value": "" }, + "EmailSenderFunction_functionKey": { + "value": "" + }, "TrainingStore01_connectionString": { "value": "" }, @@ -38,6 +41,9 @@ "BatchForTraining01_properties_typeProperties_accountName": { "value": "batchfortraining01" }, + "EmailSenderFunction_properties_typeProperties_functionAppUrl": { + "value": "https://frameworksupportfunctions.azurewebsites.net" + }, "GenericKeys_properties_typeProperties_baseUrl": { "value": "@{linkedService().baseUrl}" }, diff --git a/TrainingFactoryDev/linkedTemplates/ArmTemplateParameters_master.json b/TrainingFactoryDev/linkedTemplates/ArmTemplateParameters_master.json index bad073b..9f14fb1 100644 --- a/TrainingFactoryDev/linkedTemplates/ArmTemplateParameters_master.json +++ b/TrainingFactoryDev/linkedTemplates/ArmTemplateParameters_master.json @@ -8,6 +8,9 @@ "AzureSqlDatabase1_connectionString": { "value": "" }, + "EmailSenderFunction_functionKey": { + "value": "" + }, "TrainingStore01_connectionString": { "value": "" }, @@ -38,6 +41,9 @@ "BatchForTraining01_properties_typeProperties_accountName": { "value": "batchfortraining01" }, + "EmailSenderFunction_properties_typeProperties_functionAppUrl": { + "value": "https://frameworksupportfunctions.azurewebsites.net" + }, "GenericKeys_properties_typeProperties_baseUrl": { "value": "@{linkedService().baseUrl}" }, diff --git a/TrainingFactoryDev/linkedTemplates/ArmTemplate_0.json b/TrainingFactoryDev/linkedTemplates/ArmTemplate_0.json index de0214b..ced6245 100644 --- a/TrainingFactoryDev/linkedTemplates/ArmTemplate_0.json +++ b/TrainingFactoryDev/linkedTemplates/ArmTemplate_0.json @@ -11,6 +11,10 @@ "type": "secureString", "metadata": "Secure string for 'connectionString' of 'AzureSqlDatabase1'" }, + "EmailSenderFunction_functionKey": { + "type": "secureString", + "metadata": "Secure string for 'functionKey' of 'EmailSenderFunction'" + }, "TrainingStore01_connectionString": { "type": "secureString", "metadata": "Secure string for 'connectionString' of 'TrainingStore01'" @@ -27,6 +31,10 @@ "type": "string", "defaultValue": "https://adayfullofadfsa.dfs.core.windows.net" }, + "EmailSenderFunction_properties_typeProperties_functionAppUrl": { + "type": "string", + "defaultValue": "https://frameworksupportfunctions.azurewebsites.net" + }, "GenericKeys_properties_typeProperties_baseUrl": { "type": "string", "defaultValue": "@{linkedService().baseUrl}" @@ -449,7 +457,7 @@ "cancelAfter": {} }, "folder": { - "name": "Demo Pipelines/Misc" + "name": "Demo Pipelines/Scaling Out" }, "annotations": [], "lastPublishTime": "2022-02-03T14:39:03Z" @@ -463,12 +471,12 @@ "properties": { "activities": [ { - "name": "Wait 15", + "name": "Wait 10", "type": "Wait", "dependsOn": [], "userProperties": [], "typeProperties": { - "waitTimeInSeconds": 15 + "waitTimeInSeconds": 10 } } ], @@ -509,6 +517,24 @@ }, "dependsOn": [] }, + { + "name": "[concat(parameters('factoryName'), '/EmailSenderFunction')]", + "type": "Microsoft.DataFactory/factories/linkedServices", + "apiVersion": "2018-06-01", + "properties": { + "annotations": [], + "type": "AzureFunction", + "typeProperties": { + "functionAppUrl": "[parameters('EmailSenderFunction_properties_typeProperties_functionAppUrl')]", + "functionKey": { + "type": "SecureString", + "value": "[parameters('EmailSenderFunction_functionKey')]" + }, + "authentication": "Anonymous" + } + }, + "dependsOn": [] + }, { "name": "[concat(parameters('factoryName'), '/GenericKeys')]", "type": "Microsoft.DataFactory/factories/linkedServices", @@ -701,6 +727,67 @@ "properties": {}, "dependsOn": [] }, + { + "name": "[concat(parameters('factoryName'), '/EmailSender')]", + "type": "Microsoft.DataFactory/factories/pipelines", + "apiVersion": "2018-06-01", + "properties": { + "activities": [ + { + "name": "Send Email", + "type": "AzureFunctionActivity", + "dependsOn": [], + "policy": { + "timeout": "0.12:00:00", + "retry": 0, + "retryIntervalInSeconds": 30, + "secureOutput": false, + "secureInput": false + }, + "userProperties": [], + "typeProperties": { + "functionName": "SendEmail", + "method": "POST", + "headers": {}, + "body": { + "value": "@concat('\n{\n\"emailRecipients\": \"',pipeline().parameters.Recipients,'\",\n\"emailSubject\": \"',pipeline().parameters.Subject,'\",\n\"emailBody\": \"',pipeline().parameters.Body,'\",\n\"emailImportance\": \"',pipeline().parameters.Importance,'\"\n}')", + "type": "Expression" + } + }, + "linkedServiceName": { + "referenceName": "EmailSenderFunction", + "type": "LinkedServiceReference" + } + } + ], + "policy": { + "elapsedTimeMetric": {}, + "cancelAfter": {} + }, + "parameters": { + "Recipients": { + "type": "string" + }, + "Subject": { + "type": "string" + }, + "Body": { + "type": "string" + }, + "Importance": { + "type": "string", + "defaultValue": "High" + } + }, + "folder": { + "name": "Demo Pipelines/Misc" + }, + "annotations": [] + }, + "dependsOn": [ + "[concat(variables('factoryId'), '/linkedServices/EmailSenderFunction')]" + ] + }, { "name": "[concat(parameters('factoryName'), '/HTTP_BIN_AWProduct')]", "type": "Microsoft.DataFactory/factories/datasets", @@ -757,204 +844,6 @@ "dependsOn": [ "[concat(variables('factoryId'), '/linkedServices/HTTP_AWGitHub')]" ] - }, - { - "name": "[concat(parameters('factoryName'), '/LakeFileOrderDetailLinesParquet')]", - "type": "Microsoft.DataFactory/factories/datasets", - "apiVersion": "2018-06-01", - "properties": { - "linkedServiceName": { - "referenceName": "traininglake01noneKV", - "type": "LinkedServiceReference" - }, - "folder": { - "name": "Lake" - }, - "annotations": [], - "type": "Parquet", - "typeProperties": { - "location": { - "type": "AzureBlobFSLocation", - "fileName": "OrderDetailLines.parquet", - "folderPath": "Raw", - "fileSystem": "datawarehouse" - }, - "compressionCodec": "snappy" - }, - "schema": [ - { - "name": "SalesOrderID", - "type": "INT32" - }, - { - "name": "SalesOrderDetailID", - "type": "INT32" - }, - { - "name": "OrderQty", - "type": "INT32" - }, - { - "name": "ProductID", - "type": "INT32" - }, - { - "name": "UnitPrice", - "type": "DECIMAL", - "precision": 19, - "scale": 4 - }, - { - "name": "UnitPriceDiscount", - "type": "DECIMAL", - "precision": 19, - "scale": 4 - }, - { - "name": "LineTotal", - "type": "DECIMAL", - "precision": 38, - "scale": 6 - }, - { - "name": "rowguid", - "type": "UTF8" - }, - { - "name": "ModifiedDate", - "type": "INT96" - } - ] - }, - "dependsOn": [ - "[concat(variables('factoryId'), '/linkedServices/traininglake01noneKV')]" - ] - }, - { - "name": "[concat(parameters('factoryName'), '/LakeFileOrderHeaderParquet')]", - "type": "Microsoft.DataFactory/factories/datasets", - "apiVersion": "2018-06-01", - "properties": { - "linkedServiceName": { - "referenceName": "traininglake01noneKV", - "type": "LinkedServiceReference" - }, - "folder": { - "name": "Lake" - }, - "annotations": [], - "type": "Parquet", - "typeProperties": { - "location": { - "type": "AzureBlobFSLocation", - "fileName": "OrderHeader.parquet", - "folderPath": "Raw", - "fileSystem": "datawarehouse" - }, - "compressionCodec": "snappy" - }, - "schema": [ - { - "name": "SalesOrderID", - "type": "INT32" - }, - { - "name": "RevisionNumber", - "type": "INT32" - }, - { - "name": "OrderDate", - "type": "INT96" - }, - { - "name": "DueDate", - "type": "INT96" - }, - { - "name": "ShipDate", - "type": "INT96" - }, - { - "name": "Status", - "type": "INT32" - }, - { - "name": "OnlineOrderFlag", - "type": "BOOLEAN" - }, - { - "name": "SalesOrderNumber", - "type": "UTF8" - }, - { - "name": "PurchaseOrderNumber", - "type": "UTF8" - }, - { - "name": "AccountNumber", - "type": "UTF8" - }, - { - "name": "CustomerID", - "type": "INT32" - }, - { - "name": "ShipToAddressID", - "type": "INT32" - }, - { - "name": "BillToAddressID", - "type": "INT32" - }, - { - "name": "ShipMethod", - "type": "UTF8" - }, - { - "name": "CreditCardApprovalCode", - "type": "UTF8" - }, - { - "name": "SubTotal", - "type": "DECIMAL", - "precision": 19, - "scale": 4 - }, - { - "name": "TaxAmt", - "type": "DECIMAL", - "precision": 19, - "scale": 4 - }, - { - "name": "Freight", - "type": "DECIMAL", - "precision": 19, - "scale": 4 - }, - { - "name": "TotalDue", - "type": "DECIMAL", - "precision": 19, - "scale": 4 - }, - { - "name": "Comment", - "type": "UTF8" - }, - { - "name": "rowguid", - "type": "UTF8" - }, - { - "name": "ModifiedDate", - "type": "INT96" - } - ] - }, - "dependsOn": [ - "[concat(variables('factoryId'), '/linkedServices/traininglake01noneKV')]" - ] } ] } \ No newline at end of file diff --git a/TrainingFactoryDev/linkedTemplates/ArmTemplate_1.json b/TrainingFactoryDev/linkedTemplates/ArmTemplate_1.json index e343e91..f2b35b9 100644 --- a/TrainingFactoryDev/linkedTemplates/ArmTemplate_1.json +++ b/TrainingFactoryDev/linkedTemplates/ArmTemplate_1.json @@ -70,6 +70,200 @@ "factoryId": "[concat('Microsoft.DataFactory/factories/', parameters('factoryName'))]" }, "resources": [ + { + "name": "[concat(parameters('factoryName'), '/LakeFileOrderDetailLinesParquet')]", + "type": "Microsoft.DataFactory/factories/datasets", + "apiVersion": "2018-06-01", + "properties": { + "linkedServiceName": { + "referenceName": "traininglake01noneKV", + "type": "LinkedServiceReference" + }, + "folder": { + "name": "Lake" + }, + "annotations": [], + "type": "Parquet", + "typeProperties": { + "location": { + "type": "AzureBlobFSLocation", + "fileName": "OrderDetailLines.parquet", + "folderPath": "Raw", + "fileSystem": "datawarehouse" + }, + "compressionCodec": "snappy" + }, + "schema": [ + { + "name": "SalesOrderID", + "type": "INT32" + }, + { + "name": "SalesOrderDetailID", + "type": "INT32" + }, + { + "name": "OrderQty", + "type": "INT32" + }, + { + "name": "ProductID", + "type": "INT32" + }, + { + "name": "UnitPrice", + "type": "DECIMAL", + "precision": 19, + "scale": 4 + }, + { + "name": "UnitPriceDiscount", + "type": "DECIMAL", + "precision": 19, + "scale": 4 + }, + { + "name": "LineTotal", + "type": "DECIMAL", + "precision": 38, + "scale": 6 + }, + { + "name": "rowguid", + "type": "UTF8" + }, + { + "name": "ModifiedDate", + "type": "INT96" + } + ] + }, + "dependsOn": [] + }, + { + "name": "[concat(parameters('factoryName'), '/LakeFileOrderHeaderParquet')]", + "type": "Microsoft.DataFactory/factories/datasets", + "apiVersion": "2018-06-01", + "properties": { + "linkedServiceName": { + "referenceName": "traininglake01noneKV", + "type": "LinkedServiceReference" + }, + "folder": { + "name": "Lake" + }, + "annotations": [], + "type": "Parquet", + "typeProperties": { + "location": { + "type": "AzureBlobFSLocation", + "fileName": "OrderHeader.parquet", + "folderPath": "Raw", + "fileSystem": "datawarehouse" + }, + "compressionCodec": "snappy" + }, + "schema": [ + { + "name": "SalesOrderID", + "type": "INT32" + }, + { + "name": "RevisionNumber", + "type": "INT32" + }, + { + "name": "OrderDate", + "type": "INT96" + }, + { + "name": "DueDate", + "type": "INT96" + }, + { + "name": "ShipDate", + "type": "INT96" + }, + { + "name": "Status", + "type": "INT32" + }, + { + "name": "OnlineOrderFlag", + "type": "BOOLEAN" + }, + { + "name": "SalesOrderNumber", + "type": "UTF8" + }, + { + "name": "PurchaseOrderNumber", + "type": "UTF8" + }, + { + "name": "AccountNumber", + "type": "UTF8" + }, + { + "name": "CustomerID", + "type": "INT32" + }, + { + "name": "ShipToAddressID", + "type": "INT32" + }, + { + "name": "BillToAddressID", + "type": "INT32" + }, + { + "name": "ShipMethod", + "type": "UTF8" + }, + { + "name": "CreditCardApprovalCode", + "type": "UTF8" + }, + { + "name": "SubTotal", + "type": "DECIMAL", + "precision": 19, + "scale": 4 + }, + { + "name": "TaxAmt", + "type": "DECIMAL", + "precision": 19, + "scale": 4 + }, + { + "name": "Freight", + "type": "DECIMAL", + "precision": 19, + "scale": 4 + }, + { + "name": "TotalDue", + "type": "DECIMAL", + "precision": 19, + "scale": 4 + }, + { + "name": "Comment", + "type": "UTF8" + }, + { + "name": "rowguid", + "type": "UTF8" + }, + { + "name": "ModifiedDate", + "type": "INT96" + } + ] + }, + "dependsOn": [] + }, { "name": "[concat(parameters('factoryName'), '/ADLS_saintegrationpipelines')]", "type": "Microsoft.DataFactory/factories/linkedServices", @@ -609,92 +803,6 @@ "dependsOn": [ "[concat(variables('factoryId'), '/linkedServices/traininglake01')]" ] - }, - { - "name": "[concat(parameters('factoryName'), '/LakeFilePersonCSV')]", - "type": "Microsoft.DataFactory/factories/datasets", - "apiVersion": "2018-06-01", - "properties": { - "linkedServiceName": { - "referenceName": "traininglake01", - "type": "LinkedServiceReference" - }, - "folder": { - "name": "Lake" - }, - "annotations": [], - "type": "DelimitedText", - "typeProperties": { - "location": { - "type": "AzureBlobFSLocation", - "fileName": "Person.csv", - "folderPath": "Landing", - "fileSystem": "datawarehouse" - }, - "columnDelimiter": ",", - "escapeChar": "\\", - "quoteChar": "\"" - }, - "schema": [ - { - "type": "String" - }, - { - "type": "String" - }, - { - "type": "String" - } - ] - }, - "dependsOn": [ - "[concat(variables('factoryId'), '/linkedServices/traininglake01')]" - ] - }, - { - "name": "[concat(parameters('factoryName'), '/LakeFiles')]", - "type": "Microsoft.DataFactory/factories/datasets", - "apiVersion": "2018-06-01", - "properties": { - "linkedServiceName": { - "referenceName": "traininglake01", - "type": "LinkedServiceReference" - }, - "parameters": { - "Directory": { - "type": "string" - }, - "File": { - "type": "string" - } - }, - "folder": { - "name": "Lake" - }, - "annotations": [], - "type": "DelimitedText", - "typeProperties": { - "location": { - "type": "AzureBlobFSLocation", - "fileName": { - "value": "@dataset().File", - "type": "Expression" - }, - "folderPath": { - "value": "@dataset().Directory", - "type": "Expression" - }, - "fileSystem": "datawarehouse" - }, - "columnDelimiter": ",", - "escapeChar": "\\", - "quoteChar": "\"" - }, - "schema": [] - }, - "dependsOn": [ - "[concat(variables('factoryId'), '/linkedServices/traininglake01')]" - ] } ] } \ No newline at end of file diff --git a/TrainingFactoryDev/linkedTemplates/ArmTemplate_2.json b/TrainingFactoryDev/linkedTemplates/ArmTemplate_2.json index 1c7ea99..889e98c 100644 --- a/TrainingFactoryDev/linkedTemplates/ArmTemplate_2.json +++ b/TrainingFactoryDev/linkedTemplates/ArmTemplate_2.json @@ -12,6 +12,88 @@ "factoryId": "[concat('Microsoft.DataFactory/factories/', parameters('factoryName'))]" }, "resources": [ + { + "name": "[concat(parameters('factoryName'), '/LakeFilePersonCSV')]", + "type": "Microsoft.DataFactory/factories/datasets", + "apiVersion": "2018-06-01", + "properties": { + "linkedServiceName": { + "referenceName": "traininglake01", + "type": "LinkedServiceReference" + }, + "folder": { + "name": "Lake" + }, + "annotations": [], + "type": "DelimitedText", + "typeProperties": { + "location": { + "type": "AzureBlobFSLocation", + "fileName": "Person.csv", + "folderPath": "Landing", + "fileSystem": "datawarehouse" + }, + "columnDelimiter": ",", + "escapeChar": "\\", + "quoteChar": "\"" + }, + "schema": [ + { + "type": "String" + }, + { + "type": "String" + }, + { + "type": "String" + } + ] + }, + "dependsOn": [] + }, + { + "name": "[concat(parameters('factoryName'), '/LakeFiles')]", + "type": "Microsoft.DataFactory/factories/datasets", + "apiVersion": "2018-06-01", + "properties": { + "linkedServiceName": { + "referenceName": "traininglake01", + "type": "LinkedServiceReference" + }, + "parameters": { + "Directory": { + "type": "string" + }, + "File": { + "type": "string" + } + }, + "folder": { + "name": "Lake" + }, + "annotations": [], + "type": "DelimitedText", + "typeProperties": { + "location": { + "type": "AzureBlobFSLocation", + "fileName": { + "value": "@dataset().File", + "type": "Expression" + }, + "folderPath": { + "value": "@dataset().Directory", + "type": "Expression" + }, + "fileSystem": "datawarehouse" + }, + "columnDelimiter": ",", + "escapeChar": "\\", + "quoteChar": "\"" + }, + "schema": [] + }, + "dependsOn": [] + }, { "name": "[concat(parameters('factoryName'), '/LakeFilesBinary')]", "type": "Microsoft.DataFactory/factories/datasets", @@ -376,7 +458,8 @@ "lastPublishTime": "2020-09-29T13:50:28Z" }, "dependsOn": [ - "[concat(variables('factoryId'), '/datasets/LaptopFilePersonCSV')]" + "[concat(variables('factoryId'), '/datasets/LaptopFilePersonCSV')]", + "[concat(variables('factoryId'), '/datasets/LakeFilePersonCSV')]" ] }, { @@ -462,7 +545,8 @@ "lastPublishTime": "2020-08-27T09:06:46Z" }, "dependsOn": [ - "[concat(variables('factoryId'), '/datasets/LaptopsFiles')]" + "[concat(variables('factoryId'), '/datasets/LaptopsFiles')]", + "[concat(variables('factoryId'), '/datasets/LakeFiles')]" ] }, { @@ -608,7 +692,8 @@ }, "dependsOn": [ "[concat(variables('factoryId'), '/datasets/LaptopFolders')]", - "[concat(variables('factoryId'), '/datasets/LaptopsFiles')]" + "[concat(variables('factoryId'), '/datasets/LaptopsFiles')]", + "[concat(variables('factoryId'), '/datasets/LakeFiles')]" ] }, { @@ -1051,6 +1136,7 @@ "[concat(variables('factoryId'), '/datasets/LaptopFilesBinary')]", "[concat(variables('factoryId'), '/datasets/LakeFilesBinary')]", "[concat(variables('factoryId'), '/datasets/LaptopsFiles')]", + "[concat(variables('factoryId'), '/datasets/LakeFiles')]", "[concat(variables('factoryId'), '/datasets/LaptopFilesParquet')]" ] }, @@ -1642,221 +1728,6 @@ "lastPublishTime": "2020-08-24T13:41:19Z" }, "dependsOn": [] - }, - { - "name": "[concat(parameters('factoryName'), '/MappingOrderAggregation')]", - "type": "Microsoft.DataFactory/factories/dataflows", - "apiVersion": "2018-06-01", - "properties": { - "type": "MappingDataFlow", - "typeProperties": { - "sources": [ - { - "dataset": { - "referenceName": "LakeFileOrderHeaderParquet", - "type": "DatasetReference" - }, - "name": "OrderHeader" - }, - { - "dataset": { - "referenceName": "LakeFileOrderDetailLinesParquet", - "type": "DatasetReference" - }, - "name": "OrderLineDetails" - } - ], - "sinks": [ - { - "dataset": { - "referenceName": "TableOrderSummary", - "type": "DatasetReference" - }, - "name": "OrderSummary" - } - ], - "transformations": [ - { - "name": "JoinHeaderToLineDetails" - }, - { - "name": "OrderLineCount" - } - ], - "script": "source(output(\n\t\tSalesOrderID as integer,\n\t\tRevisionNumber as integer,\n\t\tOrderDate as timestamp,\n\t\tDueDate as timestamp,\n\t\tShipDate as timestamp,\n\t\tStatus as integer,\n\t\tOnlineOrderFlag as boolean,\n\t\tSalesOrderNumber as string,\n\t\tPurchaseOrderNumber as string,\n\t\tAccountNumber as string,\n\t\tCustomerID as integer,\n\t\tShipToAddressID as integer,\n\t\tBillToAddressID as integer,\n\t\tShipMethod as string,\n\t\tCreditCardApprovalCode as string,\n\t\tSubTotal as decimal(19,4),\n\t\tTaxAmt as decimal(19,4),\n\t\tFreight as decimal(19,4),\n\t\tTotalDue as decimal(19,4),\n\t\tComment as string,\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderHeader\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tSalesOrderDetailID as integer,\n\t\tOrderQty as integer,\n\t\tProductID as integer,\n\t\tUnitPrice as decimal(19,4),\n\t\tUnitPriceDiscount as decimal(19,4),\n\t\tLineTotal as decimal(38,6),\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderLineDetails\nOrderHeader, OrderLineDetails join(OrderHeader@SalesOrderID == OrderLineDetails@SalesOrderID,\n\tjoinType:'inner',\n\tmatchType:'exact',\n\tignoreSpaces: false,\n\tpartitionBy('hash', 1),\n\tbroadcast: 'both')~> JoinHeaderToLineDetails\nJoinHeaderToLineDetails aggregate(groupBy(SalesOrderNumber),\n\tRecordCount = count(SalesOrderDetailID),\n\tpartitionBy('roundRobin', 4)) ~> OrderLineCount\nOrderLineCount sink(allowSchemaDrift: false,\n\tvalidateSchema: false,\n\tinput(\n\t\tSalesOrderNumber as string,\n\t\tRecordCount as integer\n\t),\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\ttruncate:true,\n\tformat: 'table',\n\tskipDuplicateMapInputs: true,\n\tskipDuplicateMapOutputs: true,\n\terrorHandlingOption: 'stopOnFirstError',\n\tmapColumn(\n\t\tSalesOrderNumber,\n\t\tRecordCount\n\t),\n\tpartitionBy('roundRobin', 4)) ~> OrderSummary" - } - }, - "dependsOn": [ - "[concat(variables('factoryId'), '/datasets/TableOrderSummary')]" - ] - }, - { - "name": "[concat(parameters('factoryName'), '/UpdateProductDimension')]", - "type": "Microsoft.DataFactory/factories/dataflows", - "apiVersion": "2018-06-01", - "properties": { - "folder": { - "name": "Labs" - }, - "type": "MappingDataFlow", - "typeProperties": { - "sources": [ - { - "dataset": { - "referenceName": "ADLS_TSV_AdventureWorks", - "type": "DatasetReference" - }, - "name": "Product" - }, - { - "dataset": { - "referenceName": "ADLS_TSV_AdventureWorks", - "type": "DatasetReference" - }, - "name": "ProductSubcategory" - }, - { - "dataset": { - "referenceName": "ADLS_TSV_AdventureWorks", - "type": "DatasetReference" - }, - "name": "ProductCategory" - } - ], - "sinks": [ - { - "linkedService": { - "referenceName": "ADLS_saintegrationpipelines", - "type": "LinkedServiceReference" - }, - "name": "WriteToDataLake" - } - ], - "transformations": [ - { - "name": "SelectProductColumns" - }, - { - "name": "SelectSubcategoryColumns" - }, - { - "name": "SelectCategoryColumns" - }, - { - "name": "LookupProductCategory" - }, - { - "name": "LookupProductSubcategory" - }, - { - "name": "RemoveDuplicateColumns" - } - ], - "scriptLines": [ - "source(output(", - " ProductId as integer,", - " Product as string,", - " {_col2_} as string,", - " {_col3_} as boolean,", - " {_col4_} as boolean,", - " {_col5_} as string,", - " {_col6_} as short,", - " {_col7_} as short,", - " {_col8_} as double,", - " {_col9_} as double,", - " {_col10_} as string,", - " {_col11_} as string,", - " {_col12_} as string,", - " {_col13_} as double,", - " {_col14_} as short,", - " {_col15_} as string,", - " {_col16_} as string,", - " {_col17_} as string,", - " SubcategoryId as integer,", - " {_col19_} as short,", - " {_col20_} as timestamp,", - " {_col21_} as timestamp,", - " {_col22_} as string,", - " {_col23_} as string,", - " {_col24_} as string", - " ),", - " allowSchemaDrift: true,", - " validateSchema: false,", - " ignoreNoFilesFound: false) ~> Product", - "source(output(", - " SubcategoryId as integer,", - " CategoryId as integer,", - " Subcategory as string,", - " {_col3_} as string,", - " {_col4_} as timestamp", - " ),", - " allowSchemaDrift: true,", - " validateSchema: false,", - " ignoreNoFilesFound: false) ~> ProductSubcategory", - "source(output(", - " CategoryId as integer,", - " Category as string,", - " {_col2_} as string,", - " {_col3_} as timestamp", - " ),", - " allowSchemaDrift: true,", - " validateSchema: false,", - " ignoreNoFilesFound: false) ~> ProductCategory", - "Product select(mapColumn(", - " ProductId,", - " Product,", - " SubcategoryId", - " ),", - " skipDuplicateMapInputs: true,", - " skipDuplicateMapOutputs: true) ~> SelectProductColumns", - "ProductSubcategory select(mapColumn(", - " SubcategoryId,", - " CategoryId,", - " Subcategory", - " ),", - " skipDuplicateMapInputs: true,", - " skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns", - "ProductCategory select(mapColumn(", - " CategoryId,", - " Category", - " ),", - " skipDuplicateMapInputs: true,", - " skipDuplicateMapOutputs: true) ~> SelectCategoryColumns", - "SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,", - " multiple: false,", - " pickup: 'any',", - " broadcast: 'auto')~> LookupProductCategory", - "SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,", - " multiple: false,", - " pickup: 'any',", - " broadcast: 'auto')~> LookupProductSubcategory", - "LookupProductSubcategory select(mapColumn(", - " ProductId,", - " Product,", - " SubcategoryId = SelectProductColumns@SubcategoryId,", - " SubcategoryId = SelectSubcategoryColumns@SubcategoryId,", - " CategoryId = SelectSubcategoryColumns@CategoryId,", - " Subcategory,", - " CategoryId = SelectCategoryColumns@CategoryId,", - " Category", - " ),", - " skipDuplicateMapInputs: true,", - " skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns", - "RemoveDuplicateColumns sink(allowSchemaDrift: true,", - " validateSchema: false,", - " format: 'parquet',", - " fileSystem: 'lakeroot',", - " folderPath: 'Conformed/DimProduct',", - " truncate: true,", - " umask: 0022,", - " preCommands: [],", - " postCommands: [],", - " skipDuplicateMapInputs: true,", - " skipDuplicateMapOutputs: true) ~> WriteToDataLake" - ] - } - }, - "dependsOn": [] } ] } \ No newline at end of file diff --git a/TrainingFactoryDev/linkedTemplates/ArmTemplate_3.json b/TrainingFactoryDev/linkedTemplates/ArmTemplate_3.json index 8ba9865..9414923 100644 --- a/TrainingFactoryDev/linkedTemplates/ArmTemplate_3.json +++ b/TrainingFactoryDev/linkedTemplates/ArmTemplate_3.json @@ -12,6 +12,267 @@ "factoryId": "[concat('Microsoft.DataFactory/factories/', parameters('factoryName'))]" }, "resources": [ + { + "name": "[concat(parameters('factoryName'), '/MappingOrderAggregation')]", + "type": "Microsoft.DataFactory/factories/dataflows", + "apiVersion": "2018-06-01", + "properties": { + "type": "MappingDataFlow", + "typeProperties": { + "sources": [ + { + "dataset": { + "referenceName": "LakeFileOrderHeaderParquet", + "type": "DatasetReference" + }, + "name": "OrderHeader" + }, + { + "dataset": { + "referenceName": "LakeFileOrderDetailLinesParquet", + "type": "DatasetReference" + }, + "name": "OrderLineDetails" + } + ], + "sinks": [ + { + "dataset": { + "referenceName": "TableOrderSummary", + "type": "DatasetReference" + }, + "name": "OrderSummary" + } + ], + "transformations": [ + { + "name": "JoinHeaderToLineDetails" + }, + { + "name": "OrderLineCount" + } + ], + "script": "source(output(\n\t\tSalesOrderID as integer,\n\t\tRevisionNumber as integer,\n\t\tOrderDate as timestamp,\n\t\tDueDate as timestamp,\n\t\tShipDate as timestamp,\n\t\tStatus as integer,\n\t\tOnlineOrderFlag as boolean,\n\t\tSalesOrderNumber as string,\n\t\tPurchaseOrderNumber as string,\n\t\tAccountNumber as string,\n\t\tCustomerID as integer,\n\t\tShipToAddressID as integer,\n\t\tBillToAddressID as integer,\n\t\tShipMethod as string,\n\t\tCreditCardApprovalCode as string,\n\t\tSubTotal as decimal(19,4),\n\t\tTaxAmt as decimal(19,4),\n\t\tFreight as decimal(19,4),\n\t\tTotalDue as decimal(19,4),\n\t\tComment as string,\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderHeader\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tSalesOrderDetailID as integer,\n\t\tOrderQty as integer,\n\t\tProductID as integer,\n\t\tUnitPrice as decimal(19,4),\n\t\tUnitPriceDiscount as decimal(19,4),\n\t\tLineTotal as decimal(38,6),\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderLineDetails\nOrderHeader, OrderLineDetails join(OrderHeader@SalesOrderID == OrderLineDetails@SalesOrderID,\n\tjoinType:'inner',\n\tmatchType:'exact',\n\tignoreSpaces: false,\n\tpartitionBy('hash', 1),\n\tbroadcast: 'both')~> JoinHeaderToLineDetails\nJoinHeaderToLineDetails aggregate(groupBy(SalesOrderNumber),\n\tRecordCount = count(SalesOrderDetailID),\n\tpartitionBy('roundRobin', 4)) ~> OrderLineCount\nOrderLineCount sink(allowSchemaDrift: false,\n\tvalidateSchema: false,\n\tinput(\n\t\tSalesOrderNumber as string,\n\t\tRecordCount as integer\n\t),\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\ttruncate:true,\n\tformat: 'table',\n\tskipDuplicateMapInputs: true,\n\tskipDuplicateMapOutputs: true,\n\terrorHandlingOption: 'stopOnFirstError',\n\tmapColumn(\n\t\tSalesOrderNumber,\n\t\tRecordCount\n\t),\n\tpartitionBy('roundRobin', 4)) ~> OrderSummary" + } + }, + "dependsOn": [] + }, + { + "name": "[concat(parameters('factoryName'), '/MappingOrderAggregationWithParam')]", + "type": "Microsoft.DataFactory/factories/dataflows", + "apiVersion": "2018-06-01", + "properties": { + "type": "MappingDataFlow", + "typeProperties": { + "sources": [ + { + "dataset": { + "referenceName": "LakeFileOrderHeaderParquet", + "type": "DatasetReference" + }, + "name": "OrderHeader" + }, + { + "dataset": { + "referenceName": "LakeFileOrderDetailLinesParquet", + "type": "DatasetReference" + }, + "name": "OrderLineDetails" + } + ], + "sinks": [ + { + "dataset": { + "referenceName": "TableOrderSummary", + "type": "DatasetReference" + }, + "name": "OrderSummary" + } + ], + "transformations": [ + { + "name": "JoinHeaderToLineDetails" + }, + { + "name": "OrderLineCount" + }, + { + "name": "AddAuditColum" + } + ], + "script": "parameters{\n\tAuditColumn as string\n}\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tRevisionNumber as integer,\n\t\tOrderDate as timestamp,\n\t\tDueDate as timestamp,\n\t\tShipDate as timestamp,\n\t\tStatus as integer,\n\t\tOnlineOrderFlag as boolean,\n\t\tSalesOrderNumber as string,\n\t\tPurchaseOrderNumber as string,\n\t\tAccountNumber as string,\n\t\tCustomerID as integer,\n\t\tShipToAddressID as integer,\n\t\tBillToAddressID as integer,\n\t\tShipMethod as string,\n\t\tCreditCardApprovalCode as string,\n\t\tSubTotal as decimal(19,4),\n\t\tTaxAmt as decimal(19,4),\n\t\tFreight as decimal(19,4),\n\t\tTotalDue as decimal(19,4),\n\t\tComment as string,\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderHeader\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tSalesOrderDetailID as integer,\n\t\tOrderQty as integer,\n\t\tProductID as integer,\n\t\tUnitPrice as decimal(19,4),\n\t\tUnitPriceDiscount as decimal(19,4),\n\t\tLineTotal as decimal(38,6),\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderLineDetails\nOrderHeader, OrderLineDetails join(OrderHeader@SalesOrderID == OrderLineDetails@SalesOrderID,\n\tjoinType:'inner',\n\tmatchType:'exact',\n\tignoreSpaces: false,\n\tpartitionBy('hash', 1),\n\tbroadcast: 'both')~> JoinHeaderToLineDetails\nJoinHeaderToLineDetails aggregate(groupBy(SalesOrderNumber),\n\tRecordCount = count(SalesOrderDetailID),\n\tpartitionBy('roundRobin', 4)) ~> OrderLineCount\nOrderLineCount derive(AuditValue = $AuditColumn) ~> AddAuditColum\nAddAuditColum sink(allowSchemaDrift: false,\n\tvalidateSchema: false,\n\tinput(\n\t\tSalesOrderNumber as string,\n\t\tRecordCount as integer\n\t),\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\ttruncate:true,\n\tformat: 'table',\n\tskipDuplicateMapInputs: true,\n\tskipDuplicateMapOutputs: true,\n\terrorHandlingOption: 'stopOnFirstError',\n\tmapColumn(\n\t\tSalesOrderNumber,\n\t\tRecordCount\n\t),\n\tpartitionBy('roundRobin', 4)) ~> OrderSummary" + } + }, + "dependsOn": [] + }, + { + "name": "[concat(parameters('factoryName'), '/UpdateProductDimension')]", + "type": "Microsoft.DataFactory/factories/dataflows", + "apiVersion": "2018-06-01", + "properties": { + "folder": { + "name": "Labs" + }, + "type": "MappingDataFlow", + "typeProperties": { + "sources": [ + { + "dataset": { + "referenceName": "ADLS_TSV_AdventureWorks", + "type": "DatasetReference" + }, + "name": "Product" + }, + { + "dataset": { + "referenceName": "ADLS_TSV_AdventureWorks", + "type": "DatasetReference" + }, + "name": "ProductSubcategory" + }, + { + "dataset": { + "referenceName": "ADLS_TSV_AdventureWorks", + "type": "DatasetReference" + }, + "name": "ProductCategory" + } + ], + "sinks": [ + { + "linkedService": { + "referenceName": "ADLS_saintegrationpipelines", + "type": "LinkedServiceReference" + }, + "name": "WriteToDataLake" + } + ], + "transformations": [ + { + "name": "SelectProductColumns" + }, + { + "name": "SelectSubcategoryColumns" + }, + { + "name": "SelectCategoryColumns" + }, + { + "name": "LookupProductCategory" + }, + { + "name": "LookupProductSubcategory" + }, + { + "name": "RemoveDuplicateColumns" + } + ], + "scriptLines": [ + "source(output(", + " ProductId as integer,", + " Product as string,", + " {_col2_} as string,", + " {_col3_} as boolean,", + " {_col4_} as boolean,", + " {_col5_} as string,", + " {_col6_} as short,", + " {_col7_} as short,", + " {_col8_} as double,", + " {_col9_} as double,", + " {_col10_} as string,", + " {_col11_} as string,", + " {_col12_} as string,", + " {_col13_} as double,", + " {_col14_} as short,", + " {_col15_} as string,", + " {_col16_} as string,", + " {_col17_} as string,", + " SubcategoryId as integer,", + " {_col19_} as short,", + " {_col20_} as timestamp,", + " {_col21_} as timestamp,", + " {_col22_} as string,", + " {_col23_} as string,", + " {_col24_} as string", + " ),", + " allowSchemaDrift: true,", + " validateSchema: false,", + " ignoreNoFilesFound: false) ~> Product", + "source(output(", + " SubcategoryId as integer,", + " CategoryId as integer,", + " Subcategory as string,", + " {_col3_} as string,", + " {_col4_} as timestamp", + " ),", + " allowSchemaDrift: true,", + " validateSchema: false,", + " ignoreNoFilesFound: false) ~> ProductSubcategory", + "source(output(", + " CategoryId as integer,", + " Category as string,", + " {_col2_} as string,", + " {_col3_} as timestamp", + " ),", + " allowSchemaDrift: true,", + " validateSchema: false,", + " ignoreNoFilesFound: false) ~> ProductCategory", + "Product select(mapColumn(", + " ProductId,", + " Product,", + " SubcategoryId", + " ),", + " skipDuplicateMapInputs: true,", + " skipDuplicateMapOutputs: true) ~> SelectProductColumns", + "ProductSubcategory select(mapColumn(", + " SubcategoryId,", + " CategoryId,", + " Subcategory", + " ),", + " skipDuplicateMapInputs: true,", + " skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns", + "ProductCategory select(mapColumn(", + " CategoryId,", + " Category", + " ),", + " skipDuplicateMapInputs: true,", + " skipDuplicateMapOutputs: true) ~> SelectCategoryColumns", + "SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,", + " multiple: false,", + " pickup: 'any',", + " broadcast: 'auto')~> LookupProductCategory", + "SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,", + " multiple: false,", + " pickup: 'any',", + " broadcast: 'auto')~> LookupProductSubcategory", + "LookupProductSubcategory select(mapColumn(", + " ProductId,", + " Product,", + " SubcategoryId = SelectProductColumns@SubcategoryId,", + " SubcategoryId = SelectSubcategoryColumns@SubcategoryId,", + " CategoryId = SelectSubcategoryColumns@CategoryId,", + " Subcategory,", + " CategoryId = SelectCategoryColumns@CategoryId,", + " Category", + " ),", + " skipDuplicateMapInputs: true,", + " skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns", + "RemoveDuplicateColumns sink(allowSchemaDrift: true,", + " validateSchema: false,", + " format: 'parquet',", + " fileSystem: 'lakeroot',", + " folderPath: 'Conformed/DimProduct',", + " truncate: true,", + " umask: 0022,", + " preCommands: [],", + " postCommands: [],", + " skipDuplicateMapInputs: true,", + " skipDuplicateMapOutputs: true) ~> WriteToDataLake" + ] + } + }, + "dependsOn": [] + }, { "name": "[concat(parameters('factoryName'), '/UpdateProductDimension_Sorted')]", "type": "Microsoft.DataFactory/factories/dataflows", @@ -227,7 +488,7 @@ } }, { - "name": "Set Global Param", + "name": "Set Var From Global Param", "type": "SetVariable", "dependsOn": [ { @@ -320,7 +581,9 @@ }, "annotations": [] }, - "dependsOn": [] + "dependsOn": [ + "[concat(variables('factoryId'), '/dataflows/UpdateProductDimension')]" + ] }, { "name": "[concat(parameters('factoryName'), '/CopyAWEntities')]", @@ -384,6 +647,68 @@ }, "dependsOn": [] }, + { + "name": "[concat(parameters('factoryName'), '/Order Summary with Mapping With Param')]", + "type": "Microsoft.DataFactory/factories/pipelines", + "apiVersion": "2018-06-01", + "properties": { + "activities": [ + { + "name": "Mapping Order Aggregation", + "type": "ExecuteDataFlow", + "dependsOn": [], + "policy": { + "timeout": "7.00:00:00", + "retry": 0, + "retryIntervalInSeconds": 30, + "secureOutput": false, + "secureInput": false + }, + "userProperties": [], + "typeProperties": { + "dataflow": { + "referenceName": "MappingOrderAggregationWithParam", + "type": "DataFlowReference", + "parameters": { + "AuditColumn": { + "value": "'@{pipeline().parameters.AuditColumnValue}'", + "type": "Expression" + } + }, + "datasetParameters": { + "OrderHeader": {}, + "OrderLineDetails": {}, + "OrderSummary": {} + } + }, + "staging": {}, + "compute": { + "coreCount": 8, + "computeType": "General" + }, + "traceLevel": "Fine" + } + } + ], + "policy": { + "elapsedTimeMetric": {}, + "cancelAfter": {} + }, + "parameters": { + "AuditColumnValue": { + "type": "string" + } + }, + "folder": { + "name": "Demo Pipelines/Data Flows" + }, + "annotations": [], + "lastPublishTime": "2020-09-02T14:13:15Z" + }, + "dependsOn": [ + "[concat(variables('factoryId'), '/dataflows/MappingOrderAggregationWithParam')]" + ] + }, { "name": "[concat(parameters('factoryName'), '/Order Summary with Mapping')]", "type": "Microsoft.DataFactory/factories/pipelines", @@ -432,7 +757,9 @@ "annotations": [], "lastPublishTime": "2020-09-02T14:13:15Z" }, - "dependsOn": [] + "dependsOn": [ + "[concat(variables('factoryId'), '/dataflows/MappingOrderAggregation')]" + ] }, { "name": "[concat(parameters('factoryName'), '/Scale Out Level 1')]", diff --git a/TrainingFactoryDev/linkedTemplates/ArmTemplate_master.json b/TrainingFactoryDev/linkedTemplates/ArmTemplate_master.json index 80eae95..95cb04a 100644 --- a/TrainingFactoryDev/linkedTemplates/ArmTemplate_master.json +++ b/TrainingFactoryDev/linkedTemplates/ArmTemplate_master.json @@ -11,6 +11,10 @@ "type": "secureString", "metadata": "Secure string for 'connectionString' of 'AzureSqlDatabase1'" }, + "EmailSenderFunction_functionKey": { + "type": "secureString", + "metadata": "Secure string for 'functionKey' of 'EmailSenderFunction'" + }, "TrainingStore01_connectionString": { "type": "secureString", "metadata": "Secure string for 'connectionString' of 'TrainingStore01'" @@ -51,6 +55,10 @@ "type": "string", "defaultValue": "batchfortraining01" }, + "EmailSenderFunction_properties_typeProperties_functionAppUrl": { + "type": "string", + "defaultValue": "https://frameworksupportfunctions.azurewebsites.net" + }, "GenericKeys_properties_typeProperties_baseUrl": { "type": "string", "defaultValue": "@{linkedService().baseUrl}" @@ -154,6 +162,9 @@ "AzureSqlDatabase1_connectionString": { "value": "[parameters('AzureSqlDatabase1_connectionString')]" }, + "EmailSenderFunction_functionKey": { + "value": "[parameters('EmailSenderFunction_functionKey')]" + }, "TrainingStore01_connectionString": { "value": "[parameters('TrainingStore01_connectionString')]" }, @@ -166,6 +177,9 @@ "AzureDataLakeStorage1_properties_typeProperties_url": { "value": "[parameters('AzureDataLakeStorage1_properties_typeProperties_url')]" }, + "EmailSenderFunction_properties_typeProperties_functionAppUrl": { + "value": "[parameters('EmailSenderFunction_properties_typeProperties_functionAppUrl')]" + }, "GenericKeys_properties_typeProperties_baseUrl": { "value": "[parameters('GenericKeys_properties_typeProperties_baseUrl')]" },