{ "$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", "contentVersion": "1.0.0.0", "parameters": { "factoryName": { "type": "string", "metadata": "Data Factory name", "defaultValue": "TrainingFactoryDev" } }, "variables": { "factoryId": "[concat('Microsoft.DataFactory/factories/', parameters('factoryName'))]" }, "resources": [ { "name": "[concat(parameters('factoryName'), '/UpdateProductDimension_Sorted')]", "type": "Microsoft.DataFactory/factories/dataflows", "apiVersion": "2018-06-01", "properties": { "folder": { "name": "Labs" }, "type": "MappingDataFlow", "typeProperties": { "sources": [ { "dataset": { "referenceName": "ADLS_TSV_AdventureWorks", "type": "DatasetReference" }, "name": "Product" }, { "dataset": { "referenceName": "ADLS_TSV_AdventureWorks", "type": "DatasetReference" }, "name": "ProductSubcategory" }, { "dataset": { "referenceName": "ADLS_TSV_AdventureWorks", "type": "DatasetReference" }, "name": "ProductCategory" } ], "sinks": [ { "linkedService": { "referenceName": "ADLS_saintegrationpipelines", "type": "LinkedServiceReference" }, "name": "WriteToDataLake" } ], "transformations": [ { "name": "SelectProductColumns" }, { "name": "SelectSubcategoryColumns" }, { "name": "SelectCategoryColumns" }, { "name": "LookupProductCategory" }, { "name": "LookupProductSubcategory" }, { "name": "RemoveDuplicateColumns" }, { "name": "SortBySubcategory" } ], "scriptLines": [ "source(output(", " ProductId as integer,", " Product as string,", " {_col2_} as string,", " {_col3_} as boolean,", " {_col4_} as boolean,", " {_col5_} as string,", " {_col6_} as short,", " {_col7_} as short,", " {_col8_} as double,", " {_col9_} as double,", " {_col10_} as string,", " {_col11_} as string,", " {_col12_} as string,", " {_col13_} as double,", " {_col14_} as short,", " {_col15_} as string,", " {_col16_} as string,", " {_col17_} as string,", " SubcategoryId as integer,", " {_col19_} as short,", " {_col20_} as timestamp,", " {_col21_} as timestamp,", " {_col22_} as string,", " {_col23_} as string,", " {_col24_} as string", " ),", " allowSchemaDrift: true,", " validateSchema: false,", " ignoreNoFilesFound: false) ~> Product", "source(output(", " SubcategoryId as integer,", " CategoryId as integer,", " Subcategory as string,", " {_col3_} as string,", " {_col4_} as timestamp", " ),", " allowSchemaDrift: true,", " validateSchema: false,", " ignoreNoFilesFound: false) ~> ProductSubcategory", "source(output(", " CategoryId as integer,", " Category as string,", " {_col2_} as string,", " {_col3_} as timestamp", " ),", " allowSchemaDrift: true,", " validateSchema: false,", " ignoreNoFilesFound: false) ~> ProductCategory", "Product select(mapColumn(", " ProductId,", " Product,", " SubcategoryId", " ),", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> SelectProductColumns", "ProductSubcategory select(mapColumn(", " SubcategoryId,", " CategoryId,", " Subcategory", " ),", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns", "ProductCategory select(mapColumn(", " CategoryId,", " Category", " ),", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> SelectCategoryColumns", "SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,", " multiple: false,", " pickup: 'any',", " broadcast: 'auto')~> LookupProductCategory", "SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,", " multiple: false,", " pickup: 'any',", " broadcast: 'auto')~> LookupProductSubcategory", "LookupProductSubcategory select(mapColumn(", " ProductId,", " Product,", " SubcategoryId = SelectProductColumns@SubcategoryId,", " SubcategoryId = SelectSubcategoryColumns@SubcategoryId,", " CategoryId = SelectSubcategoryColumns@CategoryId,", " Subcategory,", " CategoryId = SelectCategoryColumns@CategoryId,", " Category", " ),", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns", "RemoveDuplicateColumns sort(asc(Subcategory, false)) ~> SortBySubcategory", "SortBySubcategory sink(allowSchemaDrift: true,", " validateSchema: false,", " format: 'parquet',", " fileSystem: 'lakeroot',", " folderPath: 'Conformed/DimProduct',", " truncate: true,", " umask: 0022,", " preCommands: [],", " postCommands: [],", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> WriteToDataLake" ] } }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/06 - Example Global Param')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "06 - Get Any File List", "type": "ExecutePipeline", "dependsOn": [], "userProperties": [], "typeProperties": { "pipeline": { "referenceName": "05 - Get File List Utility", "type": "PipelineReference" }, "waitOnCompletion": false, "parameters": { "GetFileQuery": "SELECT * FROM [dbo].[FilesToUpload] WHERE [Enabled] = 1;" } } }, { "name": "Set Pipe Output", "type": "SetVariable", "dependsOn": [ { "activity": "06 - Get Any File List", "dependencyConditions": [ "Succeeded" ] } ], "userProperties": [], "typeProperties": { "variableName": "WorkerOutput", "value": { "value": "@activity('06 - Get Any File List').output.pipelineRunId", "type": "Expression" } } }, { "name": "Set Global Param", "type": "SetVariable", "dependsOn": [ { "activity": "06 - Get Any File List", "dependencyConditions": [ "Succeeded" ] } ], "userProperties": [], "typeProperties": { "variableName": "GlobalParam", "value": { "value": "@concat(pipeline().globalParameters.Environment,'Test')", "type": "Expression" } } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "variables": { "WorkerOutput": { "type": "String" }, "GlobalParam": { "type": "String" } }, "folder": { "name": "Demo Pipelines/Data Uploads" }, "annotations": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/BuildDimProduct')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Run UpdateProductDimension", "type": "ExecuteDataFlow", "dependsOn": [], "policy": { "timeout": "0.12:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "dataflow": { "referenceName": "UpdateProductDimension", "type": "DataFlowReference", "parameters": {}, "datasetParameters": { "Product": { "FileName": "Product.tsv" }, "ProductSubcategory": { "FileName": "ProductSubcategory.tsv" }, "ProductCategory": { "FileName": "ProductCategory.tsv" }, "WriteToDataLake": {} } }, "staging": {}, "compute": { "coreCount": 8, "computeType": "General" }, "traceLevel": "Fine" } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "folder": { "name": "Labs" }, "annotations": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/CopyAWEntities')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Copy each AW entity", "type": "ForEach", "dependsOn": [], "userProperties": [], "typeProperties": { "items": { "value": "@variables('Entities')", "type": "Expression" }, "isSequential": false, "activities": [ { "name": "Execute CopyAWEntity", "type": "ExecutePipeline", "dependsOn": [], "userProperties": [], "typeProperties": { "pipeline": { "referenceName": "CopyAWEntity", "type": "PipelineReference" }, "waitOnCompletion": true, "parameters": { "Entity": { "value": "@item()", "type": "Expression" } } } } ] } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "variables": { "Entities": { "type": "Array", "defaultValue": [ "Product", "ProductSubcategory", "ProductCategory" ] } }, "folder": { "name": "Labs" }, "annotations": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/Order Summary with Mapping')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Mapping Order Aggregation", "type": "ExecuteDataFlow", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "dataflow": { "referenceName": "MappingOrderAggregation", "type": "DataFlowReference", "parameters": {}, "datasetParameters": { "OrderHeader": {}, "OrderLineDetails": {}, "OrderSummary": {} } }, "staging": {}, "compute": { "coreCount": 8, "computeType": "General" }, "traceLevel": "Fine" } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "folder": { "name": "Demo Pipelines/Data Flows" }, "annotations": [], "lastPublishTime": "2020-09-02T14:13:15Z" }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/Scale Out Level 1')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Get Bucket Ids", "type": "Lookup", "dependsOn": [ { "activity": "Reset And Log Start", "dependencyConditions": [ "Succeeded" ] }, { "activity": "Reset Bucket Process Map", "dependencyConditions": [ "Succeeded" ] } ], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "sqlReaderStoredProcedureName": "[[dbo].[GetBuckets]", "partitionOption": "None" }, "dataset": { "referenceName": "GetSetMetadata", "type": "DatasetReference", "parameters": {} }, "firstRowOnly": false } }, { "name": "Call Buckets", "type": "ForEach", "dependsOn": [ { "activity": "Get Bucket Ids", "dependencyConditions": [ "Succeeded" ] } ], "userProperties": [], "typeProperties": { "items": { "value": "@activity('Get Bucket Ids').output.value", "type": "Expression" }, "isSequential": false, "batchCount": 50, "activities": [ { "name": "Call Level 2", "type": "ExecutePipeline", "dependsOn": [], "userProperties": [], "typeProperties": { "pipeline": { "referenceName": "Scale Out Level 2", "type": "PipelineReference" }, "waitOnCompletion": true, "parameters": { "BucketId": { "value": "@{item().BucketId}", "type": "Expression" } } } } ] } }, { "name": "Log End", "type": "SqlServerStoredProcedure", "dependsOn": [ { "activity": "Call Buckets", "dependencyConditions": [ "Succeeded" ] }, { "activity": "Reset And Log Start", "dependencyConditions": [ "Completed" ] } ], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "storedProcedureName": "[[dbo].[SetBucketLogEntry]", "storedProcedureParameters": { "LogId": { "value": { "value": "@activity('Reset And Log Start').output.firstRow.LogId", "type": "Expression" }, "type": "Int32" } } }, "linkedServiceName": { "referenceName": "trainingdb01", "type": "LinkedServiceReference" } }, { "name": "Reset And Log Start", "type": "Lookup", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "sqlReaderStoredProcedureName": "[[dbo].[SetBucketLogEntry]", "partitionOption": "None" }, "dataset": { "referenceName": "GetSetMetadata", "type": "DatasetReference", "parameters": {} } } }, { "name": "Reset Bucket Process Map", "type": "SqlServerStoredProcedure", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "storedProcedureName": "[[dbo].[SetBucketProcesses]" }, "linkedServiceName": { "referenceName": "trainingdb01", "type": "LinkedServiceReference" } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "folder": { "name": "Demo Pipelines/Scaling Out" }, "annotations": [], "lastPublishTime": "2020-08-24T13:41:19Z" }, "dependsOn": [] } ] }