625 lines
16 KiB
JSON

{
"$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"factoryName": {
"type": "string",
"metadata": "Data Factory name",
"defaultValue": "TrainingFactoryDev"
}
},
"variables": {
"factoryId": "[concat('Microsoft.DataFactory/factories/', parameters('factoryName'))]"
},
"resources": [
{
"name": "[concat(parameters('factoryName'), '/UpdateProductDimension_Sorted')]",
"type": "Microsoft.DataFactory/factories/dataflows",
"apiVersion": "2018-06-01",
"properties": {
"folder": {
"name": "Labs"
},
"type": "MappingDataFlow",
"typeProperties": {
"sources": [
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "Product"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductSubcategory"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductCategory"
}
],
"sinks": [
{
"linkedService": {
"referenceName": "ADLS_saintegrationpipelines",
"type": "LinkedServiceReference"
},
"name": "WriteToDataLake"
}
],
"transformations": [
{
"name": "SelectProductColumns"
},
{
"name": "SelectSubcategoryColumns"
},
{
"name": "SelectCategoryColumns"
},
{
"name": "LookupProductCategory"
},
{
"name": "LookupProductSubcategory"
},
{
"name": "RemoveDuplicateColumns"
},
{
"name": "SortBySubcategory"
}
],
"scriptLines": [
"source(output(",
" ProductId as integer,",
" Product as string,",
" {_col2_} as string,",
" {_col3_} as boolean,",
" {_col4_} as boolean,",
" {_col5_} as string,",
" {_col6_} as short,",
" {_col7_} as short,",
" {_col8_} as double,",
" {_col9_} as double,",
" {_col10_} as string,",
" {_col11_} as string,",
" {_col12_} as string,",
" {_col13_} as double,",
" {_col14_} as short,",
" {_col15_} as string,",
" {_col16_} as string,",
" {_col17_} as string,",
" SubcategoryId as integer,",
" {_col19_} as short,",
" {_col20_} as timestamp,",
" {_col21_} as timestamp,",
" {_col22_} as string,",
" {_col23_} as string,",
" {_col24_} as string",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> Product",
"source(output(",
" SubcategoryId as integer,",
" CategoryId as integer,",
" Subcategory as string,",
" {_col3_} as string,",
" {_col4_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductSubcategory",
"source(output(",
" CategoryId as integer,",
" Category as string,",
" {_col2_} as string,",
" {_col3_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductCategory",
"Product select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
"ProductSubcategory select(mapColumn(",
" SubcategoryId,",
" CategoryId,",
" Subcategory",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
"ProductCategory select(mapColumn(",
" CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductCategory",
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductSubcategory",
"LookupProductSubcategory select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId = SelectProductColumns@SubcategoryId,",
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
" CategoryId = SelectSubcategoryColumns@CategoryId,",
" Subcategory,",
" CategoryId = SelectCategoryColumns@CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
"RemoveDuplicateColumns sort(asc(Subcategory, false)) ~> SortBySubcategory",
"SortBySubcategory sink(allowSchemaDrift: true,",
" validateSchema: false,",
" format: 'parquet',",
" fileSystem: 'lakeroot',",
" folderPath: 'Conformed/DimProduct',",
" truncate: true,",
" umask: 0022,",
" preCommands: [],",
" postCommands: [],",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
]
}
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/06 - Example Global Param')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "06 - Get Any File List",
"type": "ExecutePipeline",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"pipeline": {
"referenceName": "05 - Get File List Utility",
"type": "PipelineReference"
},
"waitOnCompletion": false,
"parameters": {
"GetFileQuery": "SELECT * FROM [dbo].[FilesToUpload] WHERE [Enabled] = 1;"
}
}
},
{
"name": "Set Pipe Output",
"type": "SetVariable",
"dependsOn": [
{
"activity": "06 - Get Any File List",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "WorkerOutput",
"value": {
"value": "@activity('06 - Get Any File List').output.pipelineRunId",
"type": "Expression"
}
}
},
{
"name": "Set Global Param",
"type": "SetVariable",
"dependsOn": [
{
"activity": "06 - Get Any File List",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "GlobalParam",
"value": {
"value": "@concat(pipeline().globalParameters.Environment,'Test')",
"type": "Expression"
}
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"variables": {
"WorkerOutput": {
"type": "String"
},
"GlobalParam": {
"type": "String"
}
},
"folder": {
"name": "Demo Pipelines/Data Uploads"
},
"annotations": []
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/BuildDimProduct')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Run UpdateProductDimension",
"type": "ExecuteDataFlow",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataflow": {
"referenceName": "UpdateProductDimension",
"type": "DataFlowReference",
"parameters": {},
"datasetParameters": {
"Product": {
"FileName": "Product.tsv"
},
"ProductSubcategory": {
"FileName": "ProductSubcategory.tsv"
},
"ProductCategory": {
"FileName": "ProductCategory.tsv"
},
"WriteToDataLake": {}
}
},
"staging": {},
"compute": {
"coreCount": 8,
"computeType": "General"
},
"traceLevel": "Fine"
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"folder": {
"name": "Labs"
},
"annotations": []
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/CopyAWEntities')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Copy each AW entity",
"type": "ForEach",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@variables('Entities')",
"type": "Expression"
},
"isSequential": false,
"activities": [
{
"name": "Execute CopyAWEntity",
"type": "ExecutePipeline",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"pipeline": {
"referenceName": "CopyAWEntity",
"type": "PipelineReference"
},
"waitOnCompletion": true,
"parameters": {
"Entity": {
"value": "@item()",
"type": "Expression"
}
}
}
}
]
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"variables": {
"Entities": {
"type": "Array",
"defaultValue": [
"Product",
"ProductSubcategory",
"ProductCategory"
]
}
},
"folder": {
"name": "Labs"
},
"annotations": []
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/Order Summary with Mapping')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Mapping Order Aggregation",
"type": "ExecuteDataFlow",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataflow": {
"referenceName": "MappingOrderAggregation",
"type": "DataFlowReference",
"parameters": {},
"datasetParameters": {
"OrderHeader": {},
"OrderLineDetails": {},
"OrderSummary": {}
}
},
"staging": {},
"compute": {
"coreCount": 8,
"computeType": "General"
},
"traceLevel": "Fine"
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"folder": {
"name": "Demo Pipelines/Data Flows"
},
"annotations": [],
"lastPublishTime": "2020-09-02T14:13:15Z"
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/Scale Out Level 1')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Get Bucket Ids",
"type": "Lookup",
"dependsOn": [
{
"activity": "Reset And Log Start",
"dependencyConditions": [
"Succeeded"
]
},
{
"activity": "Reset Bucket Process Map",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"sqlReaderStoredProcedureName": "[[dbo].[GetBuckets]",
"partitionOption": "None"
},
"dataset": {
"referenceName": "GetSetMetadata",
"type": "DatasetReference",
"parameters": {}
},
"firstRowOnly": false
}
},
{
"name": "Call Buckets",
"type": "ForEach",
"dependsOn": [
{
"activity": "Get Bucket Ids",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@activity('Get Bucket Ids').output.value",
"type": "Expression"
},
"isSequential": false,
"batchCount": 50,
"activities": [
{
"name": "Call Level 2",
"type": "ExecutePipeline",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"pipeline": {
"referenceName": "Scale Out Level 2",
"type": "PipelineReference"
},
"waitOnCompletion": true,
"parameters": {
"BucketId": {
"value": "@{item().BucketId}",
"type": "Expression"
}
}
}
}
]
}
},
{
"name": "Log End",
"type": "SqlServerStoredProcedure",
"dependsOn": [
{
"activity": "Call Buckets",
"dependencyConditions": [
"Succeeded"
]
},
{
"activity": "Reset And Log Start",
"dependencyConditions": [
"Completed"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"storedProcedureName": "[[dbo].[SetBucketLogEntry]",
"storedProcedureParameters": {
"LogId": {
"value": {
"value": "@activity('Reset And Log Start').output.firstRow.LogId",
"type": "Expression"
},
"type": "Int32"
}
}
},
"linkedServiceName": {
"referenceName": "trainingdb01",
"type": "LinkedServiceReference"
}
},
{
"name": "Reset And Log Start",
"type": "Lookup",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"sqlReaderStoredProcedureName": "[[dbo].[SetBucketLogEntry]",
"partitionOption": "None"
},
"dataset": {
"referenceName": "GetSetMetadata",
"type": "DatasetReference",
"parameters": {}
}
}
},
{
"name": "Reset Bucket Process Map",
"type": "SqlServerStoredProcedure",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"storedProcedureName": "[[dbo].[SetBucketProcesses]"
},
"linkedServiceName": {
"referenceName": "trainingdb01",
"type": "LinkedServiceReference"
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"folder": {
"name": "Demo Pipelines/Scaling Out"
},
"annotations": [],
"lastPublishTime": "2020-08-24T13:41:19Z"
},
"dependsOn": []
}
]
}