625 lines
16 KiB
JSON
625 lines
16 KiB
JSON
{
|
|
"$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
|
|
"contentVersion": "1.0.0.0",
|
|
"parameters": {
|
|
"factoryName": {
|
|
"type": "string",
|
|
"metadata": "Data Factory name",
|
|
"defaultValue": "TrainingFactoryDev"
|
|
}
|
|
},
|
|
"variables": {
|
|
"factoryId": "[concat('Microsoft.DataFactory/factories/', parameters('factoryName'))]"
|
|
},
|
|
"resources": [
|
|
{
|
|
"name": "[concat(parameters('factoryName'), '/UpdateProductDimension_Sorted')]",
|
|
"type": "Microsoft.DataFactory/factories/dataflows",
|
|
"apiVersion": "2018-06-01",
|
|
"properties": {
|
|
"folder": {
|
|
"name": "Labs"
|
|
},
|
|
"type": "MappingDataFlow",
|
|
"typeProperties": {
|
|
"sources": [
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "Product"
|
|
},
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "ProductSubcategory"
|
|
},
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "ProductCategory"
|
|
}
|
|
],
|
|
"sinks": [
|
|
{
|
|
"linkedService": {
|
|
"referenceName": "ADLS_saintegrationpipelines",
|
|
"type": "LinkedServiceReference"
|
|
},
|
|
"name": "WriteToDataLake"
|
|
}
|
|
],
|
|
"transformations": [
|
|
{
|
|
"name": "SelectProductColumns"
|
|
},
|
|
{
|
|
"name": "SelectSubcategoryColumns"
|
|
},
|
|
{
|
|
"name": "SelectCategoryColumns"
|
|
},
|
|
{
|
|
"name": "LookupProductCategory"
|
|
},
|
|
{
|
|
"name": "LookupProductSubcategory"
|
|
},
|
|
{
|
|
"name": "RemoveDuplicateColumns"
|
|
},
|
|
{
|
|
"name": "SortBySubcategory"
|
|
}
|
|
],
|
|
"scriptLines": [
|
|
"source(output(",
|
|
" ProductId as integer,",
|
|
" Product as string,",
|
|
" {_col2_} as string,",
|
|
" {_col3_} as boolean,",
|
|
" {_col4_} as boolean,",
|
|
" {_col5_} as string,",
|
|
" {_col6_} as short,",
|
|
" {_col7_} as short,",
|
|
" {_col8_} as double,",
|
|
" {_col9_} as double,",
|
|
" {_col10_} as string,",
|
|
" {_col11_} as string,",
|
|
" {_col12_} as string,",
|
|
" {_col13_} as double,",
|
|
" {_col14_} as short,",
|
|
" {_col15_} as string,",
|
|
" {_col16_} as string,",
|
|
" {_col17_} as string,",
|
|
" SubcategoryId as integer,",
|
|
" {_col19_} as short,",
|
|
" {_col20_} as timestamp,",
|
|
" {_col21_} as timestamp,",
|
|
" {_col22_} as string,",
|
|
" {_col23_} as string,",
|
|
" {_col24_} as string",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> Product",
|
|
"source(output(",
|
|
" SubcategoryId as integer,",
|
|
" CategoryId as integer,",
|
|
" Subcategory as string,",
|
|
" {_col3_} as string,",
|
|
" {_col4_} as timestamp",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> ProductSubcategory",
|
|
"source(output(",
|
|
" CategoryId as integer,",
|
|
" Category as string,",
|
|
" {_col2_} as string,",
|
|
" {_col3_} as timestamp",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> ProductCategory",
|
|
"Product select(mapColumn(",
|
|
" ProductId,",
|
|
" Product,",
|
|
" SubcategoryId",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
|
|
"ProductSubcategory select(mapColumn(",
|
|
" SubcategoryId,",
|
|
" CategoryId,",
|
|
" Subcategory",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
|
|
"ProductCategory select(mapColumn(",
|
|
" CategoryId,",
|
|
" Category",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
|
|
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
|
|
" multiple: false,",
|
|
" pickup: 'any',",
|
|
" broadcast: 'auto')~> LookupProductCategory",
|
|
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
|
|
" multiple: false,",
|
|
" pickup: 'any',",
|
|
" broadcast: 'auto')~> LookupProductSubcategory",
|
|
"LookupProductSubcategory select(mapColumn(",
|
|
" ProductId,",
|
|
" Product,",
|
|
" SubcategoryId = SelectProductColumns@SubcategoryId,",
|
|
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
|
|
" CategoryId = SelectSubcategoryColumns@CategoryId,",
|
|
" Subcategory,",
|
|
" CategoryId = SelectCategoryColumns@CategoryId,",
|
|
" Category",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
|
|
"RemoveDuplicateColumns sort(asc(Subcategory, false)) ~> SortBySubcategory",
|
|
"SortBySubcategory sink(allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" format: 'parquet',",
|
|
" fileSystem: 'lakeroot',",
|
|
" folderPath: 'Conformed/DimProduct',",
|
|
" truncate: true,",
|
|
" umask: 0022,",
|
|
" preCommands: [],",
|
|
" postCommands: [],",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
|
|
]
|
|
}
|
|
},
|
|
"dependsOn": []
|
|
},
|
|
{
|
|
"name": "[concat(parameters('factoryName'), '/06 - Example Global Param')]",
|
|
"type": "Microsoft.DataFactory/factories/pipelines",
|
|
"apiVersion": "2018-06-01",
|
|
"properties": {
|
|
"activities": [
|
|
{
|
|
"name": "06 - Get Any File List",
|
|
"type": "ExecutePipeline",
|
|
"dependsOn": [],
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"pipeline": {
|
|
"referenceName": "05 - Get File List Utility",
|
|
"type": "PipelineReference"
|
|
},
|
|
"waitOnCompletion": false,
|
|
"parameters": {
|
|
"GetFileQuery": "SELECT * FROM [dbo].[FilesToUpload] WHERE [Enabled] = 1;"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"name": "Set Pipe Output",
|
|
"type": "SetVariable",
|
|
"dependsOn": [
|
|
{
|
|
"activity": "06 - Get Any File List",
|
|
"dependencyConditions": [
|
|
"Succeeded"
|
|
]
|
|
}
|
|
],
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"variableName": "WorkerOutput",
|
|
"value": {
|
|
"value": "@activity('06 - Get Any File List').output.pipelineRunId",
|
|
"type": "Expression"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"name": "Set Global Param",
|
|
"type": "SetVariable",
|
|
"dependsOn": [
|
|
{
|
|
"activity": "06 - Get Any File List",
|
|
"dependencyConditions": [
|
|
"Succeeded"
|
|
]
|
|
}
|
|
],
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"variableName": "GlobalParam",
|
|
"value": {
|
|
"value": "@concat(pipeline().globalParameters.Environment,'Test')",
|
|
"type": "Expression"
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"policy": {
|
|
"elapsedTimeMetric": {},
|
|
"cancelAfter": {}
|
|
},
|
|
"variables": {
|
|
"WorkerOutput": {
|
|
"type": "String"
|
|
},
|
|
"GlobalParam": {
|
|
"type": "String"
|
|
}
|
|
},
|
|
"folder": {
|
|
"name": "Demo Pipelines/Data Uploads"
|
|
},
|
|
"annotations": []
|
|
},
|
|
"dependsOn": []
|
|
},
|
|
{
|
|
"name": "[concat(parameters('factoryName'), '/BuildDimProduct')]",
|
|
"type": "Microsoft.DataFactory/factories/pipelines",
|
|
"apiVersion": "2018-06-01",
|
|
"properties": {
|
|
"activities": [
|
|
{
|
|
"name": "Run UpdateProductDimension",
|
|
"type": "ExecuteDataFlow",
|
|
"dependsOn": [],
|
|
"policy": {
|
|
"timeout": "0.12:00:00",
|
|
"retry": 0,
|
|
"retryIntervalInSeconds": 30,
|
|
"secureOutput": false,
|
|
"secureInput": false
|
|
},
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"dataflow": {
|
|
"referenceName": "UpdateProductDimension",
|
|
"type": "DataFlowReference",
|
|
"parameters": {},
|
|
"datasetParameters": {
|
|
"Product": {
|
|
"FileName": "Product.tsv"
|
|
},
|
|
"ProductSubcategory": {
|
|
"FileName": "ProductSubcategory.tsv"
|
|
},
|
|
"ProductCategory": {
|
|
"FileName": "ProductCategory.tsv"
|
|
},
|
|
"WriteToDataLake": {}
|
|
}
|
|
},
|
|
"staging": {},
|
|
"compute": {
|
|
"coreCount": 8,
|
|
"computeType": "General"
|
|
},
|
|
"traceLevel": "Fine"
|
|
}
|
|
}
|
|
],
|
|
"policy": {
|
|
"elapsedTimeMetric": {},
|
|
"cancelAfter": {}
|
|
},
|
|
"folder": {
|
|
"name": "Labs"
|
|
},
|
|
"annotations": []
|
|
},
|
|
"dependsOn": []
|
|
},
|
|
{
|
|
"name": "[concat(parameters('factoryName'), '/CopyAWEntities')]",
|
|
"type": "Microsoft.DataFactory/factories/pipelines",
|
|
"apiVersion": "2018-06-01",
|
|
"properties": {
|
|
"activities": [
|
|
{
|
|
"name": "Copy each AW entity",
|
|
"type": "ForEach",
|
|
"dependsOn": [],
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"items": {
|
|
"value": "@variables('Entities')",
|
|
"type": "Expression"
|
|
},
|
|
"isSequential": false,
|
|
"activities": [
|
|
{
|
|
"name": "Execute CopyAWEntity",
|
|
"type": "ExecutePipeline",
|
|
"dependsOn": [],
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"pipeline": {
|
|
"referenceName": "CopyAWEntity",
|
|
"type": "PipelineReference"
|
|
},
|
|
"waitOnCompletion": true,
|
|
"parameters": {
|
|
"Entity": {
|
|
"value": "@item()",
|
|
"type": "Expression"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
],
|
|
"policy": {
|
|
"elapsedTimeMetric": {},
|
|
"cancelAfter": {}
|
|
},
|
|
"variables": {
|
|
"Entities": {
|
|
"type": "Array",
|
|
"defaultValue": [
|
|
"Product",
|
|
"ProductSubcategory",
|
|
"ProductCategory"
|
|
]
|
|
}
|
|
},
|
|
"folder": {
|
|
"name": "Labs"
|
|
},
|
|
"annotations": []
|
|
},
|
|
"dependsOn": []
|
|
},
|
|
{
|
|
"name": "[concat(parameters('factoryName'), '/Order Summary with Mapping')]",
|
|
"type": "Microsoft.DataFactory/factories/pipelines",
|
|
"apiVersion": "2018-06-01",
|
|
"properties": {
|
|
"activities": [
|
|
{
|
|
"name": "Mapping Order Aggregation",
|
|
"type": "ExecuteDataFlow",
|
|
"dependsOn": [],
|
|
"policy": {
|
|
"timeout": "7.00:00:00",
|
|
"retry": 0,
|
|
"retryIntervalInSeconds": 30,
|
|
"secureOutput": false,
|
|
"secureInput": false
|
|
},
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"dataflow": {
|
|
"referenceName": "MappingOrderAggregation",
|
|
"type": "DataFlowReference",
|
|
"parameters": {},
|
|
"datasetParameters": {
|
|
"OrderHeader": {},
|
|
"OrderLineDetails": {},
|
|
"OrderSummary": {}
|
|
}
|
|
},
|
|
"staging": {},
|
|
"compute": {
|
|
"coreCount": 8,
|
|
"computeType": "General"
|
|
},
|
|
"traceLevel": "Fine"
|
|
}
|
|
}
|
|
],
|
|
"policy": {
|
|
"elapsedTimeMetric": {},
|
|
"cancelAfter": {}
|
|
},
|
|
"folder": {
|
|
"name": "Demo Pipelines/Data Flows"
|
|
},
|
|
"annotations": [],
|
|
"lastPublishTime": "2020-09-02T14:13:15Z"
|
|
},
|
|
"dependsOn": []
|
|
},
|
|
{
|
|
"name": "[concat(parameters('factoryName'), '/Scale Out Level 1')]",
|
|
"type": "Microsoft.DataFactory/factories/pipelines",
|
|
"apiVersion": "2018-06-01",
|
|
"properties": {
|
|
"activities": [
|
|
{
|
|
"name": "Get Bucket Ids",
|
|
"type": "Lookup",
|
|
"dependsOn": [
|
|
{
|
|
"activity": "Reset And Log Start",
|
|
"dependencyConditions": [
|
|
"Succeeded"
|
|
]
|
|
},
|
|
{
|
|
"activity": "Reset Bucket Process Map",
|
|
"dependencyConditions": [
|
|
"Succeeded"
|
|
]
|
|
}
|
|
],
|
|
"policy": {
|
|
"timeout": "7.00:00:00",
|
|
"retry": 0,
|
|
"retryIntervalInSeconds": 30,
|
|
"secureOutput": false,
|
|
"secureInput": false
|
|
},
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"source": {
|
|
"type": "AzureSqlSource",
|
|
"sqlReaderStoredProcedureName": "[[dbo].[GetBuckets]",
|
|
"partitionOption": "None"
|
|
},
|
|
"dataset": {
|
|
"referenceName": "GetSetMetadata",
|
|
"type": "DatasetReference",
|
|
"parameters": {}
|
|
},
|
|
"firstRowOnly": false
|
|
}
|
|
},
|
|
{
|
|
"name": "Call Buckets",
|
|
"type": "ForEach",
|
|
"dependsOn": [
|
|
{
|
|
"activity": "Get Bucket Ids",
|
|
"dependencyConditions": [
|
|
"Succeeded"
|
|
]
|
|
}
|
|
],
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"items": {
|
|
"value": "@activity('Get Bucket Ids').output.value",
|
|
"type": "Expression"
|
|
},
|
|
"isSequential": false,
|
|
"batchCount": 50,
|
|
"activities": [
|
|
{
|
|
"name": "Call Level 2",
|
|
"type": "ExecutePipeline",
|
|
"dependsOn": [],
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"pipeline": {
|
|
"referenceName": "Scale Out Level 2",
|
|
"type": "PipelineReference"
|
|
},
|
|
"waitOnCompletion": true,
|
|
"parameters": {
|
|
"BucketId": {
|
|
"value": "@{item().BucketId}",
|
|
"type": "Expression"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"name": "Log End",
|
|
"type": "SqlServerStoredProcedure",
|
|
"dependsOn": [
|
|
{
|
|
"activity": "Call Buckets",
|
|
"dependencyConditions": [
|
|
"Succeeded"
|
|
]
|
|
},
|
|
{
|
|
"activity": "Reset And Log Start",
|
|
"dependencyConditions": [
|
|
"Completed"
|
|
]
|
|
}
|
|
],
|
|
"policy": {
|
|
"timeout": "7.00:00:00",
|
|
"retry": 0,
|
|
"retryIntervalInSeconds": 30,
|
|
"secureOutput": false,
|
|
"secureInput": false
|
|
},
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"storedProcedureName": "[[dbo].[SetBucketLogEntry]",
|
|
"storedProcedureParameters": {
|
|
"LogId": {
|
|
"value": {
|
|
"value": "@activity('Reset And Log Start').output.firstRow.LogId",
|
|
"type": "Expression"
|
|
},
|
|
"type": "Int32"
|
|
}
|
|
}
|
|
},
|
|
"linkedServiceName": {
|
|
"referenceName": "trainingdb01",
|
|
"type": "LinkedServiceReference"
|
|
}
|
|
},
|
|
{
|
|
"name": "Reset And Log Start",
|
|
"type": "Lookup",
|
|
"dependsOn": [],
|
|
"policy": {
|
|
"timeout": "7.00:00:00",
|
|
"retry": 0,
|
|
"retryIntervalInSeconds": 30,
|
|
"secureOutput": false,
|
|
"secureInput": false
|
|
},
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"source": {
|
|
"type": "AzureSqlSource",
|
|
"sqlReaderStoredProcedureName": "[[dbo].[SetBucketLogEntry]",
|
|
"partitionOption": "None"
|
|
},
|
|
"dataset": {
|
|
"referenceName": "GetSetMetadata",
|
|
"type": "DatasetReference",
|
|
"parameters": {}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"name": "Reset Bucket Process Map",
|
|
"type": "SqlServerStoredProcedure",
|
|
"dependsOn": [],
|
|
"policy": {
|
|
"timeout": "7.00:00:00",
|
|
"retry": 0,
|
|
"retryIntervalInSeconds": 30,
|
|
"secureOutput": false,
|
|
"secureInput": false
|
|
},
|
|
"userProperties": [],
|
|
"typeProperties": {
|
|
"storedProcedureName": "[[dbo].[SetBucketProcesses]"
|
|
},
|
|
"linkedServiceName": {
|
|
"referenceName": "trainingdb01",
|
|
"type": "LinkedServiceReference"
|
|
}
|
|
}
|
|
],
|
|
"policy": {
|
|
"elapsedTimeMetric": {},
|
|
"cancelAfter": {}
|
|
},
|
|
"folder": {
|
|
"name": "Demo Pipelines/Scaling Out"
|
|
},
|
|
"annotations": [],
|
|
"lastPublishTime": "2020-08-24T13:41:19Z"
|
|
},
|
|
"dependsOn": []
|
|
}
|
|
]
|
|
} |