1584 lines
46 KiB
JSON

{
"$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"factoryName": {
"type": "string",
"metadata": "Data Factory name",
"defaultValue": "TrainingFactoryDev"
}
},
"variables": {
"factoryId": "[concat('Microsoft.DataFactory/factories/', parameters('factoryName'))]"
},
"resources": [
{
"name": "[concat(parameters('factoryName'), '/CopyAWProduct')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Copy AW Products to data lake",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "BinarySource",
"storeSettings": {
"type": "HttpReadSettings",
"requestMethod": "GET"
},
"formatSettings": {
"type": "BinaryReadSettings"
}
},
"sink": {
"type": "BinarySink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
}
},
"enableStaging": false
},
"inputs": [
{
"referenceName": "HTTP_BIN_AWProduct",
"type": "DatasetReference",
"parameters": {}
}
],
"outputs": [
{
"referenceName": "ADLS_BIN_AWProduct",
"type": "DatasetReference",
"parameters": {}
}
]
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"folder": {
"name": "Labs1"
},
"annotations": []
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/CopyAWUsingMetadata')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Get catalog",
"type": "WebActivity",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"url": "https://raw.githubusercontent.com/mrpaulandrewltd/Azure-Data-Integration-Pipeline-Training/main/Labs/TableCatalog.json",
"method": "GET",
"headers": {}
}
},
{
"name": "Cache catalog as JSON array",
"type": "SetVariable",
"dependsOn": [
{
"activity": "Get catalog",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "Catalog",
"value": {
"value": "@json(activity('Get catalog').output.Response)",
"type": "Expression"
}
}
},
{
"name": "ForEach AW entity",
"type": "ForEach",
"dependsOn": [
{
"activity": "Cache catalog as JSON array",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@variables('Catalog')",
"type": "Expression"
},
"activities": [
{
"name": "Copy AW entity",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "DelimitedTextSource",
"storeSettings": {
"type": "HttpReadSettings",
"requestMethod": "GET"
},
"formatSettings": {
"type": "DelimitedTextReadSettings"
}
},
"sink": {
"type": "ParquetSink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
},
"formatSettings": {
"type": "ParquetWriteSettings"
}
},
"enableStaging": false,
"translator": {
"value": "@item().translator",
"type": "Expression"
}
},
"inputs": [
{
"referenceName": "HTTP_TSV_AdventureWorks",
"type": "DatasetReference",
"parameters": {
"EntityName": {
"value": "@item().EntityName",
"type": "Expression"
}
}
}
],
"outputs": [
{
"referenceName": "ADLS_PQT_AdventureWorks",
"type": "DatasetReference",
"parameters": {
"EntityName": {
"value": "@item().EntityName",
"type": "Expression"
}
}
}
]
},
{
"name": "Notify error",
"type": "AzureFunctionActivity",
"dependsOn": [
{
"activity": "Copy AW entity",
"dependencyConditions": [
"Failed"
]
}
],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"functionName": "SendEmail",
"method": "POST",
"headers": {},
"body": {
"value": "{\n \"emailRecipients\": \"paul@mrpaulandrew.com\",\n \"emailSubject\": \"Error copying @{item().EntityName}\",\n \"emailBody\": \"@{replace(activity('Copy AW entity').output.errors[0].Message,'\"','\\\"')}\"\n}",
"type": "Expression"
}
},
"linkedServiceName": {
"referenceName": "EmailSenderFunction",
"type": "LinkedServiceReference"
}
}
]
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"variables": {
"Catalog": {
"type": "Array",
"defaultValue": []
}
},
"folder": {
"name": "Labs2"
},
"annotations": []
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/Get Key Vault Value')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Get Secret",
"type": "WebActivity",
"dependsOn": [
{
"activity": "Set Key URL",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"url": {
"value": "@variables('CompleteSecretURL')",
"type": "Expression"
},
"method": "GET",
"headers": {},
"authentication": {
"type": "MSI",
"resource": "https://vault.azure.net"
}
}
},
{
"name": "Set Key URL",
"type": "SetVariable",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"variableName": "CompleteSecretURL",
"value": {
"value": "@concat(\n'https://',\npipeline().globalParameters.KeyVaultName,\n'.vault.azure.net/secrets/',\npipeline().parameters.SecretName,\n'?api-version=7.0')",
"type": "Expression"
}
}
},
{
"name": "Nested Key Vault URL",
"type": "Lookup",
"dependsOn": [
{
"activity": "Get Secret",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"queryTimeout": "02:00:00",
"partitionOption": "None"
},
"dataset": {
"referenceName": "AnyDatabaseTableAnyKeyVault",
"type": "DatasetReference",
"parameters": {
"LinkedServiceConnectionSecret": "ConnectionString-trainingdb01",
"SchemaName": "sys",
"TableName": "objects",
"KeyVaultURL": {
"value": "@concat('https://',pipeline().globalParameters.KeyVaultName,'.vault.azure.net/')",
"type": "Expression"
}
}
}
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"parameters": {
"SecretName": {
"type": "string",
"defaultValue": "DemoKeyGetWithWebActivity"
}
},
"variables": {
"CompleteSecretURL": {
"type": "String"
}
},
"folder": {
"name": "Demo Pipelines/Misc"
},
"annotations": []
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/Lazy Replication')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Get Table List",
"type": "Lookup",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"sqlReaderQuery": {
"value": "@pipeline().parameters.TableLookupQuery",
"type": "Expression"
},
"queryTimeout": "02:00:00",
"partitionOption": "None"
},
"dataset": {
"referenceName": "AnyDatabaseTable",
"type": "DatasetReference",
"parameters": {
"LinkedServiceConnectionSecret": {
"value": "@pipeline().parameters.SourceConnectionSecret",
"type": "Expression"
},
"SchemaName": "sys",
"TableName": "objects"
}
},
"firstRowOnly": false
}
},
{
"name": "Copy Tables",
"type": "ForEach",
"dependsOn": [
{
"activity": "Get Table List",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@activity('Get Table List').output.value",
"type": "Expression"
},
"activities": [
{
"name": "Copy Table",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"queryTimeout": "02:00:00",
"partitionOption": "None"
},
"sink": {
"type": "AzureSqlSink",
"preCopyScript": {
"value": "IF OBJECT_ID('@{item().SchemaName}.@{item().TableName}') IS NOT NULL TRUNCATE TABLE @{item().SchemaName}.@{item().TableName}",
"type": "Expression"
},
"tableOption": "autoCreate",
"disableMetricsCollection": false
},
"enableStaging": false,
"translator": {
"type": "TabularTranslator",
"typeConversion": true,
"typeConversionSettings": {
"allowDataTruncation": true,
"treatBooleanAsNumber": false
}
}
},
"inputs": [
{
"referenceName": "AnyDatabaseTable",
"type": "DatasetReference",
"parameters": {
"LinkedServiceConnectionSecret": {
"value": "@pipeline().parameters.SourceConnectionSecret",
"type": "Expression"
},
"SchemaName": {
"value": "@item().SchemaName",
"type": "Expression"
},
"TableName": {
"value": "@item().TableName",
"type": "Expression"
}
}
}
],
"outputs": [
{
"referenceName": "AnyDatabaseTable",
"type": "DatasetReference",
"parameters": {
"LinkedServiceConnectionSecret": {
"value": "@pipeline().parameters.TargetConnectionSecret",
"type": "Expression"
},
"SchemaName": {
"value": "@item().SchemaName",
"type": "Expression"
},
"TableName": {
"value": "@item().TableName",
"type": "Expression"
}
}
}
]
}
]
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"parameters": {
"SourceConnectionSecret": {
"type": "string",
"defaultValue": "ConnectionString-trainingdb01"
},
"TargetConnectionSecret": {
"type": "string",
"defaultValue": "ConnectionString-trainingdb02"
},
"TableLookupQuery": {
"type": "string",
"defaultValue": "SELECT \ts.name AS SchemaName, \to.name AS TableName FROM \tsys.objects o \tINNER JOIN sys.schemas s \t\tON o.schema_id = s.schema_id WHERE \to.[type] = 'U'"
}
},
"folder": {
"name": "Demo Pipelines/Dynamic Linked Services"
},
"annotations": [],
"lastPublishTime": "2020-08-27T09:43:42Z"
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/Scale Out Level 2')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Get Bucket Contents",
"type": "Lookup",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"sqlReaderStoredProcedureName": "[[dbo].[GetBucketContents]",
"storedProcedureParameters": {
"BucketId": {
"type": "Int32",
"value": {
"value": "@pipeline().parameters.BucketId",
"type": "Expression"
}
}
},
"partitionOption": "None"
},
"dataset": {
"referenceName": "GetSetMetadata",
"type": "DatasetReference",
"parameters": {}
},
"firstRowOnly": false
}
},
{
"name": "Execute Processes",
"type": "ForEach",
"dependsOn": [
{
"activity": "Get Bucket Contents",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@activity('Get Bucket Contents').output.value",
"type": "Expression"
},
"batchCount": 50,
"activities": [
{
"name": "Run Process",
"type": "SqlServerStoredProcedure",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"storedProcedureName": {
"value": "@concat('[workers].[DumpDataAndWait',item().ProcessId,']')",
"type": "Expression"
}
},
"linkedServiceName": {
"referenceName": "trainingdb01",
"type": "LinkedServiceReference"
}
}
]
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"parameters": {
"BucketId": {
"type": "int"
}
},
"folder": {
"name": "Demo Pipelines/Scaling Out"
},
"annotations": [],
"lastPublishTime": "2020-08-24T13:41:19Z"
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/MappingOrderAggregation')]",
"type": "Microsoft.DataFactory/factories/dataflows",
"apiVersion": "2018-06-01",
"properties": {
"type": "MappingDataFlow",
"typeProperties": {
"sources": [
{
"dataset": {
"referenceName": "LakeFileOrderHeaderParquet",
"type": "DatasetReference"
},
"name": "OrderHeader"
},
{
"dataset": {
"referenceName": "LakeFileOrderDetailLinesParquet",
"type": "DatasetReference"
},
"name": "OrderLineDetails"
}
],
"sinks": [
{
"dataset": {
"referenceName": "TableOrderSummary",
"type": "DatasetReference"
},
"name": "OrderSummary"
}
],
"transformations": [
{
"name": "JoinHeaderToLineDetails"
},
{
"name": "OrderLineCount"
}
],
"script": "source(output(\n\t\tSalesOrderID as integer,\n\t\tRevisionNumber as integer,\n\t\tOrderDate as timestamp,\n\t\tDueDate as timestamp,\n\t\tShipDate as timestamp,\n\t\tStatus as integer,\n\t\tOnlineOrderFlag as boolean,\n\t\tSalesOrderNumber as string,\n\t\tPurchaseOrderNumber as string,\n\t\tAccountNumber as string,\n\t\tCustomerID as integer,\n\t\tShipToAddressID as integer,\n\t\tBillToAddressID as integer,\n\t\tShipMethod as string,\n\t\tCreditCardApprovalCode as string,\n\t\tSubTotal as decimal(19,4),\n\t\tTaxAmt as decimal(19,4),\n\t\tFreight as decimal(19,4),\n\t\tTotalDue as decimal(19,4),\n\t\tComment as string,\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderHeader\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tSalesOrderDetailID as integer,\n\t\tOrderQty as integer,\n\t\tProductID as integer,\n\t\tUnitPrice as decimal(19,4),\n\t\tUnitPriceDiscount as decimal(19,4),\n\t\tLineTotal as decimal(38,6),\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderLineDetails\nOrderHeader, OrderLineDetails join(OrderHeader@SalesOrderID == OrderLineDetails@SalesOrderID,\n\tjoinType:'inner',\n\tmatchType:'exact',\n\tignoreSpaces: false,\n\tpartitionBy('hash', 1),\n\tbroadcast: 'both')~> JoinHeaderToLineDetails\nJoinHeaderToLineDetails aggregate(groupBy(SalesOrderNumber),\n\tRecordCount = count(SalesOrderDetailID),\n\tpartitionBy('roundRobin', 4)) ~> OrderLineCount\nOrderLineCount sink(allowSchemaDrift: false,\n\tvalidateSchema: false,\n\tinput(\n\t\tSalesOrderNumber as string,\n\t\tRecordCount as integer\n\t),\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\ttruncate:true,\n\tformat: 'table',\n\tskipDuplicateMapInputs: true,\n\tskipDuplicateMapOutputs: true,\n\terrorHandlingOption: 'stopOnFirstError',\n\tmapColumn(\n\t\tSalesOrderNumber,\n\t\tRecordCount\n\t),\n\tpartitionBy('roundRobin', 4)) ~> OrderSummary"
}
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/MappingOrderAggregationWithParam')]",
"type": "Microsoft.DataFactory/factories/dataflows",
"apiVersion": "2018-06-01",
"properties": {
"type": "MappingDataFlow",
"typeProperties": {
"sources": [
{
"dataset": {
"referenceName": "LakeFileOrderHeaderParquet",
"type": "DatasetReference"
},
"name": "OrderHeader"
},
{
"dataset": {
"referenceName": "LakeFileOrderDetailLinesParquet",
"type": "DatasetReference"
},
"name": "OrderLineDetails"
}
],
"sinks": [
{
"dataset": {
"referenceName": "TableOrderSummary",
"type": "DatasetReference"
},
"name": "OrderSummary"
}
],
"transformations": [
{
"name": "JoinHeaderToLineDetails"
},
{
"name": "OrderLineCount"
},
{
"name": "AddAuditColum"
}
],
"script": "parameters{\n\tAuditColumn as string\n}\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tRevisionNumber as integer,\n\t\tOrderDate as timestamp,\n\t\tDueDate as timestamp,\n\t\tShipDate as timestamp,\n\t\tStatus as integer,\n\t\tOnlineOrderFlag as boolean,\n\t\tSalesOrderNumber as string,\n\t\tPurchaseOrderNumber as string,\n\t\tAccountNumber as string,\n\t\tCustomerID as integer,\n\t\tShipToAddressID as integer,\n\t\tBillToAddressID as integer,\n\t\tShipMethod as string,\n\t\tCreditCardApprovalCode as string,\n\t\tSubTotal as decimal(19,4),\n\t\tTaxAmt as decimal(19,4),\n\t\tFreight as decimal(19,4),\n\t\tTotalDue as decimal(19,4),\n\t\tComment as string,\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderHeader\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tSalesOrderDetailID as integer,\n\t\tOrderQty as integer,\n\t\tProductID as integer,\n\t\tUnitPrice as decimal(19,4),\n\t\tUnitPriceDiscount as decimal(19,4),\n\t\tLineTotal as decimal(38,6),\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderLineDetails\nOrderHeader, OrderLineDetails join(OrderHeader@SalesOrderID == OrderLineDetails@SalesOrderID,\n\tjoinType:'inner',\n\tmatchType:'exact',\n\tignoreSpaces: false,\n\tpartitionBy('hash', 1),\n\tbroadcast: 'both')~> JoinHeaderToLineDetails\nJoinHeaderToLineDetails aggregate(groupBy(SalesOrderNumber),\n\tRecordCount = count(SalesOrderDetailID),\n\tpartitionBy('roundRobin', 4)) ~> OrderLineCount\nOrderLineCount derive(AuditValue = $AuditColumn) ~> AddAuditColum\nAddAuditColum sink(allowSchemaDrift: false,\n\tvalidateSchema: false,\n\tinput(\n\t\tSalesOrderNumber as string,\n\t\tRecordCount as integer\n\t),\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\ttruncate:true,\n\tformat: 'table',\n\tskipDuplicateMapInputs: true,\n\tskipDuplicateMapOutputs: true,\n\terrorHandlingOption: 'stopOnFirstError',\n\tmapColumn(\n\t\tSalesOrderNumber,\n\t\tRecordCount\n\t),\n\tpartitionBy('roundRobin', 4)) ~> OrderSummary"
}
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/UpdateProductDimension')]",
"type": "Microsoft.DataFactory/factories/dataflows",
"apiVersion": "2018-06-01",
"properties": {
"folder": {
"name": "Labs"
},
"type": "MappingDataFlow",
"typeProperties": {
"sources": [
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "Product"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductSubcategory"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductCategory"
}
],
"sinks": [
{
"linkedService": {
"referenceName": "ADLS_saintegrationpipelines",
"type": "LinkedServiceReference"
},
"name": "WriteToDataLake"
}
],
"transformations": [
{
"name": "SelectProductColumns"
},
{
"name": "SelectSubcategoryColumns"
},
{
"name": "SelectCategoryColumns"
},
{
"name": "LookupProductCategory"
},
{
"name": "LookupProductSubcategory"
},
{
"name": "RemoveDuplicateColumns"
}
],
"scriptLines": [
"source(output(",
" ProductId as integer,",
" Product as string,",
" {_col2_} as string,",
" {_col3_} as boolean,",
" {_col4_} as boolean,",
" {_col5_} as string,",
" {_col6_} as short,",
" {_col7_} as short,",
" {_col8_} as double,",
" {_col9_} as double,",
" {_col10_} as string,",
" {_col11_} as string,",
" {_col12_} as string,",
" {_col13_} as double,",
" {_col14_} as short,",
" {_col15_} as string,",
" {_col16_} as string,",
" {_col17_} as string,",
" SubcategoryId as integer,",
" {_col19_} as short,",
" {_col20_} as timestamp,",
" {_col21_} as timestamp,",
" {_col22_} as string,",
" {_col23_} as string,",
" {_col24_} as string",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> Product",
"source(output(",
" SubcategoryId as integer,",
" CategoryId as integer,",
" Subcategory as string,",
" {_col3_} as string,",
" {_col4_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductSubcategory",
"source(output(",
" CategoryId as integer,",
" Category as string,",
" {_col2_} as string,",
" {_col3_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductCategory",
"Product select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
"ProductSubcategory select(mapColumn(",
" SubcategoryId,",
" CategoryId,",
" Subcategory",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
"ProductCategory select(mapColumn(",
" CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductCategory",
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductSubcategory",
"LookupProductSubcategory select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId = SelectProductColumns@SubcategoryId,",
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
" CategoryId = SelectSubcategoryColumns@CategoryId,",
" Subcategory,",
" CategoryId = SelectCategoryColumns@CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
"RemoveDuplicateColumns sink(allowSchemaDrift: true,",
" validateSchema: false,",
" format: 'parquet',",
" fileSystem: 'lakeroot',",
" folderPath: 'Conformed/DimProduct',",
" truncate: true,",
" umask: 0022,",
" preCommands: [],",
" postCommands: [],",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
]
}
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/UpdateProductDimension_Sorted')]",
"type": "Microsoft.DataFactory/factories/dataflows",
"apiVersion": "2018-06-01",
"properties": {
"folder": {
"name": "Labs"
},
"type": "MappingDataFlow",
"typeProperties": {
"sources": [
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "Product"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductSubcategory"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductCategory"
}
],
"sinks": [
{
"linkedService": {
"referenceName": "ADLS_saintegrationpipelines",
"type": "LinkedServiceReference"
},
"name": "WriteToDataLake"
}
],
"transformations": [
{
"name": "SelectProductColumns"
},
{
"name": "SelectSubcategoryColumns"
},
{
"name": "SelectCategoryColumns"
},
{
"name": "LookupProductCategory"
},
{
"name": "LookupProductSubcategory"
},
{
"name": "RemoveDuplicateColumns"
},
{
"name": "SortBySubcategory"
}
],
"scriptLines": [
"source(output(",
" ProductId as integer,",
" Product as string,",
" {_col2_} as string,",
" {_col3_} as boolean,",
" {_col4_} as boolean,",
" {_col5_} as string,",
" {_col6_} as short,",
" {_col7_} as short,",
" {_col8_} as double,",
" {_col9_} as double,",
" {_col10_} as string,",
" {_col11_} as string,",
" {_col12_} as string,",
" {_col13_} as double,",
" {_col14_} as short,",
" {_col15_} as string,",
" {_col16_} as string,",
" {_col17_} as string,",
" SubcategoryId as integer,",
" {_col19_} as short,",
" {_col20_} as timestamp,",
" {_col21_} as timestamp,",
" {_col22_} as string,",
" {_col23_} as string,",
" {_col24_} as string",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> Product",
"source(output(",
" SubcategoryId as integer,",
" CategoryId as integer,",
" Subcategory as string,",
" {_col3_} as string,",
" {_col4_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductSubcategory",
"source(output(",
" CategoryId as integer,",
" Category as string,",
" {_col2_} as string,",
" {_col3_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductCategory",
"Product select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
"ProductSubcategory select(mapColumn(",
" SubcategoryId,",
" CategoryId,",
" Subcategory",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
"ProductCategory select(mapColumn(",
" CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductCategory",
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductSubcategory",
"LookupProductSubcategory select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId = SelectProductColumns@SubcategoryId,",
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
" CategoryId = SelectSubcategoryColumns@CategoryId,",
" Subcategory,",
" CategoryId = SelectCategoryColumns@CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
"RemoveDuplicateColumns sort(asc(Subcategory, false)) ~> SortBySubcategory",
"SortBySubcategory sink(allowSchemaDrift: true,",
" validateSchema: false,",
" format: 'parquet',",
" fileSystem: 'lakeroot',",
" folderPath: 'Conformed/DimProduct',",
" truncate: true,",
" umask: 0022,",
" preCommands: [],",
" postCommands: [],",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
]
}
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/06 - Example Global Param')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "06 - Get Any File List",
"type": "ExecutePipeline",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"pipeline": {
"referenceName": "05 - Get File List Utility",
"type": "PipelineReference"
},
"waitOnCompletion": false,
"parameters": {
"GetFileQuery": "SELECT * FROM [dbo].[FilesToUpload] WHERE [Enabled] = 1;"
}
}
},
{
"name": "Set Pipe Output",
"type": "SetVariable",
"dependsOn": [
{
"activity": "06 - Get Any File List",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "WorkerOutput",
"value": {
"value": "@activity('06 - Get Any File List').output.pipelineRunId",
"type": "Expression"
}
}
},
{
"name": "Set Var From Global Param",
"type": "SetVariable",
"dependsOn": [
{
"activity": "06 - Get Any File List",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "GlobalParam",
"value": {
"value": "@concat(pipeline().globalParameters.Environment,'Test')",
"type": "Expression"
}
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"variables": {
"WorkerOutput": {
"type": "String"
},
"GlobalParam": {
"type": "String"
}
},
"folder": {
"name": "Demo Pipelines/Data Uploads"
},
"annotations": []
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/BuildDimProduct')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Run UpdateProductDimension",
"type": "ExecuteDataFlow",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataflow": {
"referenceName": "UpdateProductDimension",
"type": "DataFlowReference",
"parameters": {},
"datasetParameters": {
"Product": {
"FileName": "Product.tsv"
},
"ProductSubcategory": {
"FileName": "ProductSubcategory.tsv"
},
"ProductCategory": {
"FileName": "ProductCategory.tsv"
},
"WriteToDataLake": {}
}
},
"staging": {},
"compute": {
"coreCount": 8,
"computeType": "General"
},
"traceLevel": "Fine"
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"folder": {
"name": "Labs1"
},
"annotations": []
},
"dependsOn": [
"[concat(variables('factoryId'), '/dataflows/UpdateProductDimension')]"
]
},
{
"name": "[concat(parameters('factoryName'), '/CopyAWEntities')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Copy each AW entity",
"type": "ForEach",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@variables('Entities')",
"type": "Expression"
},
"isSequential": false,
"activities": [
{
"name": "Execute CopyAWEntity",
"type": "ExecutePipeline",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"pipeline": {
"referenceName": "CopyAWEntity",
"type": "PipelineReference"
},
"waitOnCompletion": true,
"parameters": {
"Entity": {
"value": "@item()",
"type": "Expression"
}
}
}
}
]
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"variables": {
"Entities": {
"type": "Array",
"defaultValue": [
"Product",
"ProductSubcategory",
"ProductCategory"
]
}
},
"folder": {
"name": "Labs1"
},
"annotations": []
},
"dependsOn": []
},
{
"name": "[concat(parameters('factoryName'), '/Order Summary with Mapping With Param')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Mapping Order Aggregation",
"type": "ExecuteDataFlow",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataflow": {
"referenceName": "MappingOrderAggregationWithParam",
"type": "DataFlowReference",
"parameters": {
"AuditColumn": {
"value": "'@{pipeline().parameters.AuditColumnValue}'",
"type": "Expression"
}
},
"datasetParameters": {
"OrderHeader": {},
"OrderLineDetails": {},
"OrderSummary": {}
}
},
"staging": {},
"compute": {
"coreCount": 8,
"computeType": "General"
},
"traceLevel": "Fine"
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"parameters": {
"AuditColumnValue": {
"type": "string"
}
},
"folder": {
"name": "Demo Pipelines/Data Flows"
},
"annotations": [],
"lastPublishTime": "2020-09-02T14:13:15Z"
},
"dependsOn": [
"[concat(variables('factoryId'), '/dataflows/MappingOrderAggregationWithParam')]"
]
},
{
"name": "[concat(parameters('factoryName'), '/Order Summary with Mapping')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Mapping Order Aggregation",
"type": "ExecuteDataFlow",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataflow": {
"referenceName": "MappingOrderAggregation",
"type": "DataFlowReference",
"parameters": {},
"datasetParameters": {
"OrderHeader": {},
"OrderLineDetails": {},
"OrderSummary": {}
}
},
"staging": {},
"compute": {
"coreCount": 8,
"computeType": "General"
},
"traceLevel": "Fine"
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"folder": {
"name": "Demo Pipelines/Data Flows"
},
"annotations": [],
"lastPublishTime": "2020-09-02T14:13:15Z"
},
"dependsOn": [
"[concat(variables('factoryId'), '/dataflows/MappingOrderAggregation')]"
]
},
{
"name": "[concat(parameters('factoryName'), '/Scale Out Level 1')]",
"type": "Microsoft.DataFactory/factories/pipelines",
"apiVersion": "2018-06-01",
"properties": {
"activities": [
{
"name": "Get Bucket Ids",
"type": "Lookup",
"dependsOn": [
{
"activity": "Reset And Log Start",
"dependencyConditions": [
"Succeeded"
]
},
{
"activity": "Reset Bucket Process Map",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"sqlReaderStoredProcedureName": "[[dbo].[GetBuckets]",
"partitionOption": "None"
},
"dataset": {
"referenceName": "GetSetMetadata",
"type": "DatasetReference",
"parameters": {}
},
"firstRowOnly": false
}
},
{
"name": "Call Buckets",
"type": "ForEach",
"dependsOn": [
{
"activity": "Get Bucket Ids",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@activity('Get Bucket Ids').output.value",
"type": "Expression"
},
"isSequential": false,
"batchCount": 50,
"activities": [
{
"name": "Call Level 2",
"type": "ExecutePipeline",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"pipeline": {
"referenceName": "Scale Out Level 2",
"type": "PipelineReference"
},
"waitOnCompletion": true,
"parameters": {
"BucketId": {
"value": "@{item().BucketId}",
"type": "Expression"
}
}
}
}
]
}
},
{
"name": "Log End",
"type": "SqlServerStoredProcedure",
"dependsOn": [
{
"activity": "Call Buckets",
"dependencyConditions": [
"Succeeded"
]
},
{
"activity": "Reset And Log Start",
"dependencyConditions": [
"Completed"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"storedProcedureName": "[[dbo].[SetBucketLogEntry]",
"storedProcedureParameters": {
"LogId": {
"value": {
"value": "@activity('Reset And Log Start').output.firstRow.LogId",
"type": "Expression"
},
"type": "Int32"
}
}
},
"linkedServiceName": {
"referenceName": "trainingdb01",
"type": "LinkedServiceReference"
}
},
{
"name": "Reset And Log Start",
"type": "Lookup",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"sqlReaderStoredProcedureName": "[[dbo].[SetBucketLogEntry]",
"partitionOption": "None"
},
"dataset": {
"referenceName": "GetSetMetadata",
"type": "DatasetReference",
"parameters": {}
}
}
},
{
"name": "Reset Bucket Process Map",
"type": "SqlServerStoredProcedure",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"storedProcedureName": "[[dbo].[SetBucketProcesses]"
},
"linkedServiceName": {
"referenceName": "trainingdb01",
"type": "LinkedServiceReference"
}
}
],
"policy": {
"elapsedTimeMetric": {},
"cancelAfter": {}
},
"folder": {
"name": "Demo Pipelines/Scaling Out"
},
"annotations": [],
"lastPublishTime": "2020-08-24T13:41:19Z"
},
"dependsOn": [
"[concat(variables('factoryId'), '/pipelines/Scale Out Level 2')]"
]
}
]
}