{ "$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", "contentVersion": "1.0.0.0", "parameters": { "factoryName": { "type": "string", "metadata": "Data Factory name", "defaultValue": "TrainingFactoryDev" } }, "variables": { "factoryId": "[concat('Microsoft.DataFactory/factories/', parameters('factoryName'))]" }, "resources": [ { "name": "[concat(parameters('factoryName'), '/LakeFilesBinary')]", "type": "Microsoft.DataFactory/factories/datasets", "apiVersion": "2018-06-01", "properties": { "linkedServiceName": { "referenceName": "traininglake01", "type": "LinkedServiceReference" }, "parameters": { "Directory": { "type": "string" }, "File": { "type": "string" } }, "folder": { "name": "Lake" }, "annotations": [], "type": "Binary", "typeProperties": { "location": { "type": "AzureBlobFSLocation", "fileName": { "value": "@dataset().File", "type": "Expression" }, "folderPath": { "value": "@dataset().Directory", "type": "Expression" }, "fileSystem": "datawarehouse" } } }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/LaptopFilePersonCSV')]", "type": "Microsoft.DataFactory/factories/datasets", "apiVersion": "2018-06-01", "properties": { "linkedServiceName": { "referenceName": "LaptopFiles", "type": "LinkedServiceReference" }, "folder": { "name": "Laptop" }, "annotations": [], "type": "DelimitedText", "typeProperties": { "location": { "type": "FileServerLocation", "fileName": "Person.csv", "folderPath": "ForUpload/People" }, "columnDelimiter": ",", "escapeChar": "\\", "quoteChar": "\"" }, "schema": [ { "type": "String" }, { "type": "String" }, { "type": "String" } ] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/LaptopFilesBinary')]", "type": "Microsoft.DataFactory/factories/datasets", "apiVersion": "2018-06-01", "properties": { "linkedServiceName": { "referenceName": "LaptopFiles", "type": "LinkedServiceReference" }, "parameters": { "Directory": { "type": "string" }, "File": { "type": "string" } }, "folder": { "name": "Laptop" }, "annotations": [], "type": "Binary", "typeProperties": { "location": { "type": "FileServerLocation", "fileName": { "value": "@dataset().File", "type": "Expression" }, "folderPath": { "value": "@dataset().Directory", "type": "Expression" } } } }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/LaptopFilesParquet')]", "type": "Microsoft.DataFactory/factories/datasets", "apiVersion": "2018-06-01", "properties": { "linkedServiceName": { "referenceName": "LaptopFiles", "type": "LinkedServiceReference" }, "parameters": { "Directory": { "type": "string" }, "File": { "type": "string" } }, "folder": { "name": "Laptop" }, "annotations": [], "type": "Parquet", "typeProperties": { "location": { "type": "FileServerLocation", "fileName": { "value": "@dataset().File", "type": "Expression" }, "folderPath": { "value": "@dataset().Directory", "type": "Expression" } }, "compressionCodec": "snappy" }, "schema": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/LaptopFolders')]", "type": "Microsoft.DataFactory/factories/datasets", "apiVersion": "2018-06-01", "properties": { "linkedServiceName": { "referenceName": "LaptopFiles", "type": "LinkedServiceReference" }, "parameters": { "Directory": { "type": "string" } }, "folder": { "name": "Laptop" }, "annotations": [], "type": "DelimitedText", "typeProperties": { "location": { "type": "FileServerLocation", "folderPath": { "value": "@dataset().Directory", "type": "Expression" } }, "columnDelimiter": ",", "escapeChar": "\\", "quoteChar": "\"" }, "schema": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/LaptopsFiles')]", "type": "Microsoft.DataFactory/factories/datasets", "apiVersion": "2018-06-01", "properties": { "linkedServiceName": { "referenceName": "LaptopFiles", "type": "LinkedServiceReference" }, "parameters": { "Directory": { "type": "string" }, "File": { "type": "string" } }, "folder": { "name": "Laptop" }, "annotations": [], "type": "DelimitedText", "typeProperties": { "location": { "type": "FileServerLocation", "fileName": { "value": "@dataset().File", "type": "Expression" }, "folderPath": { "value": "@dataset().Directory", "type": "Expression" } }, "columnDelimiter": ",", "escapeChar": "\\", "quoteChar": "\"" }, "schema": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/TableOrderSummary')]", "type": "Microsoft.DataFactory/factories/datasets", "apiVersion": "2018-06-01", "properties": { "linkedServiceName": { "referenceName": "trainingdb01", "type": "LinkedServiceReference" }, "folder": { "name": "SQLDB" }, "annotations": [], "type": "AzureSqlTable", "schema": [ { "name": "SalesOrderNumber", "type": "varchar" }, { "name": "RecordCount", "type": "int", "precision": 10 } ], "typeProperties": { "schema": "dbo", "table": "OrderSummary" } }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/WranglingOrderAggregation')]", "type": "Microsoft.DataFactory/factories/dataflows", "apiVersion": "2018-06-01", "properties": { "type": "WranglingDataFlow", "typeProperties": { "sources": [ { "name": "LakeFileOrderDetailLinesParquet", "script": "source(allowSchemaDrift: true,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet') ~> LakeFileOrderDetailLinesParquet", "dataset": { "referenceName": "LakeFileOrderDetailLinesParquet", "type": "DatasetReference" } } ], "script": "section Section1;\r\nshared LakeFileOrderDetailLinesParquet = let\r\n AdfDoc = Web.Contents(\"https://traininglake01.dfs.core.windows.net/datawarehouse/Raw/OrderDetailLines.parquet?sv=2018-03-28&sig=5R%2BzQI0dTqfGUYi8vVuzKhHq6DBYMX%2FYNyfH4c1BalM%3D&spr=https&se=2020-09-02T12%3A16%3A29Z&srt=sco&ss=bf&sp=rwl\"),\r\n Parquet = Parquet.Document(AdfDoc)\r\nin\r\n Parquet;\r\nshared UserQuery = let\r\n Source = LakeFileOrderDetailLinesParquet\r\nin\r\n Source;\r\n" } }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/01 - Upload - Simple')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Upload Person", "description": "hfdhgfdhfd", "type": "Copy", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "DelimitedTextSource", "storeSettings": { "type": "FileServerReadSettings", "recursive": true }, "formatSettings": { "type": "DelimitedTextReadSettings" } }, "sink": { "type": "DelimitedTextSink", "storeSettings": { "type": "AzureBlobFSWriteSettings" }, "formatSettings": { "type": "DelimitedTextWriteSettings", "quoteAllText": true, "fileExtension": ".txt" } }, "enableStaging": false, "translator": { "type": "TabularTranslator", "typeConversion": true, "typeConversionSettings": { "allowDataTruncation": true, "treatBooleanAsNumber": false } } }, "inputs": [ { "referenceName": "LaptopFilePersonCSV", "type": "DatasetReference", "parameters": {} } ], "outputs": [ { "referenceName": "LakeFilePersonCSV", "type": "DatasetReference", "parameters": {} } ] } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "folder": { "name": "Demo Pipelines/Data Uploads" }, "annotations": [], "lastPublishTime": "2020-09-29T13:50:28Z" }, "dependsOn": [ "[concat(variables('factoryId'), '/datasets/LaptopFilePersonCSV')]" ] }, { "name": "[concat(parameters('factoryName'), '/02 - Upload - Copy Params')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Upload Any File", "type": "Copy", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "DelimitedTextSource", "storeSettings": { "type": "FileServerReadSettings", "recursive": true }, "formatSettings": { "type": "DelimitedTextReadSettings" } }, "sink": { "type": "DelimitedTextSink", "storeSettings": { "type": "AzureBlobFSWriteSettings" }, "formatSettings": { "type": "DelimitedTextWriteSettings", "quoteAllText": true, "fileExtension": ".txt" } }, "enableStaging": false, "translator": { "type": "TabularTranslator", "typeConversion": true, "typeConversionSettings": { "allowDataTruncation": true, "treatBooleanAsNumber": false } } }, "inputs": [ { "referenceName": "LaptopsFiles", "type": "DatasetReference", "parameters": { "Directory": "ForUpload\\People", "File": "Person.csv" } } ], "outputs": [ { "referenceName": "LakeFiles", "type": "DatasetReference", "parameters": { "Directory": "Landing", "File": "Person.csv" } } ] } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "folder": { "name": "Demo Pipelines/Data Uploads" }, "annotations": [], "lastPublishTime": "2020-08-27T09:06:46Z" }, "dependsOn": [ "[concat(variables('factoryId'), '/datasets/LaptopsFiles')]" ] }, { "name": "[concat(parameters('factoryName'), '/03 - Upload - From Discovery')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Get File List", "type": "GetMetadata", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "dataset": { "referenceName": "LaptopFolders", "type": "DatasetReference", "parameters": { "Directory": "ForUpload\\People" } }, "fieldList": [ "childItems" ], "storeSettings": { "type": "FileServerReadSettings", "recursive": true }, "formatSettings": { "type": "DelimitedTextReadSettings" } } }, { "name": "Upload Files", "type": "ForEach", "dependsOn": [ { "activity": "Get File List", "dependencyConditions": [ "Succeeded" ] } ], "userProperties": [], "typeProperties": { "items": { "value": "@activity('Get File List').output.childItems", "type": "Expression" }, "activities": [ { "name": "Upload File", "type": "Copy", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "DelimitedTextSource", "storeSettings": { "type": "FileServerReadSettings", "recursive": true }, "formatSettings": { "type": "DelimitedTextReadSettings" } }, "sink": { "type": "DelimitedTextSink", "storeSettings": { "type": "AzureBlobFSWriteSettings" }, "formatSettings": { "type": "DelimitedTextWriteSettings", "quoteAllText": true, "fileExtension": ".txt" } }, "enableStaging": false, "translator": { "type": "TabularTranslator", "typeConversion": true, "typeConversionSettings": { "allowDataTruncation": true, "treatBooleanAsNumber": false } } }, "inputs": [ { "referenceName": "LaptopsFiles", "type": "DatasetReference", "parameters": { "Directory": "ForUpload\\People", "File": { "value": "@item().name", "type": "Expression" } } } ], "outputs": [ { "referenceName": "LakeFiles", "type": "DatasetReference", "parameters": { "Directory": "Landing", "File": { "value": "@item().name", "type": "Expression" } } } ] } ] } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "folder": { "name": "Demo Pipelines/Data Uploads" }, "annotations": [], "lastPublishTime": "2020-08-27T09:06:45Z" }, "dependsOn": [ "[concat(variables('factoryId'), '/datasets/LaptopFolders')]", "[concat(variables('factoryId'), '/datasets/LaptopsFiles')]" ] }, { "name": "[concat(parameters('factoryName'), '/04 - Upload - From Metadata')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Get File List", "type": "Lookup", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "sqlReaderQuery": "SELECT * FROM [dbo].[FilesToUpload] WHERE [Enabled] = 1;", "queryTimeout": "02:00:00", "partitionOption": "None" }, "dataset": { "referenceName": "GetSetMetadata", "type": "DatasetReference", "parameters": {} }, "firstRowOnly": false } }, { "name": "UploadFiles", "type": "ForEach", "dependsOn": [ { "activity": "Get File List", "dependencyConditions": [ "Succeeded" ] }, { "activity": "Log Upload Start", "dependencyConditions": [ "Succeeded" ] } ], "userProperties": [], "typeProperties": { "items": { "value": "@activity('Get File List').output.value", "type": "Expression" }, "activities": [ { "name": "Copy By File Type", "type": "Switch", "dependsOn": [], "userProperties": [], "typeProperties": { "on": { "value": "@substring(item().FileName,add(indexof(item().FileName,'.'),1),sub(length(item().FileName),add(indexof(item().FileName,'.'),1)))", "type": "Expression" }, "cases": [ { "value": "csv", "activities": [ { "name": "CSV Copy", "type": "Copy", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "DelimitedTextSource", "additionalColumns": [ { "name": "FilePath", "value": "$$FILEPATH" }, { "name": "Environment", "value": { "value": "@pipeline().globalParameters.Environment", "type": "Expression" } }, { "name": "PipelineName", "value": { "value": "@pipeline().Pipeline", "type": "Expression" } }, { "name": "RunId", "value": { "value": "@pipeline().RunId", "type": "Expression" } }, { "name": "RunDate", "value": { "value": "@utcnow()", "type": "Expression" } } ], "storeSettings": { "type": "FileServerReadSettings", "recursive": false, "enablePartitionDiscovery": false }, "formatSettings": { "type": "DelimitedTextReadSettings" } }, "sink": { "type": "DelimitedTextSink", "storeSettings": { "type": "AzureBlobFSWriteSettings" }, "formatSettings": { "type": "DelimitedTextWriteSettings", "quoteAllText": true, "fileExtension": ".txt" } }, "enableStaging": false, "translator": { "type": "TabularTranslator", "typeConversion": true, "typeConversionSettings": { "allowDataTruncation": true, "treatBooleanAsNumber": false } } }, "inputs": [ { "referenceName": "LaptopsFiles", "type": "DatasetReference", "parameters": { "Directory": { "value": "@item().SourceDirectory", "type": "Expression" }, "File": { "value": "@item().FileName", "type": "Expression" } } } ], "outputs": [ { "referenceName": "LakeFiles", "type": "DatasetReference", "parameters": { "Directory": { "value": "@item().TargetDirectory", "type": "Expression" }, "File": { "value": "@item().FileName", "type": "Expression" } } } ] } ] }, { "value": "parquet", "activities": [ { "name": "Parquet Copy", "type": "Copy", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "ParquetSource", "storeSettings": { "type": "FileServerReadSettings", "recursive": false, "enablePartitionDiscovery": false } }, "sink": { "type": "ParquetSink", "storeSettings": { "type": "AzureBlobFSWriteSettings" }, "formatSettings": { "type": "ParquetWriteSettings" } }, "enableStaging": false, "translator": { "type": "TabularTranslator", "typeConversion": true, "typeConversionSettings": { "allowDataTruncation": true, "treatBooleanAsNumber": false } } }, "inputs": [ { "referenceName": "LaptopFilesParquet", "type": "DatasetReference", "parameters": { "Directory": { "value": "@item().SourceDirectory", "type": "Expression" }, "File": { "value": "@item().FileName", "type": "Expression" } } } ], "outputs": [ { "referenceName": "LakeFileParquet", "type": "DatasetReference", "parameters": { "Directory": { "value": "@item().TargetDirectory", "type": "Expression" }, "File": { "value": "@item().FileName", "type": "Expression" } } } ] } ] } ], "defaultActivities": [ { "name": "Binary Copy", "type": "Copy", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "BinarySource", "storeSettings": { "type": "FileServerReadSettings", "recursive": false }, "formatSettings": { "type": "BinaryReadSettings" } }, "sink": { "type": "BinarySink", "storeSettings": { "type": "AzureBlobFSWriteSettings" } }, "enableStaging": false }, "inputs": [ { "referenceName": "LaptopFilesBinary", "type": "DatasetReference", "parameters": { "Directory": { "value": "@item().SourceDirectory", "type": "Expression" }, "File": { "value": "@item().FileName", "type": "Expression" } } } ], "outputs": [ { "referenceName": "LakeFilesBinary", "type": "DatasetReference", "parameters": { "Directory": { "value": "@item().TargetDirectory", "type": "Expression" }, "File": { "value": "@item().FileName", "type": "Expression" } } } ] } ] } } ] } }, { "name": "Log Upload Start", "type": "Lookup", "dependsOn": [ { "activity": "Get File List", "dependencyConditions": [ "Succeeded" ] } ], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "sqlReaderStoredProcedureName": "[[dbo].[LogUploadStart]", "storedProcedureParameters": { "FileCount": { "type": "Int32", "value": { "value": "@activity('Get File List').output.count", "type": "Expression" } }, "TriggerId": { "type": "String", "value": { "value": "@pipeline().TriggerId", "type": "Expression" } } }, "queryTimeout": "02:00:00", "partitionOption": "None" }, "dataset": { "referenceName": "GetSetMetadata", "type": "DatasetReference", "parameters": {} } } }, { "name": "Log Upload Finish", "type": "SqlServerStoredProcedure", "dependsOn": [ { "activity": "UploadFiles", "dependencyConditions": [ "Succeeded" ] } ], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "storedProcedureName": "[[dbo].[LogUploadFinish]", "storedProcedureParameters": { "LogId": { "value": { "value": "@activity('Log Upload Start').output.firstRow.LogId", "type": "Expression" }, "type": "Int32" } } }, "linkedServiceName": { "referenceName": "trainingdb01", "type": "LinkedServiceReference" } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "variables": { "DemoVariable": { "type": "String" } }, "folder": { "name": "Demo Pipelines/Data Uploads" }, "annotations": [], "lastPublishTime": "2020-09-02T14:13:14Z" }, "dependsOn": [ "[concat(variables('factoryId'), '/datasets/LaptopFilesBinary')]", "[concat(variables('factoryId'), '/datasets/LakeFilesBinary')]", "[concat(variables('factoryId'), '/datasets/LaptopsFiles')]", "[concat(variables('factoryId'), '/datasets/LaptopFilesParquet')]" ] }, { "name": "[concat(parameters('factoryName'), '/05 - Get File List Utility')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Get File List", "type": "Lookup", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "sqlReaderQuery": { "value": "@pipeline().parameters.GetFileQuery", "type": "Expression" }, "queryTimeout": "02:00:00", "partitionOption": "None" }, "dataset": { "referenceName": "GetSetMetadata", "type": "DatasetReference", "parameters": {} }, "firstRowOnly": false } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "parameters": { "GetFileQuery": { "type": "string" } }, "variables": { "DemoVariable": { "type": "String" } }, "folder": { "name": "Demo Pipelines/Data Uploads" }, "annotations": [], "lastPublishTime": "2020-09-02T14:13:14Z" }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/CopyAWEntity')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Copy AW entity", "type": "Copy", "dependsOn": [], "policy": { "timeout": "0.12:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "BinarySource", "storeSettings": { "type": "HttpReadSettings", "requestMethod": "GET" }, "formatSettings": { "type": "BinaryReadSettings" } }, "sink": { "type": "BinarySink", "storeSettings": { "type": "AzureBlobFSWriteSettings" } }, "enableStaging": false }, "inputs": [ { "referenceName": "HTTP_BIN_AdventureWorks", "type": "DatasetReference", "parameters": { "Entity": { "value": "@pipeline().parameters.Entity", "type": "Expression" } } } ], "outputs": [ { "referenceName": "ADLS_BIN_AdventureWorks", "type": "DatasetReference", "parameters": { "Entity": { "value": "@pipeline().parameters.Entity", "type": "Expression" } } } ] } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "parameters": { "Entity": { "type": "string" } }, "folder": { "name": "Labs" }, "annotations": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/CopyAWProduct')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Copy AW Products to data lake", "type": "Copy", "dependsOn": [], "policy": { "timeout": "0.12:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "BinarySource", "storeSettings": { "type": "HttpReadSettings", "requestMethod": "GET" }, "formatSettings": { "type": "BinaryReadSettings" } }, "sink": { "type": "BinarySink", "storeSettings": { "type": "AzureBlobFSWriteSettings" } }, "enableStaging": false }, "inputs": [ { "referenceName": "HTTP_BIN_AWProduct", "type": "DatasetReference", "parameters": {} } ], "outputs": [ { "referenceName": "ADLS_BIN_AWProduct", "type": "DatasetReference", "parameters": {} } ] } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "folder": { "name": "Labs" }, "annotations": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/Get Key Vault Value')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Get Secret", "type": "WebActivity", "dependsOn": [ { "activity": "Set Key URL", "dependencyConditions": [ "Succeeded" ] } ], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "url": { "value": "@variables('CompleteSecretURL')", "type": "Expression" }, "method": "GET", "headers": {}, "authentication": { "type": "MSI", "resource": "https://vault.azure.net" } } }, { "name": "Set Key URL", "type": "SetVariable", "dependsOn": [], "userProperties": [], "typeProperties": { "variableName": "CompleteSecretURL", "value": { "value": "@concat(\n'https://',\npipeline().globalParameters.KeyVaultName,\n'.vault.azure.net/secrets/',\npipeline().parameters.SecretName,\n'?api-version=7.0')", "type": "Expression" } } }, { "name": "Nested Key Vault URL", "type": "Lookup", "dependsOn": [ { "activity": "Get Secret", "dependencyConditions": [ "Succeeded" ] } ], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "queryTimeout": "02:00:00", "partitionOption": "None" }, "dataset": { "referenceName": "AnyDatabaseTableAnyKeyVault", "type": "DatasetReference", "parameters": { "LinkedServiceConnectionSecret": "ConnectionString-trainingdb01", "SchemaName": "sys", "TableName": "objects", "KeyVaultURL": { "value": "@concat('https://',pipeline().globalParameters.KeyVaultName,'.vault.azure.net/')", "type": "Expression" } } } } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "parameters": { "SecretName": { "type": "string", "defaultValue": "DemoKeyGetWithWebActivity" } }, "variables": { "CompleteSecretURL": { "type": "String" } }, "folder": { "name": "Demo Pipelines/Misc" }, "annotations": [] }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/Lazy Replication')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Get Table List", "type": "Lookup", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "sqlReaderQuery": { "value": "@pipeline().parameters.TableLookupQuery", "type": "Expression" }, "queryTimeout": "02:00:00", "partitionOption": "None" }, "dataset": { "referenceName": "AnyDatabaseTable", "type": "DatasetReference", "parameters": { "LinkedServiceConnectionSecret": { "value": "@pipeline().parameters.SourceConnectionSecret", "type": "Expression" }, "SchemaName": "sys", "TableName": "objects" } }, "firstRowOnly": false } }, { "name": "Copy Tables", "type": "ForEach", "dependsOn": [ { "activity": "Get Table List", "dependencyConditions": [ "Succeeded" ] } ], "userProperties": [], "typeProperties": { "items": { "value": "@activity('Get Table List').output.value", "type": "Expression" }, "activities": [ { "name": "Copy Table", "type": "Copy", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "queryTimeout": "02:00:00", "partitionOption": "None" }, "sink": { "type": "AzureSqlSink", "preCopyScript": { "value": "IF OBJECT_ID('@{item().SchemaName}.@{item().TableName}') IS NOT NULL TRUNCATE TABLE @{item().SchemaName}.@{item().TableName}", "type": "Expression" }, "tableOption": "autoCreate", "disableMetricsCollection": false }, "enableStaging": false, "translator": { "type": "TabularTranslator", "typeConversion": true, "typeConversionSettings": { "allowDataTruncation": true, "treatBooleanAsNumber": false } } }, "inputs": [ { "referenceName": "AnyDatabaseTable", "type": "DatasetReference", "parameters": { "LinkedServiceConnectionSecret": { "value": "@pipeline().parameters.SourceConnectionSecret", "type": "Expression" }, "SchemaName": { "value": "@item().SchemaName", "type": "Expression" }, "TableName": { "value": "@item().TableName", "type": "Expression" } } } ], "outputs": [ { "referenceName": "AnyDatabaseTable", "type": "DatasetReference", "parameters": { "LinkedServiceConnectionSecret": { "value": "@pipeline().parameters.TargetConnectionSecret", "type": "Expression" }, "SchemaName": { "value": "@item().SchemaName", "type": "Expression" }, "TableName": { "value": "@item().TableName", "type": "Expression" } } } ] } ] } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "parameters": { "SourceConnectionSecret": { "type": "string", "defaultValue": "ConnectionString-trainingdb01" }, "TargetConnectionSecret": { "type": "string", "defaultValue": "ConnectionString-trainingdb02" }, "TableLookupQuery": { "type": "string", "defaultValue": "SELECT \ts.name AS SchemaName, \to.name AS TableName FROM \tsys.objects o \tINNER JOIN sys.schemas s \t\tON o.schema_id = s.schema_id WHERE \to.[type] = 'U'" } }, "folder": { "name": "Demo Pipelines/Dynamic Linked Services" }, "annotations": [], "lastPublishTime": "2020-08-27T09:43:42Z" }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/Scale Out Level 2')]", "type": "Microsoft.DataFactory/factories/pipelines", "apiVersion": "2018-06-01", "properties": { "activities": [ { "name": "Get Bucket Contents", "type": "Lookup", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "source": { "type": "AzureSqlSource", "sqlReaderStoredProcedureName": "[[dbo].[GetBucketContents]", "storedProcedureParameters": { "BucketId": { "type": "Int32", "value": { "value": "@pipeline().parameters.BucketId", "type": "Expression" } } }, "partitionOption": "None" }, "dataset": { "referenceName": "GetSetMetadata", "type": "DatasetReference", "parameters": {} }, "firstRowOnly": false } }, { "name": "Execute Processes", "type": "ForEach", "dependsOn": [ { "activity": "Get Bucket Contents", "dependencyConditions": [ "Succeeded" ] } ], "userProperties": [], "typeProperties": { "items": { "value": "@activity('Get Bucket Contents').output.value", "type": "Expression" }, "batchCount": 50, "activities": [ { "name": "Run Process", "type": "SqlServerStoredProcedure", "dependsOn": [], "policy": { "timeout": "7.00:00:00", "retry": 0, "retryIntervalInSeconds": 30, "secureOutput": false, "secureInput": false }, "userProperties": [], "typeProperties": { "storedProcedureName": { "value": "@concat('[workers].[DumpDataAndWait',item().ProcessId,']')", "type": "Expression" } }, "linkedServiceName": { "referenceName": "trainingdb01", "type": "LinkedServiceReference" } } ] } } ], "policy": { "elapsedTimeMetric": {}, "cancelAfter": {} }, "parameters": { "BucketId": { "type": "int" } }, "folder": { "name": "Demo Pipelines/Scaling Out" }, "annotations": [], "lastPublishTime": "2020-08-24T13:41:19Z" }, "dependsOn": [] }, { "name": "[concat(parameters('factoryName'), '/MappingOrderAggregation')]", "type": "Microsoft.DataFactory/factories/dataflows", "apiVersion": "2018-06-01", "properties": { "type": "MappingDataFlow", "typeProperties": { "sources": [ { "dataset": { "referenceName": "LakeFileOrderHeaderParquet", "type": "DatasetReference" }, "name": "OrderHeader" }, { "dataset": { "referenceName": "LakeFileOrderDetailLinesParquet", "type": "DatasetReference" }, "name": "OrderLineDetails" } ], "sinks": [ { "dataset": { "referenceName": "TableOrderSummary", "type": "DatasetReference" }, "name": "OrderSummary" } ], "transformations": [ { "name": "JoinHeaderToLineDetails" }, { "name": "OrderLineCount" } ], "script": "source(output(\n\t\tSalesOrderID as integer,\n\t\tRevisionNumber as integer,\n\t\tOrderDate as timestamp,\n\t\tDueDate as timestamp,\n\t\tShipDate as timestamp,\n\t\tStatus as integer,\n\t\tOnlineOrderFlag as boolean,\n\t\tSalesOrderNumber as string,\n\t\tPurchaseOrderNumber as string,\n\t\tAccountNumber as string,\n\t\tCustomerID as integer,\n\t\tShipToAddressID as integer,\n\t\tBillToAddressID as integer,\n\t\tShipMethod as string,\n\t\tCreditCardApprovalCode as string,\n\t\tSubTotal as decimal(19,4),\n\t\tTaxAmt as decimal(19,4),\n\t\tFreight as decimal(19,4),\n\t\tTotalDue as decimal(19,4),\n\t\tComment as string,\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderHeader\nsource(output(\n\t\tSalesOrderID as integer,\n\t\tSalesOrderDetailID as integer,\n\t\tOrderQty as integer,\n\t\tProductID as integer,\n\t\tUnitPrice as decimal(19,4),\n\t\tUnitPriceDiscount as decimal(19,4),\n\t\tLineTotal as decimal(38,6),\n\t\trowguid as string,\n\t\tModifiedDate as timestamp\n\t),\n\tallowSchemaDrift: false,\n\tvalidateSchema: false,\n\tignoreNoFilesFound: false,\n\tformat: 'parquet',\n\tpartitionBy('hash', 1)) ~> OrderLineDetails\nOrderHeader, OrderLineDetails join(OrderHeader@SalesOrderID == OrderLineDetails@SalesOrderID,\n\tjoinType:'inner',\n\tmatchType:'exact',\n\tignoreSpaces: false,\n\tpartitionBy('hash', 1),\n\tbroadcast: 'both')~> JoinHeaderToLineDetails\nJoinHeaderToLineDetails aggregate(groupBy(SalesOrderNumber),\n\tRecordCount = count(SalesOrderDetailID),\n\tpartitionBy('roundRobin', 4)) ~> OrderLineCount\nOrderLineCount sink(allowSchemaDrift: false,\n\tvalidateSchema: false,\n\tinput(\n\t\tSalesOrderNumber as string,\n\t\tRecordCount as integer\n\t),\n\tdeletable:false,\n\tinsertable:true,\n\tupdateable:false,\n\tupsertable:false,\n\ttruncate:true,\n\tformat: 'table',\n\tskipDuplicateMapInputs: true,\n\tskipDuplicateMapOutputs: true,\n\terrorHandlingOption: 'stopOnFirstError',\n\tmapColumn(\n\t\tSalesOrderNumber,\n\t\tRecordCount\n\t),\n\tpartitionBy('roundRobin', 4)) ~> OrderSummary" } }, "dependsOn": [ "[concat(variables('factoryId'), '/datasets/TableOrderSummary')]" ] }, { "name": "[concat(parameters('factoryName'), '/UpdateProductDimension')]", "type": "Microsoft.DataFactory/factories/dataflows", "apiVersion": "2018-06-01", "properties": { "folder": { "name": "Labs" }, "type": "MappingDataFlow", "typeProperties": { "sources": [ { "dataset": { "referenceName": "ADLS_TSV_AdventureWorks", "type": "DatasetReference" }, "name": "Product" }, { "dataset": { "referenceName": "ADLS_TSV_AdventureWorks", "type": "DatasetReference" }, "name": "ProductSubcategory" }, { "dataset": { "referenceName": "ADLS_TSV_AdventureWorks", "type": "DatasetReference" }, "name": "ProductCategory" } ], "sinks": [ { "linkedService": { "referenceName": "ADLS_saintegrationpipelines", "type": "LinkedServiceReference" }, "name": "WriteToDataLake" } ], "transformations": [ { "name": "SelectProductColumns" }, { "name": "SelectSubcategoryColumns" }, { "name": "SelectCategoryColumns" }, { "name": "LookupProductCategory" }, { "name": "LookupProductSubcategory" }, { "name": "RemoveDuplicateColumns" } ], "scriptLines": [ "source(output(", " ProductId as integer,", " Product as string,", " {_col2_} as string,", " {_col3_} as boolean,", " {_col4_} as boolean,", " {_col5_} as string,", " {_col6_} as short,", " {_col7_} as short,", " {_col8_} as double,", " {_col9_} as double,", " {_col10_} as string,", " {_col11_} as string,", " {_col12_} as string,", " {_col13_} as double,", " {_col14_} as short,", " {_col15_} as string,", " {_col16_} as string,", " {_col17_} as string,", " SubcategoryId as integer,", " {_col19_} as short,", " {_col20_} as timestamp,", " {_col21_} as timestamp,", " {_col22_} as string,", " {_col23_} as string,", " {_col24_} as string", " ),", " allowSchemaDrift: true,", " validateSchema: false,", " ignoreNoFilesFound: false) ~> Product", "source(output(", " SubcategoryId as integer,", " CategoryId as integer,", " Subcategory as string,", " {_col3_} as string,", " {_col4_} as timestamp", " ),", " allowSchemaDrift: true,", " validateSchema: false,", " ignoreNoFilesFound: false) ~> ProductSubcategory", "source(output(", " CategoryId as integer,", " Category as string,", " {_col2_} as string,", " {_col3_} as timestamp", " ),", " allowSchemaDrift: true,", " validateSchema: false,", " ignoreNoFilesFound: false) ~> ProductCategory", "Product select(mapColumn(", " ProductId,", " Product,", " SubcategoryId", " ),", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> SelectProductColumns", "ProductSubcategory select(mapColumn(", " SubcategoryId,", " CategoryId,", " Subcategory", " ),", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns", "ProductCategory select(mapColumn(", " CategoryId,", " Category", " ),", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> SelectCategoryColumns", "SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,", " multiple: false,", " pickup: 'any',", " broadcast: 'auto')~> LookupProductCategory", "SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,", " multiple: false,", " pickup: 'any',", " broadcast: 'auto')~> LookupProductSubcategory", "LookupProductSubcategory select(mapColumn(", " ProductId,", " Product,", " SubcategoryId = SelectProductColumns@SubcategoryId,", " SubcategoryId = SelectSubcategoryColumns@SubcategoryId,", " CategoryId = SelectSubcategoryColumns@CategoryId,", " Subcategory,", " CategoryId = SelectCategoryColumns@CategoryId,", " Category", " ),", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns", "RemoveDuplicateColumns sink(allowSchemaDrift: true,", " validateSchema: false,", " format: 'parquet',", " fileSystem: 'lakeroot',", " folderPath: 'Conformed/DimProduct',", " truncate: true,", " umask: 0022,", " preCommands: [],", " postCommands: [],", " skipDuplicateMapInputs: true,", " skipDuplicateMapOutputs: true) ~> WriteToDataLake" ] } }, "dependsOn": [] } ] }