Added lab ADF artifacts

This commit is contained in:
Richard Swinbank 2022-08-29 21:37:11 +01:00
parent 213168013b
commit c2b7d903cb
14 changed files with 729 additions and 0 deletions

View File

@ -0,0 +1,165 @@
{
"name": "UpdateProductDimension",
"properties": {
"folder": {
"name": "Labs"
},
"type": "MappingDataFlow",
"typeProperties": {
"sources": [
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "Product"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductSubcategory"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductCategory"
}
],
"sinks": [
{
"linkedService": {
"referenceName": "ADLS_saintegrationpipelines",
"type": "LinkedServiceReference"
},
"name": "WriteToDataLake"
}
],
"transformations": [
{
"name": "SelectProductColumns"
},
{
"name": "SelectSubcategoryColumns"
},
{
"name": "SelectCategoryColumns"
},
{
"name": "LookupProductCategory"
},
{
"name": "LookupProductSubcategory"
},
{
"name": "RemoveDuplicateColumns"
}
],
"scriptLines": [
"source(output(",
" ProductId as integer,",
" Product as string,",
" {_col2_} as string,",
" {_col3_} as boolean,",
" {_col4_} as boolean,",
" {_col5_} as string,",
" {_col6_} as short,",
" {_col7_} as short,",
" {_col8_} as double,",
" {_col9_} as double,",
" {_col10_} as string,",
" {_col11_} as string,",
" {_col12_} as string,",
" {_col13_} as double,",
" {_col14_} as short,",
" {_col15_} as string,",
" {_col16_} as string,",
" {_col17_} as string,",
" SubcategoryId as integer,",
" {_col19_} as short,",
" {_col20_} as timestamp,",
" {_col21_} as timestamp,",
" {_col22_} as string,",
" {_col23_} as string,",
" {_col24_} as string",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> Product",
"source(output(",
" SubcategoryId as integer,",
" CategoryId as integer,",
" Subcategory as string,",
" {_col3_} as string,",
" {_col4_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductSubcategory",
"source(output(",
" CategoryId as integer,",
" Category as string,",
" {_col2_} as string,",
" {_col3_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductCategory",
"Product select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
"ProductSubcategory select(mapColumn(",
" SubcategoryId,",
" CategoryId,",
" Subcategory",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
"ProductCategory select(mapColumn(",
" CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductCategory",
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductSubcategory",
"LookupProductSubcategory select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId = SelectProductColumns@SubcategoryId,",
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
" CategoryId = SelectSubcategoryColumns@CategoryId,",
" Subcategory,",
" CategoryId = SelectCategoryColumns@CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
"RemoveDuplicateColumns sink(allowSchemaDrift: true,",
" validateSchema: false,",
" format: 'parquet',",
" fileSystem: 'lakeroot',",
" folderPath: 'Conformed/DimProduct',",
" truncate: true,",
" umask: 0022,",
" preCommands: [],",
" postCommands: [],",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
]
}
}
}

View File

@ -0,0 +1,169 @@
{
"name": "UpdateProductDimension_Sorted",
"properties": {
"folder": {
"name": "Labs"
},
"type": "MappingDataFlow",
"typeProperties": {
"sources": [
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "Product"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductSubcategory"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductCategory"
}
],
"sinks": [
{
"linkedService": {
"referenceName": "ADLS_saintegrationpipelines",
"type": "LinkedServiceReference"
},
"name": "WriteToDataLake"
}
],
"transformations": [
{
"name": "SelectProductColumns"
},
{
"name": "SelectSubcategoryColumns"
},
{
"name": "SelectCategoryColumns"
},
{
"name": "LookupProductCategory"
},
{
"name": "LookupProductSubcategory"
},
{
"name": "RemoveDuplicateColumns"
},
{
"name": "SortBySubcategory"
}
],
"scriptLines": [
"source(output(",
" ProductId as integer,",
" Product as string,",
" {_col2_} as string,",
" {_col3_} as boolean,",
" {_col4_} as boolean,",
" {_col5_} as string,",
" {_col6_} as short,",
" {_col7_} as short,",
" {_col8_} as double,",
" {_col9_} as double,",
" {_col10_} as string,",
" {_col11_} as string,",
" {_col12_} as string,",
" {_col13_} as double,",
" {_col14_} as short,",
" {_col15_} as string,",
" {_col16_} as string,",
" {_col17_} as string,",
" SubcategoryId as integer,",
" {_col19_} as short,",
" {_col20_} as timestamp,",
" {_col21_} as timestamp,",
" {_col22_} as string,",
" {_col23_} as string,",
" {_col24_} as string",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> Product",
"source(output(",
" SubcategoryId as integer,",
" CategoryId as integer,",
" Subcategory as string,",
" {_col3_} as string,",
" {_col4_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductSubcategory",
"source(output(",
" CategoryId as integer,",
" Category as string,",
" {_col2_} as string,",
" {_col3_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductCategory",
"Product select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
"ProductSubcategory select(mapColumn(",
" SubcategoryId,",
" CategoryId,",
" Subcategory",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
"ProductCategory select(mapColumn(",
" CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductCategory",
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductSubcategory",
"LookupProductSubcategory select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId = SelectProductColumns@SubcategoryId,",
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
" CategoryId = SelectSubcategoryColumns@CategoryId,",
" Subcategory,",
" CategoryId = SelectCategoryColumns@CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
"RemoveDuplicateColumns sort(asc(Subcategory, false)) ~> SortBySubcategory",
"SortBySubcategory sink(allowSchemaDrift: true,",
" validateSchema: false,",
" format: 'parquet',",
" fileSystem: 'lakeroot',",
" folderPath: 'Conformed/DimProduct',",
" truncate: true,",
" umask: 0022,",
" preCommands: [],",
" postCommands: [],",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
]
}
}
}

View File

@ -0,0 +1,22 @@
{
"name": "ADLS_BIN_AWProduct",
"properties": {
"linkedServiceName": {
"referenceName": "ADLS_saintegrationpipelines",
"type": "LinkedServiceReference"
},
"folder": {
"name": "Labs"
},
"annotations": [],
"type": "Binary",
"typeProperties": {
"location": {
"type": "AzureBlobFSLocation",
"fileName": "Product.csv",
"folderPath": "Raw",
"fileSystem": "lakeroot"
}
}
}
}

View File

@ -0,0 +1,30 @@
{
"name": "ADLS_BIN_AdventureWorks",
"properties": {
"linkedServiceName": {
"referenceName": "ADLS_saintegrationpipelines",
"type": "LinkedServiceReference"
},
"parameters": {
"Entity": {
"type": "string"
}
},
"folder": {
"name": "Labs"
},
"annotations": [],
"type": "Binary",
"typeProperties": {
"location": {
"type": "AzureBlobFSLocation",
"fileName": {
"value": "@{dataset().Entity}.tsv",
"type": "Expression"
},
"folderPath": "Raw",
"fileSystem": "lakeroot"
}
}
}
}

View File

@ -0,0 +1,34 @@
{
"name": "ADLS_TSV_AdventureWorks",
"properties": {
"linkedServiceName": {
"referenceName": "ADLS_saintegrationpipelines",
"type": "LinkedServiceReference"
},
"parameters": {
"FileName": {
"type": "string"
}
},
"folder": {
"name": "Labs"
},
"annotations": [],
"type": "DelimitedText",
"typeProperties": {
"location": {
"type": "AzureBlobFSLocation",
"fileName": {
"value": "@dataset().FileName",
"type": "Expression"
},
"folderPath": "Raw",
"fileSystem": "lakeroot"
},
"columnDelimiter": "\t",
"escapeChar": "\\",
"quoteChar": "\""
},
"schema": []
}
}

View File

@ -0,0 +1,19 @@
{
"name": "HTTP_BIN_AWProduct",
"properties": {
"linkedServiceName": {
"referenceName": "HTTP_AWProduct",
"type": "LinkedServiceReference"
},
"folder": {
"name": "Labs"
},
"annotations": [],
"type": "Binary",
"typeProperties": {
"location": {
"type": "HttpServerLocation"
}
}
}
}

View File

@ -0,0 +1,28 @@
{
"name": "HTTP_BIN_AdventureWorks",
"properties": {
"linkedServiceName": {
"referenceName": "HTTP_AWGitHub",
"type": "LinkedServiceReference"
},
"parameters": {
"Entity": {
"type": "string"
}
},
"folder": {
"name": "Labs"
},
"annotations": [],
"type": "Binary",
"typeProperties": {
"location": {
"type": "HttpServerLocation",
"relativeUrl": {
"value": "@{dataset().Entity}.csv",
"type": "Expression"
}
}
}
}
}

View File

@ -0,0 +1,10 @@
{
"name": "ADLS_saintegrationpipelines",
"properties": {
"annotations": [],
"type": "AzureBlobFS",
"typeProperties": {
"url": "https://saintegrationpipelines.dfs.core.windows.net/"
}
}
}

View File

@ -0,0 +1,12 @@
{
"name": "HTTP_AWGitHub",
"properties": {
"annotations": [],
"type": "HttpServer",
"typeProperties": {
"url": "https://raw.githubusercontent.com/microsoft/sql-server-samples/master/samples/databases/adventure-works/oltp-install-script/",
"enableServerCertificateValidation": true,
"authenticationType": "Anonymous"
}
}
}

View File

@ -0,0 +1,12 @@
{
"name": "HTTP_AWProduct",
"properties": {
"annotations": [],
"type": "HttpServer",
"typeProperties": {
"url": "https://raw.githubusercontent.com/microsoft/sql-server-samples/master/samples/databases/adventure-works/oltp-install-script/Product.csv",
"enableServerCertificateValidation": true,
"authenticationType": "Anonymous"
}
}
}

View File

@ -0,0 +1,46 @@
{
"name": "BuildDimProduct",
"properties": {
"activities": [
{
"name": "Run UpdateProductDimension",
"type": "ExecuteDataFlow",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataflow": {
"referenceName": "UpdateProductDimension",
"type": "DataFlowReference",
"datasetParameters": {
"Product": {
"FileName": "Product.tsv"
},
"ProductSubcategory": {
"FileName": "ProductSubcategory.tsv"
},
"ProductCategory": {
"FileName": "ProductCategory.tsv"
}
}
},
"compute": {
"coreCount": 8,
"computeType": "General"
},
"traceLevel": "Fine"
}
}
],
"folder": {
"name": "Labs"
},
"annotations": []
}
}

View File

@ -0,0 +1,55 @@
{
"name": "CopyAWEntities",
"properties": {
"activities": [
{
"name": "Copy each AW entity",
"type": "ForEach",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@variables('Entities')",
"type": "Expression"
},
"isSequential": false,
"activities": [
{
"name": "Execute CopyAWEntity",
"type": "ExecutePipeline",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"pipeline": {
"referenceName": "CopyAWEntity",
"type": "PipelineReference"
},
"waitOnCompletion": true,
"parameters": {
"Entity": {
"value": "@item()",
"type": "Expression"
}
}
}
}
]
}
}
],
"variables": {
"Entities": {
"type": "Array",
"defaultValue": [
"Product",
"ProductSubcategory",
"ProductCategory"
]
}
},
"folder": {
"name": "Labs"
},
"annotations": []
}
}

View File

@ -0,0 +1,72 @@
{
"name": "CopyAWEntity",
"properties": {
"activities": [
{
"name": "Copy AW entity",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "BinarySource",
"storeSettings": {
"type": "HttpReadSettings",
"requestMethod": "GET"
},
"formatSettings": {
"type": "BinaryReadSettings"
}
},
"sink": {
"type": "BinarySink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
}
},
"enableStaging": false
},
"inputs": [
{
"referenceName": "HTTP_BIN_AdventureWorks",
"type": "DatasetReference",
"parameters": {
"Entity": {
"value": "@pipeline().parameters.Entity",
"type": "Expression"
}
}
}
],
"outputs": [
{
"referenceName": "ADLS_BIN_AdventureWorks",
"type": "DatasetReference",
"parameters": {
"Entity": {
"value": "@pipeline().parameters.Entity",
"type": "Expression"
}
}
}
]
}
],
"parameters": {
"Entity": {
"type": "string"
}
},
"folder": {
"name": "Labs"
},
"annotations": []
}
}

View File

@ -0,0 +1,55 @@
{
"name": "CopyAWProduct",
"properties": {
"activities": [
{
"name": "Copy AW Products to data lake",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "BinarySource",
"storeSettings": {
"type": "HttpReadSettings",
"requestMethod": "GET"
},
"formatSettings": {
"type": "BinaryReadSettings"
}
},
"sink": {
"type": "BinarySink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
}
},
"enableStaging": false
},
"inputs": [
{
"referenceName": "HTTP_BIN_AWProduct",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "ADLS_BIN_AWProduct",
"type": "DatasetReference"
}
]
}
],
"folder": {
"name": "Labs"
},
"annotations": []
}
}