Added lab ADF artifacts
This commit is contained in:
165
Code/DataFactory/dataflow/UpdateProductDimension.json
Normal file
165
Code/DataFactory/dataflow/UpdateProductDimension.json
Normal file
@@ -0,0 +1,165 @@
|
||||
{
|
||||
"name": "UpdateProductDimension",
|
||||
"properties": {
|
||||
"folder": {
|
||||
"name": "Labs"
|
||||
},
|
||||
"type": "MappingDataFlow",
|
||||
"typeProperties": {
|
||||
"sources": [
|
||||
{
|
||||
"dataset": {
|
||||
"referenceName": "ADLS_TSV_AdventureWorks",
|
||||
"type": "DatasetReference"
|
||||
},
|
||||
"name": "Product"
|
||||
},
|
||||
{
|
||||
"dataset": {
|
||||
"referenceName": "ADLS_TSV_AdventureWorks",
|
||||
"type": "DatasetReference"
|
||||
},
|
||||
"name": "ProductSubcategory"
|
||||
},
|
||||
{
|
||||
"dataset": {
|
||||
"referenceName": "ADLS_TSV_AdventureWorks",
|
||||
"type": "DatasetReference"
|
||||
},
|
||||
"name": "ProductCategory"
|
||||
}
|
||||
],
|
||||
"sinks": [
|
||||
{
|
||||
"linkedService": {
|
||||
"referenceName": "ADLS_saintegrationpipelines",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"name": "WriteToDataLake"
|
||||
}
|
||||
],
|
||||
"transformations": [
|
||||
{
|
||||
"name": "SelectProductColumns"
|
||||
},
|
||||
{
|
||||
"name": "SelectSubcategoryColumns"
|
||||
},
|
||||
{
|
||||
"name": "SelectCategoryColumns"
|
||||
},
|
||||
{
|
||||
"name": "LookupProductCategory"
|
||||
},
|
||||
{
|
||||
"name": "LookupProductSubcategory"
|
||||
},
|
||||
{
|
||||
"name": "RemoveDuplicateColumns"
|
||||
}
|
||||
],
|
||||
"scriptLines": [
|
||||
"source(output(",
|
||||
" ProductId as integer,",
|
||||
" Product as string,",
|
||||
" {_col2_} as string,",
|
||||
" {_col3_} as boolean,",
|
||||
" {_col4_} as boolean,",
|
||||
" {_col5_} as string,",
|
||||
" {_col6_} as short,",
|
||||
" {_col7_} as short,",
|
||||
" {_col8_} as double,",
|
||||
" {_col9_} as double,",
|
||||
" {_col10_} as string,",
|
||||
" {_col11_} as string,",
|
||||
" {_col12_} as string,",
|
||||
" {_col13_} as double,",
|
||||
" {_col14_} as short,",
|
||||
" {_col15_} as string,",
|
||||
" {_col16_} as string,",
|
||||
" {_col17_} as string,",
|
||||
" SubcategoryId as integer,",
|
||||
" {_col19_} as short,",
|
||||
" {_col20_} as timestamp,",
|
||||
" {_col21_} as timestamp,",
|
||||
" {_col22_} as string,",
|
||||
" {_col23_} as string,",
|
||||
" {_col24_} as string",
|
||||
" ),",
|
||||
" allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" ignoreNoFilesFound: false) ~> Product",
|
||||
"source(output(",
|
||||
" SubcategoryId as integer,",
|
||||
" CategoryId as integer,",
|
||||
" Subcategory as string,",
|
||||
" {_col3_} as string,",
|
||||
" {_col4_} as timestamp",
|
||||
" ),",
|
||||
" allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" ignoreNoFilesFound: false) ~> ProductSubcategory",
|
||||
"source(output(",
|
||||
" CategoryId as integer,",
|
||||
" Category as string,",
|
||||
" {_col2_} as string,",
|
||||
" {_col3_} as timestamp",
|
||||
" ),",
|
||||
" allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" ignoreNoFilesFound: false) ~> ProductCategory",
|
||||
"Product select(mapColumn(",
|
||||
" ProductId,",
|
||||
" Product,",
|
||||
" SubcategoryId",
|
||||
" ),",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
|
||||
"ProductSubcategory select(mapColumn(",
|
||||
" SubcategoryId,",
|
||||
" CategoryId,",
|
||||
" Subcategory",
|
||||
" ),",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
|
||||
"ProductCategory select(mapColumn(",
|
||||
" CategoryId,",
|
||||
" Category",
|
||||
" ),",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
|
||||
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
|
||||
" multiple: false,",
|
||||
" pickup: 'any',",
|
||||
" broadcast: 'auto')~> LookupProductCategory",
|
||||
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
|
||||
" multiple: false,",
|
||||
" pickup: 'any',",
|
||||
" broadcast: 'auto')~> LookupProductSubcategory",
|
||||
"LookupProductSubcategory select(mapColumn(",
|
||||
" ProductId,",
|
||||
" Product,",
|
||||
" SubcategoryId = SelectProductColumns@SubcategoryId,",
|
||||
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
|
||||
" CategoryId = SelectSubcategoryColumns@CategoryId,",
|
||||
" Subcategory,",
|
||||
" CategoryId = SelectCategoryColumns@CategoryId,",
|
||||
" Category",
|
||||
" ),",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
|
||||
"RemoveDuplicateColumns sink(allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" format: 'parquet',",
|
||||
" fileSystem: 'lakeroot',",
|
||||
" folderPath: 'Conformed/DimProduct',",
|
||||
" truncate: true,",
|
||||
" umask: 0022,",
|
||||
" preCommands: [],",
|
||||
" postCommands: [],",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
169
Code/DataFactory/dataflow/UpdateProductDimension_Sorted.json
Normal file
169
Code/DataFactory/dataflow/UpdateProductDimension_Sorted.json
Normal file
@@ -0,0 +1,169 @@
|
||||
{
|
||||
"name": "UpdateProductDimension_Sorted",
|
||||
"properties": {
|
||||
"folder": {
|
||||
"name": "Labs"
|
||||
},
|
||||
"type": "MappingDataFlow",
|
||||
"typeProperties": {
|
||||
"sources": [
|
||||
{
|
||||
"dataset": {
|
||||
"referenceName": "ADLS_TSV_AdventureWorks",
|
||||
"type": "DatasetReference"
|
||||
},
|
||||
"name": "Product"
|
||||
},
|
||||
{
|
||||
"dataset": {
|
||||
"referenceName": "ADLS_TSV_AdventureWorks",
|
||||
"type": "DatasetReference"
|
||||
},
|
||||
"name": "ProductSubcategory"
|
||||
},
|
||||
{
|
||||
"dataset": {
|
||||
"referenceName": "ADLS_TSV_AdventureWorks",
|
||||
"type": "DatasetReference"
|
||||
},
|
||||
"name": "ProductCategory"
|
||||
}
|
||||
],
|
||||
"sinks": [
|
||||
{
|
||||
"linkedService": {
|
||||
"referenceName": "ADLS_saintegrationpipelines",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"name": "WriteToDataLake"
|
||||
}
|
||||
],
|
||||
"transformations": [
|
||||
{
|
||||
"name": "SelectProductColumns"
|
||||
},
|
||||
{
|
||||
"name": "SelectSubcategoryColumns"
|
||||
},
|
||||
{
|
||||
"name": "SelectCategoryColumns"
|
||||
},
|
||||
{
|
||||
"name": "LookupProductCategory"
|
||||
},
|
||||
{
|
||||
"name": "LookupProductSubcategory"
|
||||
},
|
||||
{
|
||||
"name": "RemoveDuplicateColumns"
|
||||
},
|
||||
{
|
||||
"name": "SortBySubcategory"
|
||||
}
|
||||
],
|
||||
"scriptLines": [
|
||||
"source(output(",
|
||||
" ProductId as integer,",
|
||||
" Product as string,",
|
||||
" {_col2_} as string,",
|
||||
" {_col3_} as boolean,",
|
||||
" {_col4_} as boolean,",
|
||||
" {_col5_} as string,",
|
||||
" {_col6_} as short,",
|
||||
" {_col7_} as short,",
|
||||
" {_col8_} as double,",
|
||||
" {_col9_} as double,",
|
||||
" {_col10_} as string,",
|
||||
" {_col11_} as string,",
|
||||
" {_col12_} as string,",
|
||||
" {_col13_} as double,",
|
||||
" {_col14_} as short,",
|
||||
" {_col15_} as string,",
|
||||
" {_col16_} as string,",
|
||||
" {_col17_} as string,",
|
||||
" SubcategoryId as integer,",
|
||||
" {_col19_} as short,",
|
||||
" {_col20_} as timestamp,",
|
||||
" {_col21_} as timestamp,",
|
||||
" {_col22_} as string,",
|
||||
" {_col23_} as string,",
|
||||
" {_col24_} as string",
|
||||
" ),",
|
||||
" allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" ignoreNoFilesFound: false) ~> Product",
|
||||
"source(output(",
|
||||
" SubcategoryId as integer,",
|
||||
" CategoryId as integer,",
|
||||
" Subcategory as string,",
|
||||
" {_col3_} as string,",
|
||||
" {_col4_} as timestamp",
|
||||
" ),",
|
||||
" allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" ignoreNoFilesFound: false) ~> ProductSubcategory",
|
||||
"source(output(",
|
||||
" CategoryId as integer,",
|
||||
" Category as string,",
|
||||
" {_col2_} as string,",
|
||||
" {_col3_} as timestamp",
|
||||
" ),",
|
||||
" allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" ignoreNoFilesFound: false) ~> ProductCategory",
|
||||
"Product select(mapColumn(",
|
||||
" ProductId,",
|
||||
" Product,",
|
||||
" SubcategoryId",
|
||||
" ),",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
|
||||
"ProductSubcategory select(mapColumn(",
|
||||
" SubcategoryId,",
|
||||
" CategoryId,",
|
||||
" Subcategory",
|
||||
" ),",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
|
||||
"ProductCategory select(mapColumn(",
|
||||
" CategoryId,",
|
||||
" Category",
|
||||
" ),",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
|
||||
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
|
||||
" multiple: false,",
|
||||
" pickup: 'any',",
|
||||
" broadcast: 'auto')~> LookupProductCategory",
|
||||
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
|
||||
" multiple: false,",
|
||||
" pickup: 'any',",
|
||||
" broadcast: 'auto')~> LookupProductSubcategory",
|
||||
"LookupProductSubcategory select(mapColumn(",
|
||||
" ProductId,",
|
||||
" Product,",
|
||||
" SubcategoryId = SelectProductColumns@SubcategoryId,",
|
||||
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
|
||||
" CategoryId = SelectSubcategoryColumns@CategoryId,",
|
||||
" Subcategory,",
|
||||
" CategoryId = SelectCategoryColumns@CategoryId,",
|
||||
" Category",
|
||||
" ),",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
|
||||
"RemoveDuplicateColumns sort(asc(Subcategory, false)) ~> SortBySubcategory",
|
||||
"SortBySubcategory sink(allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" format: 'parquet',",
|
||||
" fileSystem: 'lakeroot',",
|
||||
" folderPath: 'Conformed/DimProduct',",
|
||||
" truncate: true,",
|
||||
" umask: 0022,",
|
||||
" preCommands: [],",
|
||||
" postCommands: [],",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user