Azure-Data-Integration-Pipe.../Labs/adf/dataflow/UpdateProductDimension.json
Richard Swinbank b8984ccca5 Revised labs
2023-05-31 09:09:28 +01:00

175 lines
5.5 KiB
JSON

{
"name": "UpdateProductDimension",
"properties": {
"folder": {
"name": "Lab4"
},
"type": "MappingDataFlow",
"typeProperties": {
"sources": [
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "Product"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductSubcategory"
},
{
"dataset": {
"referenceName": "ADLS_TSV_AdventureWorks",
"type": "DatasetReference"
},
"name": "ProductCategory"
}
],
"sinks": [
{
"linkedService": {
"referenceName": "AzureDataLakeStorage1",
"type": "LinkedServiceReference"
},
"name": "WriteToDataLake",
"description": "Write results to data lake"
}
],
"transformations": [
{
"name": "SelectProductColumns"
},
{
"name": "SelectProductSubcategoryColumns"
},
{
"name": "SelectProductCategoryColumns"
},
{
"name": "LookupProductCategory",
"description": "Lookup product category name"
},
{
"name": "LookupProductSubcategory"
},
{
"name": "RemoveDuplicateColumns"
}
],
"scriptLines": [
"source(output(",
" ProductId as integer,",
" Product as string,",
" {_col2_} as string,",
" {_col3_} as boolean,",
" {_col4_} as boolean,",
" {_col5_} as string,",
" {_col6_} as short,",
" {_col7_} as short,",
" {_col8_} as double,",
" {_col9_} as double,",
" {_col10_} as string,",
" {_col11_} as string,",
" {_col12_} as string,",
" {_col13_} as double,",
" {_col14_} as short,",
" {_col15_} as string,",
" {_col16_} as string,",
" {_col17_} as string,",
" SubcategoryId as integer,",
" {_col19_} as short,",
" {_col20_} as timestamp,",
" {_col21_} as timestamp,",
" {_col22_} as string,",
" {_col23_} as string,",
" {_col24_} as string",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> Product",
"source(output(",
" SubcategoryId as integer,",
" CategoryId as short,",
" Subcategory as integer,",
" {_col3_} as string,",
" {_col4_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductSubcategory",
"source(output(",
" CategoryId as integer,",
" Category as string,",
" {_col2_} as string,",
" {_col3_} as timestamp",
" ),",
" allowSchemaDrift: true,",
" validateSchema: false,",
" ignoreNoFilesFound: false) ~> ProductCategory",
"Product select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
"ProductSubcategory select(mapColumn(",
" SubcategoryId,",
" CategoryId,",
" Subcategory",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectProductSubcategoryColumns",
"ProductCategory select(mapColumn(",
" CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> SelectProductCategoryColumns",
"SelectProductSubcategoryColumns, SelectProductCategoryColumns lookup(SelectProductSubcategoryColumns@CategoryId == SelectProductCategoryColumns@CategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductCategory",
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectProductSubcategoryColumns@SubcategoryId,",
" multiple: false,",
" pickup: 'any',",
" broadcast: 'auto')~> LookupProductSubcategory",
"LookupProductSubcategory select(mapColumn(",
" ProductId,",
" Product,",
" SubcategoryId = SelectProductColumns@SubcategoryId,",
" SubcategoryId = SelectProductSubcategoryColumns@SubcategoryId,",
" CategoryId = SelectProductSubcategoryColumns@CategoryId,",
" Subcategory,",
" CategoryId = SelectProductCategoryColumns@CategoryId,",
" Category",
" ),",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
"RemoveDuplicateColumns sink(allowSchemaDrift: true,",
" validateSchema: false,",
" format: 'delta',",
" fileSystem: 'lakeroot',",
" folderPath: 'Prepared/DimProduct',",
" overwrite:true,",
" mergeSchema: false,",
" autoCompact: false,",
" optimizedWrite: false,",
" vacuum: 0,",
" deletable:false,",
" insertable:true,",
" updateable:false,",
" upsertable:false,",
" umask: 0022,",
" preCommands: [],",
" postCommands: [],",
" skipDuplicateMapInputs: true,",
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
]
}
}
}