175 lines
5.5 KiB
JSON
175 lines
5.5 KiB
JSON
{
|
|
"name": "UpdateProductDimension",
|
|
"properties": {
|
|
"folder": {
|
|
"name": "Lab4"
|
|
},
|
|
"type": "MappingDataFlow",
|
|
"typeProperties": {
|
|
"sources": [
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "Product"
|
|
},
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "ProductSubcategory"
|
|
},
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "ProductCategory"
|
|
}
|
|
],
|
|
"sinks": [
|
|
{
|
|
"linkedService": {
|
|
"referenceName": "AzureDataLakeStorage1",
|
|
"type": "LinkedServiceReference"
|
|
},
|
|
"name": "WriteToDataLake",
|
|
"description": "Write results to data lake"
|
|
}
|
|
],
|
|
"transformations": [
|
|
{
|
|
"name": "SelectProductColumns"
|
|
},
|
|
{
|
|
"name": "SelectProductSubcategoryColumns"
|
|
},
|
|
{
|
|
"name": "SelectProductCategoryColumns"
|
|
},
|
|
{
|
|
"name": "LookupProductCategory",
|
|
"description": "Lookup product category name"
|
|
},
|
|
{
|
|
"name": "LookupProductSubcategory"
|
|
},
|
|
{
|
|
"name": "RemoveDuplicateColumns"
|
|
}
|
|
],
|
|
"scriptLines": [
|
|
"source(output(",
|
|
" ProductId as integer,",
|
|
" Product as string,",
|
|
" {_col2_} as string,",
|
|
" {_col3_} as boolean,",
|
|
" {_col4_} as boolean,",
|
|
" {_col5_} as string,",
|
|
" {_col6_} as short,",
|
|
" {_col7_} as short,",
|
|
" {_col8_} as double,",
|
|
" {_col9_} as double,",
|
|
" {_col10_} as string,",
|
|
" {_col11_} as string,",
|
|
" {_col12_} as string,",
|
|
" {_col13_} as double,",
|
|
" {_col14_} as short,",
|
|
" {_col15_} as string,",
|
|
" {_col16_} as string,",
|
|
" {_col17_} as string,",
|
|
" SubcategoryId as integer,",
|
|
" {_col19_} as short,",
|
|
" {_col20_} as timestamp,",
|
|
" {_col21_} as timestamp,",
|
|
" {_col22_} as string,",
|
|
" {_col23_} as string,",
|
|
" {_col24_} as string",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> Product",
|
|
"source(output(",
|
|
" SubcategoryId as integer,",
|
|
" CategoryId as short,",
|
|
" Subcategory as integer,",
|
|
" {_col3_} as string,",
|
|
" {_col4_} as timestamp",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> ProductSubcategory",
|
|
"source(output(",
|
|
" CategoryId as integer,",
|
|
" Category as string,",
|
|
" {_col2_} as string,",
|
|
" {_col3_} as timestamp",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> ProductCategory",
|
|
"Product select(mapColumn(",
|
|
" ProductId,",
|
|
" Product,",
|
|
" SubcategoryId",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
|
|
"ProductSubcategory select(mapColumn(",
|
|
" SubcategoryId,",
|
|
" CategoryId,",
|
|
" Subcategory",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectProductSubcategoryColumns",
|
|
"ProductCategory select(mapColumn(",
|
|
" CategoryId,",
|
|
" Category",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectProductCategoryColumns",
|
|
"SelectProductSubcategoryColumns, SelectProductCategoryColumns lookup(SelectProductSubcategoryColumns@CategoryId == SelectProductCategoryColumns@CategoryId,",
|
|
" multiple: false,",
|
|
" pickup: 'any',",
|
|
" broadcast: 'auto')~> LookupProductCategory",
|
|
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectProductSubcategoryColumns@SubcategoryId,",
|
|
" multiple: false,",
|
|
" pickup: 'any',",
|
|
" broadcast: 'auto')~> LookupProductSubcategory",
|
|
"LookupProductSubcategory select(mapColumn(",
|
|
" ProductId,",
|
|
" Product,",
|
|
" SubcategoryId = SelectProductColumns@SubcategoryId,",
|
|
" SubcategoryId = SelectProductSubcategoryColumns@SubcategoryId,",
|
|
" CategoryId = SelectProductSubcategoryColumns@CategoryId,",
|
|
" Subcategory,",
|
|
" CategoryId = SelectProductCategoryColumns@CategoryId,",
|
|
" Category",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
|
|
"RemoveDuplicateColumns sink(allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" format: 'delta',",
|
|
" fileSystem: 'lakeroot',",
|
|
" folderPath: 'Prepared/DimProduct',",
|
|
" overwrite:true,",
|
|
" mergeSchema: false,",
|
|
" autoCompact: false,",
|
|
" optimizedWrite: false,",
|
|
" vacuum: 0,",
|
|
" deletable:false,",
|
|
" insertable:true,",
|
|
" updateable:false,",
|
|
" upsertable:false,",
|
|
" umask: 0022,",
|
|
" preCommands: [],",
|
|
" postCommands: [],",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
|
|
]
|
|
}
|
|
}
|
|
} |