165 lines
5.0 KiB
JSON
165 lines
5.0 KiB
JSON
{
|
|
"name": "UpdateProductDimension",
|
|
"properties": {
|
|
"folder": {
|
|
"name": "Labs"
|
|
},
|
|
"type": "MappingDataFlow",
|
|
"typeProperties": {
|
|
"sources": [
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "Product"
|
|
},
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "ProductSubcategory"
|
|
},
|
|
{
|
|
"dataset": {
|
|
"referenceName": "ADLS_TSV_AdventureWorks",
|
|
"type": "DatasetReference"
|
|
},
|
|
"name": "ProductCategory"
|
|
}
|
|
],
|
|
"sinks": [
|
|
{
|
|
"linkedService": {
|
|
"referenceName": "ADLS_saintegrationpipelines",
|
|
"type": "LinkedServiceReference"
|
|
},
|
|
"name": "WriteToDataLake"
|
|
}
|
|
],
|
|
"transformations": [
|
|
{
|
|
"name": "SelectProductColumns"
|
|
},
|
|
{
|
|
"name": "SelectSubcategoryColumns"
|
|
},
|
|
{
|
|
"name": "SelectCategoryColumns"
|
|
},
|
|
{
|
|
"name": "LookupProductCategory"
|
|
},
|
|
{
|
|
"name": "LookupProductSubcategory"
|
|
},
|
|
{
|
|
"name": "RemoveDuplicateColumns"
|
|
}
|
|
],
|
|
"scriptLines": [
|
|
"source(output(",
|
|
" ProductId as integer,",
|
|
" Product as string,",
|
|
" {_col2_} as string,",
|
|
" {_col3_} as boolean,",
|
|
" {_col4_} as boolean,",
|
|
" {_col5_} as string,",
|
|
" {_col6_} as short,",
|
|
" {_col7_} as short,",
|
|
" {_col8_} as double,",
|
|
" {_col9_} as double,",
|
|
" {_col10_} as string,",
|
|
" {_col11_} as string,",
|
|
" {_col12_} as string,",
|
|
" {_col13_} as double,",
|
|
" {_col14_} as short,",
|
|
" {_col15_} as string,",
|
|
" {_col16_} as string,",
|
|
" {_col17_} as string,",
|
|
" SubcategoryId as integer,",
|
|
" {_col19_} as short,",
|
|
" {_col20_} as timestamp,",
|
|
" {_col21_} as timestamp,",
|
|
" {_col22_} as string,",
|
|
" {_col23_} as string,",
|
|
" {_col24_} as string",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> Product",
|
|
"source(output(",
|
|
" SubcategoryId as integer,",
|
|
" CategoryId as integer,",
|
|
" Subcategory as string,",
|
|
" {_col3_} as string,",
|
|
" {_col4_} as timestamp",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> ProductSubcategory",
|
|
"source(output(",
|
|
" CategoryId as integer,",
|
|
" Category as string,",
|
|
" {_col2_} as string,",
|
|
" {_col3_} as timestamp",
|
|
" ),",
|
|
" allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" ignoreNoFilesFound: false) ~> ProductCategory",
|
|
"Product select(mapColumn(",
|
|
" ProductId,",
|
|
" Product,",
|
|
" SubcategoryId",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectProductColumns",
|
|
"ProductSubcategory select(mapColumn(",
|
|
" SubcategoryId,",
|
|
" CategoryId,",
|
|
" Subcategory",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectSubcategoryColumns",
|
|
"ProductCategory select(mapColumn(",
|
|
" CategoryId,",
|
|
" Category",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> SelectCategoryColumns",
|
|
"SelectSubcategoryColumns, SelectCategoryColumns lookup(SelectSubcategoryColumns@CategoryId == SelectCategoryColumns@CategoryId,",
|
|
" multiple: false,",
|
|
" pickup: 'any',",
|
|
" broadcast: 'auto')~> LookupProductCategory",
|
|
"SelectProductColumns, LookupProductCategory lookup(SelectProductColumns@SubcategoryId == SelectSubcategoryColumns@SubcategoryId,",
|
|
" multiple: false,",
|
|
" pickup: 'any',",
|
|
" broadcast: 'auto')~> LookupProductSubcategory",
|
|
"LookupProductSubcategory select(mapColumn(",
|
|
" ProductId,",
|
|
" Product,",
|
|
" SubcategoryId = SelectProductColumns@SubcategoryId,",
|
|
" SubcategoryId = SelectSubcategoryColumns@SubcategoryId,",
|
|
" CategoryId = SelectSubcategoryColumns@CategoryId,",
|
|
" Subcategory,",
|
|
" CategoryId = SelectCategoryColumns@CategoryId,",
|
|
" Category",
|
|
" ),",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> RemoveDuplicateColumns",
|
|
"RemoveDuplicateColumns sink(allowSchemaDrift: true,",
|
|
" validateSchema: false,",
|
|
" format: 'parquet',",
|
|
" fileSystem: 'lakeroot',",
|
|
" folderPath: 'Conformed/DimProduct',",
|
|
" truncate: true,",
|
|
" umask: 0022,",
|
|
" preCommands: [],",
|
|
" postCommands: [],",
|
|
" skipDuplicateMapInputs: true,",
|
|
" skipDuplicateMapOutputs: true) ~> WriteToDataLake"
|
|
]
|
|
}
|
|
}
|
|
} |