diff --git a/Code/DataFactory/dataset/ADLS_BIN_AWProduct.json b/Code/DataFactory/dataset/ADLS_BIN_AWProduct.json index 13f0ffd..edccc77 100644 --- a/Code/DataFactory/dataset/ADLS_BIN_AWProduct.json +++ b/Code/DataFactory/dataset/ADLS_BIN_AWProduct.json @@ -6,7 +6,7 @@ "type": "LinkedServiceReference" }, "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [], "type": "Binary", diff --git a/Code/DataFactory/dataset/ADLS_BIN_AdventureWorks.json b/Code/DataFactory/dataset/ADLS_BIN_AdventureWorks.json index 934edf6..b0006b8 100644 --- a/Code/DataFactory/dataset/ADLS_BIN_AdventureWorks.json +++ b/Code/DataFactory/dataset/ADLS_BIN_AdventureWorks.json @@ -11,7 +11,7 @@ } }, "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [], "type": "Binary", diff --git a/Code/DataFactory/dataset/ADLS_PQT_AdventureWorks.json b/Code/DataFactory/dataset/ADLS_PQT_AdventureWorks.json new file mode 100644 index 0000000..53eef04 --- /dev/null +++ b/Code/DataFactory/dataset/ADLS_PQT_AdventureWorks.json @@ -0,0 +1,32 @@ +{ + "name": "ADLS_PQT_AdventureWorks", + "properties": { + "linkedServiceName": { + "referenceName": "ADLS_saintegrationpipelines", + "type": "LinkedServiceReference" + }, + "parameters": { + "EntityName": { + "type": "string" + } + }, + "folder": { + "name": "Labs2" + }, + "annotations": [], + "type": "Parquet", + "typeProperties": { + "location": { + "type": "AzureBlobFSLocation", + "fileName": { + "value": "@{dataset().EntityName}.parquet", + "type": "Expression" + }, + "folderPath": "Raw", + "fileSystem": "lakeroot" + }, + "compressionCodec": "snappy" + }, + "schema": [] + } +} \ No newline at end of file diff --git a/Code/DataFactory/dataset/ADLS_TSV_AdventureWorks.json b/Code/DataFactory/dataset/ADLS_TSV_AdventureWorks.json index c23301b..ca0f4e3 100644 --- a/Code/DataFactory/dataset/ADLS_TSV_AdventureWorks.json +++ b/Code/DataFactory/dataset/ADLS_TSV_AdventureWorks.json @@ -11,7 +11,7 @@ } }, "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [], "type": "DelimitedText", diff --git a/Code/DataFactory/dataset/HTTP_BIN_AWProduct.json b/Code/DataFactory/dataset/HTTP_BIN_AWProduct.json index 139ac39..d71545c 100644 --- a/Code/DataFactory/dataset/HTTP_BIN_AWProduct.json +++ b/Code/DataFactory/dataset/HTTP_BIN_AWProduct.json @@ -6,7 +6,7 @@ "type": "LinkedServiceReference" }, "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [], "type": "Binary", diff --git a/Code/DataFactory/dataset/HTTP_BIN_AdventureWorks.json b/Code/DataFactory/dataset/HTTP_BIN_AdventureWorks.json index 039b3cd..ece8886 100644 --- a/Code/DataFactory/dataset/HTTP_BIN_AdventureWorks.json +++ b/Code/DataFactory/dataset/HTTP_BIN_AdventureWorks.json @@ -11,7 +11,7 @@ } }, "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [], "type": "Binary", diff --git a/Code/DataFactory/dataset/HTTP_TSV_AdventureWorks.json b/Code/DataFactory/dataset/HTTP_TSV_AdventureWorks.json new file mode 100644 index 0000000..ac8a75a --- /dev/null +++ b/Code/DataFactory/dataset/HTTP_TSV_AdventureWorks.json @@ -0,0 +1,32 @@ +{ + "name": "HTTP_TSV_AdventureWorks", + "properties": { + "linkedServiceName": { + "referenceName": "HTTP_AWGitHub", + "type": "LinkedServiceReference" + }, + "parameters": { + "EntityName": { + "type": "string" + } + }, + "folder": { + "name": "Labs2" + }, + "annotations": [], + "type": "DelimitedText", + "typeProperties": { + "location": { + "type": "HttpServerLocation", + "relativeUrl": { + "value": "@{dataset().EntityName}.csv", + "type": "Expression" + } + }, + "columnDelimiter": "\t", + "escapeChar": "\\", + "quoteChar": "\"" + }, + "schema": [] + } +} \ No newline at end of file diff --git a/Code/DataFactory/pipeline/BuildDimProduct.json b/Code/DataFactory/pipeline/BuildDimProduct.json index 34a2e03..2e8789b 100644 --- a/Code/DataFactory/pipeline/BuildDimProduct.json +++ b/Code/DataFactory/pipeline/BuildDimProduct.json @@ -39,7 +39,7 @@ } ], "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [] } diff --git a/Code/DataFactory/pipeline/CopyAWEntities.json b/Code/DataFactory/pipeline/CopyAWEntities.json index ad648d9..ac9a59d 100644 --- a/Code/DataFactory/pipeline/CopyAWEntities.json +++ b/Code/DataFactory/pipeline/CopyAWEntities.json @@ -48,7 +48,7 @@ } }, "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [] } diff --git a/Code/DataFactory/pipeline/CopyAWEntity.json b/Code/DataFactory/pipeline/CopyAWEntity.json index 5a0db2d..004f1ff 100644 --- a/Code/DataFactory/pipeline/CopyAWEntity.json +++ b/Code/DataFactory/pipeline/CopyAWEntity.json @@ -65,7 +65,7 @@ } }, "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [] } diff --git a/Code/DataFactory/pipeline/CopyAWProduct.json b/Code/DataFactory/pipeline/CopyAWProduct.json index 1d11002..d51cdf7 100644 --- a/Code/DataFactory/pipeline/CopyAWProduct.json +++ b/Code/DataFactory/pipeline/CopyAWProduct.json @@ -48,7 +48,7 @@ } ], "folder": { - "name": "Labs" + "name": "Labs1" }, "annotations": [] } diff --git a/Code/DataFactory/pipeline/CopyAWUsingMetadata.json b/Code/DataFactory/pipeline/CopyAWUsingMetadata.json new file mode 100644 index 0000000..b3d2a03 --- /dev/null +++ b/Code/DataFactory/pipeline/CopyAWUsingMetadata.json @@ -0,0 +1,170 @@ +{ + "name": "CopyAWUsingMetadata", + "properties": { + "activities": [ + { + "name": "Get catalog", + "type": "WebActivity", + "dependsOn": [], + "policy": { + "timeout": "0.12:00:00", + "retry": 0, + "retryIntervalInSeconds": 30, + "secureOutput": false, + "secureInput": false + }, + "userProperties": [], + "typeProperties": { + "url": "https://raw.githubusercontent.com/mrpaulandrewltd/Azure-Data-Integration-Pipeline-Training/main/Labs/TableCatalog.json", + "method": "GET" + } + }, + { + "name": "Cache catalog as JSON array", + "type": "SetVariable", + "dependsOn": [ + { + "activity": "Get catalog", + "dependencyConditions": [ + "Succeeded" + ] + } + ], + "userProperties": [], + "typeProperties": { + "variableName": "Catalog", + "value": { + "value": "@json(activity('Get catalog').output.Response)", + "type": "Expression" + } + } + }, + { + "name": "ForEach AW entity", + "type": "ForEach", + "dependsOn": [ + { + "activity": "Cache catalog as JSON array", + "dependencyConditions": [ + "Succeeded" + ] + } + ], + "userProperties": [], + "typeProperties": { + "items": { + "value": "@variables('Catalog')", + "type": "Expression" + }, + "activities": [ + { + "name": "Copy AW entity", + "type": "Copy", + "dependsOn": [], + "policy": { + "timeout": "0.12:00:00", + "retry": 0, + "retryIntervalInSeconds": 30, + "secureOutput": false, + "secureInput": false + }, + "userProperties": [], + "typeProperties": { + "source": { + "type": "DelimitedTextSource", + "storeSettings": { + "type": "HttpReadSettings", + "requestMethod": "GET" + }, + "formatSettings": { + "type": "DelimitedTextReadSettings" + } + }, + "sink": { + "type": "ParquetSink", + "storeSettings": { + "type": "AzureBlobFSWriteSettings" + }, + "formatSettings": { + "type": "ParquetWriteSettings" + } + }, + "enableStaging": false, + "translator": { + "value": "@item().translator", + "type": "Expression" + } + }, + "inputs": [ + { + "referenceName": "HTTP_TSV_AdventureWorks", + "type": "DatasetReference", + "parameters": { + "EntityName": { + "value": "@item().EntityName", + "type": "Expression" + } + } + } + ], + "outputs": [ + { + "referenceName": "ADLS_PQT_AdventureWorks", + "type": "DatasetReference", + "parameters": { + "EntityName": { + "value": "@item().EntityName", + "type": "Expression" + } + } + } + ] + }, + { + "name": "Notify error", + "type": "AzureFunctionActivity", + "dependsOn": [ + { + "activity": "Copy AW entity", + "dependencyConditions": [ + "Failed" + ] + } + ], + "policy": { + "timeout": "0.12:00:00", + "retry": 0, + "retryIntervalInSeconds": 30, + "secureOutput": false, + "secureInput": false + }, + "userProperties": [], + "typeProperties": { + "functionName": "SendEmail", + "method": "POST", + "body": { + "value": "{\n \"emailRecipients\": \"richard.swinbank@gmail.com\",\n \"emailSubject\": \"Error copying @{item().EntityName}\",\n \"emailBody\": \"@{replace(activity('Copy AW entity').output.errors[0].Message,'\"','\\\"')}\"\n}", + "type": "Expression" + } + }, + "linkedServiceName": { + "referenceName": "EmailSenderFunction", + "type": "LinkedServiceReference" + } + } + ] + } + } + ], + "variables": { + "Catalog": { + "type": "Array", + "defaultValue": [] + } + }, + "folder": { + "name": "Labs2" + }, + "annotations": [] + } +} \ No newline at end of file diff --git a/Labs/Lab 7 - Mini-project.pdf b/Labs/Lab 7 - Mini-project.pdf new file mode 100644 index 0000000..d8190d9 Binary files /dev/null and b/Labs/Lab 7 - Mini-project.pdf differ diff --git a/Labs/TableCatalog.json b/Labs/TableCatalog.json index 50589ef..8a66497 100644 --- a/Labs/TableCatalog.json +++ b/Labs/TableCatalog.json @@ -531,7 +531,7 @@ }, { "source": { - "type": "Guid", + "type": "DateTime", "ordinal": 6 }, "sink": { @@ -620,99 +620,6 @@ } } }, - { - "entityName": "ProductReview", - "translator": { - "type": "TabularTranslator", - "mappings": [ - { - "source": { - "type": "Int32", - "ordinal": 1 - }, - "sink": { - "name": "ProductReviewID", - "physicalType": "UTF8" - } - }, - { - "source": { - "type": "Int32", - "ordinal": 2 - }, - "sink": { - "name": "ProductID", - "physicalType": "UTF8" - } - }, - { - "source": { - "type": "String", - "ordinal": 3 - }, - "sink": { - "name": "ReviewerName", - "physicalType": "UTF8" - } - }, - { - "source": { - "type": "DateTime", - "ordinal": 4 - }, - "sink": { - "name": "ReviewDate", - "physicalType": "UTF8" - } - }, - { - "source": { - "type": "String", - "ordinal": 5 - }, - "sink": { - "name": "EmailAddress", - "physicalType": "UTF8" - } - }, - { - "source": { - "type": "Int32", - "ordinal": 6 - }, - "sink": { - "name": "Rating", - "physicalType": "UTF8" - } - }, - { - "source": { - "type": "String", - "ordinal": 7 - }, - "sink": { - "name": "Comments", - "physicalType": "UTF8" - } - }, - { - "source": { - "type": "DateTime", - "ordinal": 8 - }, - "sink": { - "name": "ModifiedDate", - "physicalType": "UTF8" - } - } - ], - "typeConversion": true, - "typeConversionSettings": { - "allowDataTruncation": true, - "treatBooleanAsNumber": false - } - } - }, { "entityName": "ProductSubcategory", "translator": { @@ -775,5 +682,401 @@ "treatBooleanAsNumber": false } } + }, + { + "entityName": "SalesOrderDetail", + "translator": { + "type": "TabularTranslator", + "mappings": [ + { + "source": { + "type": "Int32", + "ordinal": 1 + }, + "sink": { + "name": "SalesOrderID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 2 + }, + "sink": { + "name": "SalesOrderDetailID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "String", + "ordinal": 3 + }, + "sink": { + "name": "CarrierTrackingNumber", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 4 + }, + "sink": { + "name": "OrderQty", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 5 + }, + "sink": { + "name": "ProductID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 6 + }, + "sink": { + "name": "SpecialOfferID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Decimal", + "ordinal": 7 + }, + "sink": { + "name": "UnitPrice", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Decimal", + "ordinal": 8 + }, + "sink": { + "name": "UnitPriceDiscount", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Decimal", + "ordinal": 9 + }, + "sink": { + "name": "LineTotal", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Guid", + "ordinal": 10 + }, + "sink": { + "name": "rowguid", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "DateTime", + "ordinal": 11 + }, + "sink": { + "name": "ModifiedDate", + "physicalType": "UTF8" + } + } + ], + "typeConversion": true, + "typeConversionSettings": { + "allowDataTruncation": true, + "treatBooleanAsNumber": false + } + } + }, + { + "entityName": "SalesOrderHeader", + "translator": { + "type": "TabularTranslator", + "mappings": [ + { + "source": { + "type": "Int32", + "ordinal": 1 + }, + "sink": { + "name": "SalesOrderID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 2 + }, + "sink": { + "name": "RevisionNumber", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "DateTime", + "ordinal": 3 + }, + "sink": { + "name": "OrderDate", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "DateTime", + "ordinal": 4 + }, + "sink": { + "name": "DueDate", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "DateTime", + "ordinal": 5 + }, + "sink": { + "name": "ShipDate", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 6 + }, + "sink": { + "name": "Status", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 7 + }, + "sink": { + "name": "OnlineOrderFlag", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "String", + "ordinal": 8 + }, + "sink": { + "name": "SalesOrderNumber", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "String", + "ordinal": 9 + }, + "sink": { + "name": "PurchaseOrderNumber", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "String", + "ordinal": 10 + }, + "sink": { + "name": "AccountNumber", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 11 + }, + "sink": { + "name": "CustomerID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 12 + }, + "sink": { + "name": "SalesPersonID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 13 + }, + "sink": { + "name": "TerritoryID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 14 + }, + "sink": { + "name": "BillToAddressID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 15 + }, + "sink": { + "name": "ShipToAddressID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 16 + }, + "sink": { + "name": "ShipMethodID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 17 + }, + "sink": { + "name": "CreditCardID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "String", + "ordinal": 18 + }, + "sink": { + "name": "CreditCardApprovalCode", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Int32", + "ordinal": 19 + }, + "sink": { + "name": "CurrencyRateID", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Decimal", + "ordinal": 20 + }, + "sink": { + "name": "SubTotal", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Decimal", + "ordinal": 21 + }, + "sink": { + "name": "TaxAmt", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Decimal", + "ordinal": 22 + }, + "sink": { + "name": "Freight", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Decimal", + "ordinal": 23 + }, + "sink": { + "name": "TotalDue", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "String", + "ordinal": 24 + }, + "sink": { + "name": "Comment", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "Guid", + "ordinal": 25 + }, + "sink": { + "name": "rowguid", + "physicalType": "UTF8" + } + }, + { + "source": { + "type": "DateTime", + "ordinal": 26 + }, + "sink": { + "name": "ModifiedDate", + "physicalType": "UTF8" + } + } + ], + "typeConversion": true, + "typeConversionSettings": { + "allowDataTruncation": true, + "treatBooleanAsNumber": false + } + } } -] +] \ No newline at end of file