From 9428208d9f72bfeaf514e2cce5ac01385f2a7109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=A0s?= <86352901+LucasVbr@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:23:21 +0200 Subject: [PATCH] feat!: Save the converted dataset /!\ Need to clear the cache folder --- package-lock.json | 32 -------------------------------- package.json | 1 - src/services/data/Data.ts | 5 ++++- src/services/data/NudgerData.ts | 6 ++++-- src/services/dataset/Dataset.ts | 13 ++++++++++++- 5 files changed, 20 insertions(+), 37 deletions(-) diff --git a/package-lock.json b/package-lock.json index 08938c5..ef24dee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,7 +15,6 @@ "dmn-moddle": "^10.0.0", "dotenv": "^16.4.5", "express": "^4.21.0", - "JSONStream": "^1.3.5", "node-stream-zip": "^1.15.0", "tar-stream": "^3.1.7", "unzipper": "^0.12.3" @@ -885,31 +884,6 @@ "graceful-fs": "^4.1.6" } }, - "node_modules/jsonparse": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/jsonparse/-/jsonparse-1.3.1.tgz", - "integrity": "sha512-POQXvpdL69+CluYsillJ7SUhKvytYjW9vG/GKpnf+xP8UWgYEM/RaMzHHofbALDiKbbP1W8UEYmgGl39WkPZsg==", - "engines": [ - "node >= 0.2.0" - ], - "license": "MIT" - }, - "node_modules/JSONStream": { - "version": "1.3.5", - "resolved": "https://registry.npmjs.org/JSONStream/-/JSONStream-1.3.5.tgz", - "integrity": "sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ==", - "license": "(MIT OR Apache-2.0)", - "dependencies": { - "jsonparse": "^1.2.0", - "through": ">=2.2.7 <3" - }, - "bin": { - "JSONStream": "bin.js" - }, - "engines": { - "node": "*" - } - }, "node_modules/lodash": { "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", @@ -1386,12 +1360,6 @@ "b4a": "^1.6.4" } }, - "node_modules/through": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", - "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==", - "license": "MIT" - }, "node_modules/toidentifier": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", diff --git a/package.json b/package.json index 596d1d8..05f0751 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,6 @@ "dmn-moddle": "^10.0.0", "dotenv": "^16.4.5", "express": "^4.21.0", - "JSONStream": "^1.3.5", "node-stream-zip": "^1.15.0", "tar-stream": "^3.1.7", "unzipper": "^0.12.3" diff --git a/src/services/data/Data.ts b/src/services/data/Data.ts index 20d4aee..d12be15 100644 --- a/src/services/data/Data.ts +++ b/src/services/data/Data.ts @@ -1,3 +1,6 @@ -interface Data {} +interface Data { + input: string; + output: string; +} export default Data; diff --git a/src/services/data/NudgerData.ts b/src/services/data/NudgerData.ts index c13f0e4..fc7d7c4 100644 --- a/src/services/data/NudgerData.ts +++ b/src/services/data/NudgerData.ts @@ -16,10 +16,12 @@ type RawNudgerData = { }; class NudgerData implements Data { - barcode: string; + input: string; + output: string; constructor(rawData: RawNudgerData) { - this.barcode = rawData.code; + this.input = rawData.code; + this.output = rawData.gs1_country; } } diff --git a/src/services/dataset/Dataset.ts b/src/services/dataset/Dataset.ts index 30e26be..1bad306 100644 --- a/src/services/dataset/Dataset.ts +++ b/src/services/dataset/Dataset.ts @@ -1,4 +1,4 @@ -import { pipeline } from "node:stream"; +import { pipeline, Transform } from "node:stream"; import { promisify } from "node:util"; import CacheService from "../CacheService"; import FileService from "../FileService"; @@ -22,6 +22,7 @@ class Dataset { /** * Create a new dataset instance + * @param dConstructor - The constructor of the data class * @param id - The unique identifier of the dataset * @param url - The URL of the dataset * @param sourceFile - The file name of the dataset in the archive @@ -62,11 +63,21 @@ class Dataset { const pipelineAsync = promisify(pipeline); + const self = this; + console.log(`Download: ${this.url}`); await pipelineAsync( await FileService.getFileStream(this.url), archive.extract(this.sourceFile), parser.parse(), + new Transform({ + objectMode: true, + transform(chunk: object, _, callback) { + const data: D = new self.dConstructor(JSON.parse(chunk.toString())); + this.push(JSON.stringify(data) + "\n"); + callback(null, JSON.stringify(data) + "\n"); + }, + }), FileService.createWriteStream(this.cachePath) ); }