fix: Stream does not close on end

This commit is contained in:
Lucàs
2024-10-08 13:32:14 +02:00
parent 5e53d9d914
commit 93ee52ddc2
7 changed files with 137 additions and 56 deletions
+87 -40
View File
@@ -11,10 +11,10 @@
"dependencies": {
"axios": "^1.7.7",
"body-parser": "^1.20.3",
"csvtojson": "^2.0.10",
"dmn-moddle": "^10.0.0",
"dotenv": "^16.4.5",
"express": "^4.21.0",
"fast-csv": "^5.0.1",
"node-stream-zip": "^1.15.0",
"tar-stream": "^3.1.7",
"unzipper": "^0.12.3"
@@ -42,6 +42,33 @@
"node": ">=12"
}
},
"node_modules/@fast-csv/format": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/@fast-csv/format/-/format-5.0.0.tgz",
"integrity": "sha512-IyMpHwYIOGa2f0BJi6Wk55UF0oBA5urdIydoEDYxPo88LFbeb3Yr4rgpu98OAO1glUWheSnNtUgS80LE+/dqmw==",
"license": "MIT",
"dependencies": {
"lodash.escaperegexp": "^4.1.2",
"lodash.isboolean": "^3.0.3",
"lodash.isequal": "^4.5.0",
"lodash.isfunction": "^3.0.9",
"lodash.isnil": "^4.0.0"
}
},
"node_modules/@fast-csv/parse": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/@fast-csv/parse/-/parse-5.0.0.tgz",
"integrity": "sha512-ecF8tCm3jVxeRjEB6VPzmA+1wGaJ5JgaUX2uesOXdXD6qQp0B3EdshOIed4yT1Xlj/F2f8v4zHSo0Oi31L697g==",
"license": "MIT",
"dependencies": {
"lodash.escaperegexp": "^4.1.2",
"lodash.groupby": "^4.6.0",
"lodash.isfunction": "^3.0.9",
"lodash.isnil": "^4.0.0",
"lodash.isundefined": "^3.0.1",
"lodash.uniq": "^4.5.0"
}
},
"node_modules/@jridgewell/resolve-uri": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
@@ -427,23 +454,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/csvtojson": {
"version": "2.0.10",
"resolved": "https://registry.npmjs.org/csvtojson/-/csvtojson-2.0.10.tgz",
"integrity": "sha512-lUWFxGKyhraKCW8Qghz6Z0f2l/PqB1W3AO0HKJzGIQ5JRSlR651ekJDiGJbBT4sRNNv5ddnSGVEnsxP9XRCVpQ==",
"license": "MIT",
"dependencies": {
"bluebird": "^3.5.1",
"lodash": "^4.17.3",
"strip-bom": "^2.0.0"
},
"bin": {
"csvtojson": "bin/csvtojson"
},
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
@@ -633,6 +643,19 @@
"node": ">= 0.10.0"
}
},
"node_modules/fast-csv": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/fast-csv/-/fast-csv-5.0.1.tgz",
"integrity": "sha512-Q43zC4NdQD5MAWOVQOF8KA+D6ddvTJjX2ib8zqysm74jZhtk6+dc8C75/OqRV6Y9CLc4kgvbC3PLG8YL4YZfgw==",
"license": "MIT",
"dependencies": {
"@fast-csv/format": "5.0.0",
"@fast-csv/parse": "5.0.0"
},
"engines": {
"node": ">=10.0.0"
}
},
"node_modules/fast-fifo": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
@@ -860,12 +883,6 @@
"node": ">= 0.10"
}
},
"node_modules/is-utf8": {
"version": "0.2.1",
"resolved": "https://registry.npmjs.org/is-utf8/-/is-utf8-0.2.1.tgz",
"integrity": "sha512-rMYPYvCzsXywIsldgLaSoPlw5PfoB/ssr7hY4pLfcodrA5M/eArza1a9VmTiNIBNMjOGr1Ow9mTyU2o69U6U9Q==",
"license": "MIT"
},
"node_modules/isarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
@@ -884,10 +901,52 @@
"graceful-fs": "^4.1.6"
}
},
"node_modules/lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
"node_modules/lodash.escaperegexp": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz",
"integrity": "sha512-TM9YBvyC84ZxE3rgfefxUWiQKLilstD6k7PTGt6wfbtXF8ixIJLOL3VYyV/z+ZiPLsVxAsKAFVwWlWeb2Y8Yyw==",
"license": "MIT"
},
"node_modules/lodash.groupby": {
"version": "4.6.0",
"resolved": "https://registry.npmjs.org/lodash.groupby/-/lodash.groupby-4.6.0.tgz",
"integrity": "sha512-5dcWxm23+VAoz+awKmBaiBvzox8+RqMgFhi7UvX9DHZr2HdxHXM/Wrf8cfKpsW37RNrvtPn6hSwNqurSILbmJw==",
"license": "MIT"
},
"node_modules/lodash.isboolean": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz",
"integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==",
"license": "MIT"
},
"node_modules/lodash.isequal": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/lodash.isequal/-/lodash.isequal-4.5.0.tgz",
"integrity": "sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==",
"license": "MIT"
},
"node_modules/lodash.isfunction": {
"version": "3.0.9",
"resolved": "https://registry.npmjs.org/lodash.isfunction/-/lodash.isfunction-3.0.9.tgz",
"integrity": "sha512-AirXNj15uRIMMPihnkInB4i3NHeb4iBtNg9WRWuK2o31S+ePwwNmDPaTL3o7dTJ+VXNZim7rFs4rxN4YU1oUJw==",
"license": "MIT"
},
"node_modules/lodash.isnil": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/lodash.isnil/-/lodash.isnil-4.0.0.tgz",
"integrity": "sha512-up2Mzq3545mwVnMhTDMdfoG1OurpA/s5t88JmQX809eH3C8491iu2sfKhTfhQtKY78oPNhiaHJUpT/dUDAAtng==",
"license": "MIT"
},
"node_modules/lodash.isundefined": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/lodash.isundefined/-/lodash.isundefined-3.0.1.tgz",
"integrity": "sha512-MXB1is3s899/cD8jheYYE2V9qTHwKvt+npCwpD+1Sxm3Q3cECXCiYHjeHWXNwr6Q0SOBPrYUDxendrO6goVTEA==",
"license": "MIT"
},
"node_modules/lodash.uniq": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/lodash.uniq/-/lodash.uniq-4.5.0.tgz",
"integrity": "sha512-xfBaXQd9ryd9dlSDvnvI0lvxfLJlYAZzXomUYzLKtUeOQvOP5piqAWuGtrhWeqaXK9hhoM/iyJc5AV+XfsX3HQ==",
"license": "MIT"
},
"node_modules/make-error": {
@@ -1328,18 +1387,6 @@
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
"license": "MIT"
},
"node_modules/strip-bom": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-2.0.0.tgz",
"integrity": "sha512-kwrX1y7czp1E69n2ajbG65mIo9dqvJ+8aBQXOGVxqwvNbsXdFM6Lq37dLAY3mknUwru8CfcCbfOLL/gMo+fi3g==",
"license": "MIT",
"dependencies": {
"is-utf8": "^0.2.0"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/tar-stream": {
"version": "3.1.7",
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz",
+1 -1
View File
@@ -17,10 +17,10 @@
"dependencies": {
"axios": "^1.7.7",
"body-parser": "^1.20.3",
"csvtojson": "^2.0.10",
"dmn-moddle": "^10.0.0",
"dotenv": "^16.4.5",
"express": "^4.21.0",
"fast-csv": "^5.0.1",
"node-stream-zip": "^1.15.0",
"tar-stream": "^3.1.7",
"unzipper": "^0.12.3"
+3 -5
View File
@@ -10,11 +10,9 @@ class FileService {
* @return Promise<Readable> - The compressed file stream
*/
public static async getFileStream(url: string): Promise<Readable> {
return axios({
method: "GET",
url: url,
responseType: "stream",
}).then((response) => response.data);
return axios({ method: "GET", url, responseType: "stream" }).then(
(response) => response.data
);
}
/**
+23 -5
View File
@@ -11,13 +11,18 @@ import { ParserFactory } from "../parser";
import { DatasetType } from "./";
import { Data, DataConstructor } from "../data";
type DatasetOptions = {
type DatasetParams = {
id: string;
dataType: DataConstructor<Data>;
source: string;
file: string;
archiveType: ArchiveType;
datasetType: DatasetType;
options?: DatasetOptions;
};
type DatasetOptions = {
parser?: any;
};
/**
@@ -31,6 +36,7 @@ class Dataset {
readonly datasetType: DatasetType;
readonly cachePath: string;
private dataType: DataConstructor<Data>;
private options?: DatasetOptions;
/**
* Create a new dataset instance
@@ -40,6 +46,7 @@ class Dataset {
* @param dataType - The constructor of the data class
* @param archiveType - The type of the archive
* @param datasetType - The type of the dataset
* @param options - Additional options for the dataset
*/
constructor({
id,
@@ -48,13 +55,15 @@ class Dataset {
dataType,
archiveType,
datasetType,
}: DatasetOptions) {
options,
}: DatasetParams) {
this.id = id;
this.dataType = dataType;
this.source = source;
this.file = file;
this.archiveType = archiveType;
this.datasetType = datasetType;
this.options = options;
this.cachePath = CacheService.getCachePath(this.source, ".json");
}
@@ -79,18 +88,27 @@ class Dataset {
await pipelineAsync(
await FileService.getFileStream(this.source),
archive.extract(this.file),
parser.parse(),
parser.parse(this.options?.parser),
Dataset.transformToData(this.dataType),
FileService.createWriteStream(this.cachePath)
);
)
.then(() => {
console.log(`Loaded: ${this.source}`);
})
.catch((err) => {
console.error(`Failed to load dataset: ${this.source}`);
FileService.deleteFile(this.cachePath);
throw err;
});
}
private static transformToData(dataType: DataConstructor<Data>): Transform {
return new Transform({
objectMode: true,
transform(chunk: object, _, callback) {
const data: Data = new dataType(JSON.parse(chunk.toString()));
const data: Data = new dataType(chunk);
this.push(JSON.stringify(data) + "\n");
callback(null, JSON.stringify(data) + "\n");
},
});
+11
View File
@@ -12,6 +12,11 @@ class DatasetCollection {
dataType: NudgerData,
archiveType: ArchiveType.ZIP,
datasetType: DatasetType.CSV,
options: {
parser: {
delimiter: ",",
},
},
}),
new Dataset({
id: "openfoodfacts",
@@ -21,6 +26,12 @@ class DatasetCollection {
dataType: OpenFoodFactsData,
archiveType: ArchiveType.GZIP,
datasetType: DatasetType.CSV,
options: {
parser: {
delimiter: "\t",
quote: null,
},
},
}),
];
+11 -4
View File
@@ -1,14 +1,21 @@
import { Parser } from "./";
import { Duplex } from "node:stream";
import csv from "csvtojson";
// import csv from "csvtojson";
import * as csv from "fast-csv";
class CsvParser implements Parser {
public static instance: CsvParser = new CsvParser();
public parse(): Duplex {
return csv({
delimiter: "auto",
public parse(options: any): Duplex {
return csv.parse({
headers: true,
objectMode: true,
trim: true,
...options,
});
// return csv({
// delimiter: "auto",
// });
}
}
+1 -1
View File
@@ -4,7 +4,7 @@ interface Parser {
/**
* Parse the content of the stream into JSON objects
*/
parse(): Duplex;
parse(options: any): Duplex;
}
export default Parser;