mirror of
https://github.com/kmitresse/Compo-Service-Log-Project.git
synced 2026-05-13 17:11:49 +00:00
feat: Add World-Cities Dataset
This commit is contained in:
Generated
+27
@@ -15,12 +15,14 @@
|
|||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
"express": "^4.21.0",
|
"express": "^4.21.0",
|
||||||
"fast-csv": "^5.0.1",
|
"fast-csv": "^5.0.1",
|
||||||
|
"js-yaml": "^4.1.0",
|
||||||
"node-stream-zip": "^1.15.0",
|
"node-stream-zip": "^1.15.0",
|
||||||
"tar-stream": "^3.1.7",
|
"tar-stream": "^3.1.7",
|
||||||
"unzipper": "^0.12.3"
|
"unzipper": "^0.12.3"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/express": "^4.17.21",
|
"@types/express": "^4.17.21",
|
||||||
|
"@types/js-yaml": "^4.0.9",
|
||||||
"@types/node": "^22.5.5",
|
"@types/node": "^22.5.5",
|
||||||
"@types/tar-stream": "^3.1.3",
|
"@types/tar-stream": "^3.1.3",
|
||||||
"@types/unzipper": "^0.10.10",
|
"@types/unzipper": "^0.10.10",
|
||||||
@@ -179,6 +181,13 @@
|
|||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/js-yaml": {
|
||||||
|
"version": "4.0.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||||
|
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@types/mime": {
|
"node_modules/@types/mime": {
|
||||||
"version": "1.3.5",
|
"version": "1.3.5",
|
||||||
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.5.tgz",
|
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.5.tgz",
|
||||||
@@ -299,6 +308,12 @@
|
|||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/argparse": {
|
||||||
|
"version": "2.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
||||||
|
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
|
||||||
|
"license": "Python-2.0"
|
||||||
|
},
|
||||||
"node_modules/array-flatten": {
|
"node_modules/array-flatten": {
|
||||||
"version": "1.1.1",
|
"version": "1.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
|
||||||
@@ -889,6 +904,18 @@
|
|||||||
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
|
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/js-yaml": {
|
||||||
|
"version": "4.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
||||||
|
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"argparse": "^2.0.1"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"js-yaml": "bin/js-yaml.js"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/jsonfile": {
|
"node_modules/jsonfile": {
|
||||||
"version": "6.1.0",
|
"version": "6.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz",
|
||||||
|
|||||||
@@ -21,12 +21,14 @@
|
|||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
"express": "^4.21.0",
|
"express": "^4.21.0",
|
||||||
"fast-csv": "^5.0.1",
|
"fast-csv": "^5.0.1",
|
||||||
|
"js-yaml": "^4.1.0",
|
||||||
"node-stream-zip": "^1.15.0",
|
"node-stream-zip": "^1.15.0",
|
||||||
"tar-stream": "^3.1.7",
|
"tar-stream": "^3.1.7",
|
||||||
"unzipper": "^0.12.3"
|
"unzipper": "^0.12.3"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/express": "^4.17.21",
|
"@types/express": "^4.17.21",
|
||||||
|
"@types/js-yaml": "^4.0.9",
|
||||||
"@types/node": "^22.5.5",
|
"@types/node": "^22.5.5",
|
||||||
"@types/tar-stream": "^3.1.3",
|
"@types/tar-stream": "^3.1.3",
|
||||||
"@types/unzipper": "^0.10.10",
|
"@types/unzipper": "^0.10.10",
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
import { Archive, ZipArchive, ArchiveType, GzipArchive } from "./";
|
import { Archive, ZipArchive, ArchiveType, GzipArchive, NoneArchive } from "./";
|
||||||
|
|
||||||
class ArchiveFactory {
|
class ArchiveFactory {
|
||||||
static getArchive(archiveType: ArchiveType): Archive {
|
static getArchive(archiveType: ArchiveType): Archive {
|
||||||
if (archiveType === ArchiveType.ZIP) return ZipArchive.instance;
|
if (archiveType === ArchiveType.ZIP) return ZipArchive.instance;
|
||||||
if (archiveType === ArchiveType.GZIP) return GzipArchive.instance;
|
if (archiveType === ArchiveType.GZIP) return GzipArchive.instance;
|
||||||
|
if (archiveType === ArchiveType.NONE) return NoneArchive.instance;
|
||||||
|
|
||||||
throw new Error("Unsupported archive type");
|
throw new Error("Unsupported archive type");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
enum ArchiveType {
|
enum ArchiveType {
|
||||||
ZIP = ".zip",
|
ZIP = ".zip",
|
||||||
GZIP = ".gzip",
|
GZIP = ".gzip",
|
||||||
|
NONE = "",
|
||||||
}
|
}
|
||||||
|
|
||||||
export default ArchiveType;
|
export default ArchiveType;
|
||||||
|
|||||||
@@ -0,0 +1,16 @@
|
|||||||
|
import { Archive } from "./";
|
||||||
|
import { Duplex, Transform } from "node:stream";
|
||||||
|
|
||||||
|
class NoneArchive implements Archive {
|
||||||
|
public static instance: Archive = new NoneArchive();
|
||||||
|
|
||||||
|
public extract(source: string): Duplex {
|
||||||
|
return new Transform({
|
||||||
|
transform(chunk, _, callback) {
|
||||||
|
callback(null, chunk);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default NoneArchive;
|
||||||
@@ -5,3 +5,4 @@ export { default as ArchiveFactory } from "./ArchiveFactory";
|
|||||||
|
|
||||||
export { default as ZipArchive } from "./ZipArchive";
|
export { default as ZipArchive } from "./ZipArchive";
|
||||||
export { default as GzipArchive } from "./GzipArchive";
|
export { default as GzipArchive } from "./GzipArchive";
|
||||||
|
export { default as NoneArchive } from "./NoneArchive";
|
||||||
|
|||||||
@@ -0,0 +1,20 @@
|
|||||||
|
import { Data } from "./";
|
||||||
|
|
||||||
|
type RawSmolaData = {
|
||||||
|
name: string;
|
||||||
|
country: string;
|
||||||
|
subcountry: string;
|
||||||
|
geonameid: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
class WorldCitiesData implements Data {
|
||||||
|
input: string[];
|
||||||
|
output: string[];
|
||||||
|
|
||||||
|
constructor({ geonameid, country }: RawSmolaData) {
|
||||||
|
this.input = [geonameid];
|
||||||
|
this.output = [country];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default WorldCitiesData;
|
||||||
@@ -2,3 +2,4 @@ export { default as Data, DataConstructor } from "./Data";
|
|||||||
|
|
||||||
export { default as NudgerData } from "./NudgerData";
|
export { default as NudgerData } from "./NudgerData";
|
||||||
export { default as OpenFoodFactsData } from "./OpenFoodFactsData";
|
export { default as OpenFoodFactsData } from "./OpenFoodFactsData";
|
||||||
|
export { default as WorldCitiesData } from "./WorldCitiesData";
|
||||||
|
|||||||
@@ -7,8 +7,7 @@ import CacheService from "../CacheService";
|
|||||||
import FileService from "../FileService";
|
import FileService from "../FileService";
|
||||||
|
|
||||||
import { ArchiveFactory, ArchiveType } from "../archive";
|
import { ArchiveFactory, ArchiveType } from "../archive";
|
||||||
import { ParserFactory } from "../parser";
|
import { ParserFactory, ParserType } from "../parser";
|
||||||
import { DatasetType } from "./";
|
|
||||||
import { Data, DataConstructor } from "../data";
|
import { Data, DataConstructor } from "../data";
|
||||||
|
|
||||||
type DatasetParams = {
|
type DatasetParams = {
|
||||||
@@ -17,7 +16,7 @@ type DatasetParams = {
|
|||||||
source: string;
|
source: string;
|
||||||
file: string;
|
file: string;
|
||||||
archiveType: ArchiveType;
|
archiveType: ArchiveType;
|
||||||
datasetType: DatasetType;
|
parserType: ParserType;
|
||||||
options?: DatasetOptions;
|
options?: DatasetOptions;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -33,7 +32,7 @@ class Dataset {
|
|||||||
readonly source: string;
|
readonly source: string;
|
||||||
readonly file: string;
|
readonly file: string;
|
||||||
readonly archiveType: ArchiveType;
|
readonly archiveType: ArchiveType;
|
||||||
readonly datasetType: DatasetType;
|
readonly parserType: ParserType;
|
||||||
readonly cachePath: string;
|
readonly cachePath: string;
|
||||||
private dataType: DataConstructor<Data>;
|
private dataType: DataConstructor<Data>;
|
||||||
private options?: DatasetOptions;
|
private options?: DatasetOptions;
|
||||||
@@ -54,7 +53,7 @@ class Dataset {
|
|||||||
file,
|
file,
|
||||||
dataType,
|
dataType,
|
||||||
archiveType,
|
archiveType,
|
||||||
datasetType,
|
parserType,
|
||||||
options,
|
options,
|
||||||
}: DatasetParams) {
|
}: DatasetParams) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
@@ -62,7 +61,7 @@ class Dataset {
|
|||||||
this.source = source;
|
this.source = source;
|
||||||
this.file = file;
|
this.file = file;
|
||||||
this.archiveType = archiveType;
|
this.archiveType = archiveType;
|
||||||
this.datasetType = datasetType;
|
this.parserType = parserType;
|
||||||
this.options = options;
|
this.options = options;
|
||||||
|
|
||||||
this.cachePath = CacheService.getCachePath(this.source, ".json");
|
this.cachePath = CacheService.getCachePath(this.source, ".json");
|
||||||
@@ -80,7 +79,7 @@ class Dataset {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const archive = ArchiveFactory.getArchive(this.archiveType);
|
const archive = ArchiveFactory.getArchive(this.archiveType);
|
||||||
const parser = ParserFactory.getParser(this.datasetType);
|
const parser = ParserFactory.getParser(this.parserType);
|
||||||
|
|
||||||
const pipelineAsync = promisify(pipeline);
|
const pipelineAsync = promisify(pipeline);
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { NudgerData, OpenFoodFactsData } from "../data";
|
import { NudgerData, OpenFoodFactsData, WorldCitiesData } from "../data";
|
||||||
import { ArchiveType } from "../archive";
|
import { ArchiveType } from "../archive";
|
||||||
import { Dataset, DatasetType } from "./";
|
import { Dataset } from "./";
|
||||||
|
import { ParserType } from "../parser";
|
||||||
|
|
||||||
class DatasetCollection {
|
class DatasetCollection {
|
||||||
public static datasets: Dataset[] = [
|
public static datasets: Dataset[] = [
|
||||||
@@ -11,7 +12,7 @@ class DatasetCollection {
|
|||||||
file: "open4goods-full-gtin-dataset.csv",
|
file: "open4goods-full-gtin-dataset.csv",
|
||||||
dataType: NudgerData,
|
dataType: NudgerData,
|
||||||
archiveType: ArchiveType.ZIP,
|
archiveType: ArchiveType.ZIP,
|
||||||
datasetType: DatasetType.CSV,
|
parserType: ParserType.CSV,
|
||||||
options: {
|
options: {
|
||||||
parser: {
|
parser: {
|
||||||
delimiter: ",",
|
delimiter: ",",
|
||||||
@@ -25,7 +26,7 @@ class DatasetCollection {
|
|||||||
file: "en.openfoodfacts.org.products.csv",
|
file: "en.openfoodfacts.org.products.csv",
|
||||||
dataType: OpenFoodFactsData,
|
dataType: OpenFoodFactsData,
|
||||||
archiveType: ArchiveType.GZIP,
|
archiveType: ArchiveType.GZIP,
|
||||||
datasetType: DatasetType.CSV,
|
parserType: ParserType.CSV,
|
||||||
options: {
|
options: {
|
||||||
parser: {
|
parser: {
|
||||||
delimiter: "\t",
|
delimiter: "\t",
|
||||||
@@ -33,6 +34,15 @@ class DatasetCollection {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
|
new Dataset({
|
||||||
|
id: "world-cities",
|
||||||
|
source:
|
||||||
|
"https://raw.githubusercontent.com/datasets/world-cities/refs/heads/main/data/world-cities.csv",
|
||||||
|
file: "world-cities.csv",
|
||||||
|
dataType: WorldCitiesData,
|
||||||
|
archiveType: ArchiveType.NONE,
|
||||||
|
parserType: ParserType.CSV,
|
||||||
|
}),
|
||||||
];
|
];
|
||||||
|
|
||||||
public static loadAll(): Promise<void[]> {
|
public static loadAll(): Promise<void[]> {
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
enum DatasetType {
|
|
||||||
CSV = ".csv",
|
|
||||||
}
|
|
||||||
|
|
||||||
export default DatasetType;
|
|
||||||
@@ -1,4 +1,2 @@
|
|||||||
export { default as DatasetType } from "./DatasetType";
|
|
||||||
|
|
||||||
export { default as Dataset } from "./Dataset";
|
export { default as Dataset } from "./Dataset";
|
||||||
export { default as DatasetCollection } from "./DatasetCollection";
|
export { default as DatasetCollection } from "./DatasetCollection";
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import { Parser } from "./";
|
import { Parser } from "./";
|
||||||
import { Duplex } from "node:stream";
|
import { Duplex } from "node:stream";
|
||||||
// import csv from "csvtojson";
|
|
||||||
import * as csv from "fast-csv";
|
import * as csv from "fast-csv";
|
||||||
|
|
||||||
class CsvParser implements Parser {
|
class CsvParser implements Parser {
|
||||||
@@ -13,9 +12,6 @@ class CsvParser implements Parser {
|
|||||||
trim: true,
|
trim: true,
|
||||||
...options,
|
...options,
|
||||||
});
|
});
|
||||||
// return csv({
|
|
||||||
// delimiter: "auto",
|
|
||||||
// });
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { Parser, CsvParser } from "./";
|
import { Parser, CsvParser } from "./";
|
||||||
import { DatasetType } from "../dataset";
|
import { ParserType } from ".";
|
||||||
|
|
||||||
class ParserFactory {
|
class ParserFactory {
|
||||||
/**
|
/**
|
||||||
@@ -7,8 +7,8 @@ class ParserFactory {
|
|||||||
* @param fileType The type of the dataset
|
* @param fileType The type of the dataset
|
||||||
* @returns The parser corresponding to the dataset type
|
* @returns The parser corresponding to the dataset type
|
||||||
*/
|
*/
|
||||||
static getParser(fileType: DatasetType): Parser {
|
static getParser(fileType: ParserType): Parser {
|
||||||
if (fileType === DatasetType.CSV) return CsvParser.instance;
|
if (fileType === ParserType.CSV) return CsvParser.instance;
|
||||||
throw new Error("Unsupported file type");
|
throw new Error("Unsupported file type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
enum ParserType {
|
||||||
|
CSV = ".csv",
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ParserType;
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
export { default as ParserType } from "./ParserType";
|
||||||
|
|
||||||
export { default as ParserFactory } from "./ParserFactory";
|
export { default as ParserFactory } from "./ParserFactory";
|
||||||
export { default as Parser } from "./Parser";
|
export { default as Parser } from "./Parser";
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user