mirror of
https://github.com/kmitresse/Compo-Service-Log-Project.git
synced 2026-05-13 17:11:49 +00:00
feat: Add World-Cities Dataset
This commit is contained in:
Generated
+27
@@ -15,12 +15,14 @@
|
||||
"dotenv": "^16.4.5",
|
||||
"express": "^4.21.0",
|
||||
"fast-csv": "^5.0.1",
|
||||
"js-yaml": "^4.1.0",
|
||||
"node-stream-zip": "^1.15.0",
|
||||
"tar-stream": "^3.1.7",
|
||||
"unzipper": "^0.12.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/js-yaml": "^4.0.9",
|
||||
"@types/node": "^22.5.5",
|
||||
"@types/tar-stream": "^3.1.3",
|
||||
"@types/unzipper": "^0.10.10",
|
||||
@@ -179,6 +181,13 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/js-yaml": {
|
||||
"version": "4.0.9",
|
||||
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/mime": {
|
||||
"version": "1.3.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.5.tgz",
|
||||
@@ -299,6 +308,12 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/argparse": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
||||
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
|
||||
"license": "Python-2.0"
|
||||
},
|
||||
"node_modules/array-flatten": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
|
||||
@@ -889,6 +904,18 @@
|
||||
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/js-yaml": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
||||
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"argparse": "^2.0.1"
|
||||
},
|
||||
"bin": {
|
||||
"js-yaml": "bin/js-yaml.js"
|
||||
}
|
||||
},
|
||||
"node_modules/jsonfile": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz",
|
||||
|
||||
@@ -21,12 +21,14 @@
|
||||
"dotenv": "^16.4.5",
|
||||
"express": "^4.21.0",
|
||||
"fast-csv": "^5.0.1",
|
||||
"js-yaml": "^4.1.0",
|
||||
"node-stream-zip": "^1.15.0",
|
||||
"tar-stream": "^3.1.7",
|
||||
"unzipper": "^0.12.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/js-yaml": "^4.0.9",
|
||||
"@types/node": "^22.5.5",
|
||||
"@types/tar-stream": "^3.1.3",
|
||||
"@types/unzipper": "^0.10.10",
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import { Archive, ZipArchive, ArchiveType, GzipArchive } from "./";
|
||||
import { Archive, ZipArchive, ArchiveType, GzipArchive, NoneArchive } from "./";
|
||||
|
||||
class ArchiveFactory {
|
||||
static getArchive(archiveType: ArchiveType): Archive {
|
||||
if (archiveType === ArchiveType.ZIP) return ZipArchive.instance;
|
||||
if (archiveType === ArchiveType.GZIP) return GzipArchive.instance;
|
||||
if (archiveType === ArchiveType.NONE) return NoneArchive.instance;
|
||||
|
||||
throw new Error("Unsupported archive type");
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
enum ArchiveType {
|
||||
ZIP = ".zip",
|
||||
GZIP = ".gzip",
|
||||
NONE = "",
|
||||
}
|
||||
|
||||
export default ArchiveType;
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
import { Archive } from "./";
|
||||
import { Duplex, Transform } from "node:stream";
|
||||
|
||||
class NoneArchive implements Archive {
|
||||
public static instance: Archive = new NoneArchive();
|
||||
|
||||
public extract(source: string): Duplex {
|
||||
return new Transform({
|
||||
transform(chunk, _, callback) {
|
||||
callback(null, chunk);
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export default NoneArchive;
|
||||
@@ -5,3 +5,4 @@ export { default as ArchiveFactory } from "./ArchiveFactory";
|
||||
|
||||
export { default as ZipArchive } from "./ZipArchive";
|
||||
export { default as GzipArchive } from "./GzipArchive";
|
||||
export { default as NoneArchive } from "./NoneArchive";
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
import { Data } from "./";
|
||||
|
||||
type RawSmolaData = {
|
||||
name: string;
|
||||
country: string;
|
||||
subcountry: string;
|
||||
geonameid: string;
|
||||
};
|
||||
|
||||
class WorldCitiesData implements Data {
|
||||
input: string[];
|
||||
output: string[];
|
||||
|
||||
constructor({ geonameid, country }: RawSmolaData) {
|
||||
this.input = [geonameid];
|
||||
this.output = [country];
|
||||
}
|
||||
}
|
||||
|
||||
export default WorldCitiesData;
|
||||
@@ -2,3 +2,4 @@ export { default as Data, DataConstructor } from "./Data";
|
||||
|
||||
export { default as NudgerData } from "./NudgerData";
|
||||
export { default as OpenFoodFactsData } from "./OpenFoodFactsData";
|
||||
export { default as WorldCitiesData } from "./WorldCitiesData";
|
||||
|
||||
@@ -7,8 +7,7 @@ import CacheService from "../CacheService";
|
||||
import FileService from "../FileService";
|
||||
|
||||
import { ArchiveFactory, ArchiveType } from "../archive";
|
||||
import { ParserFactory } from "../parser";
|
||||
import { DatasetType } from "./";
|
||||
import { ParserFactory, ParserType } from "../parser";
|
||||
import { Data, DataConstructor } from "../data";
|
||||
|
||||
type DatasetParams = {
|
||||
@@ -17,7 +16,7 @@ type DatasetParams = {
|
||||
source: string;
|
||||
file: string;
|
||||
archiveType: ArchiveType;
|
||||
datasetType: DatasetType;
|
||||
parserType: ParserType;
|
||||
options?: DatasetOptions;
|
||||
};
|
||||
|
||||
@@ -33,7 +32,7 @@ class Dataset {
|
||||
readonly source: string;
|
||||
readonly file: string;
|
||||
readonly archiveType: ArchiveType;
|
||||
readonly datasetType: DatasetType;
|
||||
readonly parserType: ParserType;
|
||||
readonly cachePath: string;
|
||||
private dataType: DataConstructor<Data>;
|
||||
private options?: DatasetOptions;
|
||||
@@ -54,7 +53,7 @@ class Dataset {
|
||||
file,
|
||||
dataType,
|
||||
archiveType,
|
||||
datasetType,
|
||||
parserType,
|
||||
options,
|
||||
}: DatasetParams) {
|
||||
this.id = id;
|
||||
@@ -62,7 +61,7 @@ class Dataset {
|
||||
this.source = source;
|
||||
this.file = file;
|
||||
this.archiveType = archiveType;
|
||||
this.datasetType = datasetType;
|
||||
this.parserType = parserType;
|
||||
this.options = options;
|
||||
|
||||
this.cachePath = CacheService.getCachePath(this.source, ".json");
|
||||
@@ -80,7 +79,7 @@ class Dataset {
|
||||
}
|
||||
|
||||
const archive = ArchiveFactory.getArchive(this.archiveType);
|
||||
const parser = ParserFactory.getParser(this.datasetType);
|
||||
const parser = ParserFactory.getParser(this.parserType);
|
||||
|
||||
const pipelineAsync = promisify(pipeline);
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { NudgerData, OpenFoodFactsData } from "../data";
|
||||
import { NudgerData, OpenFoodFactsData, WorldCitiesData } from "../data";
|
||||
import { ArchiveType } from "../archive";
|
||||
import { Dataset, DatasetType } from "./";
|
||||
import { Dataset } from "./";
|
||||
import { ParserType } from "../parser";
|
||||
|
||||
class DatasetCollection {
|
||||
public static datasets: Dataset[] = [
|
||||
@@ -11,7 +12,7 @@ class DatasetCollection {
|
||||
file: "open4goods-full-gtin-dataset.csv",
|
||||
dataType: NudgerData,
|
||||
archiveType: ArchiveType.ZIP,
|
||||
datasetType: DatasetType.CSV,
|
||||
parserType: ParserType.CSV,
|
||||
options: {
|
||||
parser: {
|
||||
delimiter: ",",
|
||||
@@ -25,7 +26,7 @@ class DatasetCollection {
|
||||
file: "en.openfoodfacts.org.products.csv",
|
||||
dataType: OpenFoodFactsData,
|
||||
archiveType: ArchiveType.GZIP,
|
||||
datasetType: DatasetType.CSV,
|
||||
parserType: ParserType.CSV,
|
||||
options: {
|
||||
parser: {
|
||||
delimiter: "\t",
|
||||
@@ -33,6 +34,15 @@ class DatasetCollection {
|
||||
},
|
||||
},
|
||||
}),
|
||||
new Dataset({
|
||||
id: "world-cities",
|
||||
source:
|
||||
"https://raw.githubusercontent.com/datasets/world-cities/refs/heads/main/data/world-cities.csv",
|
||||
file: "world-cities.csv",
|
||||
dataType: WorldCitiesData,
|
||||
archiveType: ArchiveType.NONE,
|
||||
parserType: ParserType.CSV,
|
||||
}),
|
||||
];
|
||||
|
||||
public static loadAll(): Promise<void[]> {
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
enum DatasetType {
|
||||
CSV = ".csv",
|
||||
}
|
||||
|
||||
export default DatasetType;
|
||||
@@ -1,4 +1,2 @@
|
||||
export { default as DatasetType } from "./DatasetType";
|
||||
|
||||
export { default as Dataset } from "./Dataset";
|
||||
export { default as DatasetCollection } from "./DatasetCollection";
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { Parser } from "./";
|
||||
import { Duplex } from "node:stream";
|
||||
// import csv from "csvtojson";
|
||||
import * as csv from "fast-csv";
|
||||
|
||||
class CsvParser implements Parser {
|
||||
@@ -13,9 +12,6 @@ class CsvParser implements Parser {
|
||||
trim: true,
|
||||
...options,
|
||||
});
|
||||
// return csv({
|
||||
// delimiter: "auto",
|
||||
// });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Parser, CsvParser } from "./";
|
||||
import { DatasetType } from "../dataset";
|
||||
import { ParserType } from ".";
|
||||
|
||||
class ParserFactory {
|
||||
/**
|
||||
@@ -7,8 +7,8 @@ class ParserFactory {
|
||||
* @param fileType The type of the dataset
|
||||
* @returns The parser corresponding to the dataset type
|
||||
*/
|
||||
static getParser(fileType: DatasetType): Parser {
|
||||
if (fileType === DatasetType.CSV) return CsvParser.instance;
|
||||
static getParser(fileType: ParserType): Parser {
|
||||
if (fileType === ParserType.CSV) return CsvParser.instance;
|
||||
throw new Error("Unsupported file type");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
enum ParserType {
|
||||
CSV = ".csv",
|
||||
}
|
||||
|
||||
export default ParserType;
|
||||
@@ -1,3 +1,5 @@
|
||||
export { default as ParserType } from "./ParserType";
|
||||
|
||||
export { default as ParserFactory } from "./ParserFactory";
|
||||
export { default as Parser } from "./Parser";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user