feat: Add World-Cities Dataset

This commit is contained in:
Lucàs
2024-10-09 17:06:54 +02:00
parent 93ee52ddc2
commit 18927b5255
16 changed files with 100 additions and 26 deletions
+2 -1
View File
@@ -1,9 +1,10 @@
import { Archive, ZipArchive, ArchiveType, GzipArchive } from "./";
import { Archive, ZipArchive, ArchiveType, GzipArchive, NoneArchive } from "./";
class ArchiveFactory {
static getArchive(archiveType: ArchiveType): Archive {
if (archiveType === ArchiveType.ZIP) return ZipArchive.instance;
if (archiveType === ArchiveType.GZIP) return GzipArchive.instance;
if (archiveType === ArchiveType.NONE) return NoneArchive.instance;
throw new Error("Unsupported archive type");
}
+1
View File
@@ -1,6 +1,7 @@
enum ArchiveType {
ZIP = ".zip",
GZIP = ".gzip",
NONE = "",
}
export default ArchiveType;
+16
View File
@@ -0,0 +1,16 @@
import { Archive } from "./";
import { Duplex, Transform } from "node:stream";
class NoneArchive implements Archive {
public static instance: Archive = new NoneArchive();
public extract(source: string): Duplex {
return new Transform({
transform(chunk, _, callback) {
callback(null, chunk);
},
});
}
}
export default NoneArchive;
+1
View File
@@ -5,3 +5,4 @@ export { default as ArchiveFactory } from "./ArchiveFactory";
export { default as ZipArchive } from "./ZipArchive";
export { default as GzipArchive } from "./GzipArchive";
export { default as NoneArchive } from "./NoneArchive";
+20
View File
@@ -0,0 +1,20 @@
import { Data } from "./";
type RawSmolaData = {
name: string;
country: string;
subcountry: string;
geonameid: string;
};
class WorldCitiesData implements Data {
input: string[];
output: string[];
constructor({ geonameid, country }: RawSmolaData) {
this.input = [geonameid];
this.output = [country];
}
}
export default WorldCitiesData;
+1
View File
@@ -2,3 +2,4 @@ export { default as Data, DataConstructor } from "./Data";
export { default as NudgerData } from "./NudgerData";
export { default as OpenFoodFactsData } from "./OpenFoodFactsData";
export { default as WorldCitiesData } from "./WorldCitiesData";
+6 -7
View File
@@ -7,8 +7,7 @@ import CacheService from "../CacheService";
import FileService from "../FileService";
import { ArchiveFactory, ArchiveType } from "../archive";
import { ParserFactory } from "../parser";
import { DatasetType } from "./";
import { ParserFactory, ParserType } from "../parser";
import { Data, DataConstructor } from "../data";
type DatasetParams = {
@@ -17,7 +16,7 @@ type DatasetParams = {
source: string;
file: string;
archiveType: ArchiveType;
datasetType: DatasetType;
parserType: ParserType;
options?: DatasetOptions;
};
@@ -33,7 +32,7 @@ class Dataset {
readonly source: string;
readonly file: string;
readonly archiveType: ArchiveType;
readonly datasetType: DatasetType;
readonly parserType: ParserType;
readonly cachePath: string;
private dataType: DataConstructor<Data>;
private options?: DatasetOptions;
@@ -54,7 +53,7 @@ class Dataset {
file,
dataType,
archiveType,
datasetType,
parserType,
options,
}: DatasetParams) {
this.id = id;
@@ -62,7 +61,7 @@ class Dataset {
this.source = source;
this.file = file;
this.archiveType = archiveType;
this.datasetType = datasetType;
this.parserType = parserType;
this.options = options;
this.cachePath = CacheService.getCachePath(this.source, ".json");
@@ -80,7 +79,7 @@ class Dataset {
}
const archive = ArchiveFactory.getArchive(this.archiveType);
const parser = ParserFactory.getParser(this.datasetType);
const parser = ParserFactory.getParser(this.parserType);
const pipelineAsync = promisify(pipeline);
+14 -4
View File
@@ -1,6 +1,7 @@
import { NudgerData, OpenFoodFactsData } from "../data";
import { NudgerData, OpenFoodFactsData, WorldCitiesData } from "../data";
import { ArchiveType } from "../archive";
import { Dataset, DatasetType } from "./";
import { Dataset } from "./";
import { ParserType } from "../parser";
class DatasetCollection {
public static datasets: Dataset[] = [
@@ -11,7 +12,7 @@ class DatasetCollection {
file: "open4goods-full-gtin-dataset.csv",
dataType: NudgerData,
archiveType: ArchiveType.ZIP,
datasetType: DatasetType.CSV,
parserType: ParserType.CSV,
options: {
parser: {
delimiter: ",",
@@ -25,7 +26,7 @@ class DatasetCollection {
file: "en.openfoodfacts.org.products.csv",
dataType: OpenFoodFactsData,
archiveType: ArchiveType.GZIP,
datasetType: DatasetType.CSV,
parserType: ParserType.CSV,
options: {
parser: {
delimiter: "\t",
@@ -33,6 +34,15 @@ class DatasetCollection {
},
},
}),
new Dataset({
id: "world-cities",
source:
"https://raw.githubusercontent.com/datasets/world-cities/refs/heads/main/data/world-cities.csv",
file: "world-cities.csv",
dataType: WorldCitiesData,
archiveType: ArchiveType.NONE,
parserType: ParserType.CSV,
}),
];
public static loadAll(): Promise<void[]> {
-5
View File
@@ -1,5 +0,0 @@
enum DatasetType {
CSV = ".csv",
}
export default DatasetType;
-2
View File
@@ -1,4 +1,2 @@
export { default as DatasetType } from "./DatasetType";
export { default as Dataset } from "./Dataset";
export { default as DatasetCollection } from "./DatasetCollection";
-4
View File
@@ -1,6 +1,5 @@
import { Parser } from "./";
import { Duplex } from "node:stream";
// import csv from "csvtojson";
import * as csv from "fast-csv";
class CsvParser implements Parser {
@@ -13,9 +12,6 @@ class CsvParser implements Parser {
trim: true,
...options,
});
// return csv({
// delimiter: "auto",
// });
}
}
+3 -3
View File
@@ -1,5 +1,5 @@
import { Parser, CsvParser } from "./";
import { DatasetType } from "../dataset";
import { ParserType } from ".";
class ParserFactory {
/**
@@ -7,8 +7,8 @@ class ParserFactory {
* @param fileType The type of the dataset
* @returns The parser corresponding to the dataset type
*/
static getParser(fileType: DatasetType): Parser {
if (fileType === DatasetType.CSV) return CsvParser.instance;
static getParser(fileType: ParserType): Parser {
if (fileType === ParserType.CSV) return CsvParser.instance;
throw new Error("Unsupported file type");
}
}
+5
View File
@@ -0,0 +1,5 @@
enum ParserType {
CSV = ".csv",
}
export default ParserType;
+2
View File
@@ -1,3 +1,5 @@
export { default as ParserType } from "./ParserType";
export { default as ParserFactory } from "./ParserFactory";
export { default as Parser } from "./Parser";