draft: Add OpenFoodFacts Dataset

/!\ Warning: Stream doesn't end
This commit is contained in:
Lucàs
2024-10-07 23:22:22 +02:00
parent 3826e3f955
commit 5e53d9d914
10 changed files with 42 additions and 16 deletions
+6 -7
View File
@@ -1,14 +1,13 @@
version: '3.1' version: "3.1"
services: services:
mariadb: mariadb:
image: mariadb:latest image: mariadb:latest
restart: 'always' restart: "always"
volumes: # volumes:
- ./mariaDB/init:/docker-entrypoint-initdb.d # - ./mariaDB/init:/docker-entrypoint-initdb.d
environment: environment:
MYSQL_ROOT_PASSWORD: root MYSQL_ROOT_PASSWORD: root
MYSQL_DATABASE: db_prod MYSQL_DATABASE: db
ports: ports:
- "3306:3306" - "3306:3306"
+2 -2
View File
@@ -1,7 +1,7 @@
import { Router, Request, Response } from "express"; import { Router, Request, Response } from "express";
import { DatasetCollection } from "../services/dataset"; import { DatasetCollection } from "../services/dataset";
import { DMN } from "../services/dmn/DMN"; import { DMN } from "../services/dmn/DMN";
import { DMN_Definitions } from "../services/dmn/interfaces/DMN_Definitions"; import { Definitions } from "../services/dmn/interfaces/";
import { Data } from "../services/data"; import { Data } from "../services/data";
const router = Router(); const router = Router();
@@ -22,7 +22,7 @@ router.post("/randomize/:id", async (req: Request, res: Response) => {
); );
if (!dataset) return res.status(404).json({ status: "NOT_FOUND" }); if (!dataset) return res.status(404).json({ status: "NOT_FOUND" });
const dmn: DMN_Definitions = await DMN.parse(req.body); const dmn: Definitions = await DMN.parse(req.body);
const schema = DMN.getSchema(dmn); const schema = DMN.getSchema(dmn);
const data: Data[] = await dataset.get(size, schema); const data: Data[] = await dataset.get(size, schema);
+1 -2
View File
@@ -3,8 +3,7 @@ import { Archive, ZipArchive, ArchiveType, GzipArchive } from "./";
class ArchiveFactory { class ArchiveFactory {
static getArchive(archiveType: ArchiveType): Archive { static getArchive(archiveType: ArchiveType): Archive {
if (archiveType === ArchiveType.ZIP) return ZipArchive.instance; if (archiveType === ArchiveType.ZIP) return ZipArchive.instance;
// if ([ArchiveType.GZIP, ArchiveType.GZ].includes(archiveType)) if (archiveType === ArchiveType.GZIP) return GzipArchive.instance;
// return GzipArchive.instance;
throw new Error("Unsupported archive type"); throw new Error("Unsupported archive type");
} }
-1
View File
@@ -1,7 +1,6 @@
enum ArchiveType { enum ArchiveType {
ZIP = ".zip", ZIP = ".zip",
GZIP = ".gzip", GZIP = ".gzip",
GZ = ".gz",
} }
export default ArchiveType; export default ArchiveType;
+1
View File
@@ -9,4 +9,5 @@ class GzipArchive implements Archive {
return createGunzip(); return createGunzip();
} }
} }
export default GzipArchive; export default GzipArchive;
+18
View File
@@ -0,0 +1,18 @@
import { Data } from "./";
type RawOpenFoodFactsData = {
code: string;
countries_en: string;
};
class OpenFoodFactsData implements Data {
input: string[] = [];
output: string[] = [];
constructor({ code, countries_en }: RawOpenFoodFactsData) {
this.input = [code];
this.output = [countries_en];
}
}
export default OpenFoodFactsData;
+1
View File
@@ -1,3 +1,4 @@
export { default as Data, DataConstructor } from "./Data"; export { default as Data, DataConstructor } from "./Data";
export { default as NudgerData } from "./NudgerData"; export { default as NudgerData } from "./NudgerData";
export { default as OpenFoodFactsData } from "./OpenFoodFactsData";
-2
View File
@@ -75,8 +75,6 @@ class Dataset {
const pipelineAsync = promisify(pipeline); const pipelineAsync = promisify(pipeline);
const self = this;
console.log(`Download: ${this.source}`); console.log(`Download: ${this.source}`);
await pipelineAsync( await pipelineAsync(
await FileService.getFileStream(this.source), await FileService.getFileStream(this.source),
+10 -1
View File
@@ -1,4 +1,4 @@
import { NudgerData } from "../data"; import { NudgerData, OpenFoodFactsData } from "../data";
import { ArchiveType } from "../archive"; import { ArchiveType } from "../archive";
import { Dataset, DatasetType } from "./"; import { Dataset, DatasetType } from "./";
@@ -13,6 +13,15 @@ class DatasetCollection {
archiveType: ArchiveType.ZIP, archiveType: ArchiveType.ZIP,
datasetType: DatasetType.CSV, datasetType: DatasetType.CSV,
}), }),
new Dataset({
id: "openfoodfacts",
source:
"https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz",
file: "en.openfoodfacts.org.products.csv",
dataType: OpenFoodFactsData,
archiveType: ArchiveType.GZIP,
datasetType: DatasetType.CSV,
}),
]; ];
public static loadAll(): Promise<void[]> { public static loadAll(): Promise<void[]> {
+3 -1
View File
@@ -6,7 +6,9 @@ class CsvParser implements Parser {
public static instance: CsvParser = new CsvParser(); public static instance: CsvParser = new CsvParser();
public parse(): Duplex { public parse(): Duplex {
return csv(); return csv({
delimiter: "auto",
});
} }
} }