draft: Add OpenFoodFacts Dataset

/!\ Warning: Stream doesn't end
This commit is contained in:
Lucàs
2024-10-07 23:22:22 +02:00
parent 3826e3f955
commit 5e53d9d914
10 changed files with 42 additions and 16 deletions
+1 -2
View File
@@ -3,8 +3,7 @@ import { Archive, ZipArchive, ArchiveType, GzipArchive } from "./";
class ArchiveFactory {
static getArchive(archiveType: ArchiveType): Archive {
if (archiveType === ArchiveType.ZIP) return ZipArchive.instance;
// if ([ArchiveType.GZIP, ArchiveType.GZ].includes(archiveType))
// return GzipArchive.instance;
if (archiveType === ArchiveType.GZIP) return GzipArchive.instance;
throw new Error("Unsupported archive type");
}
-1
View File
@@ -1,7 +1,6 @@
enum ArchiveType {
ZIP = ".zip",
GZIP = ".gzip",
GZ = ".gz",
}
export default ArchiveType;
+1
View File
@@ -9,4 +9,5 @@ class GzipArchive implements Archive {
return createGunzip();
}
}
export default GzipArchive;
+18
View File
@@ -0,0 +1,18 @@
import { Data } from "./";
type RawOpenFoodFactsData = {
code: string;
countries_en: string;
};
class OpenFoodFactsData implements Data {
input: string[] = [];
output: string[] = [];
constructor({ code, countries_en }: RawOpenFoodFactsData) {
this.input = [code];
this.output = [countries_en];
}
}
export default OpenFoodFactsData;
+1
View File
@@ -1,3 +1,4 @@
export { default as Data, DataConstructor } from "./Data";
export { default as NudgerData } from "./NudgerData";
export { default as OpenFoodFactsData } from "./OpenFoodFactsData";
-2
View File
@@ -75,8 +75,6 @@ class Dataset {
const pipelineAsync = promisify(pipeline);
const self = this;
console.log(`Download: ${this.source}`);
await pipelineAsync(
await FileService.getFileStream(this.source),
+10 -1
View File
@@ -1,4 +1,4 @@
import { NudgerData } from "../data";
import { NudgerData, OpenFoodFactsData } from "../data";
import { ArchiveType } from "../archive";
import { Dataset, DatasetType } from "./";
@@ -13,6 +13,15 @@ class DatasetCollection {
archiveType: ArchiveType.ZIP,
datasetType: DatasetType.CSV,
}),
new Dataset({
id: "openfoodfacts",
source:
"https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz",
file: "en.openfoodfacts.org.products.csv",
dataType: OpenFoodFactsData,
archiveType: ArchiveType.GZIP,
datasetType: DatasetType.CSV,
}),
];
public static loadAll(): Promise<void[]> {
+3 -1
View File
@@ -6,7 +6,9 @@ class CsvParser implements Parser {
public static instance: CsvParser = new CsvParser();
public parse(): Duplex {
return csv();
return csv({
delimiter: "auto",
});
}
}