[Draft] Stockage des données sur MariaDB avec TypeORM

This commit is contained in:
Lucàs
2024-10-23 17:07:23 +02:00
parent c95c92e987
commit cde872ca55
14 changed files with 323 additions and 144 deletions
+1 -1
View File
@@ -3,7 +3,7 @@ version: "3.1"
services: services:
mariadb: mariadb:
image: mariadb:latest image: mariadb:latest
restart: "always" restart: "no"
environment: environment:
MYSQL_ROOT_PASSWORD: root MYSQL_ROOT_PASSWORD: root
MYSQL_DATABASE: db MYSQL_DATABASE: db
+10
View File
@@ -15,6 +15,7 @@
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"express": "^4.21.0", "express": "^4.21.0",
"fast-csv": "^5.0.1", "fast-csv": "^5.0.1",
"jsonschema": "^1.4.1",
"mysql": "^2.18.1", "mysql": "^2.18.1",
"reflect-metadata": "^0.2.2", "reflect-metadata": "^0.2.2",
"tar-stream": "^3.1.7", "tar-stream": "^3.1.7",
@@ -1423,6 +1424,15 @@
"graceful-fs": "^4.1.6" "graceful-fs": "^4.1.6"
} }
}, },
"node_modules/jsonschema": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/jsonschema/-/jsonschema-1.4.1.tgz",
"integrity": "sha512-S6cATIPVv1z0IlxdN+zUk5EPjkGCdnhN4wVSBlvoUO1tOLJootbo9CquNJmbIh4yikWHiUedhRYrNPn1arpEmQ==",
"license": "MIT",
"engines": {
"node": "*"
}
},
"node_modules/lodash.escaperegexp": { "node_modules/lodash.escaperegexp": {
"version": "4.1.2", "version": "4.1.2",
"resolved": "https://registry.npmjs.org/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz", "resolved": "https://registry.npmjs.org/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz",
+1
View File
@@ -21,6 +21,7 @@
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"express": "^4.21.0", "express": "^4.21.0",
"fast-csv": "^5.0.1", "fast-csv": "^5.0.1",
"jsonschema": "^1.4.1",
"mysql": "^2.18.1", "mysql": "^2.18.1",
"reflect-metadata": "^0.2.2", "reflect-metadata": "^0.2.2",
"tar-stream": "^3.1.7", "tar-stream": "^3.1.7",
+7 -2
View File
@@ -1,6 +1,11 @@
import "reflect-metadata"; import "reflect-metadata";
import { DataSource } from "typeorm"; import { DataSource } from "typeorm";
import { Log } from "./entity/Log"; import { Log } from "./entity/Log";
import {
NudgerData,
OpenFoodFactsData,
WorldCitiesData,
} from "./services/data";
export const AppDataSource = new DataSource({ export const AppDataSource = new DataSource({
type: "mariadb", type: "mariadb",
@@ -10,8 +15,8 @@ export const AppDataSource = new DataSource({
password: "root", password: "root",
database: "db", database: "db",
synchronize: true, synchronize: true,
logging: true, logging: false,
entities: [Log], entities: [Log, NudgerData, OpenFoodFactsData, WorldCitiesData],
subscribers: [], subscribers: [],
migrations: [], migrations: [],
}); });
+1
View File
@@ -49,6 +49,7 @@ router.post("/randomize/:id", async (req: Request, res: Response) => {
const dmn: Definitions = await DMN.parse(req.body); const dmn: Definitions = await DMN.parse(req.body);
const schema = DMN.getSchema(dmn); const schema = DMN.getSchema(dmn);
console.log(JSON.stringify(schema, null, 2));
const data: Data[] = await dataset.get(size, schema); const data: Data[] = await dataset.get(size, schema);
+3 -3
View File
@@ -22,8 +22,8 @@ class CacheService {
} }
} }
if (!existsSync(CacheService.CACHE_DIR)) { // if (!existsSync(CacheService.CACHE_DIR)) {
mkdirSync(CacheService.CACHE_DIR); // mkdirSync(CacheService.CACHE_DIR);
} // }
export default CacheService; export default CacheService;
+8 -4
View File
@@ -1,9 +1,13 @@
interface Data { interface Data {
input: any[]; id?: number;
output: any[];
} }
type DataConstructor<T extends Data> = new (...args: any[]) => T; class InvalidData extends Error {
constructor(message: string) {
super(message);
this.name = "Invalid data";
}
}
export default Data; export default Data;
export { DataConstructor }; export { InvalidData };
+31 -6
View File
@@ -1,4 +1,5 @@
import { Data } from "./"; import { Data, InvalidData } from "./";
import { Column, Entity, PrimaryGeneratedColumn } from "typeorm";
type RawNudgerData = { type RawNudgerData = {
code: string; // "3260014791012", code: string; // "3260014791012",
@@ -15,13 +16,37 @@ type RawNudgerData = {
url: string; // "" url: string; // ""
}; };
@Entity()
class NudgerData implements Data { class NudgerData implements Data {
input: string[]; @PrimaryGeneratedColumn({
output: string[]; type: "integer",
})
id?: number;
constructor({ code, gs1_country }: RawNudgerData) { @Column()
this.input = [code]; barcode_ean_13: string;
this.output = [gs1_country];
@Column()
country: string;
constructor(code: string, gs1_country: string) {
this.barcode_ean_13 = code;
this.country = gs1_country;
}
fromRaw({ code, gs1_country }: RawNudgerData): NudgerData {
if (!code || !gs1_country || code.length !== 13) {
throw new InvalidData("Invalid data");
}
return new NudgerData(code, gs1_country);
}
asData(nudgerData: NudgerData): any {
return {
"Barcode (EAN 13)": nudgerData.barcode_ean_13,
Country: nudgerData.country,
};
} }
} }
+34 -6
View File
@@ -1,17 +1,45 @@
import { Data } from "./"; import { Data, InvalidData } from "./";
import { Column, Entity, PrimaryColumn, PrimaryGeneratedColumn } from "typeorm";
type RawOpenFoodFactsData = { type RawOpenFoodFactsData = {
code: string; code: string;
countries_en: string; countries_en: string;
}; };
@Entity()
class OpenFoodFactsData implements Data { class OpenFoodFactsData implements Data {
input: string[] = []; @PrimaryGeneratedColumn({
output: string[] = []; type: "integer",
})
id?: number;
constructor({ code, countries_en }: RawOpenFoodFactsData) { @Column()
this.input = [code]; barcode_ean_13: string;
this.output = [countries_en];
@Column()
country: string;
constructor(code: string, gs1_country: string) {
this.barcode_ean_13 = code;
this.country = gs1_country;
}
fromRaw({
code,
countries_en,
}: RawOpenFoodFactsData): OpenFoodFactsData {
if (!code || !countries_en || code.length !== 13) {
throw new InvalidData("Invalid data");
}
return new OpenFoodFactsData(code, countries_en);
}
asData(openData: OpenFoodFactsData): any {
return {
"Barcode (EAN 13)": openData.barcode_ean_13,
Country: openData.country,
};
} }
} }
+32 -7
View File
@@ -1,19 +1,44 @@
import { Data } from "./"; import { Data, InvalidData } from "./";
import { Column, Entity, PrimaryGeneratedColumn } from "typeorm";
type RawSmolaData = { type RawWorldCitiesData = {
name: string; name: string;
country: string; country: string;
subcountry: string; subcountry: string;
geonameid: string; geonameid: string;
}; };
@Entity()
class WorldCitiesData implements Data { class WorldCitiesData implements Data {
input: string[]; @PrimaryGeneratedColumn({
output: string[]; type: "integer",
})
id?: number;
constructor({ geonameid, country }: RawSmolaData) { @Column()
this.input = [geonameid]; geoname_id: string;
this.output = [country];
@Column()
country: string;
constructor(geonameId: string, country: string) {
this.geoname_id = geonameId;
this.country = country;
}
fromRaw({ geonameid, country }: RawWorldCitiesData): WorldCitiesData {
if (!geonameid || !country || geonameid.length !== 6) {
throw new InvalidData("Invalid data");
}
return new WorldCitiesData(geonameid, country);
}
asData(worldCitiesData: WorldCitiesData): any {
return {
"Geoname ID": worldCitiesData.geoname_id,
Country: worldCitiesData.country,
};
} }
} }
+1 -1
View File
@@ -1,4 +1,4 @@
export { default as Data, DataConstructor } from "./Data"; export { default as Data, InvalidData } from "./Data";
export { default as NudgerData } from "./NudgerData"; export { default as NudgerData } from "./NudgerData";
export { default as OpenFoodFactsData } from "./OpenFoodFactsData"; export { default as OpenFoodFactsData } from "./OpenFoodFactsData";
+106 -62
View File
@@ -1,18 +1,19 @@
import { pipeline, Transform } from "node:stream"; import { pipeline, Transform, Writable } from "node:stream";
import { promisify } from "node:util"; import { promisify } from "node:util";
import * as fs from "node:fs"; import { Validator } from "jsonschema";
import * as readline from "node:readline";
import CacheService from "../CacheService";
import FileService from "../FileService"; import FileService from "../FileService";
import { ArchiveFactory, ArchiveType } from "../archive"; import { ArchiveFactory, ArchiveType } from "../archive";
import { ParserFactory, ParserType } from "../parser"; import { ParserFactory, ParserType } from "../parser";
import { Data, DataConstructor } from "../data"; import { Data, InvalidData} from "../data";
import { AppDataSource } from "../../AppDataSource";
import { EntityManager, EntityTarget, Repository } from "typeorm";
type DatasetParams = { type DatasetParams = {
id: string; id: string;
dataType: DataConstructor<Data>; dataConstructor: (params: any) => Data;
dataType: Data;
source: string; source: string;
file: string; file: string;
archiveType: ArchiveType; archiveType: ArchiveType;
@@ -27,14 +28,14 @@ type DatasetOptions = {
/** /**
* Represents a dataset that can be loaded and queried * Represents a dataset that can be loaded and queried
*/ */
class Dataset { class Dataset<D extends Data> {
readonly id: string; readonly id: string;
readonly source: string; readonly source: string;
readonly file: string; readonly file: string;
readonly archiveType: ArchiveType; readonly archiveType: ArchiveType;
readonly parserType: ParserType; readonly parserType: ParserType;
readonly cachePath: string; readonly dataConstructor: (params: any) => Data;
private dataType: DataConstructor<Data>; readonly dataType: Data;
private options?: DatasetOptions; private options?: DatasetOptions;
/** /**
@@ -44,27 +45,28 @@ class Dataset {
* @param file - The name of the file in the archive * @param file - The name of the file in the archive
* @param dataType - The constructor of the data class * @param dataType - The constructor of the data class
* @param archiveType - The type of the archive * @param archiveType - The type of the archive
* @param datasetType - The type of the dataset * @param dataConstructor - The type of the dataset
* @param parserType
* @param options - Additional options for the dataset * @param options - Additional options for the dataset
*/ */
constructor({ constructor({
id, id,
source, source,
file, file,
dataConstructor,
dataType, dataType,
archiveType, archiveType,
parserType, parserType,
options, options,
}: DatasetParams) { }: DatasetParams) {
this.id = id; this.id = id;
this.dataType = dataType; this.dataConstructor = dataConstructor;
this.source = source; this.source = source;
this.file = file; this.file = file;
this.dataType = dataType;
this.archiveType = archiveType; this.archiveType = archiveType;
this.parserType = parserType; this.parserType = parserType;
this.options = options; this.options = options;
this.cachePath = CacheService.getCachePath(this.source, ".json");
} }
/** /**
@@ -73,10 +75,12 @@ class Dataset {
* @throws {Error} - If the dataset cannot be loaded * @throws {Error} - If the dataset cannot be loaded
*/ */
public async load(): Promise<void> { public async load(): Promise<void> {
if (CacheService.isCached(this.source, ".json")) { // const repository: Repository<T> = AppDataSource.getRepository<T>(Data);
console.log(`Already cached: ${this.source}`);
return; // if ((await repository.count()) > 0) {
} // console.log(`Already cached: ${this.source}`);
// return;
// }
const archive = ArchiveFactory.getArchive(this.archiveType); const archive = ArchiveFactory.getArchive(this.archiveType);
const parser = ParserFactory.getParser(this.parserType); const parser = ParserFactory.getParser(this.parserType);
@@ -84,31 +88,47 @@ class Dataset {
const pipelineAsync = promisify(pipeline); const pipelineAsync = promisify(pipeline);
console.log(`Download: ${this.source}`); console.log(`Download: ${this.source}`);
// Start transaction
await AppDataSource.manager.transaction(async (manager) => {
await pipelineAsync( await pipelineAsync(
await FileService.getFileStream(this.source), await FileService.getFileStream(this.source),
archive.extract(this.file), archive.extract(this.file),
parser.parse(this.options?.parser), parser.parse(this.options?.parser),
Dataset.transformToData(this.dataType), Dataset.transformToData(this.dataConstructor, manager),
FileService.createWriteStream(this.cachePath) new Writable({
objectMode: true,
write(chunk, _, callback) {
callback();
},
})
) )
.then(() => { .then(() => {
console.log(`Loaded: ${this.source}`); console.log(`Loaded: ${this.source}`);
}) })
.catch((err) => { .catch((err) => {
console.error(`Failed to load dataset: ${this.source}`); console.error(`Failed to load dataset: ${this.source}`);
FileService.deleteFile(this.cachePath);
throw err; throw err;
}); });
});
} }
private static transformToData(dataType: DataConstructor<Data>): Transform { private static transformToData(
dataType: (params: any) => Data,
manager: EntityManager
): Transform {
return new Transform({ return new Transform({
objectMode: true, objectMode: true,
transform(chunk: object, _, callback) { async transform(chunk: object, _, callback) {
const data: Data = new dataType(chunk); try {
this.push(JSON.stringify(data) + "\n"); const data: Data = dataType(chunk);
await manager.save(data);
callback(null, JSON.stringify(data) + "\n"); callback(null, JSON.stringify(data) + "\n");
} catch (err: any) {
if (err instanceof InvalidData) {
callback(null, "");
} else callback(err);
}
}, },
}); });
} }
@@ -118,51 +138,75 @@ class Dataset {
* @param length - The number of data entries to get (default: 10) * @param length - The number of data entries to get (default: 10)
* @param schema - Schema of the expected data returned * @param schema - Schema of the expected data returned
*/ */
public get( public async get(length: number = 10, schema: {}): Promise<any[]> {
length: number = 10, const dataRepository = AppDataSource.manager.getRepository<T>(
schema: { input: string[] | undefined; output: string[] | undefined } this.dataType as EntityTarget<T>
): Promise<any[]> { );
const datas = await dataRepository
.createQueryBuilder("data")
.orderBy("RAND()") // Fonction RAND() pour randomiser l'ordre
.limit(length) // Limiter le nombre de résultats
.getMany();
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
let count: number = 0; let count: number = 0;
const results: any[] = []; const results: Data[] = [];
const validator = new Validator();
const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" }); datas.forEach((data) => {
const rl = readline.createInterface({ let randomizedData = D.fromRaw(data);
input: stream, // this.dataConstructor(data);
crlfDelay: Infinity,
});
rl.on("line", (line) => { if (validator.validate(randomizedData, schema)) {
if (count < length) { results.push(randomizedData);
const data: Data = JSON.parse(line) as Data;
// Create an object with the input and output values according to the schema
const obj: any = {};
schema.input?.forEach((input: string, index: number) => {
obj[input] = data.input[index];
});
schema.output?.forEach((output, index) => {
obj[output] = data.output[index];
});
// Add the object to the results
results.push(obj);
count++; count++;
} else {
rl.close(); // Fermer le flux si on a atteint les n objets
} }
}); });
return resolve(results);
// Quand le flux est terminé ou a été fermé.
rl.on("close", () => {
resolve(results); // Renvoie les n objets lus
});
// Gérer les erreurs du flux de lecture
rl.on("error", (err) => {
reject(err);
});
}); });
// //
// // const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" });
// // const rl = readline.createInterface({
// // input: stream,
// // crlfDelay: Infinity,
// // });
// //
// //
// // rl.on("line", (line) => {
// // if (count < length) {
// // const data: Data = JSON.parse(line) as Data;
// // if (validator.validate(data, schema)) {
// // results.push(data);
// // count++;
// // }
// //
// // // // Pour chaque objet, récupérer l'objet et vérifier que le schéma est valide
// //
// // // schema.input?.forEach((input: string, index: number) => {
// // // obj[input] = data.input[index];
// // // });
// // // schema.output?.forEach((output, index) => {
// // // obj[output] = data.output[index];
// // // });
// //
// // // // Add the object to the results
// // // count++;
// // } else {
// // rl.close(); // Fermer le flux si on a atteint les n objets
// // }
// // });
// //
// // // Quand le flux est terminé ou a été fermé.
// // rl.on("close", () => {
// // resolve(results); // Renvoie les n objets lus
// // });
// //
// // // Gérer les erreurs du flux de lecture
// // rl.on("error", (err) => {
// // reject(err);
// // });
// // });
} }
} }
+35 -32
View File
@@ -1,44 +1,47 @@
import { NudgerData, OpenFoodFactsData, WorldCitiesData } from "../data"; import { Data, NudgerData, OpenFoodFactsData, WorldCitiesData } from "../data";
import { ArchiveType } from "../archive"; import { ArchiveType } from "../archive";
import { Dataset } from "./"; import { Dataset } from "./";
import { ParserType } from "../parser"; import { ParserType } from "../parser";
class DatasetCollection { class DatasetCollection {
public static datasets: Dataset[] = [ public static datasets: Dataset<Data>[] = [
new Dataset({ // new Dataset({
id: "nudger", // id: "nudger",
source: // source:
"https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip", // "https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip",
file: "open4goods-full-gtin-dataset.csv", // file: "open4goods-full-gtin-dataset.csv",
dataType: NudgerData, // dataConstructor: NudgerData.fromRaw,
archiveType: ArchiveType.ZIP, // dataType: NudgerData,
parserType: ParserType.CSV, // archiveType: ArchiveType.ZIP,
options: { // parserType: ParserType.CSV,
parser: { // options: {
delimiter: ",", // parser: {
}, // delimiter: ",",
}, // },
}), // },
new Dataset({ // }),
id: "openfoodfacts", // new Dataset({
source: // id: "openfoodfacts",
"https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz", // source:
file: "en.openfoodfacts.org.products.csv", // "https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz",
dataType: OpenFoodFactsData, // file: "en.openfoodfacts.org.products.csv",
archiveType: ArchiveType.GZIP, // dataConstructor: OpenFoodFactsData.fromRaw,
parserType: ParserType.CSV, // dataType: OpenFoodFactsData,
options: { // archiveType: ArchiveType.GZIP,
parser: { // parserType: ParserType.CSV,
delimiter: "\t", // options: {
quote: null, // parser: {
}, // delimiter: "\t",
}, // quote: null,
}), // },
new Dataset({ // },
// }),
new Dataset<WorldCitiesData>({
id: "world-cities", id: "world-cities",
source: source:
"https://raw.githubusercontent.com/datasets/world-cities/refs/heads/main/data/world-cities.csv", "https://raw.githubusercontent.com/datasets/world-cities/refs/heads/main/data/world-cities.csv",
file: "world-cities.csv", file: "world-cities.csv",
dataConstructor: WorldCitiesData.fromRaw,
dataType: WorldCitiesData, dataType: WorldCitiesData,
archiveType: ArchiveType.NONE, archiveType: ArchiveType.NONE,
parserType: ParserType.CSV, parserType: ParserType.CSV,
+39 -6
View File
@@ -6,6 +6,8 @@ import {
Name_of_DMN_InputClause, Name_of_DMN_InputClause,
Name_of_DMN_OutputClause, Name_of_DMN_OutputClause,
Definitions, Definitions,
InputClause,
OutputClause,
} from "./interfaces"; } from "./interfaces";
export class DMN { export class DMN {
@@ -17,18 +19,49 @@ export class DMN {
} }
public static getSchema(dmnDefinitions: Definitions) { public static getSchema(dmnDefinitions: Definitions) {
const descisions: Decision[] = dmnDefinitions.drgElement.filter((element) => const { inputs, outputs } = this.getInputOutput(dmnDefinitions);
const properties = this.getProperties(inputs || [], outputs || []);
return {
type: "object",
properties,
required: Object.keys(properties),
};
}
private static getInputOutput(dmnDefinitions: Definitions) {
const decisions: Decision[] = dmnDefinitions.drgElement.filter((element) =>
Is_DMN_Decision(element) Is_DMN_Decision(element)
); );
const { input, output } = descisions const { input: inputs, output: outputs } = decisions
.map((decision) => decision.decisionLogic) .map((decision) => decision.decisionLogic)
.filter((decisionLogic) => Is_DMN_DecisionTable(decisionLogic))[0]; .filter((decisionLogic) => Is_DMN_DecisionTable(decisionLogic))[0];
// TODO generate json schema return { inputs, outputs };
}
return { private static getProperties(inputs: InputClause[], outputs: OutputClause[]) {
input: input?.map((input) => Name_of_DMN_InputClause(input)), let properties = {};
output: output?.map((output) => Name_of_DMN_OutputClause(output)),
inputs.forEach((input) => {
const name = Name_of_DMN_InputClause(input) as string;
const type = input.typeRef || "string";
// @ts-ignore
properties[name] = {
type,
}; };
});
outputs.forEach((output) => {
const name = Name_of_DMN_OutputClause(output) as string;
const type = output.typeRef || "string";
// @ts-ignore
properties[name] = {
type,
};
});
return properties;
} }
} }