[Draft] Stockage des données sur MariaDB avec TypeORM

This commit is contained in:
Lucàs
2024-10-23 17:07:23 +02:00
parent c95c92e987
commit cde872ca55
14 changed files with 323 additions and 144 deletions
+1 -1
View File
@@ -3,7 +3,7 @@ version: "3.1"
services:
mariadb:
image: mariadb:latest
restart: "always"
restart: "no"
environment:
MYSQL_ROOT_PASSWORD: root
MYSQL_DATABASE: db
+10
View File
@@ -15,6 +15,7 @@
"dotenv": "^16.4.5",
"express": "^4.21.0",
"fast-csv": "^5.0.1",
"jsonschema": "^1.4.1",
"mysql": "^2.18.1",
"reflect-metadata": "^0.2.2",
"tar-stream": "^3.1.7",
@@ -1423,6 +1424,15 @@
"graceful-fs": "^4.1.6"
}
},
"node_modules/jsonschema": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/jsonschema/-/jsonschema-1.4.1.tgz",
"integrity": "sha512-S6cATIPVv1z0IlxdN+zUk5EPjkGCdnhN4wVSBlvoUO1tOLJootbo9CquNJmbIh4yikWHiUedhRYrNPn1arpEmQ==",
"license": "MIT",
"engines": {
"node": "*"
}
},
"node_modules/lodash.escaperegexp": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz",
+1
View File
@@ -21,6 +21,7 @@
"dotenv": "^16.4.5",
"express": "^4.21.0",
"fast-csv": "^5.0.1",
"jsonschema": "^1.4.1",
"mysql": "^2.18.1",
"reflect-metadata": "^0.2.2",
"tar-stream": "^3.1.7",
+7 -2
View File
@@ -1,6 +1,11 @@
import "reflect-metadata";
import { DataSource } from "typeorm";
import { Log } from "./entity/Log";
import {
NudgerData,
OpenFoodFactsData,
WorldCitiesData,
} from "./services/data";
export const AppDataSource = new DataSource({
type: "mariadb",
@@ -10,8 +15,8 @@ export const AppDataSource = new DataSource({
password: "root",
database: "db",
synchronize: true,
logging: true,
entities: [Log],
logging: false,
entities: [Log, NudgerData, OpenFoodFactsData, WorldCitiesData],
subscribers: [],
migrations: [],
});
+1
View File
@@ -49,6 +49,7 @@ router.post("/randomize/:id", async (req: Request, res: Response) => {
const dmn: Definitions = await DMN.parse(req.body);
const schema = DMN.getSchema(dmn);
console.log(JSON.stringify(schema, null, 2));
const data: Data[] = await dataset.get(size, schema);
+3 -3
View File
@@ -22,8 +22,8 @@ class CacheService {
}
}
if (!existsSync(CacheService.CACHE_DIR)) {
mkdirSync(CacheService.CACHE_DIR);
}
// if (!existsSync(CacheService.CACHE_DIR)) {
// mkdirSync(CacheService.CACHE_DIR);
// }
export default CacheService;
+8 -4
View File
@@ -1,9 +1,13 @@
interface Data {
input: any[];
output: any[];
id?: number;
}
type DataConstructor<T extends Data> = new (...args: any[]) => T;
class InvalidData extends Error {
constructor(message: string) {
super(message);
this.name = "Invalid data";
}
}
export default Data;
export { DataConstructor };
export { InvalidData };
+31 -6
View File
@@ -1,4 +1,5 @@
import { Data } from "./";
import { Data, InvalidData } from "./";
import { Column, Entity, PrimaryGeneratedColumn } from "typeorm";
type RawNudgerData = {
code: string; // "3260014791012",
@@ -15,13 +16,37 @@ type RawNudgerData = {
url: string; // ""
};
@Entity()
class NudgerData implements Data {
input: string[];
output: string[];
@PrimaryGeneratedColumn({
type: "integer",
})
id?: number;
constructor({ code, gs1_country }: RawNudgerData) {
this.input = [code];
this.output = [gs1_country];
@Column()
barcode_ean_13: string;
@Column()
country: string;
constructor(code: string, gs1_country: string) {
this.barcode_ean_13 = code;
this.country = gs1_country;
}
fromRaw({ code, gs1_country }: RawNudgerData): NudgerData {
if (!code || !gs1_country || code.length !== 13) {
throw new InvalidData("Invalid data");
}
return new NudgerData(code, gs1_country);
}
asData(nudgerData: NudgerData): any {
return {
"Barcode (EAN 13)": nudgerData.barcode_ean_13,
Country: nudgerData.country,
};
}
}
+34 -6
View File
@@ -1,17 +1,45 @@
import { Data } from "./";
import { Data, InvalidData } from "./";
import { Column, Entity, PrimaryColumn, PrimaryGeneratedColumn } from "typeorm";
type RawOpenFoodFactsData = {
code: string;
countries_en: string;
};
@Entity()
class OpenFoodFactsData implements Data {
input: string[] = [];
output: string[] = [];
@PrimaryGeneratedColumn({
type: "integer",
})
id?: number;
constructor({ code, countries_en }: RawOpenFoodFactsData) {
this.input = [code];
this.output = [countries_en];
@Column()
barcode_ean_13: string;
@Column()
country: string;
constructor(code: string, gs1_country: string) {
this.barcode_ean_13 = code;
this.country = gs1_country;
}
fromRaw({
code,
countries_en,
}: RawOpenFoodFactsData): OpenFoodFactsData {
if (!code || !countries_en || code.length !== 13) {
throw new InvalidData("Invalid data");
}
return new OpenFoodFactsData(code, countries_en);
}
asData(openData: OpenFoodFactsData): any {
return {
"Barcode (EAN 13)": openData.barcode_ean_13,
Country: openData.country,
};
}
}
+32 -7
View File
@@ -1,19 +1,44 @@
import { Data } from "./";
import { Data, InvalidData } from "./";
import { Column, Entity, PrimaryGeneratedColumn } from "typeorm";
type RawSmolaData = {
type RawWorldCitiesData = {
name: string;
country: string;
subcountry: string;
geonameid: string;
};
@Entity()
class WorldCitiesData implements Data {
input: string[];
output: string[];
@PrimaryGeneratedColumn({
type: "integer",
})
id?: number;
constructor({ geonameid, country }: RawSmolaData) {
this.input = [geonameid];
this.output = [country];
@Column()
geoname_id: string;
@Column()
country: string;
constructor(geonameId: string, country: string) {
this.geoname_id = geonameId;
this.country = country;
}
fromRaw({ geonameid, country }: RawWorldCitiesData): WorldCitiesData {
if (!geonameid || !country || geonameid.length !== 6) {
throw new InvalidData("Invalid data");
}
return new WorldCitiesData(geonameid, country);
}
asData(worldCitiesData: WorldCitiesData): any {
return {
"Geoname ID": worldCitiesData.geoname_id,
Country: worldCitiesData.country,
};
}
}
+1 -1
View File
@@ -1,4 +1,4 @@
export { default as Data, DataConstructor } from "./Data";
export { default as Data, InvalidData } from "./Data";
export { default as NudgerData } from "./NudgerData";
export { default as OpenFoodFactsData } from "./OpenFoodFactsData";
+119 -75
View File
@@ -1,18 +1,19 @@
import { pipeline, Transform } from "node:stream";
import { pipeline, Transform, Writable } from "node:stream";
import { promisify } from "node:util";
import * as fs from "node:fs";
import * as readline from "node:readline";
import { Validator } from "jsonschema";
import CacheService from "../CacheService";
import FileService from "../FileService";
import { ArchiveFactory, ArchiveType } from "../archive";
import { ParserFactory, ParserType } from "../parser";
import { Data, DataConstructor } from "../data";
import { Data, InvalidData} from "../data";
import { AppDataSource } from "../../AppDataSource";
import { EntityManager, EntityTarget, Repository } from "typeorm";
type DatasetParams = {
id: string;
dataType: DataConstructor<Data>;
dataConstructor: (params: any) => Data;
dataType: Data;
source: string;
file: string;
archiveType: ArchiveType;
@@ -27,14 +28,14 @@ type DatasetOptions = {
/**
* Represents a dataset that can be loaded and queried
*/
class Dataset {
class Dataset<D extends Data> {
readonly id: string;
readonly source: string;
readonly file: string;
readonly archiveType: ArchiveType;
readonly parserType: ParserType;
readonly cachePath: string;
private dataType: DataConstructor<Data>;
readonly dataConstructor: (params: any) => Data;
readonly dataType: Data;
private options?: DatasetOptions;
/**
@@ -44,27 +45,28 @@ class Dataset {
* @param file - The name of the file in the archive
* @param dataType - The constructor of the data class
* @param archiveType - The type of the archive
* @param datasetType - The type of the dataset
* @param dataConstructor - The type of the dataset
* @param parserType
* @param options - Additional options for the dataset
*/
constructor({
id,
source,
file,
dataConstructor,
dataType,
archiveType,
parserType,
options,
}: DatasetParams) {
this.id = id;
this.dataType = dataType;
this.dataConstructor = dataConstructor;
this.source = source;
this.file = file;
this.dataType = dataType;
this.archiveType = archiveType;
this.parserType = parserType;
this.options = options;
this.cachePath = CacheService.getCachePath(this.source, ".json");
}
/**
@@ -73,10 +75,12 @@ class Dataset {
* @throws {Error} - If the dataset cannot be loaded
*/
public async load(): Promise<void> {
if (CacheService.isCached(this.source, ".json")) {
console.log(`Already cached: ${this.source}`);
return;
}
// const repository: Repository<T> = AppDataSource.getRepository<T>(Data);
// if ((await repository.count()) > 0) {
// console.log(`Already cached: ${this.source}`);
// return;
// }
const archive = ArchiveFactory.getArchive(this.archiveType);
const parser = ParserFactory.getParser(this.parserType);
@@ -84,31 +88,47 @@ class Dataset {
const pipelineAsync = promisify(pipeline);
console.log(`Download: ${this.source}`);
await pipelineAsync(
await FileService.getFileStream(this.source),
archive.extract(this.file),
parser.parse(this.options?.parser),
Dataset.transformToData(this.dataType),
FileService.createWriteStream(this.cachePath)
)
.then(() => {
console.log(`Loaded: ${this.source}`);
})
.catch((err) => {
console.error(`Failed to load dataset: ${this.source}`);
FileService.deleteFile(this.cachePath);
throw err;
});
// Start transaction
await AppDataSource.manager.transaction(async (manager) => {
await pipelineAsync(
await FileService.getFileStream(this.source),
archive.extract(this.file),
parser.parse(this.options?.parser),
Dataset.transformToData(this.dataConstructor, manager),
new Writable({
objectMode: true,
write(chunk, _, callback) {
callback();
},
})
)
.then(() => {
console.log(`Loaded: ${this.source}`);
})
.catch((err) => {
console.error(`Failed to load dataset: ${this.source}`);
throw err;
});
});
}
private static transformToData(dataType: DataConstructor<Data>): Transform {
private static transformToData(
dataType: (params: any) => Data,
manager: EntityManager
): Transform {
return new Transform({
objectMode: true,
transform(chunk: object, _, callback) {
const data: Data = new dataType(chunk);
this.push(JSON.stringify(data) + "\n");
callback(null, JSON.stringify(data) + "\n");
async transform(chunk: object, _, callback) {
try {
const data: Data = dataType(chunk);
await manager.save(data);
callback(null, JSON.stringify(data) + "\n");
} catch (err: any) {
if (err instanceof InvalidData) {
callback(null, "");
} else callback(err);
}
},
});
}
@@ -118,51 +138,75 @@ class Dataset {
* @param length - The number of data entries to get (default: 10)
* @param schema - Schema of the expected data returned
*/
public get(
length: number = 10,
schema: { input: string[] | undefined; output: string[] | undefined }
): Promise<any[]> {
public async get(length: number = 10, schema: {}): Promise<any[]> {
const dataRepository = AppDataSource.manager.getRepository<T>(
this.dataType as EntityTarget<T>
);
const datas = await dataRepository
.createQueryBuilder("data")
.orderBy("RAND()") // Fonction RAND() pour randomiser l'ordre
.limit(length) // Limiter le nombre de résultats
.getMany();
return new Promise((resolve, reject) => {
let count: number = 0;
const results: any[] = [];
const results: Data[] = [];
const validator = new Validator();
const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" });
const rl = readline.createInterface({
input: stream,
crlfDelay: Infinity,
});
datas.forEach((data) => {
let randomizedData = D.fromRaw(data);
// this.dataConstructor(data);
rl.on("line", (line) => {
if (count < length) {
const data: Data = JSON.parse(line) as Data;
// Create an object with the input and output values according to the schema
const obj: any = {};
schema.input?.forEach((input: string, index: number) => {
obj[input] = data.input[index];
});
schema.output?.forEach((output, index) => {
obj[output] = data.output[index];
});
// Add the object to the results
results.push(obj);
if (validator.validate(randomizedData, schema)) {
results.push(randomizedData);
count++;
} else {
rl.close(); // Fermer le flux si on a atteint les n objets
}
});
// Quand le flux est terminé ou a été fermé.
rl.on("close", () => {
resolve(results); // Renvoie les n objets lus
});
// Gérer les erreurs du flux de lecture
rl.on("error", (err) => {
reject(err);
});
return resolve(results);
});
// //
// // const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" });
// // const rl = readline.createInterface({
// // input: stream,
// // crlfDelay: Infinity,
// // });
// //
// //
// // rl.on("line", (line) => {
// // if (count < length) {
// // const data: Data = JSON.parse(line) as Data;
// // if (validator.validate(data, schema)) {
// // results.push(data);
// // count++;
// // }
// //
// // // // Pour chaque objet, récupérer l'objet et vérifier que le schéma est valide
// //
// // // schema.input?.forEach((input: string, index: number) => {
// // // obj[input] = data.input[index];
// // // });
// // // schema.output?.forEach((output, index) => {
// // // obj[output] = data.output[index];
// // // });
// //
// // // // Add the object to the results
// // // count++;
// // } else {
// // rl.close(); // Fermer le flux si on a atteint les n objets
// // }
// // });
// //
// // // Quand le flux est terminé ou a été fermé.
// // rl.on("close", () => {
// // resolve(results); // Renvoie les n objets lus
// // });
// //
// // // Gérer les erreurs du flux de lecture
// // rl.on("error", (err) => {
// // reject(err);
// // });
// // });
}
}
+35 -32
View File
@@ -1,44 +1,47 @@
import { NudgerData, OpenFoodFactsData, WorldCitiesData } from "../data";
import { Data, NudgerData, OpenFoodFactsData, WorldCitiesData } from "../data";
import { ArchiveType } from "../archive";
import { Dataset } from "./";
import { ParserType } from "../parser";
class DatasetCollection {
public static datasets: Dataset[] = [
new Dataset({
id: "nudger",
source:
"https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip",
file: "open4goods-full-gtin-dataset.csv",
dataType: NudgerData,
archiveType: ArchiveType.ZIP,
parserType: ParserType.CSV,
options: {
parser: {
delimiter: ",",
},
},
}),
new Dataset({
id: "openfoodfacts",
source:
"https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz",
file: "en.openfoodfacts.org.products.csv",
dataType: OpenFoodFactsData,
archiveType: ArchiveType.GZIP,
parserType: ParserType.CSV,
options: {
parser: {
delimiter: "\t",
quote: null,
},
},
}),
new Dataset({
public static datasets: Dataset<Data>[] = [
// new Dataset({
// id: "nudger",
// source:
// "https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip",
// file: "open4goods-full-gtin-dataset.csv",
// dataConstructor: NudgerData.fromRaw,
// dataType: NudgerData,
// archiveType: ArchiveType.ZIP,
// parserType: ParserType.CSV,
// options: {
// parser: {
// delimiter: ",",
// },
// },
// }),
// new Dataset({
// id: "openfoodfacts",
// source:
// "https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz",
// file: "en.openfoodfacts.org.products.csv",
// dataConstructor: OpenFoodFactsData.fromRaw,
// dataType: OpenFoodFactsData,
// archiveType: ArchiveType.GZIP,
// parserType: ParserType.CSV,
// options: {
// parser: {
// delimiter: "\t",
// quote: null,
// },
// },
// }),
new Dataset<WorldCitiesData>({
id: "world-cities",
source:
"https://raw.githubusercontent.com/datasets/world-cities/refs/heads/main/data/world-cities.csv",
file: "world-cities.csv",
dataConstructor: WorldCitiesData.fromRaw,
dataType: WorldCitiesData,
archiveType: ArchiveType.NONE,
parserType: ParserType.CSV,
+40 -7
View File
@@ -6,6 +6,8 @@ import {
Name_of_DMN_InputClause,
Name_of_DMN_OutputClause,
Definitions,
InputClause,
OutputClause,
} from "./interfaces";
export class DMN {
@@ -17,18 +19,49 @@ export class DMN {
}
public static getSchema(dmnDefinitions: Definitions) {
const descisions: Decision[] = dmnDefinitions.drgElement.filter((element) =>
const { inputs, outputs } = this.getInputOutput(dmnDefinitions);
const properties = this.getProperties(inputs || [], outputs || []);
return {
type: "object",
properties,
required: Object.keys(properties),
};
}
private static getInputOutput(dmnDefinitions: Definitions) {
const decisions: Decision[] = dmnDefinitions.drgElement.filter((element) =>
Is_DMN_Decision(element)
);
const { input, output } = descisions
const { input: inputs, output: outputs } = decisions
.map((decision) => decision.decisionLogic)
.filter((decisionLogic) => Is_DMN_DecisionTable(decisionLogic))[0];
// TODO generate json schema
return { inputs, outputs };
}
return {
input: input?.map((input) => Name_of_DMN_InputClause(input)),
output: output?.map((output) => Name_of_DMN_OutputClause(output)),
};
private static getProperties(inputs: InputClause[], outputs: OutputClause[]) {
let properties = {};
inputs.forEach((input) => {
const name = Name_of_DMN_InputClause(input) as string;
const type = input.typeRef || "string";
// @ts-ignore
properties[name] = {
type,
};
});
outputs.forEach((output) => {
const name = Name_of_DMN_OutputClause(output) as string;
const type = output.typeRef || "string";
// @ts-ignore
properties[name] = {
type,
};
});
return properties;
}
}