diff --git a/src/routes/randomize.ts b/src/routes/randomize.ts index 57e0a36..4e42fe9 100644 --- a/src/routes/randomize.ts +++ b/src/routes/randomize.ts @@ -1,7 +1,8 @@ import { Router, Request, Response } from "express"; import { DatasetCollection } from "../services/dataset"; -import DmnModdle from "dmn-moddle"; import { DMN } from "../services/dmn/DMN"; +import { DMN_Definitions } from "../services/dmn/interfaces/DMN_Definitions"; +import { Data } from "../services/data"; const router = Router(); @@ -21,12 +22,13 @@ router.post("/randomize/:id", async (req: Request, res: Response) => { ); if (!dataset) return res.status(404).json({ status: "NOT_FOUND" }); - const a: any = await DMN.parse(req.body); + const dmn: DMN_Definitions = await DMN.parse(req.body); + const schema = DMN.getSchema(dmn); - const { rootElement } = await new DmnModdle().fromXML(req.body); - console.log(rootElement); + const data: Data[] = await dataset.get(size, schema); - const data = await dataset.get(size); + // Randomize the data (temporary) + data.sort(() => Math.random() - 0.5); return res.status(200).json({ status: "RANDOMIZED", data }); }); diff --git a/src/services/data/Data.ts b/src/services/data/Data.ts index d12be15..366f4b2 100644 --- a/src/services/data/Data.ts +++ b/src/services/data/Data.ts @@ -1,6 +1,9 @@ interface Data { - input: string; - output: string; + input: any[]; + output: any[]; } +type DataConstructor = new (...args: any[]) => T; + export default Data; +export { DataConstructor }; diff --git a/src/services/data/NudgerData.ts b/src/services/data/NudgerData.ts index fc7d7c4..c6ddd0d 100644 --- a/src/services/data/NudgerData.ts +++ b/src/services/data/NudgerData.ts @@ -16,12 +16,12 @@ type RawNudgerData = { }; class NudgerData implements Data { - input: string; - output: string; + input: string[]; + output: string[]; - constructor(rawData: RawNudgerData) { - this.input = rawData.code; - this.output = rawData.gs1_country; + constructor({ code, gs1_country }: RawNudgerData) { + this.input = [code]; + this.output = [gs1_country]; } } diff --git a/src/services/data/index.ts b/src/services/data/index.ts index 12c9b86..ace1f9d 100644 --- a/src/services/data/index.ts +++ b/src/services/data/index.ts @@ -1,3 +1,3 @@ -export { default as Data } from "./Data"; +export { default as Data, DataConstructor } from "./Data"; export { default as NudgerData } from "./NudgerData"; diff --git a/src/services/dataset/Dataset.ts b/src/services/dataset/Dataset.ts index 4527405..836a15e 100644 --- a/src/services/dataset/Dataset.ts +++ b/src/services/dataset/Dataset.ts @@ -9,45 +9,54 @@ import FileService from "../FileService"; import { ArchiveFactory, ArchiveType } from "../archive"; import { ParserFactory } from "../parser"; import { DatasetType } from "./"; -import { Data } from "../data"; +import { Data, DataConstructor } from "../data"; + +type DatasetOptions = { + id: string; + dataType: DataConstructor; + source: string; + file: string; + archiveType: ArchiveType; + datasetType: DatasetType; +}; /** * Represents a dataset that can be loaded and queried */ class Dataset { readonly id: string; - readonly url: string; - readonly sourceFile: string; + readonly source: string; + readonly file: string; readonly archiveType: ArchiveType; readonly datasetType: DatasetType; readonly cachePath: string; - private dataConstructor: { new (rawData: object): Data }; + private dataType: DataConstructor; /** * Create a new dataset instance - * @param dataConstructor - The constructor of the data class * @param id - The unique identifier of the dataset - * @param url - The URL of the dataset - * @param sourceFile - The file name of the dataset in the archive + * @param source - The URL of the dataset + * @param file - The name of the file in the archive + * @param dataType - The constructor of the data class * @param archiveType - The type of the archive * @param datasetType - The type of the dataset */ - constructor( - dataConstructor: new (rawData: any) => Data, - id: string, - url: string, - sourceFile: string, - archiveType: ArchiveType, - datasetType: DatasetType - ) { - this.dataConstructor = dataConstructor; + constructor({ + id, + source, + file, + dataType, + archiveType, + datasetType, + }: DatasetOptions) { this.id = id; - this.url = url; - this.sourceFile = sourceFile; + this.dataType = dataType; + this.source = source; + this.file = file; this.archiveType = archiveType; this.datasetType = datasetType; - this.cachePath = CacheService.getCachePath(this.url, ".json"); + this.cachePath = CacheService.getCachePath(this.source, ".json"); } /** @@ -56,8 +65,8 @@ class Dataset { * @throws {Error} - If the dataset cannot be loaded */ public async load(): Promise { - if (CacheService.isCached(this.url, ".json")) { - console.log(`Already cached: ${this.url}`); + if (CacheService.isCached(this.source, ".json")) { + console.log(`Already cached: ${this.source}`); return; } @@ -68,33 +77,39 @@ class Dataset { const self = this; - console.log(`Download: ${this.url}`); + console.log(`Download: ${this.source}`); await pipelineAsync( - await FileService.getFileStream(this.url), - archive.extract(this.sourceFile), + await FileService.getFileStream(this.source), + archive.extract(this.file), parser.parse(), - new Transform({ - objectMode: true, - transform(chunk: object, _, callback) { - const data: Data = new self.dataConstructor( - JSON.parse(chunk.toString()) - ); - this.push(JSON.stringify(data) + "\n"); - callback(null, JSON.stringify(data) + "\n"); - }, - }), + Dataset.transformToData(this.dataType), FileService.createWriteStream(this.cachePath) ); } + private static transformToData(dataType: DataConstructor): Transform { + return new Transform({ + objectMode: true, + transform(chunk: object, _, callback) { + const data: Data = new dataType(JSON.parse(chunk.toString())); + this.push(JSON.stringify(data) + "\n"); + callback(null, JSON.stringify(data) + "\n"); + }, + }); + } + /** * Get a number of data entries from the dataset * @param length - The number of data entries to get (default: 10) + * @param schema - Schema of the expected data returned */ - public get(length: number = 10): Promise { + public get( + length: number = 10, + schema: { input: string[] | undefined; output: string[] | undefined } + ): Promise { return new Promise((resolve, reject) => { let count: number = 0; - const results: Data[] = []; + const results: any[] = []; const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" }); const rl = readline.createInterface({ @@ -104,18 +119,26 @@ class Dataset { rl.on("line", (line) => { if (count < length) { - try { - results.push(JSON.parse(line) as Data); - count++; - } catch (err) { - console.error("Erreur lors du parsing de la ligne:", err); - } + const data: Data = JSON.parse(line) as Data; + + // Create an object with the input and output values according to the schema + const obj: any = {}; + schema.input?.forEach((input: string, index: number) => { + obj[input] = data.input[index]; + }); + schema.output?.forEach((output, index) => { + obj[output] = data.output[index]; + }); + + // Add the object to the results + results.push(obj); + count++; } else { rl.close(); // Fermer le flux si on a atteint les n objets } }); - // Quand le flux est terminé ou a été fermé + // Quand le flux est terminé ou a été fermé. rl.on("close", () => { resolve(results); // Renvoie les n objets lus }); diff --git a/src/services/dataset/DatasetCollection.ts b/src/services/dataset/DatasetCollection.ts index 4c08836..49da200 100644 --- a/src/services/dataset/DatasetCollection.ts +++ b/src/services/dataset/DatasetCollection.ts @@ -4,14 +4,15 @@ import { Dataset, DatasetType } from "./"; class DatasetCollection { public static datasets: Dataset[] = [ - new Dataset( - NudgerData, - "nudger", - "https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip", - "open4goods-full-gtin-dataset.csv", - ArchiveType.ZIP, - DatasetType.CSV - ), + new Dataset({ + id: "nudger", + source: + "https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip", + file: "open4goods-full-gtin-dataset.csv", + dataType: NudgerData, + archiveType: ArchiveType.ZIP, + datasetType: DatasetType.CSV, + }), ]; public static loadAll(): Promise { diff --git a/src/services/dmn/DMN.ts b/src/services/dmn/DMN.ts index 391baba..c21a884 100644 --- a/src/services/dmn/DMN.ts +++ b/src/services/dmn/DMN.ts @@ -1,78 +1,31 @@ -import { Dataset } from "../dataset"; -import { Data } from "../data"; import DmnModdle from "dmn-moddle"; import { DMN_Decision, Is_DMN_Decision } from "./interfaces/DMN_Decision"; -import { - DMN_DecisionTable, - Is_DMN_DecisionTable, -} from "./interfaces/DMN_DecisionTable"; -import { - DMN_InputClause, - Name_of_DMN_InputClause, -} from "./interfaces/DMN_InputClause"; +import { Is_DMN_DecisionTable } from "./interfaces/DMN_DecisionTable"; +import { Name_of_DMN_InputClause } from "./interfaces/DMN_InputClause"; import { Name_of_DMN_OutputClause } from "./interfaces/DMN_OutputClause"; -import { ModdleElement } from "./interfaces/ModdleElement"; -import { DmnError } from "./error/DmnError"; +import { DMN_Definitions } from "./interfaces/DMN_Definitions"; export class DMN { - static async parse(xml: string) { + static async parse(xml: string): Promise { const { rootElement, warnings } = await new DmnModdle().fromXML(xml); if (warnings.length !== 0) console.warn(warnings.map((warning: any) => warning.message).join(" * ")); - return rootElement; + return rootElement as DMN_Definitions; } - // static async getFilter(xml: string) { - // const rootElement = await DMN.parse(xml); - // - // const filterFunction = (me: ModdleElement) => - // Is_DMN_Decision(me) && Is_DMN_DecisionTable(me.decisionLogic); - // const everyFunction = (decision: DMN_Decision) => { - // try { - // const decision_table: DMN_DecisionTable = - // decision.decisionLogic as DMN_DecisionTable; - // let features: string[] = decision_table.input!.map( - // (input_clause: DMN_InputClause) => - // Name_of_DMN_InputClause(input_clause) - // ); - // const index: number = features - // .map((feature: string): string => feature.toUpperCase()) - // .indexOf(DMN.URL); - // if (index === -1) return false; - // // Si la zone 'text' est égale à "" alors prendre 'features[0]' : - // const data_source = decision_table.input[ - // index - // ].inputExpression.text.replaceAll('"', ""); // ES2021 - // features = features.concat( - // decision_table.output!.map((output_clause) => - // Name_of_DMN_OutputClause(output_clause) - // ) - // ); - // // A changer, il y a autant de 'Randomizer' objects que de tables de décision : - // DMN._Randomizer = new Randomizer( - // data_source, - // features /* Number of features whose type is "output", default is '1' */ - // ); - // - // // decision_table.rule!.forEach((rule) => { - // // features.forEach((feature, feature_index) => { - // // const column = rule.inputEntry[feature_index]; - // // // A priori, nothing here since "rules" are ignored... - // // y}); - // // }); - // } catch (error: unknown) { - // throw new DmnError(decision, DmnError.Invalid_JSON); - // } - // }; - // - // const a: boolean = rootElement.drgElement - // .filter(filterFunction) - // .every(everyFunction); - // - // try { - // if (a === false) return Promise.resolve(undefined); // DMN processing causes trouble(s)... - // } catch (error: unknown) { - // console.error(error); - // } - // } + public static getSchema(dmnDefinitions: DMN_Definitions) { + const descisions: DMN_Decision[] = dmnDefinitions.drgElement.filter( + (element) => Is_DMN_Decision(element) + ); + const { input, output } = descisions + .map((decision) => decision.decisionLogic) + .filter((decisionLogic) => Is_DMN_DecisionTable(decisionLogic))[0]; + + // TODO generate json schema + + return { + input: input?.map((input) => Name_of_DMN_InputClause(input)), + output: output?.map((output) => Name_of_DMN_OutputClause(output)), + }; + } }