feat!: Edit Data structure

This commit is contained in:
Lucàs
2024-10-07 13:15:16 +02:00
parent 960e7b6777
commit 510d25d9af
7 changed files with 113 additions and 131 deletions
+7 -5
View File
@@ -1,7 +1,8 @@
import { Router, Request, Response } from "express"; import { Router, Request, Response } from "express";
import { DatasetCollection } from "../services/dataset"; import { DatasetCollection } from "../services/dataset";
import DmnModdle from "dmn-moddle";
import { DMN } from "../services/dmn/DMN"; import { DMN } from "../services/dmn/DMN";
import { DMN_Definitions } from "../services/dmn/interfaces/DMN_Definitions";
import { Data } from "../services/data";
const router = Router(); const router = Router();
@@ -21,12 +22,13 @@ router.post("/randomize/:id", async (req: Request, res: Response) => {
); );
if (!dataset) return res.status(404).json({ status: "NOT_FOUND" }); if (!dataset) return res.status(404).json({ status: "NOT_FOUND" });
const a: any = await DMN.parse(req.body); const dmn: DMN_Definitions = await DMN.parse(req.body);
const schema = DMN.getSchema(dmn);
const { rootElement } = await new DmnModdle().fromXML(req.body); const data: Data[] = await dataset.get(size, schema);
console.log(rootElement);
const data = await dataset.get(size); // Randomize the data (temporary)
data.sort(() => Math.random() - 0.5);
return res.status(200).json({ status: "RANDOMIZED", data }); return res.status(200).json({ status: "RANDOMIZED", data });
}); });
+5 -2
View File
@@ -1,6 +1,9 @@
interface Data { interface Data {
input: string; input: any[];
output: string; output: any[];
} }
type DataConstructor<T extends Data> = new (...args: any[]) => T;
export default Data; export default Data;
export { DataConstructor };
+5 -5
View File
@@ -16,12 +16,12 @@ type RawNudgerData = {
}; };
class NudgerData implements Data { class NudgerData implements Data {
input: string; input: string[];
output: string; output: string[];
constructor(rawData: RawNudgerData) { constructor({ code, gs1_country }: RawNudgerData) {
this.input = rawData.code; this.input = [code];
this.output = rawData.gs1_country; this.output = [gs1_country];
} }
} }
+1 -1
View File
@@ -1,3 +1,3 @@
export { default as Data } from "./Data"; export { default as Data, DataConstructor } from "./Data";
export { default as NudgerData } from "./NudgerData"; export { default as NudgerData } from "./NudgerData";
+66 -43
View File
@@ -9,45 +9,54 @@ import FileService from "../FileService";
import { ArchiveFactory, ArchiveType } from "../archive"; import { ArchiveFactory, ArchiveType } from "../archive";
import { ParserFactory } from "../parser"; import { ParserFactory } from "../parser";
import { DatasetType } from "./"; import { DatasetType } from "./";
import { Data } from "../data"; import { Data, DataConstructor } from "../data";
type DatasetOptions = {
id: string;
dataType: DataConstructor<Data>;
source: string;
file: string;
archiveType: ArchiveType;
datasetType: DatasetType;
};
/** /**
* Represents a dataset that can be loaded and queried * Represents a dataset that can be loaded and queried
*/ */
class Dataset { class Dataset {
readonly id: string; readonly id: string;
readonly url: string; readonly source: string;
readonly sourceFile: string; readonly file: string;
readonly archiveType: ArchiveType; readonly archiveType: ArchiveType;
readonly datasetType: DatasetType; readonly datasetType: DatasetType;
readonly cachePath: string; readonly cachePath: string;
private dataConstructor: { new (rawData: object): Data }; private dataType: DataConstructor<Data>;
/** /**
* Create a new dataset instance * Create a new dataset instance
* @param dataConstructor - The constructor of the data class
* @param id - The unique identifier of the dataset * @param id - The unique identifier of the dataset
* @param url - The URL of the dataset * @param source - The URL of the dataset
* @param sourceFile - The file name of the dataset in the archive * @param file - The name of the file in the archive
* @param dataType - The constructor of the data class
* @param archiveType - The type of the archive * @param archiveType - The type of the archive
* @param datasetType - The type of the dataset * @param datasetType - The type of the dataset
*/ */
constructor( constructor({
dataConstructor: new (rawData: any) => Data, id,
id: string, source,
url: string, file,
sourceFile: string, dataType,
archiveType: ArchiveType, archiveType,
datasetType: DatasetType datasetType,
) { }: DatasetOptions) {
this.dataConstructor = dataConstructor;
this.id = id; this.id = id;
this.url = url; this.dataType = dataType;
this.sourceFile = sourceFile; this.source = source;
this.file = file;
this.archiveType = archiveType; this.archiveType = archiveType;
this.datasetType = datasetType; this.datasetType = datasetType;
this.cachePath = CacheService.getCachePath(this.url, ".json"); this.cachePath = CacheService.getCachePath(this.source, ".json");
} }
/** /**
@@ -56,8 +65,8 @@ class Dataset {
* @throws {Error} - If the dataset cannot be loaded * @throws {Error} - If the dataset cannot be loaded
*/ */
public async load(): Promise<void> { public async load(): Promise<void> {
if (CacheService.isCached(this.url, ".json")) { if (CacheService.isCached(this.source, ".json")) {
console.log(`Already cached: ${this.url}`); console.log(`Already cached: ${this.source}`);
return; return;
} }
@@ -68,33 +77,39 @@ class Dataset {
const self = this; const self = this;
console.log(`Download: ${this.url}`); console.log(`Download: ${this.source}`);
await pipelineAsync( await pipelineAsync(
await FileService.getFileStream(this.url), await FileService.getFileStream(this.source),
archive.extract(this.sourceFile), archive.extract(this.file),
parser.parse(), parser.parse(),
new Transform({ Dataset.transformToData(this.dataType),
objectMode: true,
transform(chunk: object, _, callback) {
const data: Data = new self.dataConstructor(
JSON.parse(chunk.toString())
);
this.push(JSON.stringify(data) + "\n");
callback(null, JSON.stringify(data) + "\n");
},
}),
FileService.createWriteStream(this.cachePath) FileService.createWriteStream(this.cachePath)
); );
} }
private static transformToData(dataType: DataConstructor<Data>): Transform {
return new Transform({
objectMode: true,
transform(chunk: object, _, callback) {
const data: Data = new dataType(JSON.parse(chunk.toString()));
this.push(JSON.stringify(data) + "\n");
callback(null, JSON.stringify(data) + "\n");
},
});
}
/** /**
* Get a number of data entries from the dataset * Get a number of data entries from the dataset
* @param length - The number of data entries to get (default: 10) * @param length - The number of data entries to get (default: 10)
* @param schema - Schema of the expected data returned
*/ */
public get(length: number = 10): Promise<Data[]> { public get(
length: number = 10,
schema: { input: string[] | undefined; output: string[] | undefined }
): Promise<any[]> {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
let count: number = 0; let count: number = 0;
const results: Data[] = []; const results: any[] = [];
const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" }); const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" });
const rl = readline.createInterface({ const rl = readline.createInterface({
@@ -104,18 +119,26 @@ class Dataset {
rl.on("line", (line) => { rl.on("line", (line) => {
if (count < length) { if (count < length) {
try { const data: Data = JSON.parse(line) as Data;
results.push(JSON.parse(line) as Data);
count++; // Create an object with the input and output values according to the schema
} catch (err) { const obj: any = {};
console.error("Erreur lors du parsing de la ligne:", err); schema.input?.forEach((input: string, index: number) => {
} obj[input] = data.input[index];
});
schema.output?.forEach((output, index) => {
obj[output] = data.output[index];
});
// Add the object to the results
results.push(obj);
count++;
} else { } else {
rl.close(); // Fermer le flux si on a atteint les n objets rl.close(); // Fermer le flux si on a atteint les n objets
} }
}); });
// Quand le flux est terminé ou a été fermé // Quand le flux est terminé ou a été fermé.
rl.on("close", () => { rl.on("close", () => {
resolve(results); // Renvoie les n objets lus resolve(results); // Renvoie les n objets lus
}); });
+9 -8
View File
@@ -4,14 +4,15 @@ import { Dataset, DatasetType } from "./";
class DatasetCollection { class DatasetCollection {
public static datasets: Dataset[] = [ public static datasets: Dataset[] = [
new Dataset( new Dataset({
NudgerData, id: "nudger",
"nudger", source:
"https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip", "https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip",
"open4goods-full-gtin-dataset.csv", file: "open4goods-full-gtin-dataset.csv",
ArchiveType.ZIP, dataType: NudgerData,
DatasetType.CSV archiveType: ArchiveType.ZIP,
), datasetType: DatasetType.CSV,
}),
]; ];
public static loadAll(): Promise<void[]> { public static loadAll(): Promise<void[]> {
+20 -67
View File
@@ -1,78 +1,31 @@
import { Dataset } from "../dataset";
import { Data } from "../data";
import DmnModdle from "dmn-moddle"; import DmnModdle from "dmn-moddle";
import { DMN_Decision, Is_DMN_Decision } from "./interfaces/DMN_Decision"; import { DMN_Decision, Is_DMN_Decision } from "./interfaces/DMN_Decision";
import { import { Is_DMN_DecisionTable } from "./interfaces/DMN_DecisionTable";
DMN_DecisionTable, import { Name_of_DMN_InputClause } from "./interfaces/DMN_InputClause";
Is_DMN_DecisionTable,
} from "./interfaces/DMN_DecisionTable";
import {
DMN_InputClause,
Name_of_DMN_InputClause,
} from "./interfaces/DMN_InputClause";
import { Name_of_DMN_OutputClause } from "./interfaces/DMN_OutputClause"; import { Name_of_DMN_OutputClause } from "./interfaces/DMN_OutputClause";
import { ModdleElement } from "./interfaces/ModdleElement"; import { DMN_Definitions } from "./interfaces/DMN_Definitions";
import { DmnError } from "./error/DmnError";
export class DMN { export class DMN {
static async parse(xml: string) { static async parse(xml: string): Promise<DMN_Definitions> {
const { rootElement, warnings } = await new DmnModdle().fromXML(xml); const { rootElement, warnings } = await new DmnModdle().fromXML(xml);
if (warnings.length !== 0) if (warnings.length !== 0)
console.warn(warnings.map((warning: any) => warning.message).join(" * ")); console.warn(warnings.map((warning: any) => warning.message).join(" * "));
return rootElement; return rootElement as DMN_Definitions;
} }
// static async getFilter(xml: string) { public static getSchema(dmnDefinitions: DMN_Definitions) {
// const rootElement = await DMN.parse(xml); const descisions: DMN_Decision[] = dmnDefinitions.drgElement.filter(
// (element) => Is_DMN_Decision(element)
// const filterFunction = (me: ModdleElement) => );
// Is_DMN_Decision(me) && Is_DMN_DecisionTable(me.decisionLogic); const { input, output } = descisions
// const everyFunction = (decision: DMN_Decision) => { .map((decision) => decision.decisionLogic)
// try { .filter((decisionLogic) => Is_DMN_DecisionTable(decisionLogic))[0];
// const decision_table: DMN_DecisionTable =
// decision.decisionLogic as DMN_DecisionTable; // TODO generate json schema
// let features: string[] = decision_table.input!.map(
// (input_clause: DMN_InputClause) => return {
// Name_of_DMN_InputClause(input_clause) input: input?.map((input) => Name_of_DMN_InputClause(input)),
// ); output: output?.map((output) => Name_of_DMN_OutputClause(output)),
// const index: number = features };
// .map((feature: string): string => feature.toUpperCase()) }
// .indexOf(DMN.URL);
// if (index === -1) return false;
// // Si la zone 'text' est égale à "" alors prendre 'features[0]' :
// const data_source = decision_table.input[
// index
// ].inputExpression.text.replaceAll('"', ""); // ES2021
// features = features.concat(
// decision_table.output!.map((output_clause) =>
// Name_of_DMN_OutputClause(output_clause)
// )
// );
// // A changer, il y a autant de 'Randomizer' objects que de tables de décision :
// DMN._Randomizer = new Randomizer(
// data_source,
// features /* Number of features whose type is "output", default is '1' */
// );
//
// // decision_table.rule!.forEach((rule) => {
// // features.forEach((feature, feature_index) => {
// // const column = rule.inputEntry[feature_index];
// // // A priori, nothing here since "rules" are ignored...
// // y});
// // });
// } catch (error: unknown) {
// throw new DmnError(decision, DmnError.Invalid_JSON);
// }
// };
//
// const a: boolean = rootElement.drgElement
// .filter(filterFunction)
// .every(everyFunction);
//
// try {
// if (a === false) return Promise.resolve(undefined); // DMN processing causes trouble(s)...
// } catch (error: unknown) {
// console.error(error);
// }
// }
} }