feat!: Edit Data structure

This commit is contained in:
Lucàs
2024-10-07 13:15:16 +02:00
parent 960e7b6777
commit 510d25d9af
7 changed files with 113 additions and 131 deletions
+7 -5
View File
@@ -1,7 +1,8 @@
import { Router, Request, Response } from "express";
import { DatasetCollection } from "../services/dataset";
import DmnModdle from "dmn-moddle";
import { DMN } from "../services/dmn/DMN";
import { DMN_Definitions } from "../services/dmn/interfaces/DMN_Definitions";
import { Data } from "../services/data";
const router = Router();
@@ -21,12 +22,13 @@ router.post("/randomize/:id", async (req: Request, res: Response) => {
);
if (!dataset) return res.status(404).json({ status: "NOT_FOUND" });
const a: any = await DMN.parse(req.body);
const dmn: DMN_Definitions = await DMN.parse(req.body);
const schema = DMN.getSchema(dmn);
const { rootElement } = await new DmnModdle().fromXML(req.body);
console.log(rootElement);
const data: Data[] = await dataset.get(size, schema);
const data = await dataset.get(size);
// Randomize the data (temporary)
data.sort(() => Math.random() - 0.5);
return res.status(200).json({ status: "RANDOMIZED", data });
});
+5 -2
View File
@@ -1,6 +1,9 @@
interface Data {
input: string;
output: string;
input: any[];
output: any[];
}
type DataConstructor<T extends Data> = new (...args: any[]) => T;
export default Data;
export { DataConstructor };
+5 -5
View File
@@ -16,12 +16,12 @@ type RawNudgerData = {
};
class NudgerData implements Data {
input: string;
output: string;
input: string[];
output: string[];
constructor(rawData: RawNudgerData) {
this.input = rawData.code;
this.output = rawData.gs1_country;
constructor({ code, gs1_country }: RawNudgerData) {
this.input = [code];
this.output = [gs1_country];
}
}
+1 -1
View File
@@ -1,3 +1,3 @@
export { default as Data } from "./Data";
export { default as Data, DataConstructor } from "./Data";
export { default as NudgerData } from "./NudgerData";
+62 -39
View File
@@ -9,45 +9,54 @@ import FileService from "../FileService";
import { ArchiveFactory, ArchiveType } from "../archive";
import { ParserFactory } from "../parser";
import { DatasetType } from "./";
import { Data } from "../data";
import { Data, DataConstructor } from "../data";
type DatasetOptions = {
id: string;
dataType: DataConstructor<Data>;
source: string;
file: string;
archiveType: ArchiveType;
datasetType: DatasetType;
};
/**
* Represents a dataset that can be loaded and queried
*/
class Dataset {
readonly id: string;
readonly url: string;
readonly sourceFile: string;
readonly source: string;
readonly file: string;
readonly archiveType: ArchiveType;
readonly datasetType: DatasetType;
readonly cachePath: string;
private dataConstructor: { new (rawData: object): Data };
private dataType: DataConstructor<Data>;
/**
* Create a new dataset instance
* @param dataConstructor - The constructor of the data class
* @param id - The unique identifier of the dataset
* @param url - The URL of the dataset
* @param sourceFile - The file name of the dataset in the archive
* @param source - The URL of the dataset
* @param file - The name of the file in the archive
* @param dataType - The constructor of the data class
* @param archiveType - The type of the archive
* @param datasetType - The type of the dataset
*/
constructor(
dataConstructor: new (rawData: any) => Data,
id: string,
url: string,
sourceFile: string,
archiveType: ArchiveType,
datasetType: DatasetType
) {
this.dataConstructor = dataConstructor;
constructor({
id,
source,
file,
dataType,
archiveType,
datasetType,
}: DatasetOptions) {
this.id = id;
this.url = url;
this.sourceFile = sourceFile;
this.dataType = dataType;
this.source = source;
this.file = file;
this.archiveType = archiveType;
this.datasetType = datasetType;
this.cachePath = CacheService.getCachePath(this.url, ".json");
this.cachePath = CacheService.getCachePath(this.source, ".json");
}
/**
@@ -56,8 +65,8 @@ class Dataset {
* @throws {Error} - If the dataset cannot be loaded
*/
public async load(): Promise<void> {
if (CacheService.isCached(this.url, ".json")) {
console.log(`Already cached: ${this.url}`);
if (CacheService.isCached(this.source, ".json")) {
console.log(`Already cached: ${this.source}`);
return;
}
@@ -68,33 +77,39 @@ class Dataset {
const self = this;
console.log(`Download: ${this.url}`);
console.log(`Download: ${this.source}`);
await pipelineAsync(
await FileService.getFileStream(this.url),
archive.extract(this.sourceFile),
await FileService.getFileStream(this.source),
archive.extract(this.file),
parser.parse(),
new Transform({
Dataset.transformToData(this.dataType),
FileService.createWriteStream(this.cachePath)
);
}
private static transformToData(dataType: DataConstructor<Data>): Transform {
return new Transform({
objectMode: true,
transform(chunk: object, _, callback) {
const data: Data = new self.dataConstructor(
JSON.parse(chunk.toString())
);
const data: Data = new dataType(JSON.parse(chunk.toString()));
this.push(JSON.stringify(data) + "\n");
callback(null, JSON.stringify(data) + "\n");
},
}),
FileService.createWriteStream(this.cachePath)
);
});
}
/**
* Get a number of data entries from the dataset
* @param length - The number of data entries to get (default: 10)
* @param schema - Schema of the expected data returned
*/
public get(length: number = 10): Promise<Data[]> {
public get(
length: number = 10,
schema: { input: string[] | undefined; output: string[] | undefined }
): Promise<any[]> {
return new Promise((resolve, reject) => {
let count: number = 0;
const results: Data[] = [];
const results: any[] = [];
const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" });
const rl = readline.createInterface({
@@ -104,18 +119,26 @@ class Dataset {
rl.on("line", (line) => {
if (count < length) {
try {
results.push(JSON.parse(line) as Data);
const data: Data = JSON.parse(line) as Data;
// Create an object with the input and output values according to the schema
const obj: any = {};
schema.input?.forEach((input: string, index: number) => {
obj[input] = data.input[index];
});
schema.output?.forEach((output, index) => {
obj[output] = data.output[index];
});
// Add the object to the results
results.push(obj);
count++;
} catch (err) {
console.error("Erreur lors du parsing de la ligne:", err);
}
} else {
rl.close(); // Fermer le flux si on a atteint les n objets
}
});
// Quand le flux est terminé ou a été fermé
// Quand le flux est terminé ou a été fermé.
rl.on("close", () => {
resolve(results); // Renvoie les n objets lus
});
+8 -7
View File
@@ -4,14 +4,15 @@ import { Dataset, DatasetType } from "./";
class DatasetCollection {
public static datasets: Dataset[] = [
new Dataset(
NudgerData,
"nudger",
new Dataset({
id: "nudger",
source:
"https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip",
"open4goods-full-gtin-dataset.csv",
ArchiveType.ZIP,
DatasetType.CSV
),
file: "open4goods-full-gtin-dataset.csv",
dataType: NudgerData,
archiveType: ArchiveType.ZIP,
datasetType: DatasetType.CSV,
}),
];
public static loadAll(): Promise<void[]> {
+20 -67
View File
@@ -1,78 +1,31 @@
import { Dataset } from "../dataset";
import { Data } from "../data";
import DmnModdle from "dmn-moddle";
import { DMN_Decision, Is_DMN_Decision } from "./interfaces/DMN_Decision";
import {
DMN_DecisionTable,
Is_DMN_DecisionTable,
} from "./interfaces/DMN_DecisionTable";
import {
DMN_InputClause,
Name_of_DMN_InputClause,
} from "./interfaces/DMN_InputClause";
import { Is_DMN_DecisionTable } from "./interfaces/DMN_DecisionTable";
import { Name_of_DMN_InputClause } from "./interfaces/DMN_InputClause";
import { Name_of_DMN_OutputClause } from "./interfaces/DMN_OutputClause";
import { ModdleElement } from "./interfaces/ModdleElement";
import { DmnError } from "./error/DmnError";
import { DMN_Definitions } from "./interfaces/DMN_Definitions";
export class DMN {
static async parse(xml: string) {
static async parse(xml: string): Promise<DMN_Definitions> {
const { rootElement, warnings } = await new DmnModdle().fromXML(xml);
if (warnings.length !== 0)
console.warn(warnings.map((warning: any) => warning.message).join(" * "));
return rootElement;
return rootElement as DMN_Definitions;
}
// static async getFilter(xml: string) {
// const rootElement = await DMN.parse(xml);
//
// const filterFunction = (me: ModdleElement) =>
// Is_DMN_Decision(me) && Is_DMN_DecisionTable(me.decisionLogic);
// const everyFunction = (decision: DMN_Decision) => {
// try {
// const decision_table: DMN_DecisionTable =
// decision.decisionLogic as DMN_DecisionTable;
// let features: string[] = decision_table.input!.map(
// (input_clause: DMN_InputClause) =>
// Name_of_DMN_InputClause(input_clause)
// );
// const index: number = features
// .map((feature: string): string => feature.toUpperCase())
// .indexOf(DMN.URL);
// if (index === -1) return false;
// // Si la zone 'text' est égale à "" alors prendre 'features[0]' :
// const data_source = decision_table.input[
// index
// ].inputExpression.text.replaceAll('"', ""); // ES2021
// features = features.concat(
// decision_table.output!.map((output_clause) =>
// Name_of_DMN_OutputClause(output_clause)
// )
// );
// // A changer, il y a autant de 'Randomizer' objects que de tables de décision :
// DMN._Randomizer = new Randomizer(
// data_source,
// features /* Number of features whose type is "output", default is '1' */
// );
//
// // decision_table.rule!.forEach((rule) => {
// // features.forEach((feature, feature_index) => {
// // const column = rule.inputEntry[feature_index];
// // // A priori, nothing here since "rules" are ignored...
// // y});
// // });
// } catch (error: unknown) {
// throw new DmnError(decision, DmnError.Invalid_JSON);
// }
// };
//
// const a: boolean = rootElement.drgElement
// .filter(filterFunction)
// .every(everyFunction);
//
// try {
// if (a === false) return Promise.resolve(undefined); // DMN processing causes trouble(s)...
// } catch (error: unknown) {
// console.error(error);
// }
// }
public static getSchema(dmnDefinitions: DMN_Definitions) {
const descisions: DMN_Decision[] = dmnDefinitions.drgElement.filter(
(element) => Is_DMN_Decision(element)
);
const { input, output } = descisions
.map((decision) => decision.decisionLogic)
.filter((decisionLogic) => Is_DMN_DecisionTable(decisionLogic))[0];
// TODO generate json schema
return {
input: input?.map((input) => Name_of_DMN_InputClause(input)),
output: output?.map((output) => Name_of_DMN_OutputClause(output)),
};
}
}