diff --git a/.gitignore b/.gitignore index eb5ffd1..53c6fed 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ cache/ node_modules/ +# Docker +docker/ + # Build files build/ diff --git a/docker-compose.yaml b/docker-compose.yaml index 1a134b4..48f1e0d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,11 +1,46 @@ +# Use root/example as user/password credentials version: "3.1" services: + mongo: + image: mongo + container_name: data-db + restart: "no" + environment: + MONGO_INITDB_ROOT_USERNAME: root + MONGO_INITDB_ROOT_PASSWORD: root + MONGO_INITDB_DATABASE: db + volumes: + - ./docker/mongodb/db:/data/db + - ./docker/mongodb/logs:/var/log/mongodb + command: + ["mongod", "--logpath", "/var/log/mongodb/mongodb.log", "--logappend"] + deploy: + resources: + limits: + memory: 4g + cpus: "2.0" + reservations: + memory: 2g + ports: + - "27017:27017" + mariadb: - image: mariadb:latest + image: mariadb + container_name: log-db restart: "no" environment: MYSQL_ROOT_PASSWORD: root MYSQL_DATABASE: db + MYSQL_USER: user + MYSQL_PASSWORD: example ports: - "3306:3306" + volumes: + - ./docker/mariadb/db:/var/lib/mysql + +volumes: + mongodb_data: + driver: local + mariadb_data: + driver: local diff --git a/mongod.conf b/mongod.conf new file mode 100644 index 0000000..efec200 --- /dev/null +++ b/mongod.conf @@ -0,0 +1,15 @@ +# mongod.conf +storage: + dbPath: /data/db + wiredTiger: + engineConfig: + cacheSizeGB: 2 # Ajuster selon la mémoire disponible (ex: 2 Go) +systemLog: + destination: file + logAppend: true + path: /data/db/mongodb.log +operationProfiling: + mode: slowOp + slowOpThresholdMs: 1000 # Considère une opération lente si elle dépasse 1 seconde +net: + bindIp: 0.0.0.0 diff --git a/package-lock.json b/package-lock.json index 0aae65d..d2f0b36 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,7 @@ "express": "^4.21.0", "fast-csv": "^5.0.1", "jsonschema": "^1.4.1", + "mongoose": "^8.8.1", "mysql": "^2.18.1", "reflect-metadata": "^0.2.2", "tar-stream": "^3.1.7", @@ -24,6 +25,7 @@ }, "devDependencies": { "@types/express": "^4.17.21", + "@types/mongoose": "^5.11.96", "@types/node": "^22.5.5", "@types/tar-stream": "^3.1.3", "@types/unzipper": "^0.10.10", @@ -117,6 +119,15 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "node_modules/@mongodb-js/saslprep": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@mongodb-js/saslprep/-/saslprep-1.1.9.tgz", + "integrity": "sha512-tVkljjeEaAhCqTzajSdgbQ6gE6f3oneVwa3iXR6csiEwXXOFsiC6Uh9iAjAhXPtqa/XMDHWjjeNH/77m/Yq2dw==", + "license": "MIT", + "dependencies": { + "sparse-bitfield": "^3.0.3" + } + }, "node_modules/@pkgjs/parseargs": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", @@ -222,6 +233,16 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/mongoose": { + "version": "5.11.96", + "resolved": "https://registry.npmjs.org/@types/mongoose/-/mongoose-5.11.96.tgz", + "integrity": "sha512-keiY22ljJtXyM7osgScmZOHV6eL5VFUD5tQumlu+hjS++HND5nM8jNEdj5CSWfKIJpVwQfPuwQ2SfBqUnCAVRw==", + "dev": true, + "license": "MIT", + "dependencies": { + "mongoose": "*" + } + }, "node_modules/@types/node": { "version": "22.5.5", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.5.tgz", @@ -289,6 +310,24 @@ "@types/node": "*" } }, + "node_modules/@types/webidl-conversions": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/@types/webidl-conversions/-/webidl-conversions-7.0.3.tgz", + "integrity": "sha512-CiJJvcRtIgzadHCYXw7dqEnMNRjhGZlYK05Mj9OyktqV8uVT8fD2BFOB7S1uwBE3Kj2Z+4UyPmFw/Ixgw/LAlA==", + "license": "MIT" + }, + "node_modules/@types/whatwg-url": { + "version": "8.2.2", + "resolved": "https://registry.npmjs.org/@types/whatwg-url/-/whatwg-url-8.2.2.tgz", + "integrity": "sha512-FtQu10RWgn3D9U4aazdwIE2yzphmTJREDqNdODHrbrZmmMqI0vMheC/6NE/J1Yveaj8H+ela+YwWTjq5PGmuhA==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/node": "*", + "@types/webidl-conversions": "*" + } + }, "node_modules/accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -487,6 +526,15 @@ "balanced-match": "^1.0.0" } }, + "node_modules/bson": { + "version": "6.9.0", + "resolved": "https://registry.npmjs.org/bson/-/bson-6.9.0.tgz", + "integrity": "sha512-X9hJeyeM0//Fus+0pc5dSUMhhrrmWwQUtdavaQeF3Ta6m69matZkGWV/MrBcnwUeLC8W9kwwc2hfkZgUuCX3Ig==", + "license": "Apache-2.0", + "engines": { + "node": ">=16.20.1" + } + }, "node_modules/buffer": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", @@ -1367,6 +1415,21 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "license": "ISC" }, + "node_modules/ip-address": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-9.0.5.tgz", + "integrity": "sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "jsbn": "1.1.0", + "sprintf-js": "^1.1.3" + }, + "engines": { + "node": ">= 12" + } + }, "node_modules/ipaddr.js": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", @@ -1412,6 +1475,14 @@ "@pkgjs/parseargs": "^0.11.0" } }, + "node_modules/jsbn": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz", + "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==", + "license": "MIT", + "optional": true, + "peer": true + }, "node_modules/jsonfile": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz", @@ -1433,6 +1504,15 @@ "node": "*" } }, + "node_modules/kareem": { + "version": "2.6.3", + "resolved": "https://registry.npmjs.org/kareem/-/kareem-2.6.3.tgz", + "integrity": "sha512-C3iHfuGUXK2u8/ipq9LfjFfXFxAZMQJJq7vLS45r3D9Y2xQ/m4S8zaR4zMLFWh9AsNPXmcFfUDhTEO8UIC/V6Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/lodash.escaperegexp": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz", @@ -1503,6 +1583,12 @@ "node": ">= 0.6" } }, + "node_modules/memory-pager": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/memory-pager/-/memory-pager-1.5.0.tgz", + "integrity": "sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==", + "license": "MIT" + }, "node_modules/merge-descriptors": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", @@ -1619,6 +1705,234 @@ "saxen": "^8.1.2" } }, + "node_modules/mongodb": { + "version": "5.9.2", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-5.9.2.tgz", + "integrity": "sha512-H60HecKO4Bc+7dhOv4sJlgvenK4fQNqqUIlXxZYQNbfEWSALGAwGoyJd/0Qwk4TttFXUOHJ2ZJQe/52ScaUwtQ==", + "license": "Apache-2.0", + "optional": true, + "peer": true, + "dependencies": { + "bson": "^5.5.0", + "mongodb-connection-string-url": "^2.6.0", + "socks": "^2.7.1" + }, + "engines": { + "node": ">=14.20.1" + }, + "optionalDependencies": { + "@mongodb-js/saslprep": "^1.1.0" + }, + "peerDependencies": { + "@aws-sdk/credential-providers": "^3.188.0", + "@mongodb-js/zstd": "^1.0.0", + "kerberos": "^1.0.0 || ^2.0.0", + "mongodb-client-encryption": ">=2.3.0 <3", + "snappy": "^7.2.2" + }, + "peerDependenciesMeta": { + "@aws-sdk/credential-providers": { + "optional": true + }, + "@mongodb-js/zstd": { + "optional": true + }, + "kerberos": { + "optional": true + }, + "mongodb-client-encryption": { + "optional": true + }, + "snappy": { + "optional": true + } + } + }, + "node_modules/mongodb-connection-string-url": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/mongodb-connection-string-url/-/mongodb-connection-string-url-2.6.0.tgz", + "integrity": "sha512-WvTZlI9ab0QYtTYnuMLgobULWhokRjtC7db9LtcVfJ+Hsnyr5eo6ZtNAt3Ly24XZScGMelOcGtm7lSn0332tPQ==", + "license": "Apache-2.0", + "optional": true, + "peer": true, + "dependencies": { + "@types/whatwg-url": "^8.2.1", + "whatwg-url": "^11.0.0" + } + }, + "node_modules/mongodb/node_modules/bson": { + "version": "5.5.1", + "resolved": "https://registry.npmjs.org/bson/-/bson-5.5.1.tgz", + "integrity": "sha512-ix0EwukN2EpC0SRWIj/7B5+A6uQMQy6KMREI9qQqvgpkV2frH63T0UDVd1SYedL6dNCmDBYB3QtXi4ISk9YT+g==", + "license": "Apache-2.0", + "optional": true, + "peer": true, + "engines": { + "node": ">=14.20.1" + } + }, + "node_modules/mongoose": { + "version": "8.8.1", + "resolved": "https://registry.npmjs.org/mongoose/-/mongoose-8.8.1.tgz", + "integrity": "sha512-l7DgeY1szT98+EKU8GYnga5WnyatAu+kOQ2VlVX1Mxif6A0Umt0YkSiksCiyGxzx8SPhGe9a53ND1GD4yVDrPA==", + "license": "MIT", + "dependencies": { + "bson": "^6.7.0", + "kareem": "2.6.3", + "mongodb": "~6.10.0", + "mpath": "0.9.0", + "mquery": "5.0.0", + "ms": "2.1.3", + "sift": "17.1.3" + }, + "engines": { + "node": ">=16.20.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/mongoose" + } + }, + "node_modules/mongoose/node_modules/@types/whatwg-url": { + "version": "11.0.5", + "resolved": "https://registry.npmjs.org/@types/whatwg-url/-/whatwg-url-11.0.5.tgz", + "integrity": "sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==", + "license": "MIT", + "dependencies": { + "@types/webidl-conversions": "*" + } + }, + "node_modules/mongoose/node_modules/mongodb": { + "version": "6.10.0", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-6.10.0.tgz", + "integrity": "sha512-gP9vduuYWb9ZkDM546M+MP2qKVk5ZG2wPF63OvSRuUbqCR+11ZCAE1mOfllhlAG0wcoJY5yDL/rV3OmYEwXIzg==", + "license": "Apache-2.0", + "dependencies": { + "@mongodb-js/saslprep": "^1.1.5", + "bson": "^6.7.0", + "mongodb-connection-string-url": "^3.0.0" + }, + "engines": { + "node": ">=16.20.1" + }, + "peerDependencies": { + "@aws-sdk/credential-providers": "^3.188.0", + "@mongodb-js/zstd": "^1.1.0", + "gcp-metadata": "^5.2.0", + "kerberos": "^2.0.1", + "mongodb-client-encryption": ">=6.0.0 <7", + "snappy": "^7.2.2", + "socks": "^2.7.1" + }, + "peerDependenciesMeta": { + "@aws-sdk/credential-providers": { + "optional": true + }, + "@mongodb-js/zstd": { + "optional": true + }, + "gcp-metadata": { + "optional": true + }, + "kerberos": { + "optional": true + }, + "mongodb-client-encryption": { + "optional": true + }, + "snappy": { + "optional": true + }, + "socks": { + "optional": true + } + } + }, + "node_modules/mongoose/node_modules/mongodb-connection-string-url": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mongodb-connection-string-url/-/mongodb-connection-string-url-3.0.1.tgz", + "integrity": "sha512-XqMGwRX0Lgn05TDB4PyG2h2kKO/FfWJyCzYQbIhXUxz7ETt0I/FqHjUeqj37irJ+Dl1ZtU82uYyj14u2XsZKfg==", + "license": "Apache-2.0", + "dependencies": { + "@types/whatwg-url": "^11.0.2", + "whatwg-url": "^13.0.0" + } + }, + "node_modules/mongoose/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/mongoose/node_modules/tr46": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-4.1.1.tgz", + "integrity": "sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==", + "license": "MIT", + "dependencies": { + "punycode": "^2.3.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/mongoose/node_modules/whatwg-url": { + "version": "13.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-13.0.0.tgz", + "integrity": "sha512-9WWbymnqj57+XEuqADHrCJ2eSXzn8WXIW/YSGaZtb2WKAInQ6CHfaUUcTyyver0p8BDg5StLQq8h1vtZuwmOig==", + "license": "MIT", + "dependencies": { + "tr46": "^4.1.1", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/mpath": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/mpath/-/mpath-0.9.0.tgz", + "integrity": "sha512-ikJRQTk8hw5DEoFVxHG1Gn9T/xcjtdnOKIU1JTmGjZZlg9LST2mBLmcX3/ICIbgJydT2GOc15RnNy5mHmzfSew==", + "license": "MIT", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/mquery": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/mquery/-/mquery-5.0.0.tgz", + "integrity": "sha512-iQMncpmEK8R8ncT8HJGsGc9Dsp8xcgYMVSbs5jgnm1lFHTZqMJTUWTDx1LBO8+mK3tPNZWFLBghQEIOULSTHZg==", + "license": "MIT", + "dependencies": { + "debug": "4.x" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/mquery/node_modules/debug": { + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/mquery/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, "node_modules/ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", @@ -1828,6 +2142,15 @@ "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", "license": "MIT" }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/qs": { "version": "6.13.0", "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", @@ -2070,6 +2393,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/sift": { + "version": "17.1.3", + "resolved": "https://registry.npmjs.org/sift/-/sift-17.1.3.tgz", + "integrity": "sha512-Rtlj66/b0ICeFzYTuNvX/EF1igRbbnGSvEyT79McoZa/DeGhMyC5pWKOEsZKnpkqtSeovd5FL/bjHWC3CIIvCQ==", + "license": "MIT" + }, "node_modules/signal-exit": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", @@ -2082,6 +2411,51 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks": { + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.3.tgz", + "integrity": "sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "ip-address": "^9.0.5", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/sparse-bitfield": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/sparse-bitfield/-/sparse-bitfield-3.0.3.tgz", + "integrity": "sha512-kvzhi7vqKTfkh0PZU+2D2PIllw2ymqJKujUcyPMd9Y75Nv4nPbGJZXNhxsgdQab2BmlDct1YnfQCguEvHr7VsQ==", + "license": "MIT", + "dependencies": { + "memory-pager": "^1.0.2" + } + }, + "node_modules/sprintf-js": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", + "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==", + "license": "BSD-3-Clause", + "optional": true, + "peer": true + }, "node_modules/sqlstring": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/sqlstring/-/sqlstring-2.3.1.tgz", @@ -2287,6 +2661,20 @@ "node": ">=0.6" } }, + "node_modules/tr46": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-3.0.0.tgz", + "integrity": "sha512-l7FvfAHlcmulp8kr+flpQZmVwtu7nfRV7NZujtN0OqES8EL4O4e0qqzL0DC5gAvx/ZC/9lk6rhcUwYvkBnBnYA==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "punycode": "^2.1.1" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/ts-node": { "version": "10.9.2", "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", @@ -2575,6 +2963,30 @@ "node": ">= 0.8" } }, + "node_modules/webidl-conversions": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", + "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/whatwg-url": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-11.0.0.tgz", + "integrity": "sha512-RKT8HExMpoYx4igMiVMY83lN6UeITKJlBQ+vR/8ZJ8OCdSiN3RwCq+9gH0+Xzj0+5IrM6i4j/6LuvzbZIQgEcQ==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "tr46": "^3.0.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index d38d5cf..55bee53 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "express": "^4.21.0", "fast-csv": "^5.0.1", "jsonschema": "^1.4.1", + "mongoose": "^8.8.1", "mysql": "^2.18.1", "reflect-metadata": "^0.2.2", "tar-stream": "^3.1.7", @@ -30,6 +31,7 @@ }, "devDependencies": { "@types/express": "^4.17.21", + "@types/mongoose": "^5.11.96", "@types/node": "^22.5.5", "@types/tar-stream": "^3.1.3", "@types/unzipper": "^0.10.10", diff --git a/src/Server.ts b/src/Server.ts index f44f4a5..63c5b08 100644 --- a/src/Server.ts +++ b/src/Server.ts @@ -1,7 +1,7 @@ import express from "express"; import routes from "./routes"; import { createServer } from "node:http"; -import { logger, xmlBodyParser } from "./middlewares"; +import { bodyToSchema, logger, xmlBodyParser } from "./middlewares"; export default class Server { private readonly app: express.Application; @@ -9,7 +9,7 @@ export default class Server { constructor() { this.app = express(); - this.app.use(express.json(), xmlBodyParser, logger, routes); + this.app.use(express.json(), xmlBodyParser, bodyToSchema, logger, routes); } public start() { diff --git a/src/AppDataSource.ts b/src/TypeOrmDataSource.ts similarity index 59% rename from src/AppDataSource.ts rename to src/TypeOrmDataSource.ts index 6d099b3..212c139 100644 --- a/src/AppDataSource.ts +++ b/src/TypeOrmDataSource.ts @@ -1,13 +1,8 @@ import "reflect-metadata"; import { DataSource } from "typeorm"; import { Log } from "./entity/Log"; -import { - NudgerData, - OpenFoodFactsData, - WorldCitiesData, -} from "./services/data"; -export const AppDataSource = new DataSource({ +export const TypeOrmDataSource = new DataSource({ type: "mariadb", host: "localhost", port: 3306, @@ -16,7 +11,7 @@ export const AppDataSource = new DataSource({ database: "db", synchronize: true, logging: false, - entities: [Log, NudgerData, OpenFoodFactsData, WorldCitiesData], + entities: [Log], subscribers: [], migrations: [], }); diff --git a/src/index.ts b/src/index.ts index 06d8807..25f936a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,11 +1,17 @@ import dotenv from "dotenv"; import Server from "./Server"; import { DatasetCollection } from "./services/dataset"; -import { AppDataSource } from "./AppDataSource"; +import { TypeOrmDataSource } from "./TypeOrmDataSource"; dotenv.config(); -AppDataSource.initialize() +// 1. initialize database connexions + +// 2. Load all datasets + +// 3. Start the server + +TypeOrmDataSource.initialize() .then(() => DatasetCollection.loadAll()) .then(() => console.log("All datasets are loaded")) .then(() => new Server().start()) diff --git a/src/middlewares/bodyToSchema.ts b/src/middlewares/bodyToSchema.ts new file mode 100644 index 0000000..d32ea54 --- /dev/null +++ b/src/middlewares/bodyToSchema.ts @@ -0,0 +1,15 @@ +import { NextFunction, Request, Response } from "express"; +import { Definitions } from "../services/dmn/interfaces"; +import { DMN } from "../services/dmn/DMN"; + +export default async function ( + req: Request, + res: Response, + next: NextFunction +) { + if (req.is("application/xml")) { + const dmn: Definitions = await DMN.parse(req.body); + req.body = DMN.getSchema(dmn); + next(); + } else next(); +} diff --git a/src/middlewares/index.ts b/src/middlewares/index.ts index a4deecf..5eaf9db 100644 --- a/src/middlewares/index.ts +++ b/src/middlewares/index.ts @@ -1,2 +1,3 @@ export { default as logger } from "./logger"; export { default as xmlBodyParser } from "./xmlBodyParser"; +export { default as bodyToSchema } from "./bodyToSchema"; diff --git a/src/middlewares/logger.ts b/src/middlewares/logger.ts index 0f29c10..28ee832 100644 --- a/src/middlewares/logger.ts +++ b/src/middlewares/logger.ts @@ -1,6 +1,6 @@ import { NextFunction, Request, Response } from "express"; import { Log } from "../entity/Log"; -import { AppDataSource } from "../AppDataSource"; +import { TypeOrmDataSource } from "../TypeOrmDataSource"; export default async function logger( req: Request, @@ -8,13 +8,16 @@ export default async function logger( next: NextFunction ) { console.info(`[${req.method}] ${req.url}`); - // Put the log into the database - const log: Log = new Log( - req.url, - req.method as any, - JSON.stringify(req.body) - ); - await AppDataSource.manager.save(log); + + if (req.path === "/randomize") { + // Put the log into the database + const log: Log = new Log( + req.url, + req.method as any, + JSON.stringify(req.body) + ); + await TypeOrmDataSource.manager.save(log); + } next(); } diff --git a/src/middlewares/xmlBodyParser.ts b/src/middlewares/xmlBodyParser.ts index b9f4f6e..434dd22 100644 --- a/src/middlewares/xmlBodyParser.ts +++ b/src/middlewares/xmlBodyParser.ts @@ -4,14 +4,10 @@ export default function (req: Request, res: Response, next: NextFunction) { if (req.is("application/xml")) { let data = ""; req.setEncoding("utf8"); - req.on("data", (chunk: any) => { - data += chunk; - }); + req.on("data", (chunk: any) => (data += chunk)); req.on("end", () => { req.body = data; next(); }); - } else { - next(); - } + } else next(); } diff --git a/src/routes/randomize.ts b/src/routes/randomize.ts index 54e7ca6..3d390fe 100644 --- a/src/routes/randomize.ts +++ b/src/routes/randomize.ts @@ -1,38 +1,35 @@ import { Router, Request, Response } from "express"; import { DatasetCollection } from "../services/dataset"; -import { DMN } from "../services/dmn/DMN"; -import { Definitions } from "../services/dmn/interfaces/"; -import { Data } from "../services/data"; +import axios from "axios"; const router = Router(); router.post("/randomize", (req: Request, res: Response) => { - const size: number = req.query.size - ? parseInt(req.query.size as string) - : 1000; + const size: number = req.query.size ? parseInt(req.query.size as string) : 10; - const datasetID = DatasetCollection.datasets.map((dataset) => dataset.id); + DatasetCollection.getDatasetByMatchingSchema(req.body) + .then((endpoints) => { + // Split evenly the size between the datasets + const sizePerDataset = Math.floor(size / endpoints.length); - Promise.all( - datasetID.map((id) => { - const url: URL = new URL(`http://localhost:4321/randomize/${id}`); - url.searchParams.append("size", size.toString()); - return fetch(url, { - method: "POST", - body: req.body, - headers: { "Content-Type": "application/xml" }, - }) - .then((response) => response.json()) - .then((json: any) => json.data); + return Promise.all( + endpoints.map(async (endpoint) => { + const params = { size: sizePerDataset }; + + return axios + .post(endpoint, req.body, { params }) + .then((res) => res.data.data); + }) + ); }) - ).then((r) => { - const data = r - .flat() - .sort(() => Math.random() - 0.5) - .slice(0, size); + .then((r) => { + const data = r + .flat() + .sort(() => Math.random() - 0.5) + .slice(0, size); - res.status(200).json({ status: "RANDOMIZED", data }); - }); + res.status(200).json({ status: "RANDOMIZED", data }); + }); }); router.post("/randomize/:id", async (req: Request, res: Response) => { @@ -47,15 +44,7 @@ router.post("/randomize/:id", async (req: Request, res: Response) => { ); if (!dataset) return res.status(404).json({ status: "NOT_FOUND" }); - const dmn: Definitions = await DMN.parse(req.body); - const schema = DMN.getSchema(dmn); - console.log(JSON.stringify(schema, null, 2)); - - const data: Data[] = await dataset.get(size, schema); - - // Randomize the data (temporary) - data.sort(() => Math.random() - 0.5); - + const data = await dataset.get(size, req.body); return res.status(200).json({ status: "RANDOMIZED", data }); }); diff --git a/src/services/DataLake.ts b/src/services/DataLake.ts new file mode 100644 index 0000000..10d08a8 --- /dev/null +++ b/src/services/DataLake.ts @@ -0,0 +1,19 @@ +import { MongoClient, Db } from "mongodb"; + +const uri = "mongodb://localhost:27017"; +const dbName = "db"; + +let db: Db; + +export const getDatabaseConnexion = async (): Promise => { + if (!db) { + const client = new MongoClient(uri, { + auth: { username: "root", password: "root" }, + connectTimeoutMS: 60000, + socketTimeoutMS: 60000, + }); + await client.connect(); + db = client.db(dbName); + } + return db; +}; diff --git a/src/services/archive/Archive.ts b/src/services/archive/Archive.ts deleted file mode 100644 index 33b3708..0000000 --- a/src/services/archive/Archive.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { Duplex } from "node:stream"; - -interface Archive { - extract(source: string): Duplex; -} - -export default Archive; diff --git a/src/services/archive/ArchiveFactory.ts b/src/services/archive/ArchiveFactory.ts deleted file mode 100644 index 215d115..0000000 --- a/src/services/archive/ArchiveFactory.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { Archive, ZipArchive, ArchiveType, GzipArchive, NoneArchive } from "./"; - -class ArchiveFactory { - static getArchive(archiveType: ArchiveType): Archive { - if (archiveType === ArchiveType.ZIP) return ZipArchive.instance; - if (archiveType === ArchiveType.GZIP) return GzipArchive.instance; - if (archiveType === ArchiveType.NONE) return NoneArchive.instance; - - throw new Error("Unsupported archive type"); - } -} - -export default ArchiveFactory; diff --git a/src/services/archive/ArchiveType.ts b/src/services/archive/ArchiveType.ts deleted file mode 100644 index 7bacea6..0000000 --- a/src/services/archive/ArchiveType.ts +++ /dev/null @@ -1,7 +0,0 @@ -enum ArchiveType { - ZIP = ".zip", - GZIP = ".gzip", - NONE = "", -} - -export default ArchiveType; diff --git a/src/services/archive/GzipArchive.ts b/src/services/archive/GzipArchive.ts deleted file mode 100644 index cae9caf..0000000 --- a/src/services/archive/GzipArchive.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { createGunzip } from "node:zlib"; -import { Duplex } from "node:stream"; -import { Archive } from "./"; - -class GzipArchive implements Archive { - public static instance: Archive = new GzipArchive(); - - public extract(source: string): Duplex { - return createGunzip(); - } -} - -export default GzipArchive; diff --git a/src/services/archive/NoneArchive.ts b/src/services/archive/NoneArchive.ts deleted file mode 100644 index 0c1465e..0000000 --- a/src/services/archive/NoneArchive.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { Archive } from "./"; -import { Duplex, Transform } from "node:stream"; - -class NoneArchive implements Archive { - public static instance: Archive = new NoneArchive(); - - public extract(source: string): Duplex { - return new Transform({ - transform(chunk, _, callback) { - callback(null, chunk); - }, - }); - } -} - -export default NoneArchive; diff --git a/src/services/archive/ZipArchive.ts b/src/services/archive/ZipArchive.ts deleted file mode 100644 index 05d3f97..0000000 --- a/src/services/archive/ZipArchive.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { Archive } from "./"; -import { Duplex } from "node:stream"; -import { ParseOne } from "unzipper"; - -class ZipArchive implements Archive { - public static instance: Archive = new ZipArchive(); - - public extract(source: string): Duplex { - return ParseOne(new RegExp(source), { - forceStream: true, - }); - } -} - -export default ZipArchive; diff --git a/src/services/archive/index.ts b/src/services/archive/index.ts deleted file mode 100644 index 46c66bf..0000000 --- a/src/services/archive/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -export { default as ArchiveType } from "./ArchiveType"; - -export { default as Archive } from "./Archive"; -export { default as ArchiveFactory } from "./ArchiveFactory"; - -export { default as ZipArchive } from "./ZipArchive"; -export { default as GzipArchive } from "./GzipArchive"; -export { default as NoneArchive } from "./NoneArchive"; diff --git a/src/services/archive_extractor/Extractor.ts b/src/services/archive_extractor/Extractor.ts new file mode 100644 index 0000000..283db24 --- /dev/null +++ b/src/services/archive_extractor/Extractor.ts @@ -0,0 +1,5 @@ +import { Duplex } from "node:stream"; + +export default interface Extractor { + extract(options: { file: string }): Duplex; +} diff --git a/src/services/archive_extractor/ExtractorFactory.ts b/src/services/archive_extractor/ExtractorFactory.ts new file mode 100644 index 0000000..5ea3e1b --- /dev/null +++ b/src/services/archive_extractor/ExtractorFactory.ts @@ -0,0 +1,23 @@ +import Extractor from "./Extractor"; +import ZipExtractor from "./ZipExtractor"; +import GzipExtractor from "./GzipExtractor"; +import NoneExtractor from "./NoneExtractor"; + +export enum ExtractorType { + ZIP, + GZIP, + NONE, +} + +export default class ExtractorFactory { + static getExtractor(extractorType: ExtractorType): Extractor { + switch (extractorType) { + case ExtractorType.ZIP: + return ZipExtractor.instance; + case ExtractorType.GZIP: + return GzipExtractor.instance; + default: + return NoneExtractor.instance; + } + } +} diff --git a/src/services/archive_extractor/GzipExtractor.ts b/src/services/archive_extractor/GzipExtractor.ts new file mode 100644 index 0000000..d779c15 --- /dev/null +++ b/src/services/archive_extractor/GzipExtractor.ts @@ -0,0 +1,11 @@ +import Extractor from "./Extractor"; +import { Duplex } from "node:stream"; +import { createGunzip } from "node:zlib"; + +export default class GzipExtractor implements Extractor { + public static instance = new GzipExtractor(); + + extract(_: any): Duplex { + return createGunzip(); + } +} diff --git a/src/services/archive_extractor/NoneExtractor.ts b/src/services/archive_extractor/NoneExtractor.ts new file mode 100644 index 0000000..a99a163 --- /dev/null +++ b/src/services/archive_extractor/NoneExtractor.ts @@ -0,0 +1,15 @@ +import Extractor from "./Extractor"; +import { Duplex, Transform } from "node:stream"; +import { createGunzip } from "node:zlib"; + +export default class NoneExtractor implements Extractor { + public static instance = new NoneExtractor(); + + extract(_: any): Duplex { + return new Transform({ + transform(chunk, _, callback) { + callback(null, chunk); + }, + }); + } +} diff --git a/src/services/archive_extractor/ZipExtractor.ts b/src/services/archive_extractor/ZipExtractor.ts new file mode 100644 index 0000000..68b887c --- /dev/null +++ b/src/services/archive_extractor/ZipExtractor.ts @@ -0,0 +1,13 @@ +import { Duplex } from "node:stream"; +import Extractor from "./Extractor"; +import { ParseOne } from "unzipper"; + +export default class ZipExtractor implements Extractor { + public static instance: ZipExtractor = new ZipExtractor(); + + public extract(options: { file: string }): Duplex { + return ParseOne(new RegExp(options.file), { + forceStream: true, + }); + } +} diff --git a/src/services/data/Data.ts b/src/services/data/Data.ts deleted file mode 100644 index 87c1a66..0000000 --- a/src/services/data/Data.ts +++ /dev/null @@ -1,13 +0,0 @@ -interface Data { - id?: number; -} - -class InvalidData extends Error { - constructor(message: string) { - super(message); - this.name = "Invalid data"; - } -} - -export default Data; -export { InvalidData }; diff --git a/src/services/data/NudgerData.ts b/src/services/data/NudgerData.ts deleted file mode 100644 index c296365..0000000 --- a/src/services/data/NudgerData.ts +++ /dev/null @@ -1,53 +0,0 @@ -import { Data, InvalidData } from "./"; -import { Column, Entity, PrimaryGeneratedColumn } from "typeorm"; - -type RawNudgerData = { - code: string; // "3260014791012", - brand: string; // "ALSATEK", - model: string; // "TL33171", - name: string; // "alsatek lg g3 coque protection aluminium rouge bumper tl33171", - last_updated: string; // "1562430134146", - gs1_country: string; // "FR", - offers_count: string; // "0", - min_price: string; // "", - min_price_compensation: string; // "", - currency: string; // "", - categories: string; // "ACCESSOIRES>COQUE SMARTPHONE", - url: string; // "" -}; - -@Entity() -class NudgerData implements Data { - @PrimaryGeneratedColumn({ - type: "integer", - }) - id?: number; - - @Column() - barcode_ean_13: string; - - @Column() - country: string; - - constructor(code: string, gs1_country: string) { - this.barcode_ean_13 = code; - this.country = gs1_country; - } - - fromRaw({ code, gs1_country }: RawNudgerData): NudgerData { - if (!code || !gs1_country || code.length !== 13) { - throw new InvalidData("Invalid data"); - } - - return new NudgerData(code, gs1_country); - } - - asData(nudgerData: NudgerData): any { - return { - "Barcode (EAN 13)": nudgerData.barcode_ean_13, - Country: nudgerData.country, - }; - } -} - -export default NudgerData; diff --git a/src/services/data/OpenFoodFactsData.ts b/src/services/data/OpenFoodFactsData.ts deleted file mode 100644 index f23cd16..0000000 --- a/src/services/data/OpenFoodFactsData.ts +++ /dev/null @@ -1,46 +0,0 @@ -import { Data, InvalidData } from "./"; -import { Column, Entity, PrimaryColumn, PrimaryGeneratedColumn } from "typeorm"; - -type RawOpenFoodFactsData = { - code: string; - countries_en: string; -}; - -@Entity() -class OpenFoodFactsData implements Data { - @PrimaryGeneratedColumn({ - type: "integer", - }) - id?: number; - - @Column() - barcode_ean_13: string; - - @Column() - country: string; - - constructor(code: string, gs1_country: string) { - this.barcode_ean_13 = code; - this.country = gs1_country; - } - - fromRaw({ - code, - countries_en, - }: RawOpenFoodFactsData): OpenFoodFactsData { - if (!code || !countries_en || code.length !== 13) { - throw new InvalidData("Invalid data"); - } - - return new OpenFoodFactsData(code, countries_en); - } - - asData(openData: OpenFoodFactsData): any { - return { - "Barcode (EAN 13)": openData.barcode_ean_13, - Country: openData.country, - }; - } -} - -export default OpenFoodFactsData; diff --git a/src/services/data/WorldCitiesData.ts b/src/services/data/WorldCitiesData.ts deleted file mode 100644 index 528fdb6..0000000 --- a/src/services/data/WorldCitiesData.ts +++ /dev/null @@ -1,45 +0,0 @@ -import { Data, InvalidData } from "./"; -import { Column, Entity, PrimaryGeneratedColumn } from "typeorm"; - -type RawWorldCitiesData = { - name: string; - country: string; - subcountry: string; - geonameid: string; -}; - -@Entity() -class WorldCitiesData implements Data { - @PrimaryGeneratedColumn({ - type: "integer", - }) - id?: number; - - @Column() - geoname_id: string; - - @Column() - country: string; - - constructor(geonameId: string, country: string) { - this.geoname_id = geonameId; - this.country = country; - } - - fromRaw({ geonameid, country }: RawWorldCitiesData): WorldCitiesData { - if (!geonameid || !country || geonameid.length !== 6) { - throw new InvalidData("Invalid data"); - } - - return new WorldCitiesData(geonameid, country); - } - - asData(worldCitiesData: WorldCitiesData): any { - return { - "Geoname ID": worldCitiesData.geoname_id, - Country: worldCitiesData.country, - }; - } -} - -export default WorldCitiesData; diff --git a/src/services/data/index.ts b/src/services/data/index.ts deleted file mode 100644 index 07b982e..0000000 --- a/src/services/data/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -export { default as Data, InvalidData } from "./Data"; - -export { default as NudgerData } from "./NudgerData"; -export { default as OpenFoodFactsData } from "./OpenFoodFactsData"; -export { default as WorldCitiesData } from "./WorldCitiesData"; diff --git a/src/services/data_transformer/NudgerDataTransformer.ts b/src/services/data_transformer/NudgerDataTransformer.ts new file mode 100644 index 0000000..6eaaa38 --- /dev/null +++ b/src/services/data_transformer/NudgerDataTransformer.ts @@ -0,0 +1,35 @@ +import { Transform } from "node:stream"; + +type NudgerData = { + code: string; // "3260014791012", + brand: string; // "ALSATEK", + model: string; // "TL33171", + name: string; // "alsatek lg g3 coque protection aluminium rouge bumper tl33171", + last_updated: string; // "1562430134146", + gs1_country: string; // "FR", + offers_count: string; // "0", + min_price: string; // "", + min_price_compensation: string; // "", + currency: string; // "", + categories: string; // "ACCESSOIRES>COQUE SMARTPHONE", + url: string; // "" +}; + +export default class NudgerDataTransformer extends Transform { + constructor() { + super({ objectMode: true }); + } + + _transform(chunk: NudgerData, encoding: string, callback: () => void) { + const { code, gs1_country } = chunk; + + if (code && gs1_country) { + this.push({ + "Barcode (EAN 13)": code, + Country: gs1_country, + }); + } + + callback(); + } +} diff --git a/src/services/data_transformer/OpenfoodfactsDataTransformer.ts b/src/services/data_transformer/OpenfoodfactsDataTransformer.ts new file mode 100644 index 0000000..d976e13 --- /dev/null +++ b/src/services/data_transformer/OpenfoodfactsDataTransformer.ts @@ -0,0 +1,25 @@ +import { Transform } from "node:stream"; + +type OpenfoodfactsData = { + code: string; + countries_en: string; +}; + +export default class OpenfoodfactsDataTransformer extends Transform { + constructor() { + super({ objectMode: true }); + } + + _transform(chunk: OpenfoodfactsData, encoding: string, callback: () => void) { + const { code, countries_en } = chunk; + + if (code && countries_en) { + this.push({ + "Barcode (EAN 13)": code, + Country: countries_en, + }); + } + + callback(); + } +} diff --git a/src/services/data_transformer/WorldCitiesDataTransformer.ts b/src/services/data_transformer/WorldCitiesDataTransformer.ts new file mode 100644 index 0000000..1a2249d --- /dev/null +++ b/src/services/data_transformer/WorldCitiesDataTransformer.ts @@ -0,0 +1,27 @@ +import { Transform } from "node:stream"; + +type WorldCitiesData = { + name: string; + country: string; + subcountry: string; + geonameid: string; +}; + +export default class WorldCitiesDataTransformer extends Transform { + constructor() { + super({ objectMode: true }); + } + + _transform(chunk: WorldCitiesData, encoding: string, callback: () => void) { + const { geonameid, country } = chunk; + + if (geonameid && country) { + this.push({ + "Geoname ID": geonameid, + Country: country, + }); + } + + callback(); + } +} diff --git a/src/services/dataset/Dataset.ts b/src/services/dataset/Dataset.ts index e0f991b..c1ec821 100644 --- a/src/services/dataset/Dataset.ts +++ b/src/services/dataset/Dataset.ts @@ -1,72 +1,67 @@ import { pipeline, Transform, Writable } from "node:stream"; import { promisify } from "node:util"; -import { Validator } from "jsonschema"; import FileService from "../FileService"; -import { ArchiveFactory, ArchiveType } from "../archive"; import { ParserFactory, ParserType } from "../parser"; -import { Data, InvalidData} from "../data"; -import { AppDataSource } from "../../AppDataSource"; -import { EntityManager, EntityTarget, Repository } from "typeorm"; - -type DatasetParams = { - id: string; - dataConstructor: (params: any) => Data; - dataType: Data; - source: string; - file: string; - archiveType: ArchiveType; - parserType: ParserType; - options?: DatasetOptions; -}; - -type DatasetOptions = { - parser?: any; -}; +import ExtractorFactory, { + ExtractorType, +} from "../archive_extractor/ExtractorFactory"; +import Extractor from "../archive_extractor/Extractor"; +import Parser from "../parser/Parser"; +import { getDatabaseConnexion } from "../DataLake"; +import { validate } from "jsonschema"; /** * Represents a dataset that can be loaded and queried */ -class Dataset { +export default class Dataset { readonly id: string; - readonly source: string; - readonly file: string; - readonly archiveType: ArchiveType; - readonly parserType: ParserType; - readonly dataConstructor: (params: any) => Data; - readonly dataType: Data; - private options?: DatasetOptions; + readonly uri: string; + readonly endpoint: string; + + private extractor: Extractor = ExtractorFactory.getExtractor( + ExtractorType.NONE + ); + private parser: Parser = ParserFactory.getParser(ParserType.CSV); + private extractorOptions: any; + private parserOptions: any; + private dataTransformer?: Transform; /** * Create a new dataset instance * @param id - The unique identifier of the dataset * @param source - The URL of the dataset - * @param file - The name of the file in the archive - * @param dataType - The constructor of the data class - * @param archiveType - The type of the archive - * @param dataConstructor - The type of the dataset - * @param parserType - * @param options - Additional options for the dataset */ constructor({ id, - source, - file, - dataConstructor, - dataType, - archiveType, - parserType, - options, - }: DatasetParams) { + uri, + endpoint, + }: { + id: string; + uri: string; + endpoint: string; + }) { this.id = id; - this.dataConstructor = dataConstructor; - this.source = source; - this.file = file; - this.dataType = dataType; - this.archiveType = archiveType; - this.parserType = parserType; - this.options = options; + this.uri = uri; + this.endpoint = endpoint; + } + + setExtractor(type: ExtractorType, options: any): this { + this.extractor = ExtractorFactory.getExtractor(type); + this.extractorOptions = options; + return this; + } + + setParser(type: ParserType, options: any): this { + this.parser = ParserFactory.getParser(type); + this.parserOptions = options; + return this; + } + + setDataTransformer(dataTransformer: Transform): this { + this.dataTransformer = dataTransformer; + return this; } /** @@ -75,139 +70,81 @@ class Dataset { * @throws {Error} - If the dataset cannot be loaded */ public async load(): Promise { - // const repository: Repository = AppDataSource.getRepository(Data); + if (!this.dataTransformer) { + throw new Error("Data transformer is not set"); + } - // if ((await repository.count()) > 0) { - // console.log(`Already cached: ${this.source}`); - // return; - // } - - const archive = ArchiveFactory.getArchive(this.archiveType); - const parser = ParserFactory.getParser(this.parserType); + const db = await getDatabaseConnexion(); + const collection = db.collection(this.id); + const count = await collection.countDocuments(); + if (count > 0) { + console.log(`Dataset ${this.id} already loaded`); + return; + } const pipelineAsync = promisify(pipeline); - console.log(`Download: ${this.source}`); + console.log(`Download: ${this.uri}`); - // Start transaction - await AppDataSource.manager.transaction(async (manager) => { - await pipelineAsync( - await FileService.getFileStream(this.source), - archive.extract(this.file), - parser.parse(this.options?.parser), - Dataset.transformToData(this.dataConstructor, manager), - new Writable({ - objectMode: true, - write(chunk, _, callback) { - callback(); - }, - }) - ) - .then(() => { - console.log(`Loaded: ${this.source}`); - }) - .catch((err) => { - console.error(`Failed to load dataset: ${this.source}`); - throw err; - }); - }); + const datasetId = this.id; + + const batch = 1000; + const buffer: any[] = []; + + await pipelineAsync( + await FileService.getFileStream(this.uri), + this.extractor.extract(this.extractorOptions), + this.parser.parse(this.parserOptions), + this.dataTransformer, + new Writable({ + objectMode: true, + async write(chunk, _, callback) { + buffer.push(chunk); + if (buffer.length < batch) return callback(); + + const db = await getDatabaseConnexion(); + db.collection(datasetId) + .insertMany(buffer.splice(0, batch)) + .then(() => callback()) + .catch((error) => callback(error)); + }, + }) + ) + .then(async () => { + if (buffer.length > 0) { + await db.collection(datasetId).insertMany(buffer); + } + }) + .then(() => console.log(`Dataset ${this.id} loaded`)) + .catch((error) => + console.error(`Error loading dataset ${this.id}: ${error.message}`) + ); } - private static transformToData( - dataType: (params: any) => Data, - manager: EntityManager - ): Transform { - return new Transform({ - objectMode: true, - async transform(chunk: object, _, callback) { - try { - const data: Data = dataType(chunk); - await manager.save(data); - callback(null, JSON.stringify(data) + "\n"); - } catch (err: any) { - if (err instanceof InvalidData) { - callback(null, ""); - } else callback(err); - } - }, - }); - } + async get(length: number = 10, schema: any) { + const db = await getDatabaseConnexion(); - /** - * Get a number of data entries from the dataset - * @param length - The number of data entries to get (default: 10) - * @param schema - Schema of the expected data returned - */ - public async get(length: number = 10, schema: {}): Promise { - const dataRepository = AppDataSource.manager.getRepository( - this.dataType as EntityTarget - ); + // Convert JSON schema to projection + const projection = { _id: 0 }; + if (schema?.properties) { + for (const field in schema.properties) { + // @ts-ignore + projection[field] = 1; + } + } - const datas = await dataRepository - .createQueryBuilder("data") - .orderBy("RAND()") // Fonction RAND() pour randomiser l'ordre - .limit(length) // Limiter le nombre de résultats - .getMany(); + const datas = await db + .collection(this.id) + .aggregate([{ $project: projection }, { $sample: { size: length } }]) + .limit(length) + .toArray(); - return new Promise((resolve, reject) => { - let count: number = 0; - const results: Data[] = []; - const validator = new Validator(); - - datas.forEach((data) => { - let randomizedData = D.fromRaw(data); - // this.dataConstructor(data); - - if (validator.validate(randomizedData, schema)) { - results.push(randomizedData); - count++; - } - }); - return resolve(results); - }); - // // - // // const stream = fs.createReadStream(this.cachePath, { encoding: "utf8" }); - // // const rl = readline.createInterface({ - // // input: stream, - // // crlfDelay: Infinity, - // // }); - // // - // // - // // rl.on("line", (line) => { - // // if (count < length) { - // // const data: Data = JSON.parse(line) as Data; - // // if (validator.validate(data, schema)) { - // // results.push(data); - // // count++; - // // } - // // - // // // // Pour chaque objet, récupérer l'objet et vérifier que le schéma est valide - // // - // // // schema.input?.forEach((input: string, index: number) => { - // // // obj[input] = data.input[index]; - // // // }); - // // // schema.output?.forEach((output, index) => { - // // // obj[output] = data.output[index]; - // // // }); - // // - // // // // Add the object to the results - // // // count++; - // // } else { - // // rl.close(); // Fermer le flux si on a atteint les n objets - // // } - // // }); - // // - // // // Quand le flux est terminé ou a été fermé. - // // rl.on("close", () => { - // // resolve(results); // Renvoie les n objets lus - // // }); - // // - // // // Gérer les erreurs du flux de lecture - // // rl.on("error", (err) => { - // // reject(err); - // // }); - // // }); + return datas + .map((data) => { + const res = validate(data, schema); + if (!res.valid) return null; + return data; + }) + .filter((data) => data !== null); } } - -export default Dataset; diff --git a/src/services/dataset/DatasetCollection.ts b/src/services/dataset/DatasetCollection.ts index 1fc16b6..598e43c 100644 --- a/src/services/dataset/DatasetCollection.ts +++ b/src/services/dataset/DatasetCollection.ts @@ -1,56 +1,60 @@ -import { Data, NudgerData, OpenFoodFactsData, WorldCitiesData } from "../data"; -import { ArchiveType } from "../archive"; import { Dataset } from "./"; import { ParserType } from "../parser"; +import WorldCitiesDataTransformer from "../data_transformer/WorldCitiesDataTransformer"; +import { ExtractorType } from "../archive_extractor/ExtractorFactory"; +import NudgerDataTransformer from "../data_transformer/NudgerDataTransformer"; +import OpenfoodfactsDataTransformer from "../data_transformer/OpenfoodfactsDataTransformer"; +import axios from "axios"; class DatasetCollection { - public static datasets: Dataset[] = [ - // new Dataset({ - // id: "nudger", - // source: - // "https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip", - // file: "open4goods-full-gtin-dataset.csv", - // dataConstructor: NudgerData.fromRaw, - // dataType: NudgerData, - // archiveType: ArchiveType.ZIP, - // parserType: ParserType.CSV, - // options: { - // parser: { - // delimiter: ",", - // }, - // }, - // }), - // new Dataset({ - // id: "openfoodfacts", - // source: - // "https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz", - // file: "en.openfoodfacts.org.products.csv", - // dataConstructor: OpenFoodFactsData.fromRaw, - // dataType: OpenFoodFactsData, - // archiveType: ArchiveType.GZIP, - // parserType: ParserType.CSV, - // options: { - // parser: { - // delimiter: "\t", - // quote: null, - // }, - // }, - // }), - new Dataset({ + public static datasets: Dataset[] = [ + new Dataset({ + id: "nudger", + uri: "https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip", + endpoint: "http://localhost:4321/randomize/nudger", + }) + .setExtractor(ExtractorType.ZIP, { + file: "open4goods-full-gtin-dataset.csv", + }) + .setParser(ParserType.CSV, {}) + .setDataTransformer(new NudgerDataTransformer()), + new Dataset({ + id: "openfoodfacts", + uri: "https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz", + endpoint: "http://localhost:4321/randomize/openfoodfacts", + }) + .setExtractor(ExtractorType.GZIP, { + file: "en.openfoodfacts.org.products.csv", + }) + .setParser(ParserType.CSV, { delimiter: "\t", quote: null }) + .setDataTransformer(new OpenfoodfactsDataTransformer()), + new Dataset({ id: "world-cities", - source: - "https://raw.githubusercontent.com/datasets/world-cities/refs/heads/main/data/world-cities.csv", - file: "world-cities.csv", - dataConstructor: WorldCitiesData.fromRaw, - dataType: WorldCitiesData, - archiveType: ArchiveType.NONE, - parserType: ParserType.CSV, - }), + uri: "https://raw.githubusercontent.com/datasets/world-cities/refs/heads/main/data/world-cities.csv", + endpoint: "http://localhost:4321/randomize/world-cities", + }) + .setExtractor(ExtractorType.NONE, {}) + .setParser(ParserType.CSV, {}) + .setDataTransformer(new WorldCitiesDataTransformer()), ]; public static loadAll(): Promise { return Promise.all(this.datasets.map((dataset) => dataset.load())); } + + public static async getDatasetByMatchingSchema( + schema: any + ): Promise { + return await Promise.all( + this.datasets.map((dataset) => + axios + .post(dataset.endpoint, schema, { + params: { size: 1 }, + }) + .then((res) => (res.data.data.length > 0 ? dataset.endpoint : null)) + ) + ).then((endpoints) => endpoints.filter((endpoint) => endpoint !== null)); + } } export default DatasetCollection;