mirror of
https://github.com/kmitresse/Compo-Service-Log-Project.git
synced 2026-05-13 17:11:49 +00:00
feat!: Download and extract from stream
This commit is contained in:
Generated
+18
-79
@@ -9,17 +9,18 @@
|
|||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.7.7",
|
|
||||||
"dmn-js": "^16.7.1",
|
"dmn-js": "^16.7.1",
|
||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
"express": "^4.21.0",
|
"express": "^4.21.0",
|
||||||
"fs-extra": "^11.2.0",
|
"fs-extra": "^11.2.0",
|
||||||
|
"papaparse": "^5.4.1",
|
||||||
"unzipper": "^0.12.3"
|
"unzipper": "^0.12.3"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/express": "^4.17.21",
|
"@types/express": "^4.17.21",
|
||||||
"@types/fs-extra": "^11.0.4",
|
"@types/fs-extra": "^11.0.4",
|
||||||
"@types/node": "^22.5.5",
|
"@types/node": "^22.5.5",
|
||||||
|
"@types/papaparse": "^5.3.14",
|
||||||
"@types/unzipper": "^0.10.10",
|
"@types/unzipper": "^0.10.10",
|
||||||
"prettier": "3.3.3",
|
"prettier": "3.3.3",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
@@ -333,6 +334,16 @@
|
|||||||
"undici-types": "~6.19.2"
|
"undici-types": "~6.19.2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/papaparse": {
|
||||||
|
"version": "5.3.14",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/papaparse/-/papaparse-5.3.14.tgz",
|
||||||
|
"integrity": "sha512-LxJ4iEFcpqc6METwp9f6BV6VVc43m6MfH0VqFosHvrUgfXiFe6ww7R3itkOQ+TCK6Y+Iv/+RnnvtRZnkc5Kc9g==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/qs": {
|
"node_modules/@types/qs": {
|
||||||
"version": "6.9.16",
|
"version": "6.9.16",
|
||||||
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.16.tgz",
|
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.16.tgz",
|
||||||
@@ -432,23 +443,6 @@
|
|||||||
"integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==",
|
"integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/asynckit": {
|
|
||||||
"version": "0.4.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
|
||||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
|
||||||
"license": "MIT"
|
|
||||||
},
|
|
||||||
"node_modules/axios": {
|
|
||||||
"version": "1.7.7",
|
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz",
|
|
||||||
"integrity": "sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"follow-redirects": "^1.15.6",
|
|
||||||
"form-data": "^4.0.0",
|
|
||||||
"proxy-from-env": "^1.1.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bluebird": {
|
"node_modules/bluebird": {
|
||||||
"version": "3.7.2",
|
"version": "3.7.2",
|
||||||
"resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz",
|
"resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz",
|
||||||
@@ -516,18 +510,6 @@
|
|||||||
"node": ">=6"
|
"node": ">=6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/combined-stream": {
|
|
||||||
"version": "1.0.8",
|
|
||||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
|
||||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"delayed-stream": "~1.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 0.8"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/component-event": {
|
"node_modules/component-event": {
|
||||||
"version": "0.2.1",
|
"version": "0.2.1",
|
||||||
"resolved": "https://registry.npmjs.org/component-event/-/component-event-0.2.1.tgz",
|
"resolved": "https://registry.npmjs.org/component-event/-/component-event-0.2.1.tgz",
|
||||||
@@ -632,15 +614,6 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/delayed-stream": {
|
|
||||||
"version": "1.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
|
||||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
|
||||||
"license": "MIT",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=0.4.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/depd": {
|
"node_modules/depd": {
|
||||||
"version": "2.0.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
|
||||||
@@ -980,40 +953,6 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/follow-redirects": {
|
|
||||||
"version": "1.15.9",
|
|
||||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz",
|
|
||||||
"integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==",
|
|
||||||
"funding": [
|
|
||||||
{
|
|
||||||
"type": "individual",
|
|
||||||
"url": "https://github.com/sponsors/RubenVerborgh"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"license": "MIT",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4.0"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"debug": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/form-data": {
|
|
||||||
"version": "4.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
|
||||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"asynckit": "^0.4.0",
|
|
||||||
"combined-stream": "^1.0.8",
|
|
||||||
"mime-types": "^2.1.12"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/forwarded": {
|
"node_modules/forwarded": {
|
||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
|
||||||
@@ -1452,6 +1391,12 @@
|
|||||||
"opencollective-postinstall": "index.js"
|
"opencollective-postinstall": "index.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/papaparse": {
|
||||||
|
"version": "5.4.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.4.1.tgz",
|
||||||
|
"integrity": "sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/parseurl": {
|
"node_modules/parseurl": {
|
||||||
"version": "1.3.3",
|
"version": "1.3.3",
|
||||||
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
|
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
|
||||||
@@ -1521,12 +1466,6 @@
|
|||||||
"node": ">= 0.10"
|
"node": ">= 0.10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/proxy-from-env": {
|
|
||||||
"version": "1.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
|
||||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
|
|
||||||
"license": "MIT"
|
|
||||||
},
|
|
||||||
"node_modules/qs": {
|
"node_modules/qs": {
|
||||||
"version": "6.13.0",
|
"version": "6.13.0",
|
||||||
"resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
|
"resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
|
||||||
|
|||||||
+3
-2
@@ -9,23 +9,24 @@
|
|||||||
},
|
},
|
||||||
"keywords": [],
|
"keywords": [],
|
||||||
"authors": [
|
"authors": [
|
||||||
"Kévin Mitressé",
|
"Kevin Mitressé",
|
||||||
"Lucàs Vabre"
|
"Lucàs Vabre"
|
||||||
],
|
],
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"description": "",
|
"description": "",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.7.7",
|
|
||||||
"dmn-js": "^16.7.1",
|
"dmn-js": "^16.7.1",
|
||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
"express": "^4.21.0",
|
"express": "^4.21.0",
|
||||||
"fs-extra": "^11.2.0",
|
"fs-extra": "^11.2.0",
|
||||||
|
"papaparse": "^5.4.1",
|
||||||
"unzipper": "^0.12.3"
|
"unzipper": "^0.12.3"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/express": "^4.17.21",
|
"@types/express": "^4.17.21",
|
||||||
"@types/fs-extra": "^11.0.4",
|
"@types/fs-extra": "^11.0.4",
|
||||||
"@types/node": "^22.5.5",
|
"@types/node": "^22.5.5",
|
||||||
|
"@types/papaparse": "^5.3.14",
|
||||||
"@types/unzipper": "^0.10.10",
|
"@types/unzipper": "^0.10.10",
|
||||||
"prettier": "3.3.3",
|
"prettier": "3.3.3",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
|
|||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import express from "express";
|
||||||
|
import routes from "./routes";
|
||||||
|
import { createServer } from "node:http";
|
||||||
|
import { logger } from "./middlewares";
|
||||||
|
|
||||||
|
export default class Server {
|
||||||
|
private readonly app: express.Application;
|
||||||
|
private static PORT: number = 4321;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.app = express();
|
||||||
|
this.app.use(logger, routes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public start() {
|
||||||
|
const server = createServer(this.app);
|
||||||
|
server.listen(Server.PORT, () => {
|
||||||
|
console.info(`Server is running on http://localhost:${Server.PORT}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
+5
-13
@@ -1,17 +1,9 @@
|
|||||||
import express from "express";
|
|
||||||
import dotenv from "dotenv";
|
import dotenv from "dotenv";
|
||||||
import { createServer } from "node:http";
|
import Server from "./Server";
|
||||||
import { logger } from "./middlewares";
|
import NudgerDatasetService from "./services/dataset/NudgerDatasetService";
|
||||||
import routes from "./routes";
|
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
const app = express();
|
Promise.all([NudgerDatasetService.loadDataset()])
|
||||||
app.use(logger, routes);
|
.then(() => new Server().start())
|
||||||
const server = createServer(app);
|
.catch(console.error);
|
||||||
|
|
||||||
server.listen(process.env.PORT || 8080, () => {
|
|
||||||
console.info(
|
|
||||||
`Server is running on http://localhost:${process.env.PORT || 8080}`
|
|
||||||
);
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -1,20 +1,9 @@
|
|||||||
import { Router, Request, Response } from "express";
|
import { Router, Request, Response } from "express";
|
||||||
import fileService from "../../services/fileService";
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
router.get("/data/nudger", (req: Request, res: Response) => {
|
router.get("/data/nudger", (req: Request, res: Response) => {
|
||||||
fileService
|
res.status(501).send("Not yet implemented");
|
||||||
.downloadAndExtract("https://nudger.fr/opendata/gtin-open-data.zip")
|
|
||||||
.then(() => {
|
|
||||||
res.status(200).json({
|
|
||||||
status: "SUCCESS",
|
|
||||||
message: "Data nudger downloaded and extracted",
|
|
||||||
});
|
|
||||||
})
|
|
||||||
.catch((error) => {
|
|
||||||
res.status(500).json({ status: "ERROR", message: error.message });
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -1,22 +1,9 @@
|
|||||||
import { Router, Request, Response } from "express";
|
import { Router, Request, Response } from "express";
|
||||||
import fileService from "../../services/fileService";
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
router.get("/data/openfoodfacts", (req: Request, res: Response) => {
|
router.get("/data/openfoodfacts", (req: Request, res: Response) => {
|
||||||
fileService
|
res.status(501).send("Not yet implemented");
|
||||||
.downloadAndExtract(
|
|
||||||
"https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz"
|
|
||||||
)
|
|
||||||
.then(() => {
|
|
||||||
res.status(200).json({
|
|
||||||
status: "SUCCESS",
|
|
||||||
message: "Data openfoodfacts downloaded and extracted",
|
|
||||||
});
|
|
||||||
})
|
|
||||||
.catch((error) => {
|
|
||||||
res.status(500).json({ status: "ERROR", message: error.message });
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
import { createHash } from "node:crypto";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import * as fs from "fs-extra";
|
||||||
|
|
||||||
|
class CacheService {
|
||||||
|
public static readonly CACHE_DIR: string = "./cache";
|
||||||
|
|
||||||
|
public static generateCacheKey(url: string): string {
|
||||||
|
return createHash("md5").update(url).digest("hex");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static getCachePath(url: string): string {
|
||||||
|
const cacheKey = this.generateCacheKey(url);
|
||||||
|
return join(CacheService.CACHE_DIR, cacheKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static isCached(url: string): boolean {
|
||||||
|
const cacheKey = CacheService.generateCacheKey(url);
|
||||||
|
|
||||||
|
const cachedPath = join(CacheService.CACHE_DIR, cacheKey);
|
||||||
|
return fs.pathExistsSync(cachedPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.ensureDirSync(CacheService.CACHE_DIR);
|
||||||
|
|
||||||
|
export default CacheService;
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
import { extname } from "node:path";
|
||||||
|
import { ArchiveExtractorFactory, ArchiveType } from "./archive_extractor";
|
||||||
|
|
||||||
|
class FileService {
|
||||||
|
public static async downloadAndExtract(
|
||||||
|
url: string,
|
||||||
|
output: string
|
||||||
|
): Promise<void> {
|
||||||
|
const fileType: string = FileService.getFileExtension(url);
|
||||||
|
|
||||||
|
const archiveExtractor = ArchiveExtractorFactory.getExtractor(
|
||||||
|
fileType as ArchiveType
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`Downloading ${url}`);
|
||||||
|
const stream = await FileService.getFileStream(url);
|
||||||
|
return archiveExtractor.extract(stream, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async getFileStream(url: string): Promise<ReadableStream> {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/octet-stream",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to download file: ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.body) {
|
||||||
|
throw new Error("Response body is not a readable stream");
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.body;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static getFileExtension(url: string): string {
|
||||||
|
return extname(url).toLowerCase();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default FileService;
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
interface ArchiveExtractor {
|
||||||
|
extract(stream: ReadableStream, destinationPath: string): Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ArchiveExtractor;
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
import ArchiveExtractor from "./ArchiveExtractor";
|
||||||
|
import ZipExtractor from "./ZipExtractor";
|
||||||
|
|
||||||
|
enum ArchiveType {
|
||||||
|
ZIP = ".zip",
|
||||||
|
}
|
||||||
|
|
||||||
|
class ArchiveExtractorFactory {
|
||||||
|
static getExtractor(archiveType: ArchiveType): ArchiveExtractor {
|
||||||
|
if (archiveType === ArchiveType.ZIP) return ZipExtractor.instance;
|
||||||
|
throw new Error("Unsupported archive type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ArchiveExtractorFactory;
|
||||||
|
export { ArchiveType };
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
import ArchiveExtractor from "./ArchiveExtractor";
|
||||||
|
import unzipper from "unzipper";
|
||||||
|
import { pipeline } from "node:stream";
|
||||||
|
import { promisify } from "node:util";
|
||||||
|
|
||||||
|
class ZipExtractor implements ArchiveExtractor {
|
||||||
|
public static instance: ArchiveExtractor = new ZipExtractor();
|
||||||
|
|
||||||
|
async extract(
|
||||||
|
stream: ReadableStream,
|
||||||
|
destinationPath: string
|
||||||
|
): Promise<void> {
|
||||||
|
const streamPipeline = promisify(pipeline);
|
||||||
|
await streamPipeline(stream, unzipper.Extract({ path: destinationPath }));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ZipExtractor;
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
export { default as ArchiveExtractor } from "./ArchiveExtractor";
|
||||||
|
export {
|
||||||
|
default as ArchiveExtractorFactory,
|
||||||
|
ArchiveType,
|
||||||
|
} from "./ArchiveExtractorFactory";
|
||||||
|
export { default as ZipExtractor } from "./ZipExtractor";
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
import FileService from "../FileService";
|
||||||
|
import CacheService from "../CacheService";
|
||||||
|
import { extname, join } from "node:path";
|
||||||
|
import { DatasetParserFactory, DatasetType } from "../dataset_parser";
|
||||||
|
|
||||||
|
class NudgerDatasetService {
|
||||||
|
private static URL: string =
|
||||||
|
"https://files.opendatarchives.fr/data.cquest.org/open4goods/gtin-open-data.zip";
|
||||||
|
private static SOURCE_FILE: string = "open4goods-full-gtin-dataset.csv";
|
||||||
|
private static CACHE_PATH: string = CacheService.getCachePath(
|
||||||
|
NudgerDatasetService.URL
|
||||||
|
);
|
||||||
|
|
||||||
|
public static loadDataset(): Promise<void> {
|
||||||
|
if (CacheService.isCached(NudgerDatasetService.URL)) {
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
|
||||||
|
return FileService.downloadAndExtract(
|
||||||
|
NudgerDatasetService.URL,
|
||||||
|
NudgerDatasetService.CACHE_PATH
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static parse() {
|
||||||
|
const extension = extname(NudgerDatasetService.SOURCE_FILE).toLowerCase();
|
||||||
|
|
||||||
|
const parser = DatasetParserFactory.getParser(extension as DatasetType);
|
||||||
|
return parser.parse(NudgerDatasetService.getSourcePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static getSourcePath(): string {
|
||||||
|
return join(
|
||||||
|
NudgerDatasetService.CACHE_PATH,
|
||||||
|
NudgerDatasetService.SOURCE_FILE
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default NudgerDatasetService;
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
import DatasetParser from "./DatasetParser";
|
||||||
|
import * as fs from "node:fs";
|
||||||
|
import Papa from "papaparse";
|
||||||
|
|
||||||
|
class CsvParser implements DatasetParser {
|
||||||
|
public static instance: CsvParser = new CsvParser();
|
||||||
|
|
||||||
|
async parse(filePath: string) {
|
||||||
|
const stream = fs.createReadStream(filePath);
|
||||||
|
Papa.parse(stream, {
|
||||||
|
worker: true,
|
||||||
|
step: (res) => console.log("Row:", res.data),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default CsvParser;
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
interface DatasetParser {
|
||||||
|
parse(filePath: string): unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default DatasetParser;
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
import DatasetParser from "./DatasetParser";
|
||||||
|
import CsvParser from "./CsvParser";
|
||||||
|
|
||||||
|
enum DatasetType {
|
||||||
|
CSV = ".csv",
|
||||||
|
}
|
||||||
|
|
||||||
|
class DatasetParserFactory {
|
||||||
|
static getParser(fileType: DatasetType): DatasetParser {
|
||||||
|
if (fileType === DatasetType.CSV) return CsvParser.instance;
|
||||||
|
throw new Error("Unsupported file type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default DatasetParserFactory;
|
||||||
|
export { DatasetType };
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
export { default as CsvParser } from "./CsvParser";
|
||||||
|
export { default as DatasetParser } from "./DatasetParser";
|
||||||
|
export {
|
||||||
|
default as DatasetParserFactory,
|
||||||
|
DatasetType,
|
||||||
|
} from "./DatasetParserFactory";
|
||||||
@@ -1,131 +0,0 @@
|
|||||||
import axios from "axios";
|
|
||||||
import * as unzipper from "unzipper";
|
|
||||||
import * as fs from "fs-extra";
|
|
||||||
import * as zlib from "zlib";
|
|
||||||
import { extname, join, basename } from "path";
|
|
||||||
import crypto from "crypto"; // Utilisé pour générer des identifiants uniques basés sur l'URL
|
|
||||||
|
|
||||||
type SupportedFormats = "zip" | "gz" | "gzip";
|
|
||||||
|
|
||||||
class FileService {
|
|
||||||
private cacheDir: string;
|
|
||||||
|
|
||||||
constructor() {
|
|
||||||
this.cacheDir = "./cache";
|
|
||||||
fs.ensureDirSync(this.cacheDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Télécharger et extraire le fichier à partir de l'URL
|
|
||||||
* @param url URL du fichier à télécharger
|
|
||||||
*/
|
|
||||||
async downloadAndExtract(url: string): Promise<void> {
|
|
||||||
try {
|
|
||||||
const fileType = this.getFileExtension(url);
|
|
||||||
if (!fileType) throw new Error("Unsupported file format");
|
|
||||||
if (this.isInCache(url)) return;
|
|
||||||
|
|
||||||
const response = await axios({
|
|
||||||
method: "GET",
|
|
||||||
url,
|
|
||||||
responseType: "stream",
|
|
||||||
});
|
|
||||||
console.log(`Downloading : ${url}`);
|
|
||||||
|
|
||||||
// Décompresser et sauvegarder dans le cache
|
|
||||||
const cacheKey = this.generateCacheKey(url);
|
|
||||||
const cachedPath = join(this.cacheDir, cacheKey);
|
|
||||||
|
|
||||||
fs.ensureDirSync(cachedPath);
|
|
||||||
|
|
||||||
if (fileType === "zip") await this.extractZip(response.data, cachedPath);
|
|
||||||
if (fileType === "gz" || fileType === "gzip")
|
|
||||||
await this.extractGzip(
|
|
||||||
response.data,
|
|
||||||
join(cachedPath, basename(url).replace(/\.(gz|gzip)$/, ""))
|
|
||||||
);
|
|
||||||
|
|
||||||
console.log(`Downloaded and extracted : ${basename(url)}`);
|
|
||||||
} catch (error) {
|
|
||||||
console.error(
|
|
||||||
"An error occurred while downloading and extracting the file",
|
|
||||||
error
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Vérifier si le fichier est déjà en cache
|
|
||||||
* @param url URL du fichier
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private isInCache(url: string): boolean {
|
|
||||||
const cacheKey = this.generateCacheKey(url);
|
|
||||||
const cachedPath = join(this.cacheDir, cacheKey);
|
|
||||||
return fs.pathExistsSync(cachedPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extraire les fichiers ZIP et stocker dans le cache
|
|
||||||
* @param stream
|
|
||||||
* @param cachePath
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private async extractZip(
|
|
||||||
stream: NodeJS.ReadableStream,
|
|
||||||
cachePath: string
|
|
||||||
): Promise<void> {
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
stream
|
|
||||||
.pipe(unzipper.Extract({ path: cachePath }))
|
|
||||||
.on("close", resolve)
|
|
||||||
.on("error", reject);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extraire les fichiers GZ et GZIP et stocker dans le cache
|
|
||||||
* @param stream Flux du fichier téléchargé
|
|
||||||
* @param cachePath Chemin où stocker le fichier décompressé
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private async extractGzip(
|
|
||||||
stream: NodeJS.ReadableStream,
|
|
||||||
cachePath: string
|
|
||||||
): Promise<void> {
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
// Ajouter une extension correcte (par exemple, si le fichier original est 'file.gz', le résultat sera 'file')
|
|
||||||
const decompressedFilePath = cachePath.replace(/\.gz$/, "");
|
|
||||||
|
|
||||||
const writeStream = fs.createWriteStream(decompressedFilePath);
|
|
||||||
|
|
||||||
// Pipeliner le flux du téléchargement et la décompression
|
|
||||||
stream
|
|
||||||
.pipe(zlib.createGunzip()) // Décompresser le flux
|
|
||||||
.pipe(writeStream) // Écrire le fichier décompressé
|
|
||||||
.on("finish", resolve)
|
|
||||||
.on("error", reject);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtenir l'extension du fichier à partir de l'URL
|
|
||||||
* @param url URL du fichier
|
|
||||||
*/
|
|
||||||
private getFileExtension(url: string): SupportedFormats | null {
|
|
||||||
const extension = extname(url).toLowerCase();
|
|
||||||
if (extension === ".zip") return "zip";
|
|
||||||
if (extension === ".gz" || extension === ".gzip") return "gz";
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Générer un identifiant unique pour le fichier basé sur l'URL
|
|
||||||
* @param url URL du fichier
|
|
||||||
*/
|
|
||||||
private generateCacheKey(url: string): string {
|
|
||||||
return crypto.createHash("md5").update(url).digest("hex");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export default new FileService();
|
|
||||||
Reference in New Issue
Block a user