diff --git a/README.md b/README.md index 66bcd5445e60dc582911466b8252e4e7ce12e854..e99958b41472dae3ab1bc9e87901cef327e07ba6 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Ce dépôt contient une petite infrastructure Docker Swarm capable de bruteforce ### Sous-projets -- **backend** : API Node.js/Express pour gérer les jobs de bruteforce, l’état du cluster, et le scaling des workers. +- **backend** : API Node.js/Express pour gérer les jobs de bruteforce, le **cache des hash** (Redis), l’état du cluster et le scaling des workers. - **worker** : service de bruteforce MD5 (conteneurs réplicables dans le Swarm). - **frontend** : application React permettant d’envoyer des hash, choisir les modes *gentil/normal/agressif* et visualiser l’état du cluster. - **infra** : configuration Docker Swarm (stack) et scripts éventuels. @@ -44,8 +44,19 @@ flowchart LR Proxy -->|"HTTP :8080"| Backend ``` -- **Flux utilisateur (UI)** : le frontend envoie des hash au backend (`POST /hash/manual`), qui les met en file dans Redis. Les workers consomment la file, bruteforcent et écrivent les résultats dans Redis ; le backend lit l’état et les résultats (`GET /hash/:id`). Le scaler (dans le backend) ajuste le nombre de réplicas du service `hash_worker` en fonction de la charge. -- **Flux tester** : le conteneur `hash_checker` (image enseignante) se connecte en WebSocket au `hash_proxy`. Il envoie `search MD5_HASH BEGIN END`. Le proxy appelle `POST /tester/search` puis poll `GET /tester/job/:id`, et renvoie `found` ou `notfound` au checker. +**Flux utilisateur (UI)** : +* Le frontend envoie des hash au backend (`POST /hash/manual`). +* Le backend vérifie d’abord le **cache des hash** (`jobs:cache`) ; en cas de hit, il crée un job « synthétique » et renvoie immédiatement `202 { id }` sans mettre en file. Sinon, le job est mis en file Redis. +* Les workers consomment la file, bruteforcent, écrivent les résultats et mettent à jour le cache +* Le frontend récupère la liste des jobs via `GET /cluster/jobs`. +* Le scaler ajuste le nombre de réplicas du service `hash_worker` en fonction de la charge. + +**Flux tester** : +* Le conteneur `hash_checker` se connecte en WebSocket au `hash_proxy`. +* Il envoie `search MD5_HASH BEGIN END`. +* Le proxy appelle `POST /tester/search`. +* Le backend applique le même middleware de cache, mais n’utilise le cache que si le résultat en cache est **trouvé** (`found: true`), afin de garantir la cohérence des plages `begin`/`end`. Sinon, le job est mis en file. +* Le proxy poll `GET /tester/job/:id` et renvoie `found` ou `notfound` au checker. #### Flux de traitement d’un job (utilisateur ou tester) @@ -57,17 +68,23 @@ sequenceDiagram participant Worker as hash_worker Client->>Backend: POST /hash/manual ou POST /tester/search - Backend->>Redis: LPUSH jobs:pending, HSET jobs:status - Backend-->>Client: 202 { id } - Worker->>Redis: BRPOP jobs:pending - Worker->>Redis: SADD jobs:in_progress - Worker->>Worker: bruteforce (optionnellement entre begin/end) - Worker->>Redis: HSET jobs:results, jobs:status - Worker->>Redis: SREM jobs:in_progress + Backend->>Redis: HGET jobs:cache (middleware cache) + alt Cache hit (selon règles frontend/tester) + Backend->>Redis: HSET jobs:results, jobs:status, jobs:meta, LPUSH jobs:recent + Backend-->>Client: 202 { id } + else Pas de cache + Backend->>Redis: LPUSH jobs:pending, HSET jobs:status, jobs:meta, LPUSH jobs:recent + Backend-->>Client: 202 { id } + Worker->>Redis: BRPOP jobs:pending + Worker->>Redis: SADD jobs:in_progress + Worker->>Worker: bruteforce (optionnellement entre begin/end) + Worker->>Redis: HSET jobs:results, jobs:status, jobs:cache + Worker->>Redis: SREM jobs:in_progress + end loop Polling - Client->>Backend: GET /hash/:id ou GET /tester/job/:id - Backend->>Redis: HGET jobs:results ou jobs:status - Backend-->>Client: résultat ou statut + Client->>Backend: GET /cluster/jobs ou GET /tester/job/:id + Backend->>Redis: HGET jobs:meta, jobs:status, jobs:results + Backend-->>Client: liste jobs ou résultat end ``` @@ -91,6 +108,18 @@ flowchart LR Proxy -->|"found HASH PLAINTEXT"| Checker ``` +#### Cache des hash + +Pour éviter de re-bruteforcer un même hash déjà traité, un **cache** est géré dans Redis. + +- **Clé Redis** : `jobs:cache` (hash Redis : clé = hash MD5, valeur = JSON `{ found, plaintext, elapsedMs }`). +- **Lecture (backend)** : un middleware Express `cacheHashOrNext` est appliqué à `POST /hash/manual` et `POST /tester/search`. Avant toute mise en file, il vérifie si le hash est présent dans `jobs:cache`. En cas de hit : + - **Frontend** : tout résultat en cache (trouvé ou non) est réutilisé ; le backend crée un job « synthétique » (résultat, statut, métadonnées, liste des jobs récents) et renvoie `202 { id }` sans toucher à la file. + - **Tester** : le cache n’est utilisé que si le résultat en cache a `found === true`, afin de ne pas renvoyer « non trouvé » pour une plage `begin`/`end` différente. +- **Écriture (worker)** : après chaque bruteforce réussi, le worker écrit dans `jobs:cache` le résultat (trouvé ou non) associé au hash, pour les soumissions futures. + +Le worker ne consulte pas le cache : toute décision de cache est prise côté backend à la soumission. + --- ### Prérequis diff --git a/backend/src/config.js b/backend/src/config.js index 70ec2d6d480c7cc06e16f64001edec054e10b1a1..7153699215f20bf799e20d91336db77d9e74099c 100644 --- a/backend/src/config.js +++ b/backend/src/config.js @@ -13,6 +13,7 @@ export const REDIS_KEYS = { JOBS_IN_PROGRESS: "jobs:in_progress", JOBS_META: "jobs:meta", JOBS_RECENT: "jobs:recent", + JOBS_CACHE: "jobs:cache", }; // Docker diff --git a/backend/src/middleware/cacheHash.js b/backend/src/middleware/cacheHash.js new file mode 100644 index 0000000000000000000000000000000000000000..3bd3444d873c6531d38a98f196e678cfabdec18f --- /dev/null +++ b/backend/src/middleware/cacheHash.js @@ -0,0 +1,52 @@ +import { v4 as uuidv4 } from "uuid"; +import { REDIS_KEYS } from "../config.js"; +import { redis } from "../clients.js"; + +/** + * Express middleware: if req.body.hash is present and we have a cached result + * for that hash, respond with 202 and a synthetic job id (no queue). Otherwise + * call next() so the route can validate and enqueue. + * + * @param options.getSource - (req) => string — source label for the synthetic job (e.g. "frontend", "tester") + * @param options.useCacheOnlyWhenFound - if true, use cache only when cached result has found === true (e.g. for tester with range) + */ +export function cacheHashOrNext(options = {}) { + const { + getSource = (req) => req.body?.source ?? "frontend", + useCacheOnlyWhenFound = false, + } = options; + + return async (req, res, next) => { + const hash = req.body?.hash; + if (!hash) return next(); + + const cached = await redis.hget(REDIS_KEYS.JOBS_CACHE, hash); + if (!cached) return next(); + + const parsed = JSON.parse(cached); + if (useCacheOnlyWhenFound && !parsed.found) return next(); + + console.log("Cache : using the cached record: ", {hash, ...parsed}); + const source = typeof getSource === "function" ? getSource(req) : getSource; + const jobId = uuidv4(); + const now = Date.now(); + const resultPayload = { + id: jobId, + hash, + found: parsed.found, + plaintext: parsed.plaintext ?? null, + elapsedMs: 0, // this way we know that it was read from cache + completedAt: now, + }; + await redis.hset(REDIS_KEYS.JOBS_RESULTS, jobId, JSON.stringify(resultPayload)); + await redis.hset(REDIS_KEYS.JOBS_STATUS, jobId, JSON.stringify({ status: "done" })); + await redis.hset( + REDIS_KEYS.JOBS_META, + jobId, + JSON.stringify({ hash, source, createdAt: now }) + ); + await redis.lpush(REDIS_KEYS.JOBS_RECENT, jobId); + await redis.ltrim(REDIS_KEYS.JOBS_RECENT, 0, 499); + return res.status(202).json({ id: jobId }); + }; +} diff --git a/backend/src/routes/hash.js b/backend/src/routes/hash.js index 1c334c10227c380db7581ab41843ed652cf8ddf8..04380348d8ff3e88e7214a2a7f6690fd54f1c533 100644 --- a/backend/src/routes/hash.js +++ b/backend/src/routes/hash.js @@ -2,40 +2,39 @@ import express from "express"; import { v4 as uuidv4 } from "uuid"; import { REDIS_KEYS } from "../config.js"; import { redis } from "../clients.js"; +import { cacheHashOrNext } from "../middleware/cacheHash.js"; const router = express.Router(); -// POST /hash/manual - queing the hash to be bruteforced by the worker -router.post("/manual", async (req, res) => { - const { hash, source } = req.body; - if (!hash) { - return res.status(400).json({ error: "hash is required" }); +// POST /hash/manual - queue the hash to be bruteforced by the worker (or serve from cache) +router.post( + "/manual", + cacheHashOrNext({ getSource: (req) => req.body?.source ?? "frontend" }), + async (req, res) => { + const { hash, source } = req.body; + if (!hash) { + return res.status(400).json({ error: "hash is required" }); + } + if (!source) { + console.log("Received job with no source meta data. Setting source to unknown..."); + } + + const jobId = uuidv4(); + const job = { id: jobId, hash, createdAt: Date.now() }; + + await redis.lpush(REDIS_KEYS.JOBS_PENDING, JSON.stringify(job)); + await redis.hset(REDIS_KEYS.JOBS_STATUS, jobId, JSON.stringify({ status: "queued" })); + await redis.hset( + REDIS_KEYS.JOBS_META, + jobId, + JSON.stringify({ hash, source: source ?? "unknown", createdAt: job.createdAt }) + ); + await redis.lpush(REDIS_KEYS.JOBS_RECENT, jobId); + await redis.ltrim(REDIS_KEYS.JOBS_RECENT, 0, 499); + + return res.status(202).json({ id: jobId }); } - if (!source){ - console.log("Received job with no source meta data. Setting source to unknown..."); - } - - const jobId = uuidv4(); - const job = { id: jobId, hash, createdAt: Date.now() }; - - /**@note */ - // Consider using Promise.then rather then two awaits, this - // will leave the nodejs runtime schedule them when it is - // appropriate regarding the current process load. Double - // awaits forces the event loop to consider these over other - // stuff, it is okey for critical stuff, but this is not ! - await redis.lpush(REDIS_KEYS.JOBS_PENDING, JSON.stringify(job)); - await redis.hset(REDIS_KEYS.JOBS_STATUS, jobId, JSON.stringify({ status: "queued" })); - await redis.hset( - REDIS_KEYS.JOBS_META, - jobId, - JSON.stringify({ hash, source: source ?? "unkown", createdAt: job.createdAt }) - ); - await redis.lpush(REDIS_KEYS.JOBS_RECENT, jobId); - await redis.ltrim(REDIS_KEYS.JOBS_RECENT, 0, 499); - - return res.status(202).json({ id: jobId }); -}); +); // GET /hash/:id - get the status of a specific job router.get("/:id", async (req, res) => { diff --git a/backend/src/routes/tester.js b/backend/src/routes/tester.js index c1eb30ff35c7001440ec039285354393d84b88e9..de7ac1a9857e4f289ccbdcf4d391f63f2cdb12e2 100644 --- a/backend/src/routes/tester.js +++ b/backend/src/routes/tester.js @@ -2,6 +2,7 @@ import express from "express"; import { v4 as uuidv4 } from "uuid"; import { REDIS_KEYS } from "../config.js"; import { redis } from "../clients.js"; +import { cacheHashOrNext } from "../middleware/cacheHash.js"; const router = express.Router(); @@ -9,51 +10,56 @@ function isNonEmptyString(value) { return typeof value === "string" && value.length > 0; } -// POST /tester/search - enqueue a range-aware bruteforce job -router.post("/search", async (req, res) => { - const { hash, begin, end } = req.body ?? {}; +// POST /tester/search - enqueue a range-aware bruteforce job (or serve from cache when found) +router.post( + "/search", + cacheHashOrNext({ getSource: () => "tester", useCacheOnlyWhenFound: true }), + async (req, res) => { + const { hash, begin, end } = req.body ?? {}; - if (!isNonEmptyString(hash)) { - return res.status(400).json({ error: "hash is required" }); - } + if (!isNonEmptyString(hash)) { + return res.status(400).json({ error: "hash is required" }); + } - if (!isNonEmptyString(begin) || !isNonEmptyString(end)) { - return res - .status(400) - .json({ error: "both begin and end must be non-empty strings" }); - } + if (!isNonEmptyString(begin) || !isNonEmptyString(end)) { + return res + .status(400) + .json({ error: "both begin and end must be non-empty strings" }); + } - if (begin.length > end.length || (begin.length === end.length && begin > end)) { - return res - .status(400) - .json({ error: "begin must be <= end in lexicographic order" }); - } + if (begin.length > end.length || (begin.length === end.length && begin > end)) { + return res + .status(400) + .json({ error: "begin must be <= end in lexicographic order" }); + } - const jobId = uuidv4(); - const job = { - id: jobId, - hash, - begin, - end, - createdAt: Date.now(), - }; + const jobId = uuidv4(); + const job = { + id: jobId, + source: "tester", + hash, + begin, + end, + createdAt: Date.now(), + }; - await redis.lpush(REDIS_KEYS.JOBS_PENDING, JSON.stringify(job)); - await redis.hset( - REDIS_KEYS.JOBS_STATUS, - jobId, - JSON.stringify({ status: "queued" }) - ); - await redis.hset( - REDIS_KEYS.JOBS_META, - jobId, - JSON.stringify({ hash, source: "tester", createdAt: job.createdAt }) - ); - await redis.lpush(REDIS_KEYS.JOBS_RECENT, jobId); - await redis.ltrim(REDIS_KEYS.JOBS_RECENT, 0, 499); + await redis.lpush(REDIS_KEYS.JOBS_PENDING, JSON.stringify(job)); + await redis.hset( + REDIS_KEYS.JOBS_STATUS, + jobId, + JSON.stringify({ status: "queued" }) + ); + await redis.hset( + REDIS_KEYS.JOBS_META, + jobId, + JSON.stringify({ hash, source: "tester", createdAt: job.createdAt }) + ); + await redis.lpush(REDIS_KEYS.JOBS_RECENT, jobId); + await redis.ltrim(REDIS_KEYS.JOBS_RECENT, 0, 499); - return res.status(202).json({ id: jobId }); -}); + return res.status(202).json({ id: jobId }); + } +); // GET /tester/job/:id - normalized view over status/results router.get("/job/:id", async (req, res) => { diff --git a/hash_checker_test_results/hash_checker_tests_result-1770649214.bin b/hash_checker_test_results/hash_checker_tests_result-1770649214.bin deleted file mode 100644 index 6446e064d3941288be13319076240ad40afa02c5..0000000000000000000000000000000000000000 Binary files a/hash_checker_test_results/hash_checker_tests_result-1770649214.bin and /dev/null differ diff --git a/hash_checker_test_results/hash_checker_tests_result-1770649363.bin b/hash_checker_test_results/hash_checker_tests_result-1770649363.bin deleted file mode 100644 index 66308dc0fb75a1e7af695f3ce58f4a21bd2565a1..0000000000000000000000000000000000000000 Binary files a/hash_checker_test_results/hash_checker_tests_result-1770649363.bin and /dev/null differ diff --git a/hash_checker_test_results/hash_checker_tests_result-1771158701.bin b/hash_checker_test_results/hash_checker_tests_result-1771158701.bin new file mode 100644 index 0000000000000000000000000000000000000000..d339409864c823287f3ed00f8de721acc75b67f3 Binary files /dev/null and b/hash_checker_test_results/hash_checker_tests_result-1771158701.bin differ diff --git a/hash_checker_test_results/hash_checker_tests_result-1771158851.bin b/hash_checker_test_results/hash_checker_tests_result-1771158851.bin new file mode 100644 index 0000000000000000000000000000000000000000..3665c0ea18b9a551a92bb7771c409a196abe99b6 Binary files /dev/null and b/hash_checker_test_results/hash_checker_tests_result-1771158851.bin differ diff --git a/proxy/index.js b/proxy/index.js index c7fa8cc39b05eb45191f627328a614b53ba15529..8dfc7ebbc37df2a17cc56a3240c30ecea92e4d0e 100644 --- a/proxy/index.js +++ b/proxy/index.js @@ -31,7 +31,7 @@ async function submitJobTester(hash, begin, end) { const res = await fetch(`${BACKEND_BASE_URL}/tester/search`, { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ hash, begin, end }), + body: JSON.stringify({ hash, begin, end, source:"tester" }), }); if (!res.ok) { diff --git a/run-swarm.sh b/run-swarm.sh index 5b21654a1eecca4b3aa2b0b7bfa558023891fa68..dd6ea58f02ba2aa756e70a2e809b7a0c2fec1167 100755 --- a/run-swarm.sh +++ b/run-swarm.sh @@ -13,7 +13,7 @@ docker build -t md5-swarm-proxy:latest ./proxy echo "[*] Vérification du mode Swarm..." if ! docker info 2>/dev/null | grep -q "Swarm: active"; then echo "[*] Swarm non initialisé, exécution de 'docker swarm init'..." - docker swarm init --advertise-addr 10.67.138.118 + docker swarm init --advertise-addr 10.193.26.118 else echo "[*] Swarm déjà actif." fi diff --git a/todo.txt b/todo.txt index 1f666c32e67d9af6e2cf9fcbefdf3ffda6e4c2d6..05763ef88b8e4607e25307ce839d7ddf1728cca0 100644 --- a/todo.txt +++ b/todo.txt @@ -1,4 +1,3 @@ -* add a cache of the already computed hashes, using redis since we already integrated it in the backend. * review the code, simplify and simplify! diff --git a/worker/src/index.js b/worker/src/index.js index cac1249f74aff1dc1b7ba9e90ad0666bf8b5692d..b32ed1c3b51450f36c77a77311f0cf16d69979d2 100644 --- a/worker/src/index.js +++ b/worker/src/index.js @@ -3,6 +3,7 @@ import crypto from "crypto"; const redisUrl = process.env.REDIS_URL || "redis://redis:6379"; const redis = new Redis(redisUrl); +const JOBS_CACHE = "jobs:cache"; const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @@ -19,17 +20,17 @@ async function bruteforce(hash, begin, end) { // allows us to satisfy our need without pre-computing // all the possible words before hand which requires // much more memory to store them all! - + // If you draw the execution path of this generator // function, you'll see that the recursive calls generate // the words in a tree like fashion. function* generateStrings(maxLen, prefix = "") { // base case - if (prefix.length === maxLen){ + if (prefix.length === maxLen) { yield prefix; return; }; - + // yield current state/word if (prefix.length > 0) { yield prefix; @@ -66,13 +67,13 @@ async function bruteforce(hash, begin, end) { // this is our main function of the woker async function processJobs() { let job; - + while (true) { - + job = null; - + try { - + // poll a pending job from the queue const res = await redis.brpop("jobs:pending", 0); if (!res) continue; @@ -108,17 +109,28 @@ async function processJobs() { JSON.stringify({ status: "done" }) ); + // cache result by hash for future lookups + await redis.hset( + JOBS_CACHE, + job.hash, + JSON.stringify({ + found: result.found, + plaintext: result.plaintext ?? null, + elapsedMs: result.elapsedMs, + }) + ); + // unregister the job id from the in_progress // so now worker goes idle, which is important // info for the scaler await redis.srem("jobs:in_progress", job.id); console.log(`Job ${job.id} processed, found=${result.found}`); - - + + } catch (err) { console.error("Worker error:", err); - + // check if the error occured after the job polling // if that is true, then job should not be undefined @@ -148,7 +160,7 @@ async function processJobs() { // do not forget to remove if from the in_progress queue await redis.srem("jobs:in_progress", job.id); } - + console.log(`Job ${job?.id ?? "?"} failed (error)`); } }