first commit

This commit is contained in:
Stefano Rossi 2025-07-12 17:25:18 +02:00
commit 7d4e05de19
Signed by: chadmin
GPG key ID: 9EFA2130646BC893
27 changed files with 7574 additions and 0 deletions

7
.gitignore vendored Normal file
View file

@ -0,0 +1,7 @@
node_modules
.DS_Store
dist
*.local
.vite-inspect
.remote-assets
components.d.ts

42
.gitlab-ci.yml Normal file
View file

@ -0,0 +1,42 @@
image: node:lts
stages:
- build
- deploy
cache:
paths:
- node_modules/
build:
stage: build
script:
- apt-get update --allow-releaseinfo-change || apt-get update --fix-missing || (sleep 10 && apt-get update)
- apt-get install -y --no-install-recommends --fix-missing xvfb libgtk-3-0 libnss3 libasound2 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 libgbm1 libasound2
- npm install
- export PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
- npm install -g playwright-chromium @playwright/test
- npx playwright install --with-deps chromium || npx playwright install-deps chromium
- npm run build
artifacts:
paths:
- dist
retry:
max: 2
when: script_failure
pages:
stage: deploy
dependencies:
- build
script:
- mkdir -p public
- cp -r dist/* public/ || echo "Warning, No files to copy or dist directory is empty"
- ls -la public/
- echo "Pages deployment running on branch $CI_COMMIT_REF_NAME"
artifacts:
paths:
- public
only:
- main
- master

3
.npmrc Normal file
View file

@ -0,0 +1,3 @@
# for pnpm
shamefully-hoist=true
auto-install-peers=true

38
README.md Normal file
View file

@ -0,0 +1,38 @@
# Welcome to [Slidev](https://github.com/slidevjs/slidev)!
To start the slide show:
- `pnpm install`
- `pnpm dev`
- visit <http://localhost:3030/?time=10>
Edit the [slides.md](./slides.md) to see the changes.
Learn more about Slidev at the [documentation](https://sli.dev/).
## Structure
```bash
your-slidev/
├── components/ # custom components
├── layouts/ # custom layouts
├── public/ # static assets
├── setup/ # custom setup / hooks
├── snippets/ # code snippets
├── styles/ # custom style
├── index.html # injections to index.html
├── slides.md # the main slides entry
└── vite.config.ts # extending vite config
```
### Global layers
Global layers allow you to have custom components that persist across slides. This could be useful for having footers, cross-slide animations, global effects, etc.
[Pattern:](https://sli.dev/features/global-layers)
- global-top.vue
- global-bottom.vue
- custom-nav-controls.vue
- slide-top.vue
- slide-bottom.vue

37
components/Counter.vue Normal file
View file

@ -0,0 +1,37 @@
<script setup lang="ts">
import { ref } from 'vue'
const props = defineProps({
count: {
default: 0,
},
})
const counter = ref(props.count)
</script>
<template>
<div flex="~" w="min" border="~ main rounded-md">
<button
border="r main"
p="2"
font="mono"
outline="!none"
hover:bg="gray-400 opacity-20"
@click="counter -= 1"
>
-
</button>
<span m="auto" p="2">{{ counter }}</span>
<button
border="l main"
p="2"
font="mono"
outline="!none"
hover:bg="gray-400 opacity-20"
@click="counter += 1"
>
+
</button>
</div>
</template>

2
global-top.vue Normal file
View file

@ -0,0 +1,2 @@
<template>
</template>

16
netlify.toml Normal file
View file

@ -0,0 +1,16 @@
[build]
publish = "dist"
command = "npm run build"
[build.environment]
NODE_VERSION = "20"
[[redirects]]
from = "/.well-known/*"
to = "/.well-known/:splat"
status = 200
[[redirects]]
from = "/*"
to = "/index.html"
status = 200

30
package.json Normal file
View file

@ -0,0 +1,30 @@
{
"name": "benchmark-redteam",
"type": "module",
"private": true,
"scripts": {
"build": "slidev build",
"dev": "slidev --open",
"export": "slidev export"
},
"dependencies": {
"@slidev/cli": "^51.4.0",
"@slidev/theme-default": "latest",
"@slidev/theme-seriph": "latest",
"slidev-addon-python-runner": "^0.1.3",
"slidev-addon-rabbit": "^0.4.0",
"vue": "^3.5.13"
},
"pnpm": {
"ignoredBuiltDependencies": [
"esbuild",
"playwright-chromium"
],
"onlyBuiltDependencies": [
"playwright-chromium"
]
},
"devDependencies": {
"playwright-chromium": "^1.51.0"
}
}

View file

@ -0,0 +1,68 @@
# Advanced Attack Techniques
<div class="grid-3">
<div class="card">
<h2>Prompt Obfuscation</h2>
<p>Using techniques like Base64 encoding, character transformations (e.g., ROT13), or prompt-level obfuscations to <span class="highlight-word">bypass restrictions</span>.</p>
</div>
<div class="card">
<h2>Model-based Jailbreaking</h2>
<p>Automating the creation of adversarial attacks by evolving simple synthetic inputs into more <span class="highlight-word">complex attacks</span>.</p>
</div>
<div class="card">
<h2>Dialogue-based Jailbreaking</h2>
<p>Employing <span class="highlight-word">reinforcement learning</span> with two models: the target LLM and a red-teamer model trained to exploit vulnerabilities.</p>
</div>
<div class="card">
<h2>Primary Areas of Concern</h2>
<ul>
<li><span class="highlight-word">Organizational reputation</span> damage</li>
<li><span class="highlight-word">Legal compliance</span> violations</li>
<li><span class="highlight-word">Data security</span> breaches</li>
</ul>
</div>
</div>
<style>
.attack-techniques-layout {
display: grid;
grid-template-columns: 1fr;
grid-template-rows: auto auto auto;
gap: 1rem;
}
.primary-card {
grid-row: 1;
background: linear-gradient(135deg, rgba(30, 41, 59, 0.7), rgba(30, 41, 59, 0.9));
border-left: 4px solid var(--accent-color);
}
.secondary-cards {
grid-row: 2;
display: flex;
gap: 1rem;
}
.secondary-cards .card {
flex: 1;
}
.concerns-card {
grid-row: 3;
border-top: 2px solid var(--primary-color);
background: linear-gradient(135deg, rgba(30, 41, 59, 0.6), rgba(30, 41, 59, 0.8));
}
.highlight-word {
color: var(--highlight);
font-weight: 600;
transition: all 0.3s ease;
}
.card:hover .highlight-word {
text-shadow: 0 0 8px rgba(14, 165, 233, 0.6);
}
</style>

40
pages/best-practices.md Normal file
View file

@ -0,0 +1,40 @@
# Best Practices for LLM Security Benchmarking
<ul class="better-list">
<li><span class="highlight-word animated-highlight">Comprehensive vulnerability coverage</span>: Test for all five risk categories, not just obvious harmful content generation.</li>
<li><span class="highlight-word animated-highlight">Systematic approach</span>: Combine automated testing with human red-teaming for maximum effectiveness.</li>
<li><span class="highlight-word animated-highlight">Continuous evaluation</span>: Security benchmarking should be an ongoing process throughout the LLM lifecycle, not a one-time assessment.</li>
<li><span class="highlight-word animated-highlight">Attack diversity</span>: Employ multiple attack techniques and enhancement methods to thoroughly probe the system.</li>
<li><span class="highlight-word animated-highlight">Detailed analysis</span>: Go beyond simple pass/fail metrics to understand vulnerability scores and their breakdown for targeted improvements.</li>
</ul>
<style>
.highlight-word {
color: var(--highlight);
font-weight: 600;
}
.animated-highlight {
background: linear-gradient(90deg, var(--highlight), var(--primary-color));
background-clip: text;
-webkit-background-clip: text;
color: transparent;
background-size: 200% auto;
animation: gentle-shimmer 4s linear infinite;
}
@keyframes gentle-shimmer {
0% { background-position: 0% 50%; }
100% { background-position: 200% 50%; }
}
.better-list li:hover {
transform: translateX(5px);
background: rgba(30, 35, 52, 0.9);
border-left-width: 5px;
}
</style>

97
pages/deepeval-example.md Normal file
View file

@ -0,0 +1,97 @@
# Implementation Tools: DeepEval RedTeamer
<div class="code-container themed-code fullpage-code">
```py
from deepeval.red_teaming import RedTeamer
from deepeval.vulnerabilities import Bias, Misinformation
red_teamer = RedTeamer(
target_purpose="Provide financial advice and answer user finance queries",
target_system_prompt="You are a financial assistant for planning and advice"
)
vulnerabilities = [
Bias(types=[BiasType.GENDER, BiasType.POLITICS]),
Misinformation(types=[MisinformationType.FACTUAL_ERRORS])
]
results = red_teamer.scan(
target_model_callback=target_model_callback,
attacks_per_vulnerability_type=5,
vulnerabilities=vulnerabilities,
)
print(f"Total attacks: {len(results.attacks)}")
print(f"Successful attacks: {len(results.successful_attacks)}")
print(f"Success rate: {results.attack_success_rate}")
```
</div>
<style>
.code-container {
max-height: 75vh;
height: 75vh;
overflow-y: auto;
margin-bottom: 0;
margin-top: 1rem;
border-radius: 8px;
border: 1px solid var(--primary-color);
box-shadow: 0 4px 12px var(--card-shadow);
transition: all 0.3s ease;
}
.code-container:hover {
transform: scale(1.01);
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.5);
border-color: var(--highlight);
}
.fullpage-code pre {
padding: 1.5rem !important;
}
.fullpage-code code {
font-size: 0.8rem !important;
line-height: 1.5 !important;
}
/* Hover effect for each word in code */
.fullpage-code .token {
transition: all 0.15s ease;
display: inline-block;
}
.fullpage-code .token:hover {
transform: scale(1.2);
z-index: 10;
position: relative;
cursor: pointer;
color: var(--highlight);
}
.themed-code pre {
background-color: #0c1525 !important;
}
.themed-code .token.comment {
color: #6272a4 !important;
}
.themed-code .token.string {
color: #a43e3e !important;
}
.themed-code .token.function {
color: #0066CC !important;
}
.themed-code .token.keyword {
color: #800020 !important;
}
.themed-code .token.builtin {
color: #B22222 !important;
}
</style>

178
pages/end.md Normal file
View file

@ -0,0 +1,178 @@
<div class="bouncing-container">
<div class="bg-icon security-icon" style="top: 35%; left: 15%;"><i class="fas fa-shield-alt"></i></div>
<div class="bg-icon bug-icon" style="top: 65%; left: 70%;"><i class="fas fa-bug"></i></div>
<div class="bg-icon ai-icon" style="top: 20%; left: 80%;"><i class="fas fa-robot"></i></div>
<div class="bg-icon lock-icon" style="top: 75%; left: 30%;"><i class="fas fa-lock"></i></div>
<div class="bg-icon warning-icon" style="top: 45%; left: 60%;"><i class="fas fa-exclamation-triangle"></i></div>
<div class="bouncing-box">
<h1 class="multicolor-text">Questions?</h1>
</div>
</div>
<style>
.bouncing-container {
position: relative;
width: 100%;
height: 80vh;
overflow: hidden;
}
.bouncing-box {
position: absolute;
padding: 2rem 3rem;
background: rgba(23, 28, 45, 0.8);
border: 3px solid var(--primary-color);
border-radius: 10px;
box-shadow: 0 6px 24px rgba(0, 0, 0, 0.3);
animation: bounce 20s linear infinite;
z-index: 10;
top: 10%;
left: 20%;
}
.multicolor-text {
font-size: 3rem;
font-weight: bold;
text-align: center;
background: linear-gradient(
to right,
#800020, /* Deep burgundy */
#B22222, /* Firebrick red */
#0066CC, /* Deeper blue */
#104E8B, /* Dark blue */
#800020 /* Back to burgundy */
);
background-size: 400% auto;
color: transparent;
-webkit-background-clip: text;
background-clip: text;
animation: gentle-rainbow 6s linear infinite;
}
.bg-icon {
position: absolute;
font-size: 4rem;
z-index: 5;
}
.bg-icon i {
font-size: 4rem;
}
.security-icon {
animation: security-bounce 24s linear infinite;
color: rgba(0, 102, 204, 0.35); /* Blue icon */
}
.bug-icon {
animation: bug-bounce 22s linear infinite;
color: rgba(178, 34, 34, 0.35); /* Red icon */
}
.ai-icon {
animation: ai-bounce 26s linear infinite;
color: rgba(103, 92, 246, 0.35); /* Purple icon */
}
.lock-icon {
animation: lock-bounce 28s linear infinite;
color: rgba(15, 116, 147, 0.35); /* Blue-teal icon */
}
.warning-icon {
animation: warning-bounce 25s linear infinite;
color: rgba(176, 27, 27, 0.35); /* Red warning icon */
}
@keyframes bounce {
0% {
top: 10%;
left: 20%;
}
12.5% {
top: 70%;
left: 75%;
}
25% {
top: 30%;
left: 80%;
}
37.5% {
top: 80%;
left: 15%;
}
50% {
top: 40%;
left: 10%;
}
62.5% {
top: 65%;
left: 50%;
}
75% {
top: 25%;
left: 40%;
}
87.5% {
top: 55%;
left: 65%;
}
100% {
top: 10%;
left: 20%;
}
}
@keyframes security-bounce {
0% { top: 35%; left: 15%; font-size: 3.8rem; }
20% { top: 75%; left: 40%; font-size: 4.2rem; }
40% { top: 25%; left: 75%; font-size: 3.5rem; }
60% { top: 65%; left: 25%; font-size: 4rem; }
80% { top: 45%; left: 60%; font-size: 3.7rem; }
100% { top: 35%; left: 15%; font-size: 3.8rem; }
}
@keyframes bug-bounce {
0% { top: 65%; left: 70%; font-size: 4.2rem; }
25% { top: 20%; left: 30%; font-size: 3.6rem; }
50% { top: 80%; left: 20%; font-size: 4rem; }
75% { top: 40%; left: 75%; font-size: 3.8rem; }
100% { top: 65%; left: 70%; font-size: 4.2rem; }
}
@keyframes ai-bounce {
0% { top: 20%; left: 80%; font-size: 3.6rem; }
20% { top: 55%; left: 15%; font-size: 4.1rem; }
40% { top: 70%; left: 60%; font-size: 3.7rem; }
60% { top: 25%; left: 45%; font-size: 4.2rem; }
80% { top: 60%; left: 85%; font-size: 3.8rem; }
100% { top: 20%; left: 80%; font-size: 3.6rem; }
}
@keyframes lock-bounce {
0% { top: 75%; left: 30%; font-size: 4rem; }
25% { top: 30%; left: 65%; font-size: 3.5rem; }
50% { top: 65%; left: 15%; font-size: 4.3rem; }
75% { top: 15%; left: 50%; font-size: 3.7rem; }
100% { top: 75%; left: 30%; font-size: 4rem; }
}
@keyframes warning-bounce {
0% { top: 45%; left: 60%; font-size: 3.9rem; }
20% { top: 15%; left: 25%; font-size: 4.2rem; }
40% { top: 60%; left: 40%; font-size: 3.5rem; }
60% { top: 30%; left: 80%; font-size: 4.1rem; }
80% { top: 70%; left: 10%; font-size: 3.8rem; }
100% { top: 45%; left: 60%; font-size: 3.9rem; }
}
@keyframes gentle-rainbow {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
</style>
<!-- Add Font Awesome for icons -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">

82
pages/intro.md Normal file
View file

@ -0,0 +1,82 @@
# Introduction
<br><br>
<div class="intro-container container-fade-in">
<div class="intro-point animated-text delay-1">
<div class="intro-icon"><i class="fas fa-microchip"></i></div>
<div>LLMs are increasingly integrated into <span class="highlight-word">critical applications</span></div>
</div>
<div class="intro-point animated-text delay-2">
<div class="intro-icon"><i class="fas fa-exclamation-triangle"></i></div>
<div>Security vulnerabilities present <span class="highlight-word">significant challenges</span></div>
</div>
<div class="intro-point animated-text delay-3">
<div class="intro-icon"><i class="fas fa-clipboard-check"></i></div>
<div>Need for <span class="highlight-word">systematic evaluation</span> approaches</div>
</div>
<div class="intro-point animated-text delay-4">
<div class="intro-icon"><i class="fas fa-user-secret"></i></div>
<div>Focus on <span class="highlight-word">red teaming</span> methodologies</div>
</div>
</div>
<style>
.intro-container {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
width: 85%;
max-width: 700px;
margin: 0 auto;
padding: 1.25rem;
background: var(--background-card);
border-radius: 8px;
border: 1px solid var(--card-border);
box-shadow: 0 4px 12px var(--card-shadow);
}
.intro-point {
display: flex;
align-items: center;
margin-bottom: 1rem;
padding: 0.5rem;
width: 100%;
transition: transform 0.2s ease;
}
.intro-point:hover {
transform: translateX(5px);
}
.intro-icon {
flex: 0 0 3rem;
font-size: 1.5rem;
color: var(--primary-color);
display: flex;
align-items: center;
justify-content: center;
margin-right: 1rem;
}
.highlight-word {
color: var(--highlight);
font-weight: 600;
position: relative;
background: linear-gradient(90deg, var(--highlight), var(--primary-color));
background-clip: text;
-webkit-background-clip: text;
color: transparent;
background-size: 200% auto;
animation: gentle-shimmer 4s linear infinite;
}
@keyframes gentle-shimmer {
0% { background-position: 0% 50%; }
100% { background-position: 200% 50%; }
}
</style>

23
pages/major-bench-secu.md Normal file
View file

@ -0,0 +1,23 @@
# Major Benchmarks for LLM Security
<div class="grid-3">
<div class="card">
<h2 class="benchmark-title title-blue">Meta's CyberSecEval 2</h2>
<p>Introduced in April 2024, this benchmark suite evaluates both LLM security risks and cybersecurity capabilities.</p>
</div>
<div class="card">
<h2 class="benchmark-title title-purple">SEvenLLM-Bench</h2>
<p>A multiple-choice Q&A benchmark with 1300 test samples for evaluating LLM cybersecurity capabilities.</p>
</div>
<div class="card">
<h2 class="benchmark-title title-pink">SecLLMHolmes</h2>
<p>A generalized, automated framework for evaluating LLM performance in vulnerability detection.</p>
</div>
<div class="card">
<h2 class="benchmark-title title-cyan">SECURE</h2>
<p>The Security Extraction, Understanding & Reasoning Evaluation benchmark designed to assess LLM performance in realistic cybersecurity scenarios.</p>
</div>
</div>

View file

@ -0,0 +1,25 @@
# Red Teaming Methodology
<div class="two-column">
<div class="card">
<h2>Generating Adversarial Attacks</h2>
<ul>
<li>Creating inputs to elicit <span class="highlight-word">unsafe responses</span></li>
<li><span class="key-term">Baseline attack generation</span> strategies</li>
<li><span class="key-term">Attack enhancement</span> techniques</li>
</ul>
</div>
<div class="card">
<h2>Evaluating Target LLM Responses</h2>
<ul>
<li><span class="key-term">Response generation</span> analysis</li>
<li>Vulnerability-specific <span class="highlight-word">metrics</span></li>
<li>Feedback-based <span class="highlight-word">improvement</span></li>
</ul>
</div>
</div>
<div class="card key-insight" style="margin-top: 1.5rem;">
<strong>Key Insight:</strong> Red teaming simulates <span class="highlight-word">real-world adversarial scenarios</span> to find vulnerabilities before deployment, enabling <span class="highlight-word">preemptive security measures</span>.
</div>

View file

@ -0,0 +1,66 @@
# LLM Risks & Vulnerabilities
<div class="risks-table-container container-slide-up">
<table class="llm-risks-table stagger-container">
<thead>
<tr>
<th>LLM Risk</th>
<th>Vulnerabilities</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><span class="highlight-word">Responsible AI</span> Risks</td>
<td>Bias, <span class="highlight-word">Toxicity</span></td>
<td>Ensuring ethical model behavior by preventing <span class="highlight-word">discriminatory outputs</span> and offensive content generation that could harm users or specific demographic groups</td>
</tr>
<tr>
<td><span class="highlight-word">Illegal Activities</span> Risks</td>
<td>IllegalActivity, <span class="highlight-word">GraphicContent</span></td>
<td>Preventing content that violates laws, promotes <span class="highlight-word">criminal behavior</span>, or generates instructions for harmful activities that could endanger public safety</td>
</tr>
<tr>
<td><span class="highlight-word">Brand Image</span> Risks</td>
<td>ExcessiveAgency, <span class="highlight-word">Robustness</span></td>
<td>Protecting organizational reputation by avoiding <span class="highlight-word">misinformation</span>, misattribution, and content that contradicts company values</td>
</tr>
<tr>
<td><span class="highlight-word">Data Privacy</span> Risks</td>
<td>PIILeakage, <span class="highlight-word">PromptLeakage</span></td>
<td>Safeguarding <span class="highlight-word">sensitive information</span> by preventing the exposure of personal identifiable information and confidential data</td>
</tr>
<tr>
<td><span class="highlight-word">Unauthorized Access</span> Risks</td>
<td>UnauthorizedAccess</td>
<td>Securing systems by preventing exploitation of LLMs to gain <span class="highlight-word">unauthorized system access</span> or execute malicious commands</td>
</tr>
</tbody>
</table>
</div>
<style>
.risks-table-container {
height: auto;
max-height: 500px;
overflow: visible;
}
.llm-risks-table td {
padding: 0.7rem 1rem;
line-height: 1.4;
}
.highlight-word {
background: linear-gradient(90deg, var(--highlight), var(--primary-color));
background-clip: text;
-webkit-background-clip: text;
color: transparent;
background-size: 200% auto;
animation: gentle-shimmer 4s linear infinite;
font-weight: 600;
}
@keyframes gentle-shimmer {
0% { background-position: 0% 50%; }
100% { background-position: 200% 50%; }
}
</style>

View file

@ -0,0 +1,25 @@
# Understanding LLM Vulnerabilities
<div class="two-column stagger-container">
<div class="card container-rotate-in">
<h2>Risk Categories</h2>
<ul class="enhanced-list">
<li><span class="key-term">Responsible AI</span>: Biases, toxicity, <span class="highlight-word">ethical concerns</span></li>
<li><span class="key-term">Illegal Activities</span>: Violent crimes, <span class="highlight-word">cybercrimes</span></li>
<li><span class="key-term">Brand Image</span>: Misinformation, <span class="highlight-word">competitive references</span></li>
<li><span class="key-term">Data Privacy</span>: PII leakage, <span class="highlight-word">credentials exposure</span></li>
<li><span class="key-term">Unauthorized Access</span>: System access, <span class="highlight-word">command execution</span></li>
</ul>
</div>
<div class="card container-rotate-in" style="animation-delay: 0.3s;">
<h2>Impact Areas</h2>
<ul class="enhanced-list">
<li><span class="highlight-word">User safety</span> and protection</li>
<li><span class="highlight-word">Organizational reputation</span> and trust</li>
<li><span class="highlight-word">Legal compliance</span> and regulations</li>
<li><span class="highlight-word">Data security</span> and privacy</li>
<li><span class="highlight-word">System integrity</span> and reliability</li>
</ul>
</div>
</div>

5992
pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load diff

Binary file not shown.

19
setup/code-runners.ts Normal file
View file

@ -0,0 +1,19 @@
import { defineCodeRunnersSetup } from '@slidev/types'
export default defineCodeRunnersSetup(() => {
return {
async python(code, ctx) {
// Somehow execute the code and return the result
const result = await executePythonCodeRemotely(code)
return {
text: result
}
},
html(code, ctx) {
return {
html: sanitizeHtml(code)
}
},
// or other languages, key is the language id
}
})

7
setup/mermai.ts Normal file
View file

@ -0,0 +1,7 @@
import { defineMermaidSetup } from '@slidev/types'
export default defineMermaidSetup(() => {
return {
theme: 'forest',
}
})

174
slides.md Normal file
View file

@ -0,0 +1,174 @@
---
# theme id, package name, or local path
# Learn more: https://sli.dev/guide/theme-addon.html#use-theme
theme: seriph
# title of your slide, will inferred from the first header if not specified
title: Security Benchmarking for Large Language Models
# titleTemplate for the webpage, `%s` will be replaced by the slides deck's title
titleTemplate: '%s - Security Benchmarking for AI'
# information for your slides, can be a Markdown string
author: Rossi Stefano
info: |
## Methodologies and Applications
# keywords field for exported PDF, comma-delimited
keywords: Security, Benchmarking, Large Language Models, LLM, Red Teaming
mdc: true
hideInToc: false
# addons, can be a list of package names or local paths
# Learn more: https://sli.dev/guide/theme-addon.html#use-addon
addons:
- slidev-addon-rabbit
- slidev-addon-python-runner
python:
installs: ["deepeval"]
prelude: |
GREETING_FROM_PRELUDE = "Hello, Slidev!"
loadPackagesFromImports: true
suppressDeprecationWarnings: true
alwaysReload: false
loadPyodideOptions: {}
# enable presenter mode, can be boolean, 'dev' or 'build'
presenter: true
# enable browser exporter, can be boolean, 'dev' or 'build'
browserExporter: dev
# enabled pdf downloading in SPA build, can also be a custom url
download: true
# filename of the export file
exportFilename: security-benchmarking-for-llms
# enable twoslash, can be boolean, 'dev' or 'build'
twoslash: false
# show line numbers in code blocks
lineNumbers: true
# enable monaco editor, can be boolean, 'dev' or 'build'
monaco: false
# controls whether texts in slides are selectable
selectable: false
# enable slide recording, can be boolean, 'dev' or 'build'
record: dev
# enable Slidev's context menu, can be boolean, 'dev' or 'build'
contextMenu: dev
# enable wake lock, can be boolean, 'dev' or 'build'
wakeLock: true
# take snapshot for each slide in the overview
overviewSnapshots: false
# force color schema for the slides, can be 'auto', 'light', or 'dark'
colorSchema: dark
# router mode for vue-router, can be "history" or "hash"
routerMode: history
# aspect ratio for the slides
aspectRatio: 16/9
# real width of the canvas, unit in px
canvasWidth: 980
# Import your custom CSS
css: unocss
# Enable UnoCSS processing
unocss:
configFile: './uno.config.ts'
# used for theme customization, will inject root styles as `--slidev-theme-x` for attribute `x`
# themeConfig:
# primary: '#5d8392'
# favicon, can be a local file path or URL
# favicon: 'https://cdn.jsdelivr.net/gh/slidevjs/slidev/assets/favicon.png'
# URL of PlantUML server used to render diagrams
# Learn more: https://sli.dev/features/plantuml.html
# plantUmlServer: https://www.plantuml.com/plantuml
# fonts will be auto-imported from Google fonts
# Learn more: https://sli.dev/custom/config-fonts.html
# fonts:
# sans: Roboto
# serif: Roboto Slab
# mono: Fira Code
# default frontmatter applies to all slides
defaults:
layout: default
# ...
# drawing options
# Learn more: https://sli.dev/guide/drawing.html
drawings:
enabled: true
persist: false
presenterOnly: false
syncAll: true
# HTML tag attributes
htmlAttrs:
dir: ltr
lang: en
layout: center
# add slide transitions
transition: slide-left
# default slide transition
background: none
---
<div class="flex flex-col items-center justify-center h-full py-10">
<h1 class="text-center text-5xl font-bold gradient-text mb-10">Security Benchmarking for Large Language Models</h1>
<h2 class="text-center text-4xl mb-6" style="color: var(--accent-color);">Methodologies and Applications</h2>
<h3 class="text-center text-3xl mb-14 animate-pulse highlight-word">Understanding, Evaluating, and Mitigating LLM Security Vulnerabilities</h3>
<div class="flex w-full justify-between mt-auto">
<div class="text-left text-xl">Rossi Stefano</div>
<div class="text-right text-xl">14 March, 2025</div>
</div>
</div>
<style>
.gradient-text {
background: linear-gradient(-45deg, var(--primary-color), var(--secondary-color), var(--accent-color), var(--primary-color));
background-size: 300% 300%;
color: transparent;
-webkit-background-clip: text;
background-clip: text;
animation: gradient-animation 6s ease infinite;
}
@keyframes gradient-animation {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
</style>
---
src: ./pages/intro.md
transition: fade
---
---
src: ./pages/risks-and-vulnerabilities.md
transition: slide-up
---
---
src: ./pages/understanding-llm-vulns.md
transition: slide-right
---
---
src: ./pages/red-teaming-methodologies.md
transition: zoom-in
---
---
src: ./pages/advanced-att-techniques.md
transition: slide-left
---
---
src: ./pages/major-bench-secu.md
transition: fade-out
---
---
src: ./pages/deepeval-example.md
transition: zoom-out
---
---
src: ./pages/best-practices.md
transition: slide-up
---
---
src: ./pages/end.md
transition: fade
---

12
snippets/external.ts Normal file
View file

@ -0,0 +1,12 @@
/* eslint-disable no-console */
// #region snippet
// Inside ./snippets/external.ts
export function emptyArray<T>(length: number) {
return Array.from<T>({ length })
}
// #endregion snippet
export function sayHello() {
console.log('Hello from snippets/external.ts')
}

284
styles/animations.css Normal file
View file

@ -0,0 +1,284 @@
/* Basic animations */
@keyframes fadeIn {
0% { opacity: 0; }
100% { opacity: 1; }
}
@keyframes slideInRight {
0% { transform: translateX(-20px); opacity: 0; }
100% { transform: translateX(0); opacity: 1; }
}
@keyframes slideInBottom {
0% { transform: translateY(15px); opacity: 0; }
100% { transform: translateY(0); opacity: 1; }
}
@keyframes scaleIn {
0% { transform: scale(0.92); opacity: 0; }
100% { transform: scale(1); opacity: 1; }
}
@keyframes gentle-shimmer {
0% { background-position: 0% 50%; }
100% { background-position: 200% 50%; }
}
@keyframes subtle-pulse {
0% { transform: scale(1); opacity: 0.95; }
50% { transform: scale(1.01); opacity: 1; }
100% { transform: scale(1); opacity: 0.95; }
}
@keyframes borderPulse {
0% { border-color: rgba(99, 102, 241, 0.3); }
50% { border-color: rgba(99, 102, 241, 0.7); }
100% { border-color: rgba(99, 102, 241, 0.3); }
}
/* Slide transitions */
@keyframes slideOutLeft {
0% { transform: translateX(0); opacity: 1; }
100% { transform: translateX(-50px); opacity: 0; }
}
@keyframes slideOutRight {
0% { transform: translateX(0); opacity: 1; }
100% { transform: translateX(50px); opacity: 0; }
}
@keyframes slideInLeft {
0% { transform: translateX(50px); opacity: 0; }
100% { transform: translateX(0); opacity: 1; }
}
@keyframes zoomIn {
0% { transform: scale(0.85); opacity: 0; }
100% { transform: scale(1); opacity: 1; }
}
@keyframes zoomOut {
0% { transform: scale(1); opacity: 1; }
100% { transform: scale(0.85); opacity: 0; }
}
@keyframes rotateIn {
0% { transform: perspective(1000px) rotateY(10deg); opacity: 0; }
100% { transform: perspective(1000px) rotateY(0); opacity: 1; }
}
@keyframes fadeInScale {
0% { transform: scale(0.9); opacity: 0; }
100% { transform: scale(1); opacity: 1; }
}
@keyframes slideUpFade {
0% { transform: translateY(20px); opacity: 0; }
100% { transform: translateY(0); opacity: 1; }
}
/* Modern hover effects */
.card {
transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1);
}
/* Update highlight word animation to avoid orange */
.highlight-word {
background: linear-gradient(90deg, var(--highlight), var(--primary-color));
background-clip: text;
-webkit-background-clip: text;
color: transparent;
background-size: 200% auto;
animation: gentle-shimmer 4s linear infinite;
font-weight: 600;
}
/* Title animations for all slides */
.slidev-layout h1 {
animation: fadeIn 0.8s ease-out forwards, slideInBottom 0.8s ease-out forwards;
}
/* List item animations */
.slidev-layout ul li, .slidev-layout ol li {
opacity: 0;
animation: slideInRight 0.5s ease-out forwards;
}
/* Stagger the animation delay for list items */
.slidev-layout ul li:nth-child(1), .slidev-layout ol li:nth-child(1) { animation-delay: 0.2s; }
.slidev-layout ul li:nth-child(2), .slidev-layout ol li:nth-child(2) { animation-delay: 0.35s; }
.slidev-layout ul li:nth-child(3), .slidev-layout ol li:nth-child(3) { animation-delay: 0.5s; }
.slidev-layout ul li:nth-child(4), .slidev-layout ol li:nth-child(4) { animation-delay: 0.65s; }
.slidev-layout ul li:nth-child(5), .slidev-layout ol li:nth-child(5) { animation-delay: 0.8s; }
.slidev-layout ul li:nth-child(n+6), .slidev-layout ol li:nth-child(n+6) { animation-delay: 0.95s; }
/* Table animation - fixing table appearance */
.slidev-layout table {
animation: fadeIn 0.8s ease-out forwards;
}
.slidev-layout table th {
opacity: 0;
animation: fadeIn 0.7s ease-out forwards;
animation-delay: 0.3s;
}
.slidev-layout table td {
opacity: 0;
animation: fadeIn 0.7s ease-out forwards;
}
/* Staggered rows with shorter delays */
.slidev-layout table tr:nth-child(1) td { animation-delay: 0.4s; }
.slidev-layout table tr:nth-child(2) td { animation-delay: 0.5s; }
.slidev-layout table tr:nth-child(3) td { animation-delay: 0.6s; }
.slidev-layout table tr:nth-child(4) td { animation-delay: 0.7s; }
.slidev-layout table tr:nth-child(5) td { animation-delay: 0.8s; }
.slidev-layout table tr:nth-child(n+6) td { animation-delay: 0.9s; }
/* Code block animations */
.slidev-layout pre {
opacity: 1;
animation: none;
}
/* Blockquote animations for key insights */
.slidev-layout blockquote {
opacity: 0;
animation: fadeIn 1s ease-out forwards, slideInBottom 1s ease-out forwards;
animation-delay: 1s;
}
/* Special intro slide animations */
.slidev-layout.intro h1 {
animation: fadeIn 1.5s ease-out forwards, subtle-pulse 6s ease-in-out infinite 1.5s;
}
/* Section headers */
.slidev-layout h2 {
opacity: 0;
animation: fadeIn 0.8s ease-out forwards, slideInBottom 0.8s ease-out forwards;
animation-delay: 0.2s;
}
/* Add typing effect for code comment lines */
.slidev-layout pre .line-comment {
overflow: hidden;
white-space: nowrap;
animation: typing 1s steps(40, end) forwards;
animation-delay: 1.5s;
}
/* Add hover effects for interactive elements */
.slidev-layout table tr:hover td {
background-color: rgba(8, 38, 82, 0.2);
transition: background-color 0.3s ease;
}
.slidev-layout pre:hover {
animation: borderGlow 2s infinite;
}
/* Card animations */
.card {
animation: fadeIn 0.5s ease-out forwards, scaleIn 0.5s ease-out forwards;
}
/* Stagger card animations */
.two-column .card:nth-child(1) { animation-delay: 0.3s; }
.two-column .card:nth-child(2) { animation-delay: 0.5s; }
.grid-3 .card:nth-child(1) { animation-delay: 0.3s; }
.grid-3 .card:nth-child(2) { animation-delay: 0.4s; }
.grid-3 .card:nth-child(3) { animation-delay: 0.5s; }
.grid-3 .card:nth-child(4) { animation-delay: 0.6s; }
/* Add special animations for key insight blocks */
.key-insight {
position: relative;
overflow: hidden;
animation: fadeIn 1s ease-out forwards, slideInBottom 1s ease-out forwards;
animation-delay: 0.8s;
}
/* Better list animations */
.better-list li {
transition: all 0.3s ease;
}
.better-list li:hover {
transform: translateX(5px);
background: rgba(19, 21, 33, 0.95);
border-left-width: 5px;
}
.animate-pulse {
animation: subtle-pulse 6s ease-in-out infinite;
}
/* Apply slide transitions */
.slidev-vclick-prior {
transition: all 0.5s ease;
}
.slidev-vclick-target {
animation: fadeInScale 0.6s ease forwards;
}
/* Enhanced container animations */
.card {
animation: fadeInScale 0.5s ease-out forwards;
transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1);
}
/* Modern text animations */
.animated-text {
animation: slideUpFade 0.7s ease-out forwards;
opacity: 0;
}
.delay-1 { animation-delay: 0.2s; }
.delay-2 { animation-delay: 0.4s; }
.delay-3 { animation-delay: 0.6s; }
.delay-4 { animation-delay: 0.8s; }
.delay-5 { animation-delay: 1s; }
/* Container entrance animations */
.container-fade-in {
animation: fadeIn 1s ease forwards;
opacity: 0;
}
.container-slide-up {
animation: slideUpFade 0.8s ease-out forwards;
opacity: 0;
}
.container-zoom-in {
animation: zoomIn 0.8s ease-out forwards;
opacity: 0;
}
.container-rotate-in {
animation: rotateIn 0.9s ease-out forwards;
opacity: 0;
}
/* Staggered container content */
.stagger-container > *:nth-child(1) { animation-delay: 0.1s; }
.stagger-container > *:nth-child(2) { animation-delay: 0.3s; }
.stagger-container > *:nth-child(3) { animation-delay: 0.5s; }
.stagger-container > *:nth-child(4) { animation-delay: 0.7s; }
.stagger-container > *:nth-child(5) { animation-delay: 0.9s; }
/* Enhanced list animations */
.enhanced-list li {
animation: slideInRight 0.5s ease-out forwards;
opacity: 0;
}
.enhanced-list li:nth-child(1) { animation-delay: 0.2s; }
.enhanced-list li:nth-child(2) { animation-delay: 0.35s; }
.enhanced-list li:nth-child(3) { animation-delay: 0.5s; }
.enhanced-list li:nth-child(4) { animation-delay: 0.65s; }
.enhanced-list li:nth-child(5) { animation-delay: 0.8s; }

297
styles/base.css Normal file
View file

@ -0,0 +1,297 @@
:root {
--primary-color: #800020; /* Deep burgundy */
--secondary-color: #B22222; /* Firebrick red */
--accent-color: #A30000; /* Darker red replacing orange */
--text-color: #f0f2f5; /* Slightly dimmed light text */
--text-muted: #d6a5b3; /* Muted burgundy text */
--background-dark: #0a0c14; /* Much darker background */
--background-card: #161b26; /* Darker card background */
--background-code: #252d3d; /* Darker code background */
--highlight: #0066CC; /* Deeper blue for better contrast with reds */
--card-border: rgba(176, 27, 27, 0.3); /* Red card border */
--card-shadow: rgba(0, 0, 0, 0.7); /* Darker shadow for better contrast */
}
body {
font-family: 'Inter', 'SF Pro Display', system-ui, sans-serif;
color: var(--text-color);
margin: 0;
padding: 0;
background-color: var(--background-dark);
font-size: 15px;
}
.slidev-layout {
display: flex;
flex-direction: column;
height: 100%;
padding: 1.75rem;
background: linear-gradient(150deg, var(--background-dark), #1c2333, #0f1623);
background-size: 300% 300%;
color: var(--text-color);
overflow: hidden;
border-radius: 0;
box-shadow: none;
animation: gradientShift 15s ease infinite;
}
@keyframes gradientShift {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
/* Different backgrounds for different slides */
.slidev-layout[data-content-file*="intro"] {
background: linear-gradient(135deg, #0a0c14, #1e2233, #0f1a2a);
}
.slidev-layout[data-content-file*="risks-and-vulnerabilities"] {
background: linear-gradient(135deg, #140a0c, #291517, #1e0f12);
}
.slidev-layout[data-content-file*="understanding-llm-vulns"] {
background: linear-gradient(135deg, #0c0d14, #1a1e33, #12172a);
}
.slidev-layout[data-content-file*="red-teaming-methodologies"] {
background: linear-gradient(135deg, #0e0a14, #271729, #1e122a);
}
.slidev-layout[data-content-file*="advanced-att-techniques"] {
background: linear-gradient(135deg, #14090c, #2d1319, #26121e);
}
.slidev-layout[data-content-file*="major-bench-secu"] {
background: linear-gradient(135deg, #0a1214, #152329, #0f1a2a);
}
.slidev-layout[data-content-file*="deepeval-example"] {
background: linear-gradient(135deg, #0a0e14, #151c29, #0f162a);
}
.slidev-layout[data-content-file*="best-practices"] {
background: linear-gradient(135deg, #0c140a, #192915, #122a0f);
}
.slidev-layout[data-content-file*="end"] {
background: linear-gradient(135deg, #14130a, #292617, #2a250f);
}
/* Layout with center class */
.slidev-layout.center {
background: radial-gradient(circle at center, #15192b, #0a0c14 70%);
}
/* Common elements styling */
.slidev-layout h1 {
font-size: 2.2rem;
font-weight: 700;
margin-bottom: 1.5rem;
color: var(--highlight);
line-height: 1.1;
letter-spacing: -0.02em;
position: relative;
padding-bottom: 0.5rem;
}
.slidev-layout h1::after {
content: '';
position: absolute;
bottom: 0;
left: 0;
width: 5rem;
height: 0.2rem;
background: linear-gradient(90deg, var(--accent-color), var(--highlight));
border-radius: 4px;
}
.slidev-layout h2 {
font-size: 1.6rem;
font-weight: 600;
color: var(--primary-color);
margin: 0.8rem 0;
}
.slidev-layout p {
font-size: 1.1rem;
line-height: 1.5;
opacity: 0.9;
}
.slidev-layout ul, .slidev-layout ol {
padding-left: 1.25rem;
margin-top: 0.75rem;
}
.slidev-layout ul li, .slidev-layout ol li {
margin-bottom: 0.6rem;
font-size: 1.1rem;
position: relative;
line-height: 1.4;
}
.slidev-layout ul li::marker {
color: var(--accent-color);
}
/* Cards and boxes styling */
.card {
background: linear-gradient(135deg, rgba(22, 27, 38, 0.9), rgba(37, 34, 45, 0.9));
border-radius: 8px;
padding: 1.25rem;
margin-bottom: 1rem;
border: 1px solid var(--card-border);
transition: all 0.3s ease;
box-shadow: 0 4px 12px var(--card-shadow);
}
.card:hover {
transform: translateY(-3px);
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.4);
border-color: rgba(229, 62, 62, 0.5);
}
.two-column {
display: grid;
grid-template-columns: 1fr 1fr;
grid-gap: 1.5rem;
}
.highlight-word {
color: var(--highlight);
font-weight: 600;
}
.key-term {
color: var(--accent-color);
font-weight: 500;
}
/* Table styling */
.slidev-layout table {
width: 100%;
border-collapse: separate;
border-spacing: 0;
margin: 1rem 0;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
}
.slidev-layout table th {
background-color: var(--background-card);
color: var(--primary-color);
font-weight: 600;
padding: 0.6rem 1rem;
text-align: left;
font-size: 0.9rem;
border-bottom: 2px solid var(--accent-color);
}
.slidev-layout table td {
padding: 0.5rem 1rem;
border-bottom: 1px solid rgba(255, 255, 255, 0.07);
font-size: 0.9rem;
background-color: rgba(11, 15, 25, 0.9);
}
.slidev-layout table tr:last-child td {
border-bottom: none;
}
/* Compact table */
.compact-table table td, .compact-table table th {
padding: 0.4rem 0.5rem;
font-size: 0.85rem;
}
/* Code blocks */
.slidev-layout pre, .slidev-layout code {
border-radius: 6px;
font-family: 'Fira Code', 'Cascadia Code', monospace;
font-size: 0.9rem;
}
/* Center layout */
.center-content {
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
text-align: center;
height: 100%;
}
/* Better practices list */
.better-list li {
background: rgba(12, 16, 29, 0.9);
margin: 0.7rem 0;
padding: 0.6rem 1rem 0.6rem 0.5rem;
border-radius: 6px;
border-left: 3px solid var(--accent-color);
list-style-position: inside;
}
.better-list li::marker {
color: var(--highlight);
}
/* Grid layout for benchmarks */
.grid-3 {
display: grid;
grid-template-columns: repeat(2, 1fr);
grid-gap: 1rem;
}
/* Responsive design adjustments */
@media (max-width: 768px) {
.slidev-layout h1 {
font-size: 1.8rem;
}
.slidev-layout h2 {
font-size: 1.4rem;
}
.two-column {
grid-template-columns: 1fr;
}
.grid-3 {
grid-template-columns: 1fr;
}
}
/* Hover effect for interactive elements */
.slidev-layout h2:hover,
.slidev-layout h3:hover,
.slidev-layout a:hover,
.slidev-layout button:hover,
.slidev-layout .key-term:hover,
.slidev-layout .highlight-word:hover,
.intro-point:hover,
.benchmark-title:hover {
transform: scale(1.03);
transition: transform 0.2s ease;
}
/* Make tables more interactive */
.slidev-layout table tr:hover {
transform: scale(1.01);
z-index: 5;
position: relative;
}
/* Add code highlighting based on theme */
.slidev-layout pre {
background-color: #0c1525;
border: 1px solid var(--card-border);
transition: all 0.3s ease;
}
.slidev-layout pre:hover {
transform: scale(1.02);
border-color: var(--highlight);
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.4);
}

3
styles/index.ts Normal file
View file

@ -0,0 +1,3 @@
import './animations.css';
import './base.css';

7
vercel.json Normal file
View file

@ -0,0 +1,7 @@
{
"rewrites": [
{ "source": "/(.*)", "destination": "/index.html" }
],
"buildCommand": "npm run build",
"outputDirectory": "dist"
}