1import { test, expect } from '@playwright/test';
2
3test.describe('Shelley Conversation Tests', () => {
4 test('can send Hello and get greeting response', async ({ page }) => {
5 await page.goto('/');
6 await page.waitForLoadState('domcontentloaded');
7
8 // Wait for the message input using improved selector
9 const messageInput = page.getByTestId('message-input');
10 await expect(messageInput).toBeVisible({ timeout: 30000 });
11
12 // Send "Hello" and expect specific predictable response
13 await messageInput.fill('Hello');
14
15 // Find and click the send button using improved selector
16 const sendButton = page.getByTestId('send-button');
17 await expect(sendButton).toBeVisible();
18 await sendButton.click();
19
20 // Wait for the response from the predictable model
21 // The predictable model responds to "Hello" with "Hello! I'm Shelley, your AI assistant. How can I help you today?"
22 await page.waitForFunction(
23 () => {
24 const text = "Hello! I'm Shelley, your AI assistant. How can I help you today?";
25 return document.body.textContent?.includes(text) ?? false;
26 },
27 undefined,
28 { timeout: 30000 }
29 );
30
31 // Verify both the user message and assistant response are visible
32 await expect(page.locator('text=Hello').first()).toBeVisible();
33 await expect(page.locator('text=Hello! I\'m Shelley, your AI assistant. How can I help you today?').first()).toBeVisible();
34 });
35
36 test('can use echo command', async ({ page }) => {
37 await page.goto('/');
38 await page.waitForLoadState('domcontentloaded');
39
40 const messageInput = page.getByTestId('message-input');
41 const sendButton = page.getByTestId('send-button');
42
43 // Send "echo: test message" and expect echo response
44 await messageInput.fill('echo: test message');
45 await sendButton.click();
46
47 // The predictable model should echo back "test message"
48 await page.waitForFunction(
49 () => document.body.textContent?.includes('test message') ?? false,
50 undefined,
51 { timeout: 30000 }
52 );
53
54 // Verify both input and output messages are visible
55 await expect(page.locator('text=echo: test message')).toBeVisible();
56 });
57
58 test('responds differently to lowercase hello', async ({ page }) => {
59 await page.goto('/');
60 await page.waitForLoadState('domcontentloaded');
61
62 const messageInput = page.getByTestId('message-input');
63 const sendButton = page.getByTestId('send-button');
64
65 // Send "hello" (lowercase) and expect different response
66 await messageInput.fill('hello');
67 await sendButton.click();
68
69 // The predictable model responds to "hello" with "Well, hi there!"
70 await page.waitForFunction(
71 () => document.body.textContent?.includes('Well, hi there!') ?? false,
72 undefined,
73 { timeout: 30000 }
74 );
75
76 // Verify the hello message and response are both visible
77 await expect(page.getByText('Well, hi there!').first()).toBeVisible();
78 });
79
80 test('shows thinking indicator while awaiting response', async ({ page }) => {
81 await page.goto('/');
82 await page.waitForLoadState('domcontentloaded');
83
84 const messageInput = page.getByTestId('message-input');
85 const sendButton = page.getByTestId('send-button');
86
87 await messageInput.fill('hello');
88 await sendButton.click();
89
90 const thinkingIndicator = page.getByTestId('agent-thinking');
91 await expect(thinkingIndicator).toBeVisible({ timeout: 2000 });
92
93 await page.waitForFunction(
94 () => document.body.textContent?.includes('Well, hi there!') ?? false,
95 undefined,
96 { timeout: 30000 }
97 );
98
99 await expect(thinkingIndicator).toBeHidden({ timeout: 10000 });
100 });
101
102 test('shows thinking indicator on follow-up messages', async ({ page }) => {
103 await page.goto('/');
104 await page.waitForLoadState('domcontentloaded');
105
106 const messageInput = page.getByTestId('message-input');
107 const sendButton = page.getByTestId('send-button');
108
109 await messageInput.fill('hello');
110 await sendButton.click();
111
112 await page.waitForFunction(
113 () => document.body.textContent?.includes('Well, hi there!') ?? false,
114 undefined,
115 { timeout: 30000 }
116 );
117
118 await messageInput.fill('echo: follow up');
119 await sendButton.click();
120
121 const thinkingIndicator = page.getByTestId('agent-thinking');
122 await expect(thinkingIndicator).toBeVisible({ timeout: 2000 });
123
124 await page.waitForFunction(
125 () => document.body.textContent?.includes('follow up') ?? false,
126 undefined,
127 { timeout: 30000 }
128 );
129
130 await expect(thinkingIndicator).toBeHidden({ timeout: 10000 });
131 });
132
133 test('can use bash tool', async ({ page }) => {
134 await page.goto('/');
135 await page.waitForLoadState('domcontentloaded');
136
137 const messageInput = page.getByTestId('message-input');
138 const sendButton = page.getByTestId('send-button');
139
140 // Send a message that triggers tool use
141 await messageInput.fill('bash: echo "hello world"');
142 await sendButton.click();
143
144 // The predictable model should use the bash tool and show the response
145 await page.waitForFunction(
146 () => {
147 const text = 'I\'ll run the command: echo "hello world"';
148 return document.body.textContent?.includes(text) ?? false;
149 },
150 undefined,
151 { timeout: 30000 }
152 );
153
154 // Verify tool usage appears in the UI with coalesced tool call
155 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 10000 });
156 // Check that the tool name "bash" is visible
157 await expect(page.locator('text=bash').first()).toBeVisible();
158 });
159
160 test('gives default response for undefined messages', async ({ page }) => {
161 await page.goto('/');
162 await page.waitForLoadState('domcontentloaded');
163
164 const messageInput = page.getByTestId('message-input');
165 const sendButton = page.getByTestId('send-button');
166
167 // Send an undefined message and expect default response
168 await messageInput.fill('this is an undefined message');
169 await sendButton.click();
170
171 // The predictable model responds to undefined inputs with "edit predictable.go to add a response for that one..."
172 await page.waitForFunction(
173 () => {
174 const text = 'edit predictable.go to add a response for that one...';
175 return document.body.textContent?.includes(text) ?? false;
176 },
177 undefined,
178 { timeout: 30000 }
179 );
180
181 // Verify the undefined message and default response are visible
182 await expect(page.locator('text=this is an undefined message')).toBeVisible();
183 });
184
185 test('conversation persists and displays correctly', async ({ page }) => {
186 await page.goto('/');
187 await page.waitForLoadState('domcontentloaded');
188
189 const messageInput = page.getByTestId('message-input');
190 const sendButton = page.getByTestId('send-button');
191
192 // Send first message
193 await messageInput.fill('Hello');
194 await sendButton.click();
195
196 // Wait for first response
197 await page.waitForFunction(
198 () => {
199 const text = "Hello! I'm Shelley, your AI assistant. How can I help you today?";
200 return document.body.textContent?.includes(text) ?? false;
201 },
202 undefined,
203 { timeout: 30000 }
204 );
205
206 // Send second message
207 await messageInput.fill('echo: second message');
208 await sendButton.click();
209
210 // Wait for second response
211 await page.waitForFunction(
212 () => document.body.textContent?.includes('second message') ?? false,
213 undefined,
214 { timeout: 30000 }
215 );
216
217 // Verify both responses are still visible (conversation persists)
218 await expect(page.locator('text=Hello! I\'m Shelley, your AI assistant. How can I help you today?').first()).toBeVisible();
219 await expect(page.locator('text=second message').first()).toBeVisible();
220 });
221
222 test('can send message with Enter key', async ({ page }) => {
223 await page.goto('/');
224 await page.waitForLoadState('domcontentloaded');
225
226 const messageInput = page.getByTestId('message-input');
227 await expect(messageInput).toBeVisible({ timeout: 30000 });
228
229 // Type message and press Enter
230 await messageInput.fill('Hello');
231 await messageInput.press('Enter');
232
233 // Verify response
234 await page.waitForFunction(
235 () => {
236 const text = "Hello! I'm Shelley, your AI assistant. How can I help you today?";
237 return document.body.textContent?.includes(text) ?? false;
238 },
239 undefined,
240 { timeout: 30000 }
241 );
242
243 // Verify the Hello message and response are visible
244 await expect(page.locator('text=Hello! I\'m Shelley, your AI assistant. How can I help you today?').first()).toBeVisible();
245 });
246
247 test('handles think tool correctly', async ({ page }) => {
248 await page.goto('/');
249 await page.waitForLoadState('domcontentloaded');
250
251 const messageInput = page.getByTestId('message-input');
252 const sendButton = page.getByTestId('send-button');
253
254 // Send a message that triggers think tool
255 await messageInput.fill('think: I need to analyze this problem');
256 await sendButton.click();
257
258 // The predictable model should use the think tool
259 await page.waitForFunction(
260 () => document.body.textContent?.includes('Let me think about this.') ?? false,
261 undefined,
262 { timeout: 30000 }
263 );
264
265 // Verify think tool usage appears in the UI
266 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 10000 });
267 await expect(page.locator('text=think').first()).toBeVisible();
268 });
269
270 test('handles patch tool correctly', async ({ page }) => {
271 await page.goto('/');
272 await page.waitForLoadState('domcontentloaded');
273
274 const messageInput = page.getByTestId('message-input');
275 const sendButton = page.getByTestId('send-button');
276
277 // Send a message that triggers patch tool
278 await messageInput.fill('patch: test.txt');
279 await sendButton.click();
280
281 // The predictable model should use the patch tool
282 await page.waitForFunction(
283 () => document.body.textContent?.includes('I\'ll patch the file: test.txt') ?? false,
284 undefined,
285 { timeout: 30000 }
286 );
287
288 // Verify patch tool usage appears in the UI
289 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 10000 });
290 await expect(page.locator('text=patch').first()).toBeVisible();
291 });
292
293 test('displays tool results with collapsible details', async ({ page }) => {
294 await page.goto('/');
295 await page.waitForLoadState('domcontentloaded');
296
297 const messageInput = page.getByTestId('message-input');
298 const sendButton = page.getByTestId('send-button');
299
300 // Send a bash command that will show tool results
301 await messageInput.fill('bash: echo "testing tool results"');
302 await sendButton.click();
303
304 // Wait for the tool call to appear
305 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
306
307 // Check for bash tool header (collapsible element)
308 const bashToolHeader = page.locator('.bash-tool-header');
309 await expect(bashToolHeader.first()).toBeVisible({ timeout: 10000 });
310 });
311
312 test('handles multiple consecutive tool calls', async ({ page }) => {
313 await page.goto('/');
314 await page.waitForLoadState('domcontentloaded');
315
316 const messageInput = page.getByTestId('message-input');
317 const sendButton = page.getByTestId('send-button');
318
319 // First tool call: bash
320 await messageInput.fill('bash: echo "first command"');
321 await sendButton.click();
322
323 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
324
325 // Second tool call: think
326 await messageInput.fill('think: analyzing the output');
327 await sendButton.click();
328
329 // Wait for at least 2 tool calls
330 await page.waitForFunction(
331 () => document.querySelectorAll('[data-testid="tool-call-completed"]').length >= 2,
332 undefined,
333 { timeout: 30000 }
334 );
335
336 // Third tool call: patch
337 await messageInput.fill('patch: example.txt');
338 await sendButton.click();
339
340 // Wait for at least 3 tool calls
341 await page.waitForFunction(
342 () => document.querySelectorAll('[data-testid="tool-call-completed"]').length >= 3,
343 undefined,
344 { timeout: 30000 }
345 );
346
347 // Verify all the specific messages we sent are visible
348 await expect(page.locator('text=bash: echo "first command"')).toBeVisible();
349 await expect(page.locator('text=think: analyzing the output')).toBeVisible();
350 await expect(page.locator('text=patch: example.txt')).toBeVisible();
351
352 // Verify all tool types are visible
353 await expect(page.locator('text=bash').first()).toBeVisible();
354 await expect(page.locator('text=think').first()).toBeVisible();
355 await expect(page.locator('text=patch').first()).toBeVisible();
356 });
357});
358
359 test('coalesces tool calls - shows tool result with details', async ({ page }) => {
360 await page.goto('/');
361 await page.waitForLoadState('domcontentloaded');
362
363 const messageInput = page.getByTestId('message-input');
364 const sendButton = page.getByTestId('send-button');
365
366 // Send a bash command to trigger tool use
367 await messageInput.fill('bash: echo "hello world"');
368 await sendButton.click();
369
370 // Wait for the tool result to appear
371 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
372
373 // Verify the bash tool header is visible
374 await expect(page.locator('.bash-tool-header').first()).toBeVisible();
375
376 // Verify bash tool shows command
377 await expect(page.locator('.bash-tool-command').first()).toBeVisible();
378 });
379
380 test('coalesces tool calls - displays agent text and tool separately', async ({ page }) => {
381 await page.goto('/');
382 await page.waitForLoadState('domcontentloaded');
383
384 const messageInput = page.getByTestId('message-input');
385 const sendButton = page.getByTestId('send-button');
386
387 // Send a bash command
388 await messageInput.fill('bash: pwd');
389 await sendButton.click();
390
391 // Wait for tool result
392 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
393
394 // Verify agent message is shown ("I'll run the command: pwd")
395 await expect(page.locator('text=I\'ll run the command: pwd').first()).toBeVisible();
396
397 // Verify tool result is shown separately as coalesced tool call
398 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible();
399 await expect(page.locator('text=bash').first()).toBeVisible();
400 });
401
402 test('handles sequential tool calls', async ({ page }) => {
403 await page.goto('/');
404 await page.waitForLoadState('domcontentloaded');
405
406 const messageInput = page.getByTestId('message-input');
407 const sendButton = page.getByTestId('send-button');
408
409 // First tool call
410 await messageInput.fill('bash: echo "first"');
411 await sendButton.click();
412 await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
413
414 // Second tool call
415 await messageInput.fill('bash: echo "second"');
416 await sendButton.click();
417
418 // Wait for the second tool result
419 await page.waitForFunction(
420 () => document.querySelectorAll('[data-testid="tool-call-completed"]').length >= 2,
421 undefined,
422 { timeout: 30000 }
423 );
424
425 // Verify both tool calls are displayed
426 const toolCalls = page.locator('[data-testid="tool-call-completed"]');
427 expect(await toolCalls.count()).toBeGreaterThanOrEqual(2);
428 });
429
430 test('displays LLM error message in UI', async ({ page }) => {
431 // Clear any existing data by navigating to root (which should show empty state)
432 await page.goto('/');
433 await page.waitForLoadState('domcontentloaded');
434
435 // Wait for the empty state or message input
436 const messageInput = page.getByTestId('message-input');
437 await expect(messageInput).toBeVisible({ timeout: 30000 });
438
439 const sendButton = page.getByTestId('send-button');
440
441 // Send a message that triggers an error in the predictable LLM
442 await messageInput.fill('error: test error message');
443 await sendButton.click();
444
445 // Wait for the error message to appear in the UI
446 await page.waitForFunction(
447 () => {
448 const text = 'LLM request failed: predictable error: test error message';
449 return document.body.textContent?.includes(text) ?? false;
450 },
451 undefined,
452 { timeout: 30000 }
453 );
454
455 // Verify error message is visible with error styling
456 const errorMessage = page.locator('[role="alert"]');
457 await expect(errorMessage).toBeVisible({ timeout: 10000 });
458
459 // Verify the error text is displayed
460 await expect(page.locator('text=LLM request failed: predictable error: test error message')).toBeVisible();
461
462 // Verify error label is shown in the message header
463 await expect(page.locator('[role="alert"]').locator('text=Error')).toBeVisible();
464 });