conversation.spec.ts

  1import { test, expect } from '@playwright/test';
  2
  3test.describe('Shelley Conversation Tests', () => {
  4  test('can send Hello and get greeting response', async ({ page }) => {
  5    await page.goto('/');
  6    await page.waitForLoadState('domcontentloaded');
  7    
  8    // Wait for the message input using improved selector
  9    const messageInput = page.getByTestId('message-input');
 10    await expect(messageInput).toBeVisible({ timeout: 30000 });
 11    
 12    // Send "Hello" and expect specific predictable response
 13    await messageInput.fill('Hello');
 14    
 15    // Find and click the send button using improved selector
 16    const sendButton = page.getByTestId('send-button');
 17    await expect(sendButton).toBeVisible();
 18    await sendButton.click();
 19    
 20    // Wait for the response from the predictable model
 21    // The predictable model responds to "Hello" with "Hello! I'm Shelley, your AI assistant. How can I help you today?"
 22    await page.waitForFunction(
 23      () => {
 24        const text = "Hello! I'm Shelley, your AI assistant. How can I help you today?";
 25        return document.body.textContent?.includes(text) ?? false;
 26      },
 27      undefined,
 28      { timeout: 30000 }
 29    );
 30    
 31    // Verify both the user message and assistant response are visible
 32    await expect(page.locator('text=Hello').first()).toBeVisible();
 33    await expect(page.locator('text=Hello! I\'m Shelley, your AI assistant. How can I help you today?').first()).toBeVisible();
 34  });
 35  
 36  test('can use echo command', async ({ page }) => {
 37    await page.goto('/');
 38    await page.waitForLoadState('domcontentloaded');
 39    
 40    const messageInput = page.getByTestId('message-input');
 41    const sendButton = page.getByTestId('send-button');
 42    
 43    // Send "echo: test message" and expect echo response
 44    await messageInput.fill('echo: test message');
 45    await sendButton.click();
 46    
 47    // The predictable model should echo back "test message"
 48    await page.waitForFunction(
 49      () => document.body.textContent?.includes('test message') ?? false,
 50      undefined,
 51      { timeout: 30000 }
 52    );
 53    
 54    // Verify both input and output messages are visible
 55    await expect(page.locator('text=echo: test message')).toBeVisible();
 56  });
 57  
 58  test('responds differently to lowercase hello', async ({ page }) => {
 59    await page.goto('/');
 60    await page.waitForLoadState('domcontentloaded');
 61
 62    const messageInput = page.getByTestId('message-input');
 63    const sendButton = page.getByTestId('send-button');
 64
 65    // Send "hello" (lowercase) and expect different response
 66    await messageInput.fill('hello');
 67    await sendButton.click();
 68
 69    // The predictable model responds to "hello" with "Well, hi there!"
 70    await page.waitForFunction(
 71      () => document.body.textContent?.includes('Well, hi there!') ?? false,
 72      undefined,
 73      { timeout: 30000 }
 74    );
 75
 76    // Verify the hello message and response are both visible
 77    await expect(page.getByText('Well, hi there!').first()).toBeVisible();
 78  });
 79
 80  test('shows thinking indicator while awaiting response', async ({ page }) => {
 81    await page.goto('/');
 82    await page.waitForLoadState('domcontentloaded');
 83
 84    const messageInput = page.getByTestId('message-input');
 85    const sendButton = page.getByTestId('send-button');
 86
 87    await messageInput.fill('hello');
 88    await sendButton.click();
 89
 90    const thinkingIndicator = page.getByTestId('agent-thinking');
 91    await expect(thinkingIndicator).toBeVisible({ timeout: 2000 });
 92
 93    await page.waitForFunction(
 94      () => document.body.textContent?.includes('Well, hi there!') ?? false,
 95      undefined,
 96      { timeout: 30000 }
 97    );
 98
 99    await expect(thinkingIndicator).toBeHidden({ timeout: 10000 });
100  });
101
102  test('shows thinking indicator on follow-up messages', async ({ page }) => {
103    await page.goto('/');
104    await page.waitForLoadState('domcontentloaded');
105
106    const messageInput = page.getByTestId('message-input');
107    const sendButton = page.getByTestId('send-button');
108
109    await messageInput.fill('hello');
110    await sendButton.click();
111
112    await page.waitForFunction(
113      () => document.body.textContent?.includes('Well, hi there!') ?? false,
114      undefined,
115      { timeout: 30000 }
116    );
117
118    await messageInput.fill('echo: follow up');
119    await sendButton.click();
120
121    const thinkingIndicator = page.getByTestId('agent-thinking');
122    await expect(thinkingIndicator).toBeVisible({ timeout: 2000 });
123
124    await page.waitForFunction(
125      () => document.body.textContent?.includes('follow up') ?? false,
126      undefined,
127      { timeout: 30000 }
128    );
129
130    await expect(thinkingIndicator).toBeHidden({ timeout: 10000 });
131  });
132  
133  test('can use bash tool', async ({ page }) => {
134    await page.goto('/');
135    await page.waitForLoadState('domcontentloaded');
136    
137    const messageInput = page.getByTestId('message-input');
138    const sendButton = page.getByTestId('send-button');
139    
140    // Send a message that triggers tool use
141    await messageInput.fill('bash: echo "hello world"');
142    await sendButton.click();
143    
144    // The predictable model should use the bash tool and show the response
145    await page.waitForFunction(
146      () => {
147        const text = 'I\'ll run the command: echo "hello world"';
148        return document.body.textContent?.includes(text) ?? false;
149      },
150      undefined,
151      { timeout: 30000 }
152    );
153    
154    // Verify tool usage appears in the UI with coalesced tool call
155    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 10000 });
156    // Check that the tool name "bash" is visible
157    await expect(page.locator('text=bash').first()).toBeVisible();
158  });
159  
160  test('gives default response for undefined messages', async ({ page }) => {
161    await page.goto('/');
162    await page.waitForLoadState('domcontentloaded');
163    
164    const messageInput = page.getByTestId('message-input');
165    const sendButton = page.getByTestId('send-button');
166    
167    // Send an undefined message and expect default response
168    await messageInput.fill('this is an undefined message');
169    await sendButton.click();
170    
171    // The predictable model responds to undefined inputs with "edit predictable.go to add a response for that one..."
172    await page.waitForFunction(
173      () => {
174        const text = 'edit predictable.go to add a response for that one...';
175        return document.body.textContent?.includes(text) ?? false;
176      },
177      undefined,
178      { timeout: 30000 }
179    );
180    
181    // Verify the undefined message and default response are visible
182    await expect(page.locator('text=this is an undefined message')).toBeVisible();
183  });
184  
185  test('conversation persists and displays correctly', async ({ page }) => {
186    await page.goto('/');
187    await page.waitForLoadState('domcontentloaded');
188    
189    const messageInput = page.getByTestId('message-input');
190    const sendButton = page.getByTestId('send-button');
191    
192    // Send first message
193    await messageInput.fill('Hello');
194    await sendButton.click();
195    
196    // Wait for first response
197    await page.waitForFunction(
198      () => {
199        const text = "Hello! I'm Shelley, your AI assistant. How can I help you today?";
200        return document.body.textContent?.includes(text) ?? false;
201      },
202      undefined,
203      { timeout: 30000 }
204    );
205    
206    // Send second message
207    await messageInput.fill('echo: second message');
208    await sendButton.click();
209    
210    // Wait for second response
211    await page.waitForFunction(
212      () => document.body.textContent?.includes('second message') ?? false,
213      undefined,
214      { timeout: 30000 }
215    );
216    
217    // Verify both responses are still visible (conversation persists)
218    await expect(page.locator('text=Hello! I\'m Shelley, your AI assistant. How can I help you today?').first()).toBeVisible();
219    await expect(page.locator('text=second message').first()).toBeVisible();
220  });
221  
222  test('can send message with Enter key', async ({ page }) => {
223    await page.goto('/');
224    await page.waitForLoadState('domcontentloaded');
225    
226    const messageInput = page.getByTestId('message-input');
227    await expect(messageInput).toBeVisible({ timeout: 30000 });
228    
229    // Type message and press Enter
230    await messageInput.fill('Hello');
231    await messageInput.press('Enter');
232    
233    // Verify response
234    await page.waitForFunction(
235      () => {
236        const text = "Hello! I'm Shelley, your AI assistant. How can I help you today?";
237        return document.body.textContent?.includes(text) ?? false;
238      },
239      undefined,
240      { timeout: 30000 }
241    );
242    
243    // Verify the Hello message and response are visible
244    await expect(page.locator('text=Hello! I\'m Shelley, your AI assistant. How can I help you today?').first()).toBeVisible();
245  });
246  
247  test('handles think tool correctly', async ({ page }) => {
248    await page.goto('/');
249    await page.waitForLoadState('domcontentloaded');
250    
251    const messageInput = page.getByTestId('message-input');
252    const sendButton = page.getByTestId('send-button');
253    
254    // Send a message that triggers think tool
255    await messageInput.fill('think: I need to analyze this problem');
256    await sendButton.click();
257    
258    // The predictable model should use the think tool
259    await page.waitForFunction(
260      () => document.body.textContent?.includes('Let me think about this.') ?? false,
261      undefined,
262      { timeout: 30000 }
263    );
264    
265    // Verify think tool usage appears in the UI
266    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 10000 });
267    await expect(page.locator('text=think').first()).toBeVisible();
268  });
269  
270  test('handles patch tool correctly', async ({ page }) => {
271    await page.goto('/');
272    await page.waitForLoadState('domcontentloaded');
273    
274    const messageInput = page.getByTestId('message-input');
275    const sendButton = page.getByTestId('send-button');
276    
277    // Send a message that triggers patch tool
278    await messageInput.fill('patch: test.txt');
279    await sendButton.click();
280    
281    // The predictable model should use the patch tool
282    await page.waitForFunction(
283      () => document.body.textContent?.includes('I\'ll patch the file: test.txt') ?? false,
284      undefined,
285      { timeout: 30000 }
286    );
287    
288    // Verify patch tool usage appears in the UI
289    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 10000 });
290    await expect(page.locator('text=patch').first()).toBeVisible();
291  });
292  
293  test('displays tool results with collapsible details', async ({ page }) => {
294    await page.goto('/');
295    await page.waitForLoadState('domcontentloaded');
296    
297    const messageInput = page.getByTestId('message-input');
298    const sendButton = page.getByTestId('send-button');
299    
300    // Send a bash command that will show tool results
301    await messageInput.fill('bash: echo "testing tool results"');
302    await sendButton.click();
303    
304    // Wait for the tool call to appear
305    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
306
307    // Check for bash tool header (collapsible element)
308    const bashToolHeader = page.locator('.bash-tool-header');
309    await expect(bashToolHeader.first()).toBeVisible({ timeout: 10000 });
310  });
311  
312  test('handles multiple consecutive tool calls', async ({ page }) => {
313    await page.goto('/');
314    await page.waitForLoadState('domcontentloaded');
315    
316    const messageInput = page.getByTestId('message-input');
317    const sendButton = page.getByTestId('send-button');
318    
319    // First tool call: bash
320    await messageInput.fill('bash: echo "first command"');
321    await sendButton.click();
322    
323    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
324    
325    // Second tool call: think
326    await messageInput.fill('think: analyzing the output');
327    await sendButton.click();
328    
329    // Wait for at least 2 tool calls
330    await page.waitForFunction(
331      () => document.querySelectorAll('[data-testid="tool-call-completed"]').length >= 2,
332      undefined,
333      { timeout: 30000 }
334    );
335    
336    // Third tool call: patch
337    await messageInput.fill('patch: example.txt');
338    await sendButton.click();
339    
340    // Wait for at least 3 tool calls
341    await page.waitForFunction(
342      () => document.querySelectorAll('[data-testid="tool-call-completed"]').length >= 3,
343      undefined,
344      { timeout: 30000 }
345    );
346    
347    // Verify all the specific messages we sent are visible
348    await expect(page.locator('text=bash: echo "first command"')).toBeVisible();
349    await expect(page.locator('text=think: analyzing the output')).toBeVisible();
350    await expect(page.locator('text=patch: example.txt')).toBeVisible();
351    
352    // Verify all tool types are visible
353    await expect(page.locator('text=bash').first()).toBeVisible();
354    await expect(page.locator('text=think').first()).toBeVisible();
355    await expect(page.locator('text=patch').first()).toBeVisible();
356  });
357});
358
359  test('coalesces tool calls - shows tool result with details', async ({ page }) => {
360    await page.goto('/');
361    await page.waitForLoadState('domcontentloaded');
362    
363    const messageInput = page.getByTestId('message-input');
364    const sendButton = page.getByTestId('send-button');
365    
366    // Send a bash command to trigger tool use
367    await messageInput.fill('bash: echo "hello world"');
368    await sendButton.click();
369    
370    // Wait for the tool result to appear
371    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
372    
373    // Verify the bash tool header is visible
374    await expect(page.locator('.bash-tool-header').first()).toBeVisible();
375
376    // Verify bash tool shows command
377    await expect(page.locator('.bash-tool-command').first()).toBeVisible();
378  });
379  
380  test('coalesces tool calls - displays agent text and tool separately', async ({ page }) => {
381    await page.goto('/');
382    await page.waitForLoadState('domcontentloaded');
383    
384    const messageInput = page.getByTestId('message-input');
385    const sendButton = page.getByTestId('send-button');
386    
387    // Send a bash command
388    await messageInput.fill('bash: pwd');
389    await sendButton.click();
390    
391    // Wait for tool result
392    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
393    
394    // Verify agent message is shown ("I'll run the command: pwd")
395    await expect(page.locator('text=I\'ll run the command: pwd').first()).toBeVisible();
396    
397    // Verify tool result is shown separately as coalesced tool call
398    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible();
399    await expect(page.locator('text=bash').first()).toBeVisible();
400  });
401  
402  test('handles sequential tool calls', async ({ page }) => {
403    await page.goto('/');
404    await page.waitForLoadState('domcontentloaded');
405    
406    const messageInput = page.getByTestId('message-input');
407    const sendButton = page.getByTestId('send-button');
408    
409    // First tool call
410    await messageInput.fill('bash: echo "first"');
411    await sendButton.click();
412    await expect(page.locator('[data-testid="tool-call-completed"]').first()).toBeVisible({ timeout: 30000 });
413    
414    // Second tool call
415    await messageInput.fill('bash: echo "second"');
416    await sendButton.click();
417    
418    // Wait for the second tool result
419    await page.waitForFunction(
420      () => document.querySelectorAll('[data-testid="tool-call-completed"]').length >= 2,
421      undefined,
422      { timeout: 30000 }
423    );
424    
425    // Verify both tool calls are displayed
426    const toolCalls = page.locator('[data-testid="tool-call-completed"]');
427    expect(await toolCalls.count()).toBeGreaterThanOrEqual(2);
428  });
429
430  test('displays LLM error message in UI', async ({ page }) => {
431    // Clear any existing data by navigating to root (which should show empty state)
432    await page.goto('/');
433    await page.waitForLoadState('domcontentloaded');
434    
435    // Wait for the empty state or message input
436    const messageInput = page.getByTestId('message-input');
437    await expect(messageInput).toBeVisible({ timeout: 30000 });
438    
439    const sendButton = page.getByTestId('send-button');
440    
441    // Send a message that triggers an error in the predictable LLM
442    await messageInput.fill('error: test error message');
443    await sendButton.click();
444    
445    // Wait for the error message to appear in the UI
446    await page.waitForFunction(
447      () => {
448        const text = 'LLM request failed: predictable error: test error message';
449        return document.body.textContent?.includes(text) ?? false;
450      },
451      undefined,
452      { timeout: 30000 }
453    );
454    
455    // Verify error message is visible with error styling
456    const errorMessage = page.locator('[role="alert"]');
457    await expect(errorMessage).toBeVisible({ timeout: 10000 });
458    
459    // Verify the error text is displayed
460    await expect(page.locator('text=LLM request failed: predictable error: test error message')).toBeVisible();
461    
462    // Verify error label is shown in the message header
463    await expect(page.locator('[role="alert"]').locator('text=Error')).toBeVisible();
464  });