From 8085dd77149e916d1ef885d4076a508782e2fd45 Mon Sep 17 00:00:00 2001 From: Kujtim Hoxha Date: Tue, 20 Jan 2026 13:50:44 +0100 Subject: [PATCH] wip: trajectory command --- internal/cmd/root.go | 1 + internal/cmd/trajectory.go | 139 +++++++ internal/trajectory/atif.go | 199 ++++++++++ internal/trajectory/atif_test.go | 249 ++++++++++++ internal/trajectory/html.go | 36 ++ internal/trajectory/html_template.html | 508 +++++++++++++++++++++++++ 6 files changed, 1132 insertions(+) create mode 100644 internal/cmd/trajectory.go create mode 100644 internal/trajectory/atif.go create mode 100644 internal/trajectory/atif_test.go create mode 100644 internal/trajectory/html.go create mode 100644 internal/trajectory/html_template.html diff --git a/internal/cmd/root.go b/internal/cmd/root.go index edb2512171348b0c9a1156683ecb398d73657ccf..b9d4caccd3dbf0783c1cceaa8234dca0cb7708fe 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -46,6 +46,7 @@ func init() { logsCmd, schemaCmd, loginCmd, + trajectoryCmd, ) } diff --git a/internal/cmd/trajectory.go b/internal/cmd/trajectory.go new file mode 100644 index 0000000000000000000000000000000000000000..5d272ce3438490a00d5eb6fd9ca6189565aaae8e --- /dev/null +++ b/internal/cmd/trajectory.go @@ -0,0 +1,139 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/charmbracelet/crush/internal/config" + "github.com/charmbracelet/crush/internal/db" + "github.com/charmbracelet/crush/internal/message" + "github.com/charmbracelet/crush/internal/session" + "github.com/charmbracelet/crush/internal/trajectory" + "github.com/charmbracelet/crush/internal/version" + "github.com/spf13/cobra" +) + +var trajectoryCmd = &cobra.Command{ + Use: "trajectory", + Short: "Trajectory export utilities", + Long: "Export session trajectories in Harbor ATIF format for analysis and sharing", +} + +var trajectoryExportCmd = &cobra.Command{ + Use: "export", + Short: "Export a session as ATIF trajectory", + Long: "Export a Crush session in Harbor ATIF (Agent Trajectory Interchange Format) v1.4", + Example: ` +# Export a session as JSON to stdout +crush trajectory export --session + +# Export a session to a JSON file +crush trajectory export --session --output trajectory.json + +# Export as HTML for visualization +crush trajectory export --session --format html --output trajectory.html + +# Validate with Harbor validator +crush trajectory export --session > out.json +python -m harbor.utils.trajectory_validator out.json + `, + RunE: func(cmd *cobra.Command, args []string) error { + sessionID, _ := cmd.Flags().GetString("session") + outputFile, _ := cmd.Flags().GetString("output") + format, _ := cmd.Flags().GetString("format") + dataDir, _ := cmd.Flags().GetString("data-dir") + + if sessionID == "" { + return fmt.Errorf("--session flag is required") + } + + ctx := cmd.Context() + + cwd, err := ResolveCwd(cmd) + if err != nil { + return err + } + + // Load config (lightweight, no full app init). + cfg, err := config.Load(cwd, dataDir, false) + if err != nil { + return fmt.Errorf("failed to load config: %w", err) + } + + // Connect to DB. + conn, err := db.Connect(ctx, cfg.Options.DataDirectory) + if err != nil { + return fmt.Errorf("failed to connect to database: %w", err) + } + defer conn.Close() + + querier := db.New(conn) + sessionSvc := session.NewService(querier) + messageSvc := message.NewService(querier) + + // Load session. + sess, err := sessionSvc.Get(ctx, sessionID) + if err != nil { + return fmt.Errorf("failed to get session: %w", err) + } + + // Load messages. + messages, err := messageSvc.List(ctx, sessionID) + if err != nil { + return fmt.Errorf("failed to list messages: %w", err) + } + + // Determine model name from first assistant message. + var modelName string + for _, msg := range messages { + if msg.Role == message.Assistant && msg.Model != "" { + modelName = msg.Model + break + } + } + + // Export to ATIF. + traj, err := trajectory.ExportSession(sess, messages, "Crush", version.Version, modelName) + if err != nil { + return fmt.Errorf("failed to export trajectory: %w", err) + } + + var data []byte + switch format { + case "html": + data, err = trajectory.RenderHTML(traj) + if err != nil { + return fmt.Errorf("failed to render HTML: %w", err) + } + case "json": + data, err = json.MarshalIndent(traj, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal trajectory: %w", err) + } + default: + return fmt.Errorf("unknown format: %s (use 'json' or 'html')", format) + } + + // Write output. + if outputFile != "" { + if err := os.WriteFile(outputFile, data, 0o644); err != nil { + return fmt.Errorf("failed to write output file: %w", err) + } + cmd.Printf("Exported trajectory to %s\n", outputFile) + } else { + cmd.Println(string(data)) + } + + return nil + }, +} + +func init() { + trajectoryExportCmd.Flags().StringP("session", "s", "", "Session ID to export (required)") + trajectoryExportCmd.Flags().StringP("output", "o", "", "Output file path (defaults to stdout)") + trajectoryExportCmd.Flags().StringP("format", "f", "json", "Output format: json or html") + _ = trajectoryExportCmd.MarkFlagRequired("session") + + trajectoryCmd.AddCommand(trajectoryExportCmd) +} diff --git a/internal/trajectory/atif.go b/internal/trajectory/atif.go new file mode 100644 index 0000000000000000000000000000000000000000..67bade2fe30465b2f84a0f10b336d75a33eaa539 --- /dev/null +++ b/internal/trajectory/atif.go @@ -0,0 +1,199 @@ +// Package trajectory provides export functionality for the Harbor ATIF +// (Agent Trajectory Interchange Format) v1.4 specification. +package trajectory + +import ( + "encoding/json" + "time" + + "github.com/charmbracelet/crush/internal/message" + "github.com/charmbracelet/crush/internal/session" +) + +// Trajectory represents the root ATIF document structure. +type Trajectory struct { + SchemaVersion string `json:"schema_version"` + SessionID string `json:"session_id"` + Agent Agent `json:"agent"` + Steps []Step `json:"steps"` + FinalMetrics *FinalMetrics `json:"final_metrics,omitempty"` + Extra any `json:"extra,omitempty"` +} + +// Agent describes the agent that generated the trajectory. +type Agent struct { + Name string `json:"name"` + Version string `json:"version,omitempty"` + ModelName string `json:"model_name,omitempty"` +} + +// Step represents a single step in the trajectory. +type Step struct { + StepID int `json:"step_id"` + Timestamp string `json:"timestamp"` + Source string `json:"source"` // "user", "agent", or "system" + Message string `json:"message"` + ReasoningContent string `json:"reasoning_content,omitempty"` + ToolCalls []ToolCall `json:"tool_calls,omitempty"` + Observation *Observation `json:"observation,omitempty"` + Metrics *StepMetrics `json:"metrics,omitempty"` +} + +// ToolCall represents a tool invocation by the agent. +type ToolCall struct { + ToolCallID string `json:"tool_call_id"` + FunctionName string `json:"function_name"` + Arguments any `json:"arguments"` +} + +// Observation contains the results of tool executions. +type Observation struct { + Results []ObservationResult `json:"results"` +} + +// ObservationResult is a single tool result linked to its call. +type ObservationResult struct { + SourceCallID string `json:"source_call_id,omitempty"` + Content string `json:"content,omitempty"` +} + +// StepMetrics contains token usage for a single step. +type StepMetrics struct { + PromptTokens int64 `json:"prompt_tokens,omitempty"` + CompletionTokens int64 `json:"completion_tokens,omitempty"` + CachedTokens int64 `json:"cached_tokens,omitempty"` + CostUSD float64 `json:"cost_usd,omitempty"` +} + +// FinalMetrics contains aggregate metrics for the entire session. +type FinalMetrics struct { + TotalPromptTokens int64 `json:"total_prompt_tokens,omitempty"` + TotalCompletionTokens int64 `json:"total_completion_tokens,omitempty"` + TotalCostUSD float64 `json:"total_cost_usd,omitempty"` + TotalSteps int `json:"total_steps,omitempty"` +} + +// ExportSession converts a Crush session and its messages to ATIF format. +func ExportSession( + sess session.Session, + messages []message.Message, + agentName string, + agentVersion string, + modelName string, +) (*Trajectory, error) { + traj := &Trajectory{ + SchemaVersion: "ATIF-v1.4", + SessionID: sess.ID, + Agent: Agent{ + Name: agentName, + Version: agentVersion, + ModelName: modelName, + }, + Steps: make([]Step, 0, len(messages)), + } + + stepID := 1 + var lastAgentStep *Step + + for _, msg := range messages { + switch msg.Role { + case message.User: + step := convertUserMessage(msg, stepID) + traj.Steps = append(traj.Steps, step) + stepID++ + lastAgentStep = nil + + case message.Assistant: + step := convertAgentMessage(msg, stepID) + traj.Steps = append(traj.Steps, step) + lastAgentStep = &traj.Steps[len(traj.Steps)-1] + stepID++ + + case message.Tool: + // Attach tool results to the last agent step as observations. + if lastAgentStep != nil { + attachToolResults(lastAgentStep, msg) + } + // Don't create a separate step for tool results. + } + } + + // Add final metrics from session totals. + if sess.PromptTokens > 0 || sess.CompletionTokens > 0 || sess.Cost > 0 { + traj.FinalMetrics = &FinalMetrics{ + TotalPromptTokens: sess.PromptTokens, + TotalCompletionTokens: sess.CompletionTokens, + TotalCostUSD: sess.Cost, + TotalSteps: len(traj.Steps), + } + } + + return traj, nil +} + +// convertUserMessage transforms a user message into an ATIF step. +func convertUserMessage(msg message.Message, stepID int) Step { + return Step{ + StepID: stepID, + Timestamp: time.Unix(msg.CreatedAt, 0).UTC().Format(time.RFC3339), + Source: "user", + Message: msg.Content().Text, + } +} + +// convertAgentMessage transforms an assistant message into an ATIF step. +func convertAgentMessage(msg message.Message, stepID int) Step { + step := Step{ + StepID: stepID, + Timestamp: time.Unix(msg.CreatedAt, 0).UTC().Format(time.RFC3339), + Source: "agent", + Message: msg.Content().Text, + } + + // Include reasoning content if present. + if reasoning := msg.ReasoningContent(); reasoning.Thinking != "" { + step.ReasoningContent = reasoning.Thinking + } + + // Convert tool calls. + toolCalls := msg.ToolCalls() + if len(toolCalls) > 0 { + step.ToolCalls = make([]ToolCall, 0, len(toolCalls)) + for _, tc := range toolCalls { + atifCall := ToolCall{ + ToolCallID: tc.ID, + FunctionName: tc.Name, + } + // Try to parse arguments as JSON, fall back to string. + var args any + if err := json.Unmarshal([]byte(tc.Input), &args); err != nil { + args = tc.Input + } + atifCall.Arguments = args + step.ToolCalls = append(step.ToolCalls, atifCall) + } + } + + return step +} + +// attachToolResults attaches tool results from a tool message to an agent step. +func attachToolResults(step *Step, msg message.Message) { + results := msg.ToolResults() + if len(results) == 0 { + return + } + + if step.Observation == nil { + step.Observation = &Observation{ + Results: make([]ObservationResult, 0, len(results)), + } + } + + for _, tr := range results { + step.Observation.Results = append(step.Observation.Results, ObservationResult{ + SourceCallID: tr.ToolCallID, + Content: tr.Content, + }) + } +} diff --git a/internal/trajectory/atif_test.go b/internal/trajectory/atif_test.go new file mode 100644 index 0000000000000000000000000000000000000000..2994a8023927f2cc6f2b0d1754ad5cbe4be06e0e --- /dev/null +++ b/internal/trajectory/atif_test.go @@ -0,0 +1,249 @@ +package trajectory + +import ( + "encoding/json" + "testing" + "time" + + "github.com/charmbracelet/crush/internal/message" + "github.com/charmbracelet/crush/internal/session" + "github.com/stretchr/testify/require" +) + +func TestExportSession(t *testing.T) { + t.Parallel() + + now := time.Now().Unix() + + sess := session.Session{ + ID: "test-session-123", + Title: "Test Session", + PromptTokens: 1000, + CompletionTokens: 500, + Cost: 0.05, + } + + messages := []message.Message{ + { + ID: "msg-1", + SessionID: sess.ID, + Role: message.User, + Parts: []message.ContentPart{message.TextContent{Text: "Hello, can you help me?"}}, + CreatedAt: now, + }, + { + ID: "msg-2", + SessionID: sess.ID, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.ReasoningContent{Thinking: "User is asking for help. I should respond helpfully."}, + message.TextContent{Text: "Of course! How can I assist you today?"}, + }, + Model: "claude-sonnet-4-20250514", + CreatedAt: now + 1, + }, + { + ID: "msg-3", + SessionID: sess.ID, + Role: message.User, + Parts: []message.ContentPart{message.TextContent{Text: "List files in the current directory"}}, + CreatedAt: now + 2, + }, + { + ID: "msg-4", + SessionID: sess.ID, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.TextContent{Text: "I'll list the files for you."}, + message.ToolCall{ + ID: "call-123", + Name: "ls", + Input: `{"path": "."}`, + }, + }, + Model: "claude-sonnet-4-20250514", + CreatedAt: now + 3, + }, + { + ID: "msg-5", + SessionID: sess.ID, + Role: message.Tool, + Parts: []message.ContentPart{ + message.ToolResult{ + ToolCallID: "call-123", + Name: "ls", + Content: "file1.go\nfile2.go\nREADME.md", + }, + }, + CreatedAt: now + 4, + }, + { + ID: "msg-6", + SessionID: sess.ID, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.TextContent{Text: "Here are the files: file1.go, file2.go, README.md"}, + }, + Model: "claude-sonnet-4-20250514", + CreatedAt: now + 5, + }, + } + + traj, err := ExportSession(sess, messages, "Crush", "1.0.0", "claude-sonnet-4-20250514") + require.NoError(t, err) + + // Verify root structure. + require.Equal(t, "ATIF-v1.4", traj.SchemaVersion) + require.Equal(t, "test-session-123", traj.SessionID) + require.Equal(t, "Crush", traj.Agent.Name) + require.Equal(t, "1.0.0", traj.Agent.Version) + require.Equal(t, "claude-sonnet-4-20250514", traj.Agent.ModelName) + + // Verify steps (tool results are attached to agent steps, not separate). + require.Len(t, traj.Steps, 5) + + // Step 1: User message. + require.Equal(t, 1, traj.Steps[0].StepID) + require.Equal(t, "user", traj.Steps[0].Source) + require.Equal(t, "Hello, can you help me?", traj.Steps[0].Message) + require.Empty(t, traj.Steps[0].ToolCalls) + require.Nil(t, traj.Steps[0].Observation) + + // Step 2: Assistant with reasoning. + require.Equal(t, 2, traj.Steps[1].StepID) + require.Equal(t, "agent", traj.Steps[1].Source) + require.Equal(t, "Of course! How can I assist you today?", traj.Steps[1].Message) + require.Equal(t, "User is asking for help. I should respond helpfully.", traj.Steps[1].ReasoningContent) + + // Step 3: User message. + require.Equal(t, 3, traj.Steps[2].StepID) + require.Equal(t, "user", traj.Steps[2].Source) + + // Step 4: Assistant with tool call AND observation (tool result attached). + require.Equal(t, 4, traj.Steps[3].StepID) + require.Equal(t, "agent", traj.Steps[3].Source) + require.Len(t, traj.Steps[3].ToolCalls, 1) + require.Equal(t, "call-123", traj.Steps[3].ToolCalls[0].ToolCallID) + require.Equal(t, "ls", traj.Steps[3].ToolCalls[0].FunctionName) + require.NotNil(t, traj.Steps[3].ToolCalls[0].Arguments) + // Observation attached to the same agent step. + require.NotNil(t, traj.Steps[3].Observation) + require.Len(t, traj.Steps[3].Observation.Results, 1) + require.Equal(t, "call-123", traj.Steps[3].Observation.Results[0].SourceCallID) + require.Contains(t, traj.Steps[3].Observation.Results[0].Content, "file1.go") + + // Step 5: Final assistant response. + require.Equal(t, 5, traj.Steps[4].StepID) + require.Equal(t, "agent", traj.Steps[4].Source) + require.Equal(t, "Here are the files: file1.go, file2.go, README.md", traj.Steps[4].Message) + + // Verify final metrics. + require.NotNil(t, traj.FinalMetrics) + require.Equal(t, int64(1000), traj.FinalMetrics.TotalPromptTokens) + require.Equal(t, int64(500), traj.FinalMetrics.TotalCompletionTokens) + require.Equal(t, 5, traj.FinalMetrics.TotalSteps) + require.InDelta(t, 0.05, traj.FinalMetrics.TotalCostUSD, 0.001) + + // Verify timestamps are ISO 8601. + for _, step := range traj.Steps { + _, err := time.Parse(time.RFC3339, step.Timestamp) + require.NoError(t, err, "step %d has invalid timestamp: %s", step.StepID, step.Timestamp) + } + + // Verify JSON marshaling works. + data, err := json.MarshalIndent(traj, "", " ") + require.NoError(t, err) + require.Contains(t, string(data), `"schema_version": "ATIF-v1.4"`) +} + +func TestExportSession_EmptyMessages(t *testing.T) { + t.Parallel() + + sess := session.Session{ + ID: "empty-session", + Title: "Empty", + } + + traj, err := ExportSession(sess, nil, "Crush", "1.0.0", "") + require.NoError(t, err) + require.Empty(t, traj.Steps) + require.Nil(t, traj.FinalMetrics) +} + +func TestExportSession_ToolCallArgumentsParsing(t *testing.T) { + t.Parallel() + + sess := session.Session{ID: "tool-args-session"} + now := time.Now().Unix() + + messages := []message.Message{ + { + ID: "msg-1", + SessionID: sess.ID, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.ToolCall{ + ID: "call-1", + Name: "edit", + Input: `{"file_path": "/tmp/test.go", "old_string": "foo", "new_string": "bar"}`, + }, + }, + CreatedAt: now, + }, + } + + traj, err := ExportSession(sess, messages, "Crush", "1.0.0", "test-model") + require.NoError(t, err) + require.Len(t, traj.Steps, 1) + require.Len(t, traj.Steps[0].ToolCalls, 1) + + // Arguments should be parsed as JSON object. + args, ok := traj.Steps[0].ToolCalls[0].Arguments.(map[string]any) + require.True(t, ok) + require.Equal(t, "/tmp/test.go", args["file_path"]) +} + +func TestExportSession_ToolError(t *testing.T) { + t.Parallel() + + sess := session.Session{ID: "error-session"} + now := time.Now().Unix() + + messages := []message.Message{ + { + ID: "msg-1", + SessionID: sess.ID, + Role: message.Assistant, + Parts: []message.ContentPart{ + message.ToolCall{ + ID: "call-1", + Name: "bash", + Input: `{"command": "foobar"}`, + }, + }, + CreatedAt: now, + }, + { + ID: "msg-2", + SessionID: sess.ID, + Role: message.Tool, + Parts: []message.ContentPart{ + message.ToolResult{ + ToolCallID: "call-1", + Name: "bash", + Content: "command not found: foobar", + IsError: true, + }, + }, + CreatedAt: now + 1, + }, + } + + traj, err := ExportSession(sess, messages, "Crush", "1.0.0", "") + require.NoError(t, err) + require.Len(t, traj.Steps, 1) + require.Equal(t, "agent", traj.Steps[0].Source) + require.NotNil(t, traj.Steps[0].Observation) + require.Len(t, traj.Steps[0].Observation.Results, 1) + require.Equal(t, "command not found: foobar", traj.Steps[0].Observation.Results[0].Content) +} diff --git a/internal/trajectory/html.go b/internal/trajectory/html.go new file mode 100644 index 0000000000000000000000000000000000000000..726d05be65c60764f729f27265d3f795abf9d749 --- /dev/null +++ b/internal/trajectory/html.go @@ -0,0 +1,36 @@ +package trajectory + +import ( + "bytes" + _ "embed" + "encoding/json" + "html/template" +) + +//go:embed html_template.html +var htmlTemplate string + +// RenderHTML renders the trajectory as a standalone HTML document. +func RenderHTML(traj *Trajectory) ([]byte, error) { + tmpl, err := template.New("trajectory").Parse(htmlTemplate) + if err != nil { + return nil, err + } + + trajJSON, err := json.Marshal(traj) + if err != nil { + return nil, err + } + + data := map[string]any{ + "Title": traj.Agent.Name + " - " + traj.SessionID, + "TrajectoryJSON": template.JS(trajJSON), + } + + var buf bytes.Buffer + if err := tmpl.Execute(&buf, data); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} diff --git a/internal/trajectory/html_template.html b/internal/trajectory/html_template.html new file mode 100644 index 0000000000000000000000000000000000000000..d49222925aa50c3e5ff34d0441460fd80b2bba18 --- /dev/null +++ b/internal/trajectory/html_template.html @@ -0,0 +1,508 @@ + + + + + + {{.Title}} + + + +
+
+

+
+ + + +
+
+
+
+ + +
+ + + + +
+
+
+
+ Generated by Crush · Harbor ATIF v1.4 +
+
+ + + +