shell_parser.rs

  1use brush_parser::ast;
  2use brush_parser::word::WordPiece;
  3use brush_parser::{Parser, ParserOptions, SourceInfo};
  4use std::io::BufReader;
  5
  6pub fn extract_commands(command: &str) -> Option<Vec<String>> {
  7    let reader = BufReader::new(command.as_bytes());
  8    let options = ParserOptions::default();
  9    let source_info = SourceInfo::default();
 10    let mut parser = Parser::new(reader, &options, &source_info);
 11
 12    let program = parser.parse_program().ok()?;
 13
 14    let mut commands = Vec::new();
 15    extract_commands_from_program(&program, &mut commands)?;
 16
 17    Some(commands)
 18}
 19
 20fn extract_commands_from_program(program: &ast::Program, commands: &mut Vec<String>) -> Option<()> {
 21    for complete_command in &program.complete_commands {
 22        extract_commands_from_compound_list(complete_command, commands)?;
 23    }
 24    Some(())
 25}
 26
 27fn extract_commands_from_compound_list(
 28    compound_list: &ast::CompoundList,
 29    commands: &mut Vec<String>,
 30) -> Option<()> {
 31    for item in &compound_list.0 {
 32        extract_commands_from_and_or_list(&item.0, commands)?;
 33    }
 34    Some(())
 35}
 36
 37fn extract_commands_from_and_or_list(
 38    and_or_list: &ast::AndOrList,
 39    commands: &mut Vec<String>,
 40) -> Option<()> {
 41    extract_commands_from_pipeline(&and_or_list.first, commands)?;
 42
 43    for and_or in &and_or_list.additional {
 44        match and_or {
 45            ast::AndOr::And(pipeline) | ast::AndOr::Or(pipeline) => {
 46                extract_commands_from_pipeline(pipeline, commands)?;
 47            }
 48        }
 49    }
 50    Some(())
 51}
 52
 53fn extract_commands_from_pipeline(
 54    pipeline: &ast::Pipeline,
 55    commands: &mut Vec<String>,
 56) -> Option<()> {
 57    for command in &pipeline.seq {
 58        extract_commands_from_command(command, commands)?;
 59    }
 60    Some(())
 61}
 62
 63fn extract_commands_from_command(command: &ast::Command, commands: &mut Vec<String>) -> Option<()> {
 64    match command {
 65        ast::Command::Simple(simple_command) => {
 66            extract_commands_from_simple_command(simple_command, commands)?;
 67        }
 68        ast::Command::Compound(compound_command, _redirect_list) => {
 69            extract_commands_from_compound_command(compound_command, commands)?;
 70        }
 71        ast::Command::Function(func_def) => {
 72            extract_commands_from_function_body(&func_def.body, commands)?;
 73        }
 74        ast::Command::ExtendedTest(test_expr) => {
 75            extract_commands_from_extended_test_expr(test_expr, commands)?;
 76        }
 77    }
 78    Some(())
 79}
 80
 81fn extract_commands_from_simple_command(
 82    simple_command: &ast::SimpleCommand,
 83    commands: &mut Vec<String>,
 84) -> Option<()> {
 85    // Build a normalized command string from individual words, stripping shell
 86    // quotes so that security patterns match regardless of quoting style.
 87    // For example, both `rm -rf '/'` and `rm -rf /` normalize to "rm -rf /".
 88    //
 89    // If any word fails to normalize, we return None so that `extract_commands`
 90    // returns None — the same as a shell parse failure. The caller then falls
 91    // back to raw-input matching with always_allow disabled.
 92    let mut words = Vec::new();
 93    if let Some(word) = &simple_command.word_or_name {
 94        words.push(normalize_word(word)?);
 95    }
 96    if let Some(suffix) = &simple_command.suffix {
 97        for item in &suffix.0 {
 98            if let ast::CommandPrefixOrSuffixItem::Word(word) = item {
 99                words.push(normalize_word(word)?);
100            }
101        }
102    }
103    let command_str = words.join(" ");
104    if !command_str.is_empty() {
105        commands.push(command_str);
106    }
107
108    // Extract nested commands from command substitutions, process substitutions, etc.
109    if let Some(prefix) = &simple_command.prefix {
110        extract_commands_from_command_prefix(prefix, commands)?;
111    }
112    if let Some(word) = &simple_command.word_or_name {
113        extract_commands_from_word(word, commands);
114    }
115    if let Some(suffix) = &simple_command.suffix {
116        extract_commands_from_command_suffix(suffix, commands)?;
117    }
118    Some(())
119}
120
121/// Normalizes a shell word by stripping quoting syntax and returning the
122/// semantic (unquoted) value. Returns `None` if word parsing fails.
123fn normalize_word(word: &ast::Word) -> Option<String> {
124    let options = ParserOptions::default();
125    let pieces = brush_parser::word::parse(&word.value, &options).ok()?;
126    let mut result = String::new();
127    for piece_with_source in &pieces {
128        normalize_word_piece_into(
129            &piece_with_source.piece,
130            &word.value,
131            piece_with_source.start_index,
132            piece_with_source.end_index,
133            &mut result,
134        );
135    }
136    Some(result)
137}
138
139fn normalize_word_piece_into(
140    piece: &WordPiece,
141    raw_value: &str,
142    start_index: usize,
143    end_index: usize,
144    result: &mut String,
145) {
146    match piece {
147        WordPiece::Text(text) => result.push_str(text),
148        WordPiece::SingleQuotedText(text) => result.push_str(text),
149        WordPiece::AnsiCQuotedText(text) => result.push_str(text),
150        WordPiece::EscapeSequence(text) => {
151            result.push_str(text.strip_prefix('\\').unwrap_or(text));
152        }
153        WordPiece::DoubleQuotedSequence(pieces)
154        | WordPiece::GettextDoubleQuotedSequence(pieces) => {
155            for inner in pieces {
156                normalize_word_piece_into(
157                    &inner.piece,
158                    raw_value,
159                    inner.start_index,
160                    inner.end_index,
161                    result,
162                );
163            }
164        }
165        WordPiece::TildePrefix(prefix) => {
166            result.push('~');
167            result.push_str(prefix);
168        }
169        // For parameter expansions, command substitutions, and arithmetic expressions,
170        // preserve the original source text so that patterns like `\$HOME` continue
171        // to match.
172        WordPiece::ParameterExpansion(_)
173        | WordPiece::CommandSubstitution(_)
174        | WordPiece::BackquotedCommandSubstitution(_)
175        | WordPiece::ArithmeticExpression(_) => {
176            if let Some(source) = raw_value.get(start_index..end_index) {
177                result.push_str(source);
178            }
179        }
180    }
181}
182
183fn extract_commands_from_command_prefix(
184    prefix: &ast::CommandPrefix,
185    commands: &mut Vec<String>,
186) -> Option<()> {
187    for item in &prefix.0 {
188        extract_commands_from_prefix_or_suffix_item(item, commands)?;
189    }
190    Some(())
191}
192
193fn extract_commands_from_command_suffix(
194    suffix: &ast::CommandSuffix,
195    commands: &mut Vec<String>,
196) -> Option<()> {
197    for item in &suffix.0 {
198        extract_commands_from_prefix_or_suffix_item(item, commands)?;
199    }
200    Some(())
201}
202
203fn extract_commands_from_prefix_or_suffix_item(
204    item: &ast::CommandPrefixOrSuffixItem,
205    commands: &mut Vec<String>,
206) -> Option<()> {
207    match item {
208        ast::CommandPrefixOrSuffixItem::IoRedirect(redirect) => {
209            extract_commands_from_io_redirect(redirect, commands)?;
210        }
211        ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, _word) => {
212            extract_commands_from_assignment(assignment, commands);
213        }
214        ast::CommandPrefixOrSuffixItem::Word(word) => {
215            extract_commands_from_word(word, commands);
216        }
217        ast::CommandPrefixOrSuffixItem::ProcessSubstitution(_kind, subshell) => {
218            extract_commands_from_compound_list(&subshell.list, commands)?;
219        }
220    }
221    Some(())
222}
223
224fn extract_commands_from_io_redirect(
225    redirect: &ast::IoRedirect,
226    commands: &mut Vec<String>,
227) -> Option<()> {
228    match redirect {
229        ast::IoRedirect::File(_fd, _kind, target) => {
230            if let ast::IoFileRedirectTarget::ProcessSubstitution(_kind, subshell) = target {
231                extract_commands_from_compound_list(&subshell.list, commands)?;
232            }
233        }
234        ast::IoRedirect::HereDocument(_fd, _here_doc) => {}
235        ast::IoRedirect::HereString(_fd, word) => {
236            extract_commands_from_word(word, commands);
237        }
238        ast::IoRedirect::OutputAndError(word, _) => {
239            extract_commands_from_word(word, commands);
240        }
241    }
242    Some(())
243}
244
245fn extract_commands_from_assignment(assignment: &ast::Assignment, commands: &mut Vec<String>) {
246    match &assignment.value {
247        ast::AssignmentValue::Scalar(word) => {
248            extract_commands_from_word(word, commands);
249        }
250        ast::AssignmentValue::Array(words) => {
251            for (opt_word, word) in words {
252                if let Some(w) = opt_word {
253                    extract_commands_from_word(w, commands);
254                }
255                extract_commands_from_word(word, commands);
256            }
257        }
258    }
259}
260
261fn extract_commands_from_word(word: &ast::Word, commands: &mut Vec<String>) {
262    let options = ParserOptions::default();
263    if let Ok(pieces) = brush_parser::word::parse(&word.value, &options) {
264        for piece_with_source in pieces {
265            extract_commands_from_word_piece(&piece_with_source.piece, commands);
266        }
267    }
268}
269
270fn extract_commands_from_word_piece(piece: &WordPiece, commands: &mut Vec<String>) {
271    match piece {
272        WordPiece::CommandSubstitution(cmd_str)
273        | WordPiece::BackquotedCommandSubstitution(cmd_str) => {
274            if let Some(nested_commands) = extract_commands(cmd_str) {
275                commands.extend(nested_commands);
276            }
277        }
278        WordPiece::DoubleQuotedSequence(pieces)
279        | WordPiece::GettextDoubleQuotedSequence(pieces) => {
280            for inner_piece_with_source in pieces {
281                extract_commands_from_word_piece(&inner_piece_with_source.piece, commands);
282            }
283        }
284        WordPiece::EscapeSequence(_)
285        | WordPiece::SingleQuotedText(_)
286        | WordPiece::Text(_)
287        | WordPiece::AnsiCQuotedText(_)
288        | WordPiece::TildePrefix(_)
289        | WordPiece::ParameterExpansion(_)
290        | WordPiece::ArithmeticExpression(_) => {}
291    }
292}
293
294fn extract_commands_from_compound_command(
295    compound_command: &ast::CompoundCommand,
296    commands: &mut Vec<String>,
297) -> Option<()> {
298    match compound_command {
299        ast::CompoundCommand::BraceGroup(brace_group) => {
300            extract_commands_from_compound_list(&brace_group.list, commands)?;
301        }
302        ast::CompoundCommand::Subshell(subshell) => {
303            extract_commands_from_compound_list(&subshell.list, commands)?;
304        }
305        ast::CompoundCommand::ForClause(for_clause) => {
306            if let Some(words) = &for_clause.values {
307                for word in words {
308                    extract_commands_from_word(word, commands);
309                }
310            }
311            extract_commands_from_do_group(&for_clause.body, commands)?;
312        }
313        ast::CompoundCommand::CaseClause(case_clause) => {
314            extract_commands_from_word(&case_clause.value, commands);
315            for item in &case_clause.cases {
316                if let Some(body) = &item.cmd {
317                    extract_commands_from_compound_list(body, commands)?;
318                }
319            }
320        }
321        ast::CompoundCommand::IfClause(if_clause) => {
322            extract_commands_from_compound_list(&if_clause.condition, commands)?;
323            extract_commands_from_compound_list(&if_clause.then, commands)?;
324            if let Some(elses) = &if_clause.elses {
325                for else_item in elses {
326                    if let Some(condition) = &else_item.condition {
327                        extract_commands_from_compound_list(condition, commands)?;
328                    }
329                    extract_commands_from_compound_list(&else_item.body, commands)?;
330                }
331            }
332        }
333        ast::CompoundCommand::WhileClause(while_clause)
334        | ast::CompoundCommand::UntilClause(while_clause) => {
335            extract_commands_from_compound_list(&while_clause.0, commands)?;
336            extract_commands_from_do_group(&while_clause.1, commands)?;
337        }
338        ast::CompoundCommand::ArithmeticForClause(arith_for) => {
339            extract_commands_from_do_group(&arith_for.body, commands)?;
340        }
341        ast::CompoundCommand::Arithmetic(_arith_cmd) => {}
342    }
343    Some(())
344}
345
346fn extract_commands_from_do_group(
347    do_group: &ast::DoGroupCommand,
348    commands: &mut Vec<String>,
349) -> Option<()> {
350    extract_commands_from_compound_list(&do_group.list, commands)
351}
352
353fn extract_commands_from_function_body(
354    func_body: &ast::FunctionBody,
355    commands: &mut Vec<String>,
356) -> Option<()> {
357    extract_commands_from_compound_command(&func_body.0, commands)
358}
359
360fn extract_commands_from_extended_test_expr(
361    test_expr: &ast::ExtendedTestExprCommand,
362    commands: &mut Vec<String>,
363) -> Option<()> {
364    extract_commands_from_extended_test_expr_inner(&test_expr.expr, commands)
365}
366
367fn extract_commands_from_extended_test_expr_inner(
368    expr: &ast::ExtendedTestExpr,
369    commands: &mut Vec<String>,
370) -> Option<()> {
371    match expr {
372        ast::ExtendedTestExpr::Not(inner) => {
373            extract_commands_from_extended_test_expr_inner(inner, commands)?;
374        }
375        ast::ExtendedTestExpr::And(left, right) | ast::ExtendedTestExpr::Or(left, right) => {
376            extract_commands_from_extended_test_expr_inner(left, commands)?;
377            extract_commands_from_extended_test_expr_inner(right, commands)?;
378        }
379        ast::ExtendedTestExpr::Parenthesized(inner) => {
380            extract_commands_from_extended_test_expr_inner(inner, commands)?;
381        }
382        ast::ExtendedTestExpr::UnaryTest(_, word) => {
383            extract_commands_from_word(word, commands);
384        }
385        ast::ExtendedTestExpr::BinaryTest(_, word1, word2) => {
386            extract_commands_from_word(word1, commands);
387            extract_commands_from_word(word2, commands);
388        }
389    }
390    Some(())
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    #[test]
398    fn test_simple_command() {
399        let commands = extract_commands("ls").expect("parse failed");
400        assert_eq!(commands, vec!["ls"]);
401    }
402
403    #[test]
404    fn test_command_with_args() {
405        let commands = extract_commands("ls -la /tmp").expect("parse failed");
406        assert_eq!(commands, vec!["ls -la /tmp"]);
407    }
408
409    #[test]
410    fn test_single_quoted_argument_is_normalized() {
411        let commands = extract_commands("rm -rf '/'").expect("parse failed");
412        assert_eq!(commands, vec!["rm -rf /"]);
413    }
414
415    #[test]
416    fn test_single_quoted_command_name_is_normalized() {
417        let commands = extract_commands("'rm' -rf /").expect("parse failed");
418        assert_eq!(commands, vec!["rm -rf /"]);
419    }
420
421    #[test]
422    fn test_double_quoted_argument_is_normalized() {
423        let commands = extract_commands("rm -rf \"/\"").expect("parse failed");
424        assert_eq!(commands, vec!["rm -rf /"]);
425    }
426
427    #[test]
428    fn test_double_quoted_command_name_is_normalized() {
429        let commands = extract_commands("\"rm\" -rf /").expect("parse failed");
430        assert_eq!(commands, vec!["rm -rf /"]);
431    }
432
433    #[test]
434    fn test_escaped_argument_is_normalized() {
435        let commands = extract_commands("rm -rf \\/").expect("parse failed");
436        assert_eq!(commands, vec!["rm -rf /"]);
437    }
438
439    #[test]
440    fn test_partial_quoting_command_name_is_normalized() {
441        let commands = extract_commands("r'm' -rf /").expect("parse failed");
442        assert_eq!(commands, vec!["rm -rf /"]);
443    }
444
445    #[test]
446    fn test_partial_quoting_flag_is_normalized() {
447        let commands = extract_commands("rm -r'f' /").expect("parse failed");
448        assert_eq!(commands, vec!["rm -rf /"]);
449    }
450
451    #[test]
452    fn test_quoted_bypass_in_chained_command() {
453        let commands = extract_commands("ls && 'rm' -rf '/'").expect("parse failed");
454        assert_eq!(commands, vec!["ls", "rm -rf /"]);
455    }
456
457    #[test]
458    fn test_tilde_preserved_after_normalization() {
459        let commands = extract_commands("rm -rf ~").expect("parse failed");
460        assert_eq!(commands, vec!["rm -rf ~"]);
461    }
462
463    #[test]
464    fn test_quoted_tilde_normalized() {
465        let commands = extract_commands("rm -rf '~'").expect("parse failed");
466        assert_eq!(commands, vec!["rm -rf ~"]);
467    }
468
469    #[test]
470    fn test_parameter_expansion_preserved() {
471        let commands = extract_commands("rm -rf $HOME").expect("parse failed");
472        assert_eq!(commands, vec!["rm -rf $HOME"]);
473    }
474
475    #[test]
476    fn test_braced_parameter_expansion_preserved() {
477        let commands = extract_commands("rm -rf ${HOME}").expect("parse failed");
478        assert_eq!(commands, vec!["rm -rf ${HOME}"]);
479    }
480
481    #[test]
482    fn test_and_operator() {
483        let commands = extract_commands("ls && rm -rf /").expect("parse failed");
484        assert_eq!(commands, vec!["ls", "rm -rf /"]);
485    }
486
487    #[test]
488    fn test_or_operator() {
489        let commands = extract_commands("ls || rm -rf /").expect("parse failed");
490        assert_eq!(commands, vec!["ls", "rm -rf /"]);
491    }
492
493    #[test]
494    fn test_semicolon() {
495        let commands = extract_commands("ls; rm -rf /").expect("parse failed");
496        assert_eq!(commands, vec!["ls", "rm -rf /"]);
497    }
498
499    #[test]
500    fn test_pipe() {
501        let commands = extract_commands("ls | xargs rm -rf").expect("parse failed");
502        assert_eq!(commands, vec!["ls", "xargs rm -rf"]);
503    }
504
505    #[test]
506    fn test_background() {
507        let commands = extract_commands("ls & rm -rf /").expect("parse failed");
508        assert_eq!(commands, vec!["ls", "rm -rf /"]);
509    }
510
511    #[test]
512    fn test_command_substitution_dollar() {
513        let commands = extract_commands("echo $(whoami)").expect("parse failed");
514        assert!(commands.iter().any(|c| c.contains("echo")));
515        assert!(commands.contains(&"whoami".to_string()));
516    }
517
518    #[test]
519    fn test_command_substitution_backticks() {
520        let commands = extract_commands("echo `whoami`").expect("parse failed");
521        assert!(commands.iter().any(|c| c.contains("echo")));
522        assert!(commands.contains(&"whoami".to_string()));
523    }
524
525    #[test]
526    fn test_process_substitution_input() {
527        let commands = extract_commands("cat <(ls)").expect("parse failed");
528        assert!(commands.iter().any(|c| c.contains("cat")));
529        assert!(commands.contains(&"ls".to_string()));
530    }
531
532    #[test]
533    fn test_process_substitution_output() {
534        let commands = extract_commands("ls >(cat)").expect("parse failed");
535        assert!(commands.iter().any(|c| c.contains("ls")));
536        assert!(commands.contains(&"cat".to_string()));
537    }
538
539    #[test]
540    fn test_newline_separator() {
541        let commands = extract_commands("ls\nrm -rf /").expect("parse failed");
542        assert_eq!(commands, vec!["ls", "rm -rf /"]);
543    }
544
545    #[test]
546    fn test_subshell() {
547        let commands = extract_commands("(ls && rm -rf /)").expect("parse failed");
548        assert_eq!(commands, vec!["ls", "rm -rf /"]);
549    }
550
551    #[test]
552    fn test_mixed_operators() {
553        let commands = extract_commands("ls; echo hello && rm -rf /").expect("parse failed");
554        assert_eq!(commands, vec!["ls", "echo hello", "rm -rf /"]);
555    }
556
557    #[test]
558    fn test_no_spaces_around_operators() {
559        let commands = extract_commands("ls&&rm").expect("parse failed");
560        assert_eq!(commands, vec!["ls", "rm"]);
561    }
562
563    #[test]
564    fn test_nested_command_substitution() {
565        let commands = extract_commands("echo $(cat $(whoami).txt)").expect("parse failed");
566        assert!(commands.iter().any(|c| c.contains("echo")));
567        assert!(commands.iter().any(|c| c.contains("cat")));
568        assert!(commands.contains(&"whoami".to_string()));
569    }
570
571    #[test]
572    fn test_empty_command() {
573        let commands = extract_commands("").expect("parse failed");
574        assert!(commands.is_empty());
575    }
576
577    #[test]
578    fn test_invalid_syntax_returns_none() {
579        let result = extract_commands("ls &&");
580        assert!(result.is_none());
581    }
582}