feat(blob): better content-type detection

Amolith and Crush created

Improve file content detection and syntax highlighting in the web UI:
- Better MIME type detection for files without extensions
- Content-based markdown detection for extensionless files
- Enhanced lexer selection prioritizing content analysis
- Added Chroma import for improved language detection

Implements: bug-147d490
Co-Authored-By: Crush <crush@charm.land>

Change summary

pkg/web/webui_blob.go | 54 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 46 insertions(+), 8 deletions(-)

Detailed changes

pkg/web/webui_blob.go 🔗

@@ -8,6 +8,7 @@ import (
 	"path/filepath"
 	"strings"
 
+	"github.com/alecthomas/chroma/v2"
 	"github.com/alecthomas/chroma/v2/formatters/html"
 	"github.com/alecthomas/chroma/v2/lexers"
 	"github.com/alecthomas/chroma/v2/styles"
@@ -88,7 +89,7 @@ func repoBlob(w http.ResponseWriter, r *http.Request) {
 
 	defaultBranch := getDefaultBranch(gr)
 
-	isMarkdown := isMarkdownFile(path)
+	isMarkdown := isMarkdownFile(path, content)
 	showSource := r.URL.Query().Get("source") == "1"
 	var renderedHTML template.HTML
 
@@ -170,10 +171,20 @@ func repoBlobRaw(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	contentType := mime.TypeByExtension(filepath.Ext(path))
-	if contentType == "" {
+	ext := filepath.Ext(path)
+	var contentType string
+
+	// For files without extensions, prioritize content detection
+	if ext == "" {
 		contentType = http.DetectContentType(content)
+	} else {
+		// For files with extensions, try MIME type lookup first
+		contentType = mime.TypeByExtension(ext)
+		if contentType == "" {
+			contentType = http.DetectContentType(content)
+		}
 	}
+
 	if strings.HasPrefix(contentType, "text/") && !strings.Contains(contentType, "charset") {
 		contentType += "; charset=utf-8"
 	}
@@ -198,18 +209,45 @@ func isBinaryContent(content []byte) bool {
 	return false
 }
 
-// isMarkdownFile checks if a file has a markdown extension.
-func isMarkdownFile(path string) bool {
+// isMarkdownFile checks if a file has a markdown extension or contains markdown content.
+func isMarkdownFile(path string, content []byte) bool {
 	ext := strings.ToLower(filepath.Ext(path))
-	return ext == ".md" || ext == ".markdown"
+	if ext == ".md" || ext == ".markdown" {
+		return true
+	}
+
+	// For files without extensions, use Chroma's lexer detection
+	if ext == "" && len(content) > 0 {
+		lexer := lexers.Analyse(string(content))
+		if lexer != nil {
+			config := lexer.Config()
+			name := strings.ToLower(config.Name)
+			return name == "markdown" || name == "md"
+		}
+	}
+
+	return false
 }
 
 // highlightCode applies syntax highlighting to code and returns HTML.
 func highlightCode(path string, content []byte) template.HTML {
-	lexer := lexers.Match(path)
-	if lexer == nil {
+	var lexer chroma.Lexer
+	ext := filepath.Ext(path)
+
+	// For files without extensions, prioritize content analysis to detect shebangs
+	if ext == "" {
 		lexer = lexers.Analyse(string(content))
+		if lexer == nil {
+			lexer = lexers.Match(path)
+		}
+	} else {
+		// For files with extensions, try filename matching first
+		lexer = lexers.Match(path)
+		if lexer == nil {
+			lexer = lexers.Analyse(string(content))
+		}
 	}
+
 	if lexer == nil {
 		lexer = lexers.Fallback
 	}