detect-text.mjs

  1import { GENERIC_FONTS } from '../../shared/constants.mjs';
  2import { isFullPage } from '../../shared/page.mjs';
  3import { finding } from '../../findings.mjs';
  4import { profileFindings, profileStep } from '../../profile/profiler.mjs';
  5
  6// ---------------------------------------------------------------------------
  7// Regex fallback (non-HTML files: CSS, JSX, TSX, etc.)
  8// ---------------------------------------------------------------------------
  9
 10const hasRounded = (line) => /\brounded(?:-\w+)?\b/.test(line);
 11const hasBorderRadius = (line) => /border-radius/i.test(line);
 12const isSafeElement = (line) => /<(?:blockquote|nav[\s>]|pre[\s>]|code[\s>]|a\s|input[\s>]|span[\s>])/i.test(line);
 13
 14function isNeutralBorderColor(str) {
 15  const m = str.match(/solid\s+(#[0-9a-f]{3,8}|rgba?\([^)]+\)|\w+)/i);
 16  if (!m) return false;
 17  const c = m[1].toLowerCase();
 18  if (['gray', 'grey', 'silver', 'white', 'black', 'transparent', 'currentcolor'].includes(c)) return true;
 19  const hex = c.match(/^#([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})$/);
 20  if (hex) {
 21    const [r, g, b] = [parseInt(hex[1], 16), parseInt(hex[2], 16), parseInt(hex[3], 16)];
 22    return (Math.max(r, g, b) - Math.min(r, g, b)) < 30;
 23  }
 24  const shex = c.match(/^#([0-9a-f])([0-9a-f])([0-9a-f])$/);
 25  if (shex) {
 26    const [r, g, b] = [parseInt(shex[1] + shex[1], 16), parseInt(shex[2] + shex[2], 16), parseInt(shex[3] + shex[3], 16)];
 27    return (Math.max(r, g, b) - Math.min(r, g, b)) < 30;
 28  }
 29  return false;
 30}
 31
 32const REGEX_MATCHERS = [
 33  // --- Side-tab ---
 34  { id: 'side-tab', regex: /\bborder-[lrse]-(\d+)\b/g,
 35    test: (m, line) => { const n = +m[1]; return hasRounded(line) ? n >= 1 : n >= 4; },
 36    fmt: (m) => m[0] },
 37  { id: 'side-tab', regex: /border-(?:left|right)\s*:\s*(\d+)px\s+solid[^;]*/gi,
 38    test: (m, line) => { if (isSafeElement(line)) return false; if (isNeutralBorderColor(m[0])) return false; const n = +m[1]; return hasBorderRadius(line) ? n >= 1 : n >= 3; },
 39    fmt: (m) => m[0].replace(/\s*;?\s*$/, '') },
 40  { id: 'side-tab', regex: /border-(?:left|right)-width\s*:\s*(\d+)px/gi,
 41    test: (m, line) => !isSafeElement(line) && +m[1] >= 3,
 42    fmt: (m) => m[0] },
 43  { id: 'side-tab', regex: /border-inline-(?:start|end)\s*:\s*(\d+)px\s+solid/gi,
 44    test: (m, line) => !isSafeElement(line) && +m[1] >= 3,
 45    fmt: (m) => m[0] },
 46  { id: 'side-tab', regex: /border-inline-(?:start|end)-width\s*:\s*(\d+)px/gi,
 47    test: (m, line) => !isSafeElement(line) && +m[1] >= 3,
 48    fmt: (m) => m[0] },
 49  { id: 'side-tab', regex: /border(?:Left|Right)\s*[:=]\s*["'`](\d+)px\s+solid/g,
 50    test: (m) => +m[1] >= 3,
 51    fmt: (m) => m[0] },
 52  // --- Border accent on rounded ---
 53  { id: 'border-accent-on-rounded', regex: /\bborder-[tb]-(\d+)\b/g,
 54    test: (m, line) => hasRounded(line) && +m[1] >= 1,
 55    fmt: (m) => m[0] },
 56  { id: 'border-accent-on-rounded', regex: /border-(?:top|bottom)\s*:\s*(\d+)px\s+solid/gi,
 57    test: (m, line) => +m[1] >= 3 && hasBorderRadius(line),
 58    fmt: (m) => m[0] },
 59  // --- Overused font ---
 60  { id: 'overused-font', regex: /font-family\s*:\s*['"]?(Inter|Roboto|Open Sans|Lato|Montserrat|Arial|Helvetica|Fraunces|Geist Sans|Geist Mono|Geist|Mona Sans|Plus Jakarta Sans|Space Grotesk|Recoleta|Instrument Sans)\b/gi,
 61    test: () => true,
 62    fmt: (m) => m[0] },
 63  { id: 'overused-font', regex: /fonts\.googleapis\.com\/css2?\?family=(Inter|Roboto|Open\+Sans|Lato|Montserrat|Fraunces|Plus\+Jakarta\+Sans|Space\+Grotesk|Instrument\+Sans|Mona\+Sans|Geist)\b/gi,
 64    test: () => true,
 65    fmt: (m) => `Google Fonts: ${m[1].replace(/\+/g, ' ')}` },
 66  // --- Pure black background ---
 67  { id: 'pure-black-white', regex: /background(?:-color)?\s*:\s*(#000000|#000|rgb\(0,\s*0,\s*0\))\b/gi,
 68    test: () => true,
 69    fmt: (m) => m[0] },
 70  // --- Gradient text ---
 71  { id: 'gradient-text', regex: /background-clip\s*:\s*text|-webkit-background-clip\s*:\s*text/gi,
 72    test: (m, line) => /gradient/i.test(line),
 73    fmt: () => 'background-clip: text + gradient' },
 74  // --- Gradient text (Tailwind) ---
 75  { id: 'gradient-text', regex: /\bbg-clip-text\b/g,
 76    test: (m, line) => /\bbg-gradient-to-/i.test(line),
 77    fmt: () => 'bg-clip-text + bg-gradient' },
 78  // --- Tailwind pure black background ---
 79  { id: 'pure-black-white', regex: /\bbg-black\b/g,
 80    test: () => true,
 81    fmt: (m) => m[0] },
 82  // --- Tailwind gray on colored bg ---
 83  { id: 'gray-on-color', regex: /\btext-(?:gray|slate|zinc|neutral|stone)-(\d+)\b/g,
 84    test: (m, line) => /\bbg-(?:red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-\d+\b/.test(line),
 85    fmt: (m, line) => { const bg = line.match(/\bbg-(?:red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-\d+\b/); return `${m[0]} on ${bg?.[0] || '?'}`; } },
 86  // --- Tailwind AI palette ---
 87  { id: 'ai-color-palette', regex: /\btext-(?:purple|violet|indigo)-(\d+)\b/g,
 88    test: (m, line) => /\btext-(?:[2-9]xl|[3-9]xl)\b|<h[1-3]/i.test(line),
 89    fmt: (m) => `${m[0]} on heading` },
 90  { id: 'ai-color-palette', regex: /\bfrom-(?:purple|violet|indigo)-(\d+)\b/g,
 91    test: (m, line) => /\bto-(?:purple|violet|indigo|blue|cyan|pink|fuchsia)-\d+\b/.test(line),
 92    fmt: (m) => `${m[0]} gradient` },
 93  // --- Bounce/elastic easing ---
 94  { id: 'bounce-easing', regex: /\banimate-bounce\b/g,
 95    test: () => true,
 96    fmt: () => 'animate-bounce (Tailwind)' },
 97  { id: 'bounce-easing', regex: /animation(?:-name)?\s*:\s*[^;]*\b(bounce|elastic|wobble|jiggle|spring)\b/gi,
 98    test: () => true,
 99    fmt: (m) => m[0] },
100  { id: 'bounce-easing', regex: /cubic-bezier\(\s*([\d.-]+)\s*,\s*([\d.-]+)\s*,\s*([\d.-]+)\s*,\s*([\d.-]+)\s*\)/g,
101    test: (m) => {
102      const y1 = parseFloat(m[2]), y2 = parseFloat(m[4]);
103      return y1 < -0.1 || y1 > 1.1 || y2 < -0.1 || y2 > 1.1;
104    },
105    fmt: (m) => `cubic-bezier(${m[1]}, ${m[2]}, ${m[3]}, ${m[4]})` },
106  // --- Layout property transition ---
107  { id: 'layout-transition', regex: /transition\s*:\s*([^;{}]+)/gi,
108    test: (m) => {
109      const val = m[1].toLowerCase();
110      if (/\ball\b/.test(val)) return false;
111      return /\b(?:(?:max|min)-)?(?:width|height)\b|\bpadding\b|\bmargin\b/.test(val);
112    },
113    fmt: (m) => {
114      const found = m[1].match(/\b(?:(?:max|min)-)?(?:width|height)\b|\bpadding(?:-(?:top|right|bottom|left))?\b|\bmargin(?:-(?:top|right|bottom|left))?\b/gi);
115      return `transition: ${found ? found.join(', ') : m[1].trim()}`;
116    } },
117  { id: 'layout-transition', regex: /transition-property\s*:\s*([^;{}]+)/gi,
118    test: (m) => {
119      const val = m[1].toLowerCase();
120      if (/\ball\b/.test(val)) return false;
121      return /\b(?:(?:max|min)-)?(?:width|height)\b|\bpadding\b|\bmargin\b/.test(val);
122    },
123    fmt: (m) => {
124      const found = m[1].match(/\b(?:(?:max|min)-)?(?:width|height)\b|\bpadding(?:-(?:top|right|bottom|left))?\b|\bmargin(?:-(?:top|right|bottom|left))?\b/gi);
125      return `transition-property: ${found ? found.join(', ') : m[1].trim()}`;
126    } },
127];
128
129const REGEX_ANALYZERS = [
130  // Single font
131  (content, filePath) => {
132    const fontFamilyRe = /font-family\s*:\s*([^;}]+)/gi;
133    const fonts = new Set();
134    let m;
135    while ((m = fontFamilyRe.exec(content)) !== null) {
136      for (const f of m[1].split(',').map(f => f.trim().replace(/^['"]|['"]$/g, '').toLowerCase())) {
137        if (f && !GENERIC_FONTS.has(f)) fonts.add(f);
138      }
139    }
140    const gfRe = /fonts\.googleapis\.com\/css2?\?family=([^&"'\s]+)/gi;
141    while ((m = gfRe.exec(content)) !== null) {
142      for (const f of m[1].split('|').map(f => f.split(':')[0].replace(/\+/g, ' ').toLowerCase())) fonts.add(f);
143    }
144    if (fonts.size !== 1 || content.split('\n').length < 20) return [];
145    const name = [...fonts][0];
146    const lines = content.split('\n');
147    let line = 1;
148    for (let i = 0; i < lines.length; i++) { if (lines[i].toLowerCase().includes(name)) { line = i + 1; break; } }
149    return [finding('single-font', filePath, `only font used is ${name}`, line)];
150  },
151  // Flat type hierarchy
152  (content, filePath) => {
153    const sizes = new Set();
154    const REM = 16;
155    let m;
156    const sizeRe = /font-size\s*:\s*([\d.]+)(px|rem|em)\b/gi;
157    while ((m = sizeRe.exec(content)) !== null) {
158      const px = m[2] === 'px' ? +m[1] : +m[1] * REM;
159      if (px > 0 && px < 200) sizes.add(Math.round(px * 10) / 10);
160    }
161    const clampRe = /font-size\s*:\s*clamp\(\s*([\d.]+)(px|rem|em)\s*,\s*[^,]+,\s*([\d.]+)(px|rem|em)\s*\)/gi;
162    while ((m = clampRe.exec(content)) !== null) {
163      sizes.add(Math.round((m[2] === 'px' ? +m[1] : +m[1] * REM) * 10) / 10);
164      sizes.add(Math.round((m[4] === 'px' ? +m[3] : +m[3] * REM) * 10) / 10);
165    }
166    const TW = { 'text-xs': 12, 'text-sm': 14, 'text-base': 16, 'text-lg': 18, 'text-xl': 20, 'text-2xl': 24, 'text-3xl': 30, 'text-4xl': 36, 'text-5xl': 48, 'text-6xl': 60, 'text-7xl': 72, 'text-8xl': 96, 'text-9xl': 128 };
167    for (const [cls, px] of Object.entries(TW)) { if (new RegExp(`\\b${cls}\\b`).test(content)) sizes.add(px); }
168    if (sizes.size < 3) return [];
169    const sorted = [...sizes].sort((a, b) => a - b);
170    const ratio = sorted[sorted.length - 1] / sorted[0];
171    if (ratio >= 2.0) return [];
172    const lines = content.split('\n');
173    let line = 1;
174    for (let i = 0; i < lines.length; i++) { if (/font-size/i.test(lines[i]) || /\btext-(?:xs|sm|base|lg|xl|\d)/i.test(lines[i])) { line = i + 1; break; } }
175    return [finding('flat-type-hierarchy', filePath, `Sizes: ${sorted.map(s => s + 'px').join(', ')} (ratio ${ratio.toFixed(1)}:1)`, line)];
176  },
177  // Monotonous spacing (regex)
178  (content, filePath) => {
179    const vals = [];
180    let m;
181    const pxRe = /(?:padding|margin)(?:-(?:top|right|bottom|left))?\s*:\s*(\d+)px/gi;
182    while ((m = pxRe.exec(content)) !== null) { const v = +m[1]; if (v > 0 && v < 200) vals.push(v); }
183    const remRe = /(?:padding|margin)(?:-(?:top|right|bottom|left))?\s*:\s*([\d.]+)rem/gi;
184    while ((m = remRe.exec(content)) !== null) { const v = Math.round(parseFloat(m[1]) * 16); if (v > 0 && v < 200) vals.push(v); }
185    const gapRe = /gap\s*:\s*(\d+)px/gi;
186    while ((m = gapRe.exec(content)) !== null) vals.push(+m[1]);
187    const twRe = /\b(?:p|px|py|pt|pb|pl|pr|m|mx|my|mt|mb|ml|mr|gap)-(\d+)\b/g;
188    while ((m = twRe.exec(content)) !== null) vals.push(+m[1] * 4);
189    const rounded = vals.map(v => Math.round(v / 4) * 4);
190    if (rounded.length < 10) return [];
191    const counts = {};
192    for (const v of rounded) counts[v] = (counts[v] || 0) + 1;
193    const maxCount = Math.max(...Object.values(counts));
194    const pct = maxCount / rounded.length;
195    const unique = [...new Set(rounded)].filter(v => v > 0);
196    if (pct <= 0.6 || unique.length > 3) return [];
197    const dominant = Object.entries(counts).sort((a, b) => b[1] - a[1])[0][0];
198    return [finding('monotonous-spacing', filePath, `~${dominant}px used ${maxCount}/${rounded.length} times (${Math.round(pct * 100)}%)`)];
199  },
200  // Everything centered (regex)
201  (content, filePath) => {
202    const lines = content.split('\n');
203    let centered = 0, total = 0;
204    for (const line of lines) {
205      if (/<(?:h[1-6]|p|div|li|button)\b[^>]*>/i.test(line) && line.trim().length > 20) {
206        total++;
207        if (/text-align\s*:\s*center/i.test(line) || /\btext-center\b/.test(line)) centered++;
208      }
209    }
210    if (total < 5 || centered / total <= 0.7) return [];
211    return [finding('everything-centered', filePath, `${centered}/${total} text elements centered (${Math.round(centered / total * 100)}%)`)];
212  },
213  // Dark glow (page-level: dark bg + colored box-shadow with blur)
214  (content, filePath) => {
215    // Check if page has a dark background
216    const darkBgRe = /background(?:-color)?\s*:\s*(?:#(?:0[0-9a-f]|1[0-9a-f]|2[0-3])[0-9a-f]{4}\b|#(?:0|1)[0-9a-f]{2}\b|rgb\(\s*(\d{1,2})\s*,\s*(\d{1,2})\s*,\s*(\d{1,2})\s*\))/gi;
217    const twDarkBg = /\bbg-(?:gray|slate|zinc|neutral|stone)-(?:9\d{2}|800)\b/;
218    const hasDarkBg = darkBgRe.test(content) || twDarkBg.test(content);
219    if (!hasDarkBg) return [];
220
221    // Check for colored box-shadow with blur > 4px
222    const shadowRe = /box-shadow\s*:\s*([^;{}]+)/gi;
223    let m;
224    while ((m = shadowRe.exec(content)) !== null) {
225      const val = m[1];
226      const colorMatch = val.match(/rgba?\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)/);
227      if (!colorMatch) continue;
228      const [r, g, b] = [+colorMatch[1], +colorMatch[2], +colorMatch[3]];
229      if ((Math.max(r, g, b) - Math.min(r, g, b)) < 30) continue; // skip gray
230      // Check blur: look for pattern like "0 0 20px" (third number > 4)
231      const pxVals = [...val.matchAll(/(\d+)px|(?<![.\d])\b(0)\b(?![.\d])/g)].map(p => +(p[1] || p[2]));
232      if (pxVals.length >= 3 && pxVals[2] > 4) {
233        const lines = content.substring(0, m.index).split('\n');
234        return [finding('dark-glow', filePath, `Colored glow (rgb(${r},${g},${b})) on dark page`, lines.length)];
235      }
236    }
237    return [];
238  },
239];
240
241// ---------------------------------------------------------------------------
242// Style block extraction (Vue/Svelte <style> blocks)
243// ---------------------------------------------------------------------------
244
245function extractStyleBlocks(content, ext) {
246  ext = ext.toLowerCase();
247  if (ext !== '.vue' && ext !== '.svelte') return [];
248  const blocks = [];
249  const re = /<style[^>]*>([\s\S]*?)<\/style>/gi;
250  let m;
251  while ((m = re.exec(content)) !== null) {
252    const before = content.substring(0, m.index);
253    const startLine = before.split('\n').length + 1;
254    blocks.push({ content: m[1], startLine });
255  }
256  return blocks;
257}
258
259// ---------------------------------------------------------------------------
260// CSS-in-JS extraction (styled-components, emotion)
261// ---------------------------------------------------------------------------
262
263const CSS_IN_JS_EXTENSIONS = new Set(['.js', '.ts', '.jsx', '.tsx']);
264
265function extractCSSinJS(content, ext) {
266  ext = ext.toLowerCase();
267  if (!CSS_IN_JS_EXTENSIONS.has(ext)) return [];
268  const blocks = [];
269  const re = /(?:styled(?:\.\w+|\([^)]+\))|css)\s*`([\s\S]*?)`/g;
270  let m;
271  while ((m = re.exec(content)) !== null) {
272    const before = content.substring(0, m.index);
273    const startLine = before.split('\n').length;
274    blocks.push({ content: m[1], startLine });
275  }
276  return blocks;
277}
278
279function runRegexMatchers(lines, filePath, lineOffset = 0, blockContext = null, options = {}) {
280  const { profile, phase = 'regex-matchers' } = options || {};
281  const findings = [];
282  if (!profile) {
283    for (const matcher of REGEX_MATCHERS) {
284      for (let i = 0; i < lines.length; i++) {
285        const line = lines[i];
286        matcher.regex.lastIndex = 0;
287        let m;
288        while ((m = matcher.regex.exec(line)) !== null) {
289          // For extracted blocks, use nearby lines as context for multi-line CSS patterns
290          const context = blockContext
291            ? lines.slice(Math.max(0, i - 3), Math.min(lines.length, i + 4)).join(' ')
292            : line;
293          if (matcher.test(m, context)) {
294            findings.push(finding(matcher.id, filePath, matcher.fmt(m, context), i + 1 + lineOffset));
295          }
296        }
297      }
298    }
299    return findings;
300  }
301
302  for (const matcher of REGEX_MATCHERS) {
303    const matcherFindings = profileFindings(profile, {
304      engine: 'regex',
305      phase,
306      ruleId: matcher.id,
307      target: filePath,
308    }, () => {
309      const matches = [];
310      for (let i = 0; i < lines.length; i++) {
311        const line = lines[i];
312        matcher.regex.lastIndex = 0;
313        let m;
314        while ((m = matcher.regex.exec(line)) !== null) {
315          // For extracted blocks, use nearby lines as context for multi-line CSS patterns
316          const context = blockContext
317            ? lines.slice(Math.max(0, i - 3), Math.min(lines.length, i + 4)).join(' ')
318            : line;
319          if (matcher.test(m, context)) {
320            matches.push(finding(matcher.id, filePath, matcher.fmt(m, context), i + 1 + lineOffset));
321          }
322        }
323      }
324      return matches;
325    });
326    findings.push(...matcherFindings);
327  }
328  return findings;
329}
330
331function detectText(content, filePath, options = {}) {
332  const profile = options?.profile;
333  const findings = [];
334  const lines = content.split('\n');
335  const ext = filePath ? (filePath.match(/\.\w+$/)?.[0] || '').toLowerCase() : '';
336
337  // Run regex matchers on the full file content (catches Tailwind classes, inline styles)
338  // Enable block context for CSS files where related properties span multiple lines
339  const cssLike = new Set(['.css', '.scss', '.less']);
340  findings.push(...runRegexMatchers(lines, filePath, 0, cssLike.has(ext) || null, {
341    profile,
342    phase: 'source',
343  }));
344
345  // Extract and scan <style> blocks from Vue/Svelte SFCs
346  const styleBlocks = profile
347    ? profileStep(profile, {
348      engine: 'regex',
349      phase: 'extract',
350      ruleId: 'style-blocks',
351      target: filePath,
352    }, () => extractStyleBlocks(content, ext))
353    : extractStyleBlocks(content, ext);
354  for (const block of styleBlocks) {
355    const blockLines = block.content.split('\n');
356    findings.push(...runRegexMatchers(blockLines, filePath, block.startLine - 1, true, {
357      profile,
358      phase: 'style-block',
359    }));
360  }
361
362  // Extract and scan CSS-in-JS template literals
363  const cssJsBlocks = profile
364    ? profileStep(profile, {
365      engine: 'regex',
366      phase: 'extract',
367      ruleId: 'css-in-js',
368      target: filePath,
369    }, () => extractCSSinJS(content, ext))
370    : extractCSSinJS(content, ext);
371  for (const block of cssJsBlocks) {
372    const blockLines = block.content.split('\n');
373    findings.push(...runRegexMatchers(blockLines, filePath, block.startLine - 1, true, {
374      profile,
375      phase: 'css-in-js',
376    }));
377  }
378
379  // Deduplicate findings (same antipattern + similar snippet, within 2 lines)
380  const deduped = [];
381  for (const f of findings) {
382    const isDupe = deduped.some(d =>
383      d.antipattern === f.antipattern &&
384      d.snippet === f.snippet &&
385      Math.abs(d.line - f.line) <= 2
386    );
387    if (!isDupe) deduped.push(f);
388  }
389
390  // Page-level analyzers only run on full pages
391  if (isFullPage(content)) {
392    const analyzerIds = [
393      'single-font',
394      'flat-type-hierarchy',
395      'monotonous-spacing',
396      'everything-centered',
397      'dark-glow',
398    ];
399    for (let i = 0; i < REGEX_ANALYZERS.length; i++) {
400      const analyzer = REGEX_ANALYZERS[i];
401      deduped.push(...profileFindings(profile, {
402        engine: 'regex',
403        phase: 'page-analyzer',
404        ruleId: analyzerIds[i] || `analyzer-${i + 1}`,
405        target: filePath,
406      }, () => analyzer(content, filePath)));
407    }
408  }
409
410  return deduped;
411}
412
413export {
414  REGEX_MATCHERS,
415  REGEX_ANALYZERS,
416  extractStyleBlocks,
417  extractCSSinJS,
418  runRegexMatchers,
419  detectText,
420};