Spaces:
Sleeping
Sleeping
import { API_URL, API_TOKEN, API_USER } from '../constants'; | |
import { ApiInput, ApiResponseOutput, ProcessedResult, QaSectionResult, DetailedQaReport } from '../types'; | |
const MAX_RETRIES = 3; | |
const INITIAL_RETRY_DELAY_MS = 1000; | |
const RETRYABLE_STATUS_CODES = [429, 502, 503, 504]; // 429: Too Many Requests, 5xx: Server Errors | |
// Add jitter to delay to prevent thundering herd problem | |
const delay = (ms: number) => new Promise(res => setTimeout(res, ms + Math.random() * 500)); | |
// Structured error for workflow failures | |
class WorkflowError extends Error { | |
code: string; | |
at: 'network' | 'api' | 'stream' | 'parse' | 'unknown'; | |
debug?: string; | |
constructor(code: string, message: string, at: 'network' | 'api' | 'stream' | 'parse' | 'unknown' = 'unknown', debug?: string) { | |
super(message); | |
this.name = 'WorkflowError'; | |
this.code = code; | |
this.at = at; | |
this.debug = debug; | |
} | |
} | |
/** | |
* Removes <think>...</think> blocks from a string. | |
*/ | |
const cleanResponseText = (text: string): string => { | |
if (typeof text !== 'string') return ''; | |
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim(); | |
}; | |
/** | |
* Parses a single section of the QA report (e.g., TITLE, H1). | |
* This uses regular expressions to be robust against multiline content and format variations. | |
* @param sectionText The text content of a single QA section. | |
* @returns A structured object with the section's results. | |
*/ | |
const parseSection = (sectionText: string): QaSectionResult => { | |
console.log('Parsing section text:', sectionText.substring(0, 200)); | |
// ROBUST GRADE EXTRACTION - handles multiple formats | |
let grade = 'N/A'; | |
let gradeMatch = null; | |
// Try various grade patterns in order of specificity | |
const gradePatterns = [ | |
/-\s*\*\*Grade:\*\*\s*(.*)/, // - **Grade:** 100/100 | |
/•\s*\*\*Grade:\*\*\s*(.*)/, // • **Grade:** 100/100 | |
/\*\s*\*\*Grade:\*\*\s*(.*)/, // * **Grade:** 100/100 | |
/-\s*\*\*Grade\*\*:\s*(.*)/, // - **Grade**: 100/100 (colon without space) | |
/•\s*\*\*Grade\*\*:\s*(.*)/, // • **Grade**: 100/100 (colon without space) | |
/\*\s*\*\*Grade\*\*:\s*(.*)/, // * **Grade**: 100/100 | |
/(?:•|-|\*)\s*\*\*Grade\*\*:?:\s*(.*)/, // •/**/- **Grade**: 100/100 or - **Grade** 100/100 | |
/(?:•|-|\*)\s*Grade:?:\s*(.*)/, // • Grade: 100/100 or - Grade 100/100 | |
/Grade:?:\s*(\d+\/\d+|\d+)/m, // Grade: 100/100 (anywhere in text) | |
/(\d+\/\d+)\s*(?:grade|Grade)/ // 100/100 grade (reverse order) | |
]; | |
for (const pattern of gradePatterns) { | |
gradeMatch = sectionText.match(pattern); | |
if (gradeMatch) { | |
grade = gradeMatch[1].trim(); | |
break; | |
} | |
} | |
console.log('Grade match result:', gradeMatch, 'Final grade:', grade); | |
// ROBUST PASS EXTRACTION - handles multiple formats | |
let pass = false; | |
let passMatch = null; | |
// Try various pass patterns in order of specificity | |
const passPatterns = [ | |
/-\s*\*\*Pass:\*\*\s*(.*)/i, // - **Pass:** true | |
/•\s*\*\*Pass:\*\*\s*(.*)/i, // • **Pass:** true | |
/\*\s*\*\*Pass:\*\*\s*(.*)/i, // * **Pass:** true | |
/-\s*\*\*Pass\*\*:\s*(.*)/i, // - **Pass**: true (colon without space) | |
/•\s*\*\*Pass\*\*:\s*(.*)/i, // • **Pass**: true (colon without space) | |
/\*\s*\*\*Pass\*\*:\s*(.*)/i, // * **Pass**: true (colon without space) | |
/(?:•|-|\*)\s*\*\*Pass\*\*:?:\s*(.*)/i, // •/**/- **Pass**: true | |
/(?:•|-|\*)\s*Pass:?:\s*(.*)/i, // • Pass: true | |
/Pass:?:\s*(true|false|✅|❌|TRUE|FALSE)/im, // Pass: true (anywhere) | |
/(true|false|✅|❌|TRUE|FALSE)\s*pass/im // true pass (reverse) | |
]; | |
for (const pattern of passPatterns) { | |
passMatch = sectionText.match(pattern); | |
if (passMatch) { | |
const passValue = passMatch[1].toLowerCase().trim(); | |
pass = passValue.includes('true') || | |
passValue.includes('✅') || | |
passValue === 'yes' || | |
passValue === 'passed' || | |
passValue === 'pass'; | |
break; | |
} | |
} | |
console.log('Pass match result:', passMatch, 'Final pass:', pass); | |
// ROBUST ERRORS EXTRACTION - handles multiple formats | |
let errors: string[] = ['No errors reported.']; | |
let errorsMatch = null; | |
// Try various error patterns | |
const errorPatterns = [ | |
/-\s*\*\*Errors:\*\*\s*([\s\S]*?)(?=\n-\s*\*\*|$)/, // - **Errors:** [] | |
/•\s*\*\*Errors:\*\*\s*([\s\S]*?)(?=\n•\s*\*\*|$)/, // • **Errors:** [] | |
/\*\s*\*\*Errors:\*\*\s*([\s\S]*?)(?=\n\*\s*\*\*|$)/, // * **Errors:** [] | |
/(?:•|-|\*)\s*\*\*Errors?\*\*:?:\s*([\s\S]*?)(?=\n(?:•|-|\*)\s*\*\*|$)/, // Generic bullet/dash/star + bold | |
/(?:•|-|\*)\s*Errors:?:\s*([\s\S]*?)(?=\n(?:•|-|\*)\s*\*\*|$)/, // Generic bullet/dash/star + no bold | |
/Errors:?:\s*([\s\S]*?)(?=\n(?:•|-|\*|\*\*)|$)/m // Errors: anywhere in text | |
]; | |
for (const pattern of errorPatterns) { | |
errorsMatch = sectionText.match(pattern); | |
if (errorsMatch) break; | |
} | |
if (errorsMatch) { | |
const errorsBlock = errorsMatch[1].trim(); | |
if (errorsBlock === '[]' || !errorsBlock || errorsBlock.toLowerCase() === 'none') { | |
errors = ['No errors reported.']; | |
} else if (errorsBlock.startsWith('[') && errorsBlock.includes(']')) { | |
// Handle array format: [] | |
try { | |
const parsed = JSON.parse(errorsBlock); | |
errors = Array.isArray(parsed) && parsed.length > 0 ? parsed : ['No errors reported.']; | |
} catch { | |
// If JSON parsing fails, treat as plain text | |
errors = [errorsBlock.replace(/[\[\]]/g, '').trim()]; | |
} | |
} else { | |
// Handle multi-line bullet format or plain text | |
const lines = errorsBlock.split('\n').map(e => e.trim().replace(/^[-•\*]\s*/, '')).filter(Boolean); | |
errors = lines.length > 0 ? lines : ['No errors reported.']; | |
} | |
} | |
// ROBUST ANALYSIS/CORRECTED CONTENT EXTRACTION - handles multiple formats | |
let corrected = 'Content analysis not available.'; | |
let contentMatch = null; | |
// Try various content patterns - Analysis, Corrected, or any descriptive text | |
const contentPatterns = [ | |
/-\s*\*\*Analysis:\*\*\s*([\s\S]*?)(?=\n-\s*\*\*|$)/, // - **Analysis:** text | |
/•\s*\*\*Analysis:\*\*\s*([\s\S]*?)(?=\n•\s*\*\*|$)/, // • **Analysis:** text | |
/\*\s*\*\*Analysis:\*\*\s*([\s\S]*?)(?=\n\*\s*\*\*|$)/, // * **Analysis:** text | |
/-\s*\*\*Corrected:\*\*\s*([\s\S]*?)(?=\n-\s*\*\*|$)/, // - **Corrected:** text | |
/•\s*\*\*Corrected:\*\*\s*([\s\S]*?)(?=\n•\s*\*\*|$)/, // • **Corrected:** text | |
/\*\s*\*\*Corrected:\*\*\s*([\s\S]*?)(?=\n\*\s*\*\*|$)/, // * **Corrected:** text | |
/(?:•|-|\*)\s*\*\*(?:Analysis|Corrected)\*\*:?:\s*([\s\S]*?)(?=\n(?:•|-|\*)\s*\*\*|$)/, // Generic | |
/(?:•|-|\*)\s*(?:Analysis|Corrected):?:\s*([\s\S]*?)(?=\n(?:•|-|\*)\s*\*\*|$)/, // No bold | |
/Analysis:?:\s*([\s\S]*?)(?=\n(?:•|-|\*|\*\*)|$)/m, // Analysis: anywhere | |
/Corrected:?:\s*([\s\S]*?)(?=\n(?:•|-|\*|\*\*)|$)/m // Corrected: anywhere | |
]; | |
for (const pattern of contentPatterns) { | |
contentMatch = sectionText.match(pattern); | |
if (contentMatch) { | |
corrected = contentMatch[1].trim(); | |
break; | |
} | |
} | |
// If no Analysis/Corrected found, extract the section title/content as fallback | |
if (!contentMatch || corrected.length < 10) { | |
// Extract title or first meaningful content line | |
const lines = sectionText.split('\n').map(l => l.trim()).filter(Boolean); | |
const titleLine = lines.find(line => !line.startsWith('•') && !line.startsWith('-') && !line.startsWith('*') && !line.includes('**') && line.length > 10); | |
if (titleLine) { | |
corrected = titleLine; | |
} | |
} | |
// Clean up any extra formatting | |
corrected = corrected.replace(/^#\s*/, '').replace(/###\s*\*\*[^*]+\*\*/, '').trim(); | |
console.log('Content match result:', contentMatch, 'Final corrected:', corrected.substring(0, 50)); | |
return { grade, pass, errors, corrected }; | |
}; | |
/** | |
* Parses the structured QA report format that comes as plain text with sections. | |
* @param qaText The raw structured QA text from the API. | |
* @returns An object containing the detailed parsed report and top-level pass/grade info. | |
*/ | |
const parseStructuredQaReport = (qaText: string): { detailedQaReport: DetailedQaReport, overallPass: boolean, overallGrade: string } => { | |
const defaultSection: QaSectionResult = { grade: 'N/A', pass: false, errors: ['Parsing failed'], corrected: '' }; | |
const defaultReport: DetailedQaReport = { | |
title: { ...defaultSection }, | |
meta: { ...defaultSection }, | |
h1: { ...defaultSection }, | |
copy: { ...defaultSection }, | |
overall: { grade: 'N/A', pass: false, primaryIssue: 'Parsing failed' } | |
}; | |
try { | |
// Split the text into sections by looking for section headers | |
const sections = qaText.split(/(?=^## [A-Z]+)/gm).filter(Boolean); | |
const parsedData: Partial<DetailedQaReport> = {}; | |
sections.forEach(sectionText => { | |
const lines = sectionText.trim().split('\n'); | |
const header = lines[0]?.replace('## ', '').trim().toLowerCase() || ''; | |
// Extract grade | |
const gradeLine = lines.find(line => line.includes('- Grade:'))?.trim() || ''; | |
const gradeMatch = gradeLine.match(/- Grade:\s*(\d+)\/100/) || gradeLine.match(/- Grade:\s*([^\n]+)/); | |
const grade = gradeMatch ? gradeMatch[1].trim() : 'N/A'; | |
// Extract pass status | |
const passLine = lines.find(line => line.includes('- Pass:'))?.trim() || ''; | |
const passMatch = passLine.match(/- Pass:\s*(true|false)/i); | |
const pass = passMatch ? passMatch[1].toLowerCase() === 'true' : false; | |
// Extract errors | |
let errors: string[] = []; | |
const errorsLineIndex = lines.findIndex(line => line.includes('- Errors:')); | |
if (errorsLineIndex !== -1) { | |
const errorsContent = lines[errorsLineIndex].replace('- Errors:', '').trim(); | |
if (errorsContent === '[]' || errorsContent === '') { | |
errors = ['No errors reported.']; | |
} else { | |
// Look for multi-line errors | |
let errorText = errorsContent; | |
for (let i = errorsLineIndex + 1; i < lines.length; i++) { | |
if (lines[i].startsWith('- ') && !lines[i].startsWith(' ')) break; | |
errorText += '\n' + lines[i].trim(); | |
} | |
// Parse error list | |
if (errorText.startsWith('[') && errorText.includes(']')) { | |
// Handle array format | |
try { | |
const parsedErrors = JSON.parse(errorText); | |
errors = Array.isArray(parsedErrors) ? parsedErrors : [errorText]; | |
} catch { | |
errors = [errorText]; | |
} | |
} else { | |
// Handle plain text or bullet list | |
errors = errorText.split('\n') | |
.map(e => e.trim().replace(/^- /, '')) | |
.filter(Boolean); | |
} | |
if (errors.length === 0) { | |
errors = ['No errors reported.']; | |
} | |
} | |
} else { | |
errors = ['Errors not found.']; | |
} | |
// Extract corrected content | |
let corrected = ''; | |
const correctedLineIndex = lines.findIndex(line => line.includes('- Corrected:')); | |
if (correctedLineIndex !== -1) { | |
corrected = lines.slice(correctedLineIndex) | |
.join('\n') | |
.replace('- Corrected:', '') | |
.trim(); | |
} else { | |
corrected = 'Correction not found.'; | |
} | |
const sectionResult: QaSectionResult = { grade, pass, errors, corrected }; | |
if (header.includes('title')) { | |
parsedData.title = sectionResult; | |
} else if (header.includes('meta')) { | |
parsedData.meta = sectionResult; | |
} else if (header.includes('h1')) { | |
parsedData.h1 = sectionResult; | |
} else if (header.includes('copy')) { | |
parsedData.copy = sectionResult; | |
} else if (header.includes('overall')) { | |
// Extract primary issue for overall section | |
const primaryIssueLine = lines.find(line => line.includes('- Primary Issue:'))?.trim() || ''; | |
const primaryIssue = primaryIssueLine.replace('- Primary Issue:', '').trim() || 'Not specified.'; | |
parsedData.overall = { grade, pass, primaryIssue }; | |
} | |
}); | |
const finalReport: DetailedQaReport = { | |
title: parsedData.title || { ...defaultSection, errors: ['Title section not found'] }, | |
meta: parsedData.meta || { ...defaultSection, errors: ['Meta section not found'] }, | |
h1: parsedData.h1 || { ...defaultSection, errors: ['H1 section not found'] }, | |
copy: parsedData.copy || { ...defaultSection, errors: ['Copy section not found'] }, | |
overall: parsedData.overall || { grade: 'N/A', pass: false, primaryIssue: 'Overall section not found' } | |
}; | |
return { | |
detailedQaReport: finalReport, | |
overallPass: finalReport.overall.pass, | |
overallGrade: finalReport.overall.grade | |
}; | |
} catch (error) { | |
console.error('Error parsing structured QA report:', error); | |
return { detailedQaReport: defaultReport, overallPass: false, overallGrade: 'N/A' }; | |
} | |
}; | |
/** | |
* Parses single-section format where all content is in one block. | |
* @param sectionText The section containing all embedded QA data. | |
* @param defaultReport Default report structure. | |
* @returns Parsed QA report data. | |
*/ | |
const parseSingleSectionFormat = (sectionText: string, defaultReport: DetailedQaReport): { detailedQaReport: DetailedQaReport, overallPass: boolean, overallGrade: string } => { | |
console.log('Parsing single-section format'); | |
// Extract embedded sections by looking for section patterns like "**TITLE:", "**META:", etc. | |
const titleMatch = sectionText.match(/\*\*TITLE[^*]*\*\*([\s\S]*?)(?=\*\*[A-Z]+|$)/i); | |
const metaMatch = sectionText.match(/\*\*META[^*]*\*\*([\s\S]*?)(?=\*\*[A-Z]+|$)/i); | |
const h1Match = sectionText.match(/\*\*H1[^*]*\*\*([\s\S]*?)(?=\*\*[A-Z]+|$)/i); | |
const copyMatch = sectionText.match(/\*\*COPY[^*]*\*\*([\s\S]*?)(?=\*\*[A-Z]+|$)/i); | |
const overallMatch = sectionText.match(/\*\*(?:OVERALL|ASSESSMENT)[^*]*\*\*([\s\S]*?)$/i); | |
const finalReport: DetailedQaReport = { | |
title: titleMatch ? parseSection(titleMatch[1]) : { ...defaultReport.title, errors: ['Title section not found'] }, | |
meta: metaMatch ? parseSection(metaMatch[1]) : { ...defaultReport.meta, errors: ['Meta section not found'] }, | |
h1: h1Match ? parseSection(h1Match[1]) : { ...defaultReport.h1, errors: ['H1 section not found'] }, | |
copy: copyMatch ? parseSection(copyMatch[1]) : { ...defaultReport.copy, errors: ['Copy section not found'] }, | |
overall: overallMatch ? { | |
grade: extractOverallGrade(overallMatch[1]), | |
pass: extractOverallPass(overallMatch[1]), | |
primaryIssue: 'Single-section format parsed' | |
} : { ...defaultReport.overall } | |
}; | |
return { | |
detailedQaReport: finalReport, | |
overallPass: finalReport.overall.pass, | |
overallGrade: finalReport.overall.grade | |
}; | |
}; | |
/** | |
* Helper function to extract overall grade from text. | |
*/ | |
const extractOverallGrade = (text: string): string => { | |
const gradeMatch = text.match(/Grade[^:]*:?\s*(\d+(?:\.\d+)?\/?\d*)/i) || text.match(/(\d+(?:\.\d+)?\/\d+)/); | |
return gradeMatch ? gradeMatch[1].trim() : 'N/A'; | |
}; | |
/** | |
* Helper function to extract overall pass from text. | |
*/ | |
const extractOverallPass = (text: string): boolean => { | |
const passMatch = text.match(/Pass[^:]*:?\s*(true|false|✅|❌|TRUE|FALSE)/i); | |
if (passMatch) { | |
const passValue = passMatch[1].toLowerCase().trim(); | |
return passValue.includes('true') || passValue.includes('✅'); | |
} | |
return false; | |
}; | |
/** | |
* Parses the new, structured QA report format. | |
* @param qaText The raw `qa_gaurd` string from the API. | |
* @returns An object containing the detailed parsed report and top-level pass/grade info. | |
*/ | |
const parseNewQaReport = (qaText: string): { detailedQaReport: DetailedQaReport, overallPass: boolean, overallGrade: string } => { | |
// Default structure in case of parsing failure | |
const defaultSection: QaSectionResult = { grade: 'N/A', pass: false, errors: ['Parsing failed'], corrected: '' }; | |
const defaultReport: DetailedQaReport = { | |
title: { ...defaultSection }, | |
meta: { ...defaultSection }, | |
h1: { ...defaultSection }, | |
copy: { ...defaultSection }, | |
overall: { grade: 'N/A', pass: false, primaryIssue: 'Parsing failed' } | |
}; | |
const cleanedQaText = cleanResponseText(qaText); | |
if (!cleanedQaText || typeof cleanedQaText !== 'string') { | |
return { detailedQaReport: defaultReport, overallPass: false, overallGrade: 'N/A' }; | |
} | |
// Check if it's the new markdown format (starts with ##) or contains **TITLE** style sections | |
if (cleanedQaText.startsWith('##') || cleanedQaText.includes('**TITLE**') || cleanedQaText.includes('### **') || cleanedQaText.includes('### TITLE')) { | |
console.log('Using markdown parser for QA report'); | |
console.log('QA text starts with:', cleanedQaText.substring(0, 200)); | |
// Handle different section header formats | |
let sections; | |
if (cleanedQaText.includes('### **')) { | |
// Split on ### ** and keep the ** in the section content | |
sections = cleanedQaText.split(/(?=### \*\*)/g).slice(1); | |
console.log('Splitting on ### ** (keeping section headers)'); | |
} else if (cleanedQaText.includes('**TITLE**')) { | |
// Split on bold section headers like **TITLE**, **META**, etc. | |
sections = cleanedQaText.split(/(?=\*\*(?:TITLE|META|H1|COPY|OVERALL|ASSESSMENT|CORRECTED COPY)[^*]*\*\*)/g).slice(1); | |
console.log('Splitting on **SECTION** headers'); | |
} else if (cleanedQaText.includes('### ')) { | |
// Split generic ### headers | |
sections = cleanedQaText.split(/(?=###\s+)/g).slice(1); | |
console.log('Splitting on generic ### headers'); | |
} else { | |
sections = cleanedQaText.split('## ').slice(1); | |
console.log('Splitting on ## headers'); | |
} | |
console.log('Found sections:', sections.length); | |
sections.forEach((section, index) => { | |
console.log(`Section ${index}:`, section.substring(0, 100)); | |
}); | |
const parsedData: Partial<DetailedQaReport> = {}; | |
let correctedCopyFromSeparateSection = ''; | |
// Special handling for single-section format like "## GRADE REPORT" | |
if (sections.length === 1 && (sections[0].includes('GRADE REPORT') || sections[0].includes('QUALITY ASSURANCE'))) { | |
console.log('Detected single-section format, parsing embedded sections'); | |
return parseSingleSectionFormat(sections[0], defaultReport); | |
} | |
sections.forEach(sectionBlock => { | |
const lines = sectionBlock.trim().split('\n'); | |
let headerRaw = lines[0].trim(); | |
let header = headerRaw.toLowerCase(); | |
// Clean up header - remove markdown formatting and punctuation | |
header = header.replace(/^#+\s*/, '').replace(/\*\*/g, '').replace(/[:\-–]+$/, '').trim(); | |
console.log('Processing header:', header); | |
if (header.includes('title')) { | |
console.log('Parsing title section'); | |
parsedData.title = parseSection(sectionBlock); | |
} else if (header.includes('meta')) { | |
console.log('Parsing meta section'); | |
parsedData.meta = parseSection(sectionBlock); | |
} else if (header.includes('h1')) { | |
console.log('Parsing h1 section'); | |
parsedData.h1 = parseSection(sectionBlock); | |
} else if (header.includes('copy') && !header.includes('corrected')) { | |
console.log('Parsing copy section'); | |
parsedData.copy = parseSection(sectionBlock); | |
} else if (header.includes('corrected') && header.includes('copy')) { | |
console.log('Capturing separate CORRECTED COPY section'); | |
correctedCopyFromSeparateSection = lines.slice(1).join('\n').trim(); | |
} else if (header.includes('overall') || header.includes('assessment') || header.includes('pipeline')) { | |
console.log('Parsing overall section'); | |
console.log('Overall section text:', sectionBlock.substring(0, 300)); | |
// ROBUST OVERALL GRADE EXTRACTION - handles multiple formats | |
let grade = 'N/A'; | |
let gradeMatch = null; | |
const overallGradePatterns = [ | |
/-\s*\*\*Final Grade:\*\*\s*(.*)/, // - **Final Grade:** 100/100 | |
/•\s*\*\*Final Grade:\*\*\s*(.*)/, // • **Final Grade:** 100/100 | |
/\*\s*\*\*Final Grade:\*\*\s*(.*)/, // * **Final Grade:** 100/100 | |
/-\s*\*\*Total Grade:\*\*\s*(.*)/, // - **Total Grade:** 100/100 | |
/•\s*\*\*Total Grade:\*\*\s*(.*)/, // • **Total Grade:** 100/100 | |
/\*\s*\*\*Total Grade:\*\*\s*(.*)/, // * **Total Grade:** 100/100 | |
/(?:•|-|\*)\s*\*\*(?:Final|Total|Overall)?\s*Grade\*\*:?:\s*(.*)/i, // Generic grade | |
/(?:•|-|\*)\s*(?:Final|Total|Overall)?\s*Grade:?:\s*(.*)/i, // No bold | |
/(?:Final|Total|Overall)\s*Grade:?:\s*(\d+\/\d+|\d+)/im, // Anywhere in text | |
/(\d+\/\d+)\s*(?:final|total|overall)?/im // Number first | |
]; | |
for (const pattern of overallGradePatterns) { | |
gradeMatch = sectionBlock.match(pattern); | |
if (gradeMatch) { | |
grade = gradeMatch[1].trim(); | |
break; | |
} | |
} | |
console.log('Overall grade match:', gradeMatch, 'Final grade:', grade); | |
// ROBUST OVERALL PASS EXTRACTION - handles multiple formats | |
let pass = false; | |
let passMatch = null; | |
const overallPassPatterns = [ | |
// Exact format variations found in logs | |
/-\s*\*\*Overall Pass:\*\*\s*(.*)/i, // - **Overall Pass:** true | |
/•\s*\*\*Overall Pass:\*\*\s*(.*)/i, // • **Overall Pass:** true | |
/\*\s*\*\*Overall Pass:\*\*\s*(.*)/i, // * **Overall Pass:** true | |
/•\s*\*\*All Sections Pass:\*\*\s*(.*)/i, // • **All Sections Pass:** true | |
/-\s*\*\*All Sections Pass:\*\*\s*(.*)/i, // - **All Sections Pass:** true | |
/\*\s*\*\*All Sections Pass:\*\*\s*(.*)/i, // * **All Sections Pass:** true | |
/•\s*\*\*Final Pass:\*\*\s*(.*)/i, // • **Final Pass:** true | |
/-\s*\*\*Final Pass:\*\*\s*(.*)/i, // - **Final Pass:** true | |
/\*\s*\*\*Final Pass:\*\*\s*(.*)/i, // * **Final Pass:** true | |
// Generic patterns with flexible formatting | |
/(?:•|-|\*)\s*\*\*(?:Overall\s+|All\s+Sections\s+|Final\s+)?Pass\*\*:?:\s*(.*)/i, | |
/(?:•|-|\*)\s*(?:Overall\s+|All\s+Sections\s+|Final\s+)?Pass:?:\s*(.*)/i, | |
// Anywhere in text patterns | |
/Pass:?:\s*(true|false|✅|❌|TRUE|FALSE)/im, | |
/Overall\s*Pass:?:\s*(true|false|✅|❌|TRUE|FALSE)/im, | |
/All\s*Sections\s*Pass:?:\s*(true|false|✅|❌|TRUE|FALSE)/im, | |
/(true|false|✅|❌|TRUE|FALSE)\s*(?:overall|pass)/im, | |
// Handle capitalized boolean values | |
/Pass:?:\s*(True|False|TRUE|FALSE)/im, | |
/Overall\s*Pass:?:\s*(True|False|TRUE|FALSE)/im | |
]; | |
for (const pattern of overallPassPatterns) { | |
passMatch = sectionBlock.match(pattern); | |
if (passMatch) { | |
const passValue = passMatch[1].toLowerCase().trim(); | |
pass = passValue.includes('true') || | |
passValue.includes('✅') || | |
passValue === 'yes' || | |
passValue === 'passed' || | |
passValue === 'pass'; | |
break; | |
} | |
} | |
console.log('Overall pass match:', passMatch, 'Final pass:', pass); | |
// Look for various primary issue formats | |
const explanationMatch = sectionBlock.match(/\*\*Overall Pass\*\*:\s*[^()]*\(([^)]+)\)/); | |
const statusMatch = sectionBlock.match(/(?:•|-|\*)\s*\*\*(?:Pipeline\s+)?Status\*\*?:?\s*([\s\S]*?)(?=\n(?:•|-|\*)\s*\*\*|$)/); | |
const primaryIssueMatch = sectionBlock.match(/(?:•|-|\*)\s*\*\*(?:Primary\s+)?Issue\*\*?:?\s*([\s\S]*?)(?=\n(?:•|-|\*)\s*\*\*|$)/) || | |
sectionBlock.match(/(?:•|-|\*)\s*(?:Primary\s+)?Issue:?:\s*([\s\S]*?)(?=\n(?:•|-|\*)\s*\*\*|$)/); | |
const errorsMatch = sectionBlock.match(/(?:•|-|\*)\s*\*\*Total\s+Errors?\*\*?:?\s*([\s\S]*?)(?=\n(?:•|-|\*)\s*\*\*|$)/); | |
const totalSectionsMatch = sectionBlock.match(/Total\s*Sections\s*Passing:?:\s*([^\n]+)/i); | |
let primaryIssue = 'All sections passed successfully.'; | |
if (explanationMatch) { | |
primaryIssue = explanationMatch[1].trim(); | |
} else if (statusMatch) { | |
primaryIssue = statusMatch[1].trim(); | |
} else if (primaryIssueMatch) { | |
primaryIssue = primaryIssueMatch[1].trim(); | |
} else if (errorsMatch) { | |
const errorText = errorsMatch[1].trim(); | |
if (errorText !== '[]' && errorText !== '') { | |
primaryIssue = `Errors found: ${errorText}`; | |
} | |
} | |
if (totalSectionsMatch) { | |
primaryIssue = `${primaryIssue} | Total Sections Passing: ${totalSectionsMatch[1].trim()}`; | |
} | |
console.log('Primary issue extraction - explanation:', explanationMatch, 'status:', statusMatch, 'issue:', primaryIssueMatch, 'Final issue:', primaryIssue); | |
console.log('Setting overall data - grade:', grade, 'pass:', pass, 'primaryIssue:', primaryIssue); | |
parsedData.overall = { grade, pass, primaryIssue }; | |
} | |
}); | |
const finalReport: DetailedQaReport = { | |
title: parsedData.title || { ...defaultSection, errors: ['Title section not found'] }, | |
meta: parsedData.meta || { ...defaultSection, errors: ['Meta section not found'] }, | |
h1: parsedData.h1 || { ...defaultSection, errors: ['H1 section not found'] }, | |
copy: parsedData.copy || { ...defaultSection, errors: ['Copy section not found'] }, | |
overall: parsedData.overall || { grade: 'N/A', pass: false, primaryIssue: 'Overall section not found' } | |
}; | |
// If we saw a separate CORRECTED COPY section, populate copy.corrected with it when useful | |
if (correctedCopyFromSeparateSection) { | |
const cleanedCorrected = correctedCopyFromSeparateSection.replace(/^###.*$/m, '').trim(); | |
if (!finalReport.copy.corrected || finalReport.copy.corrected.length < 20) { | |
finalReport.copy.corrected = cleanedCorrected; | |
} | |
} | |
// If no explicit overall section was found, check for inline overall results at end of text | |
if (!parsedData.overall) { | |
// Look for overall results anywhere in the full text (sometimes appears at the end) | |
const inlineOverallGradeMatch = cleanedQaText.match(/##?\s*OVERALL\s*(?:PIPELINE\s*)?GRADE:?:\s*([^#\n]+)/i); | |
const inlineOverallPassMatch = cleanedQaText.match(/##?\s*OVERALL\s*(?:PIPELINE\s*)?PASS:?:\s*([^#\n(]+)/i); | |
if (inlineOverallGradeMatch || inlineOverallPassMatch) { | |
let grade = 'N/A'; | |
let pass = false; | |
if (inlineOverallGradeMatch) { | |
grade = inlineOverallGradeMatch[1].trim(); | |
} | |
if (inlineOverallPassMatch) { | |
const passValue = inlineOverallPassMatch[1].toLowerCase().trim(); | |
pass = passValue.includes('true') || | |
passValue.includes('✅') || | |
passValue === 'yes' || | |
passValue === 'passed'; | |
} | |
parsedData.overall = { | |
grade, | |
pass, | |
primaryIssue: pass ? 'All sections passed successfully.' : 'Overall assessment failed.' | |
}; | |
console.log('Found inline overall results - grade:', grade, 'pass:', pass); | |
} else { | |
// Calculate overall pass from individual sections | |
const allSectionsPassed = finalReport.title.pass && | |
finalReport.meta.pass && | |
finalReport.h1.pass && | |
finalReport.copy.pass; | |
// Calculate average grade if all sections have numeric grades | |
let averageGrade = 'N/A'; | |
const grades = [finalReport.title.grade, finalReport.meta.grade, finalReport.h1.grade, finalReport.copy.grade]; | |
const numericGrades = grades.filter(g => g !== 'N/A' && g !== undefined) | |
.map(g => { | |
const match = String(g).match(/(\d+(?:\.\d+)?)/); | |
return match ? parseFloat(match[1]) : null; | |
}) | |
.filter(g => g !== null) as number[]; | |
if (numericGrades.length === 4) { | |
const avg = numericGrades.reduce((sum, grade) => sum + grade, 0) / numericGrades.length; | |
averageGrade = `${avg.toFixed(2)}/100`; | |
} | |
parsedData.overall = { | |
grade: averageGrade, | |
pass: allSectionsPassed, | |
primaryIssue: allSectionsPassed ? 'All sections passed successfully.' : 'One or more sections failed.' | |
}; | |
console.log('Calculated overall from individual sections - pass:', allSectionsPassed, 'grade:', averageGrade); | |
} | |
// Update the final report with the calculated/found overall data | |
finalReport.overall = parsedData.overall; | |
} | |
console.log('Final parsed QA data:', finalReport.overall); | |
console.log('Setting overallPass:', finalReport.overall.pass, 'overallGrade:', finalReport.overall.grade); | |
return { | |
detailedQaReport: finalReport, | |
overallPass: finalReport.overall.pass, | |
overallGrade: finalReport.overall.grade | |
}; | |
} else { | |
// Parse the new structured format | |
return parseStructuredQaReport(cleanedQaText); | |
} | |
}; | |
/** | |
* Runs the Dify workflow for a given input row, with retries for transient errors. | |
* @param inputs - The data from a CSV row. | |
* @returns A promise that resolves to the processed and cleaned results. | |
*/ | |
export const runWorkflow = async (inputs: ApiInput): Promise<ProcessedResult> => { | |
let lastError: Error = new Error('Workflow failed after all retries.'); | |
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { | |
let responseText = ''; | |
try { | |
const payload = { | |
inputs, | |
response_mode: 'streaming', | |
user: API_USER, | |
}; | |
const response = await fetch(API_URL, { | |
method: 'POST', | |
headers: { | |
'Authorization': `Bearer ${API_TOKEN}`, | |
'Content-Type': 'application/json', | |
}, | |
body: JSON.stringify(payload), | |
}); | |
if (!response.ok) { | |
responseText = await response.text(); | |
// Check for retryable HTTP status codes. | |
if (RETRYABLE_STATUS_CODES.includes(response.status)) { | |
throw new WorkflowError(`RETRYABLE_HTTP_${response.status}`, `Temporary service issue (HTTP ${response.status}).`, 'network', responseText); | |
} | |
// For other HTTP errors, fail immediately. | |
throw new WorkflowError(`HTTP_${response.status}`, `API request failed (HTTP ${response.status}).`, 'api', responseText); | |
} | |
if (!response.body) { | |
throw new WorkflowError('EMPTY_RESPONSE', 'Empty response from API.', 'network'); | |
} | |
const reader = response.body.getReader(); | |
const decoder = new TextDecoder(); | |
let streamContent = ''; | |
while (true) { | |
const { done, value } = await reader.read(); | |
if (done) break; | |
streamContent += decoder.decode(value, { stream: true }); | |
} | |
// The full stream content becomes our responseText for error logging | |
responseText = streamContent; | |
const lines = streamContent.trim().split('\n'); | |
const finishedLine = lines.find(line => line.includes('"event": "workflow_finished"')) || ''; | |
if (!finishedLine) { | |
// The gateway might have returned an HTML error page instead of a stream | |
if (streamContent.trim().toLowerCase().startsWith('<html')) { | |
throw new WorkflowError('RETRYABLE_HTML_RESPONSE', 'Service returned an HTML error page.', 'stream', streamContent.slice(0, 1000)); | |
} | |
console.error('Full stream content:', streamContent); | |
throw new WorkflowError('FINISH_EVENT_MISSING', 'Workflow did not finish successfully.', 'stream', streamContent.slice(0, 1000)); | |
} | |
const jsonString = finishedLine.replace(/^data: /, ''); | |
const finishedEventData = JSON.parse(jsonString); | |
if (finishedEventData.data.status !== 'succeeded') { | |
const apiError = finishedEventData.data.error || 'Unknown'; | |
const isOverloaded = typeof apiError === 'string' && (apiError.toLowerCase().includes('overloaded') || apiError.toLowerCase().includes('gateway time-out')); | |
// If it's a known transient error, mark it as retryable. | |
if (isOverloaded) { | |
throw new WorkflowError('RETRYABLE_API_ERROR', 'Service overloaded. Retrying...', 'api', String(apiError)); | |
} | |
// Otherwise, it's a permanent failure for this row. | |
throw new WorkflowError('WORKFLOW_FAILED', `Workflow failed: ${apiError}`, 'api', String(apiError)); | |
} | |
const outputs: ApiResponseOutput = finishedEventData.data.outputs; | |
if (!outputs || Object.keys(outputs).length === 0) { | |
throw new WorkflowError('EMPTY_OUTPUTS', 'Workflow succeeded but returned empty outputs.', 'parse'); | |
} | |
const rawQaReport = outputs.qa_gaurd || 'QA report not available.'; | |
console.log('QA Report length:', rawQaReport.length); | |
const { detailedQaReport, overallPass, overallGrade } = parseNewQaReport(rawQaReport); | |
console.log('Final Parsed QA - Pass:', overallPass, 'Grade:', overallGrade); | |
// Success, return the result and exit the loop. | |
return { | |
generatedTitle: cleanResponseText(outputs.title), | |
generatedH1: cleanResponseText(outputs.h1), | |
generatedCopy: cleanResponseText(outputs.copy), | |
generatedMeta: cleanResponseText(outputs.meta), | |
qaReport: rawQaReport, | |
detailedQaReport, | |
overallPass, | |
overallGrade, | |
}; | |
} catch (error) { | |
lastError = error instanceof Error ? error : new Error(String(error)); | |
const isRetryable = lastError instanceof WorkflowError && lastError.code.startsWith('RETRYABLE_'); | |
if (isRetryable && attempt < MAX_RETRIES) { | |
// Exponential backoff with jitter: 1s, 2s, 4s, ... + random | |
const delayMs = INITIAL_RETRY_DELAY_MS * Math.pow(2, attempt - 1); | |
console.warn(`Attempt ${attempt}/${MAX_RETRIES} failed due to a transient error. Retrying in ~${Math.round(delayMs / 1000)}s...`, { error: lastError.message }); | |
await delay(delayMs); | |
continue; // Move to the next attempt | |
} | |
// For non-retryable errors, or if we've exhausted retries, break the loop to throw the error. | |
console.error(`Failed to process row. Attempt ${attempt}/${MAX_RETRIES}. Error: ${lastError.message}`); | |
if (responseText) { | |
// Log the problematic response that caused the failure | |
console.error('Problematic Response:', responseText); | |
} | |
break; | |
} | |
} | |
// If the loop finished without returning, it means all attempts failed. | |
// We re-throw the last captured error, making it more user-friendly if it was a transient one. | |
if (lastError instanceof WorkflowError && lastError.code.startsWith('RETRYABLE_')) { | |
throw new WorkflowError('SERVICE_UNAVAILABLE', `API service is temporarily unavailable. Tried ${MAX_RETRIES} times. Please try again later.`, 'api', lastError.debug || lastError.stack); | |
} | |
throw lastError; | |
}; | |