(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[974],{4124:(e,o,s)=>{"use strict";s.r(o),s.d(o,{default:()=>u});var t=s(1500),r=s(3870);let n=[{model:"Deepseek R1",score:87.3,link:"https://huggingface.co/deepseek-ai/DeepSeek-R1"},{model:"Omni Qi",score:67.6,link:"https://readomni.com"},{model:"o3-mini",score:62.7,version:"o3-mini-2025-01-31",link:"https://platform.openai.com/docs/models#o3-mini"},{model:"GPT-4o",score:61,version:"gpt-4o-2024-11-20",link:"https://platform.openai.com/docs/models#gpt-4o"},{model:"Deepseek V3",score:57.7,link:"https://huggingface.co/deepseek-ai/DeepSeek-V3"},{model:"Claude 3.7 Sonnet",score:54,version:"claude-3-7-sonnet-20250219",link:"https://docs.anthropic.com/en/docs/about-claude/models/all-models"},{model:"Claude 3.5 Sonnet",score:52,version:"claude-3-5-sonnet-20241022",link:"https://docs.anthropic.com/en/docs/about-claude/models/all-models"},{model:"Gemini 1.5 Pro",score:50.7,version:"gemini-1.5-pro-002",link:"https://ai.google.dev/gemini-api/docs/models/gemini#gemini-1.5-pro"},{model:"Qwen Max",score:49.3,version:"qwen-max-2025-01-25",link:"https://www.alibabacloud.com/help/en/model-studio/getting-started/models#c2d5833ae4jmo"},{model:"Qwen Plus",score:49.3,version:"qwen-plus-2025-01-25",link:"https://www.alibabacloud.com/help/en/model-studio/getting-started/models#6ad3cd90f0c5r"},{model:"Gemini 2.0 Flash",score:43,version:"gemini-2.0-flash-001",link:"https://ai.google.dev/gemini-api/docs/models/gemini#gemini-2.0-flash"},{model:"Mistral Large",score:40,version:"mistral-large-2411",link:"https://docs.mistral.ai/getting-started/models/models_overview/"},{model:"Gemini Flash 1.5 8B",score:38.3,version:"gemini-1.5-flash-8b-001",link:"https://ai.google.dev/gemini-api/docs/models/gemini#gemini-1.5-flash-8b"},{model:"GPT-4o-mini",score:35.3,version:"gpt-4o-mini-2024-07-18",link:"https://platform.openai.com/docs/models#gpt-4o-mini"},{model:"Phi-4",score:33,link:"https://huggingface.co/microsoft/phi-4"},{model:"Llama 3.3 70B",score:32.7,version:"llama-3.3-70b-instruct",link:"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct"},{model:"Gemini 2.0 Flash Lite",score:31.3,version:"gemini-2.0-flash-lite-001",link:"https://ai.google.dev/gemini-api/docs/models/gemini#gemini-2.0-flash-lite"},{model:"Claude 3.5 Haiku",score:30.7,version:"claude-3-5-haiku-20241022",link:"https://docs.anthropic.com/en/docs/about-claude/models/all-models"},{model:"Mistral Small 3",score:27.7,version:"mistral-small-24b-instruct-2501",link:"https://docs.mistral.ai/getting-started/models/models_overview/"},{model:"Qwen Turbo",score:27.3,version:"qwen-turbo-2024-11-01",link:"https://www.alibabacloud.com/help/en/model-studio/getting-started/models#ede6678dedqbz"},{model:"Google Translate (NMT)",score:6.7,link:"https://cloud.google.com/translate"}];var i=s(432),a=s(4184),l=s.n(a);function d(){return(0,t.jsxs)("div",{className:"bg-card text-card-foreground p-6 rounded-lg mb-8 shadow-md border border-border",children:[(0,t.jsx)("h2",{className:"text-2xl font-bold mb-4",children:"Literary Translation Evaluation and Rating Ensemble"}),(0,t.jsx)("p",{className:"text-muted-foreground mb-2",children:"LiTERatE is a benchmark specifically designed for evaluating machine translation systems on literary text from Chinese, Japanese, and Korean languages. Unlike traditional MT benchmarks, LiTERatE focuses on the unique challenges of literary translation with its creative and nuanced nature."}),(0,t.jsx)("p",{className:"text-muted-foreground mb-2",children:"Our evaluation uses chunks of 200-500 CJK characters as the basic unit, providing terminology glossaries and contextual information to all systems. An ensemble of LLMs judges translations through head-to-head comparisons with human translations, achieving 82% accuracy compared to decisive human judgments."}),(0,t.jsx)("p",{className:"text-muted-foreground mb-2",children:"The scores below represent each system's win rate against human translators (0-100). A score of 50 indicates parity with human translation quality, while higher scores suggest superior performance."}),(0,t.jsxs)("p",{className:"text-muted-foreground",children:["Learn more about our"," ",(0,t.jsx)(l(),{href:"/methodology",className:"text-primary hover:underline",children:"evaluation methodology"}),"."]})]})}function c(e){let{sortedData:o,sortField:s,sortDirection:r,handleSort:n,originalData:i}=e;return(0,t.jsx)("div",{className:"bg-card rounded-lg shadow-md border border-border overflow-hidden",children:(0,t.jsx)("div",{className:"overflow-x-auto",children:(0,t.jsxs)("table",{className:"w-full border-collapse",children:[(0,t.jsx)("thead",{children:(0,t.jsxs)("tr",{className:"bg-muted",children:[(0,t.jsx)("th",{className:"p-4 text-left font-bold text-muted-foreground",children:"Rank"}),(0,t.jsx)("th",{className:"p-4 text-left font-bold cursor-pointer hover:bg-secondary/70 focus:bg-secondary/70 transition-colors group focus-within:bg-secondary/70 rounded-sm",onClick:()=>n("model"),tabIndex:0,role:"button","aria-label":"Sort by model name ".concat("model"===s&&"asc"===r?"descending":"ascending"),onKeyDown:e=>{("Enter"===e.key||" "===e.key)&&(e.preventDefault(),n("model"))},children:(0,t.jsxs)("div",{className:"flex items-center",children:["Model",(0,t.jsx)("span",{className:"ml-2 transition-opacity ".concat("model"===s?"text-primary opacity-100":"opacity-0 group-hover:opacity-50 group-focus:opacity-50"),children:"model"===s&&"asc"===r?(0,t.jsx)("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",children:(0,t.jsx)("path",{d:"m18 15-6-6-6 6"})}):(0,t.jsx)("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",children:(0,t.jsx)("path",{d:"m6 9 6 6 6-6"})})})]})}),(0,t.jsx)("th",{className:"p-4 text-left font-bold text-muted-foreground hidden md:table-cell",children:"Version"}),(0,t.jsx)("th",{className:"p-4 text-right font-bold cursor-pointer hover:bg-secondary/70 focus:bg-secondary/70 transition-colors group focus-within:bg-secondary/70 rounded-sm whitespace-nowrap min-w-[100px]",onClick:()=>n("score"),tabIndex:0,role:"button","aria-label":"Sort by score ".concat("score"===s&&"asc"===r?"descending":"ascending"),onKeyDown:e=>{("Enter"===e.key||" "===e.key)&&(e.preventDefault(),n("score"))},children:(0,t.jsxs)("div",{className:"flex items-center justify-end",children:["Win Rate",(0,t.jsx)("span",{className:"ml-2 transition-opacity ".concat("score"===s?"text-primary opacity-100":"opacity-0 group-hover:opacity-50 group-focus:opacity-50"),children:"score"===s&&"asc"===r?(0,t.jsx)("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",children:(0,t.jsx)("path",{d:"m18 15-6-6-6 6"})}):(0,t.jsx)("svg",{xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 24 24",fill:"none",stroke:"currentColor",strokeWidth:"2",strokeLinecap:"round",strokeLinejoin:"round",children:(0,t.jsx)("path",{d:"m6 9 6 6 6-6"})})})]})})]})}),(0,t.jsx)("tbody",{children:o.map(e=>{let o=i.sort((e,o)=>o.score-e.score).findIndex(o=>o.model===e.model)+1;return(0,t.jsxs)("tr",{className:"border-b border-border hover:bg-muted/50 transition-colors",children:[(0,t.jsx)("td",{className:"p-4 text-left text-muted-foreground",children:o}),(0,t.jsx)("td",{className:"p-4 text-left font-medium",children:e.link?(0,t.jsx)("a",{href:e.link,target:"_blank",rel:"noopener noreferrer",className:"text-primary hover:underline hover:opacity-80 transition-opacity",children:e.model}):e.model}),(0,t.jsx)("td",{className:"p-4 text-left text-muted-foreground text-sm hidden md:table-cell",children:e.version||"—"}),(0,t.jsxs)("td",{className:"p-4 text-right font-semibold min-w-[100px]",children:[e.score.toFixed(1),"%"]})]},e.model)})})]})})})}function m(){return(0,t.jsx)("div",{className:"bg-amber-50 dark:bg-amber-950/30 border border-amber-200 dark:border-amber-800/50 p-4 rounded-lg mb-8 shadow-sm",children:(0,t.jsxs)("p",{className:"text-amber-800 dark:text-amber-200",children:[(0,t.jsx)("strong",{children:"Experimental Preview:"})," This leaderboard is currently in preview mode and the results may change as we refine our evaluation methodology."]})})}function h(){let{sortedData:e,sortField:o,sortDirection:s,handleSort:r}=function(e){let[o,s]=(0,i.useState)("score"),[t,r]=(0,i.useState)("desc");return{sortedData:(0,i.useMemo)(()=>[...e].sort((e,s)=>"model"===o?"asc"===t?e.model.localeCompare(s.model):s.model.localeCompare(e.model):"asc"===t?e.score-s.score:s.score-e.score),[e,o,t]),sortField:o,sortDirection:t,handleSort:e=>{o===e?r("asc"===t?"desc":"asc"):(s(e),r("model"===e?"asc":"desc"))}}}(n);return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(m,{}),(0,t.jsx)(d,{}),(0,t.jsx)(c,{sortedData:e,sortField:o,sortDirection:s,handleSort:r,originalData:n})]})}function u(){return(0,t.jsx)(r.A,{children:(0,t.jsx)(h,{})})}},8538:(e,o,s)=>{Promise.resolve().then(s.bind(s,4124))}},e=>{var o=o=>e(e.s=o);e.O(0,[475,870,815,702,358],()=>o(8538)),_N_E=e.O()}]); |