diff --git "a/assets/index-7glZF3tM.js" "b/assets/index-7glZF3tM.js" new file mode 100644--- /dev/null +++ "b/assets/index-7glZF3tM.js" @@ -0,0 +1,2892 @@ +var D0=Object.defineProperty;var P0=(e,t,r)=>t in e?D0(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r;var Ke=(e,t,r)=>P0(e,typeof t!="symbol"?t+"":t,r);(function(){const t=document.createElement("link").relList;if(t&&t.supports&&t.supports("modulepreload"))return;for(const s of document.querySelectorAll('link[rel="modulepreload"]'))i(s);new MutationObserver(s=>{for(const n of s)if(n.type==="childList")for(const a of n.addedNodes)a.tagName==="LINK"&&a.rel==="modulepreload"&&i(a)}).observe(document,{childList:!0,subtree:!0});function r(s){const n={};return s.integrity&&(n.integrity=s.integrity),s.referrerPolicy&&(n.referrerPolicy=s.referrerPolicy),s.crossOrigin==="use-credentials"?n.credentials="include":s.crossOrigin==="anonymous"?n.credentials="omit":n.credentials="same-origin",n}function i(s){if(s.ep)return;s.ep=!0;const n=r(s);fetch(s.href,n)}})();/** +* @vue/shared v3.5.18 +* (c) 2018-present Yuxi (Evan) You and Vue contributors +* @license MIT +**//*! #__NO_SIDE_EFFECTS__ */function ho(e){const t=Object.create(null);for(const r of e.split(","))t[r]=1;return r=>r in t}const Pe={},ii=[],Xt=()=>{},U0=()=>!1,Qn=e=>e.charCodeAt(0)===111&&e.charCodeAt(1)===110&&(e.charCodeAt(2)>122||e.charCodeAt(2)<97),mo=e=>e.startsWith("onUpdate:"),ot=Object.assign,go=(e,t)=>{const r=e.indexOf(t);r>-1&&e.splice(r,1)},W0=Object.prototype.hasOwnProperty,Me=(e,t)=>W0.call(e,t),Se=Array.isArray,Mi=e=>Jn(e)==="[object Map]",L0=e=>Jn(e)==="[object Set]",xe=e=>typeof e=="function",et=e=>typeof e=="string",ui=e=>typeof e=="symbol",He=e=>e!==null&&typeof e=="object",zf=e=>(He(e)||xe(e))&&xe(e.then)&&xe(e.catch),q0=Object.prototype.toString,Jn=e=>q0.call(e),V0=e=>Jn(e).slice(8,-1),F0=e=>Jn(e)==="[object Object]",_o=e=>et(e)&&e!=="NaN"&&e[0]!=="-"&&""+parseInt(e,10)===e,Ni=ho(",key,ref,ref_for,ref_key,onVnodeBeforeMount,onVnodeMounted,onVnodeBeforeUpdate,onVnodeUpdated,onVnodeBeforeUnmount,onVnodeUnmounted"),es=e=>{const t=Object.create(null);return r=>t[r]||(t[r]=e(r))},H0=/-(\w)/g,xr=es(e=>e.replace(H0,(t,r)=>r?r.toUpperCase():"")),G0=/\B([A-Z])/g,jr=es(e=>e.replace(G0,"-$1").toLowerCase()),Of=es(e=>e.charAt(0).toUpperCase()+e.slice(1)),Ss=es(e=>e?`on${Of(e)}`:""),$r=(e,t)=>!Object.is(e,t),ks=(e,...t)=>{for(let r=0;r{Object.defineProperty(e,t,{configurable:!0,enumerable:!1,writable:i,value:r})},j0=e=>{const t=parseFloat(e);return isNaN(t)?e:t};let bl;const ts=()=>bl||(bl=typeof globalThis<"u"?globalThis:typeof self<"u"?self:typeof window<"u"?window:typeof global<"u"?global:{});function yo(e){if(Se(e)){const t={};for(let r=0;r{if(r){const i=r.split(Z0);i.length>1&&(t[i[0].trim()]=i[1].trim())}}),t}function bo(e){let t="";if(et(e))t=e;else if(Se(e))for(let r=0;r0&&--this._on===0&&(nt=this.prevScope,this.prevScope=void 0)}stop(t){if(this._active){this._active=!1;let r,i;for(r=0,i=this.effects.length;r0)return;if(Pi){let t=Pi;for(Pi=void 0;t;){const r=t.next;t.next=void 0,t.flags&=-9,t=r}}let e;for(;Di;){let t=Di;for(Di=void 0;t;){const r=t.next;if(t.next=void 0,t.flags&=-9,t.flags&1)try{t.trigger()}catch(i){e||(e=i)}t=r}}if(e)throw e}function Pf(e){for(let t=e.deps;t;t=t.nextDep)t.version=-1,t.prevActiveLink=t.dep.activeLink,t.dep.activeLink=t}function Uf(e){let t,r=e.depsTail,i=r;for(;i;){const s=i.prevDep;i.version===-1?(i===r&&(r=s),$o(i),rb(i)):t=i,i.dep.activeLink=i.prevActiveLink,i.prevActiveLink=void 0,i=s}e.deps=t,e.depsTail=r}function Pa(e){for(let t=e.deps;t;t=t.nextDep)if(t.dep.version!==t.version||t.dep.computed&&(Wf(t.dep.computed)||t.dep.version!==t.version))return!0;return!!e._dirty}function Wf(e){if(e.flags&4&&!(e.flags&16)||(e.flags&=-17,e.globalVersion===Gi)||(e.globalVersion=Gi,!e.isSSR&&e.flags&128&&(!e.deps&&!e._dirty||!Pa(e))))return;e.flags|=2;const t=e.dep,r=Ue,i=Mt;Ue=e,Mt=!0;try{Pf(e);const s=e.fn(e._value);(t.version===0||$r(s,e._value))&&(e.flags|=128,e._value=s,t.version++)}catch(s){throw t.version++,s}finally{Ue=r,Mt=i,Uf(e),e.flags&=-3}}function $o(e,t=!1){const{dep:r,prevSub:i,nextSub:s}=e;if(i&&(i.nextSub=s,e.prevSub=void 0),s&&(s.prevSub=i,e.nextSub=void 0),r.subs===e&&(r.subs=i,!i&&r.computed)){r.computed.flags&=-5;for(let n=r.computed.deps;n;n=n.nextDep)$o(n,!0)}!t&&!--r.sc&&r.map&&r.map.delete(r.key)}function rb(e){const{prevDep:t,nextDep:r}=e;t&&(t.nextDep=r,e.prevDep=void 0),r&&(r.prevDep=t,e.nextDep=void 0)}let Mt=!0;const Lf=[];function lr(){Lf.push(Mt),Mt=!1}function dr(){const e=Lf.pop();Mt=e===void 0?!0:e}function wl(e){const{cleanup:t}=e;if(e.cleanup=void 0,t){const r=Ue;Ue=void 0;try{t()}finally{Ue=r}}}let Gi=0;class ib{constructor(t,r){this.sub=t,this.dep=r,this.version=r.version,this.nextDep=this.prevDep=this.nextSub=this.prevSub=this.prevActiveLink=void 0}}class rs{constructor(t){this.computed=t,this.version=0,this.activeLink=void 0,this.subs=void 0,this.map=void 0,this.key=void 0,this.sc=0,this.__v_skip=!0}track(t){if(!Ue||!Mt||Ue===this.computed)return;let r=this.activeLink;if(r===void 0||r.sub!==Ue)r=this.activeLink=new ib(Ue,this),Ue.deps?(r.prevDep=Ue.depsTail,Ue.depsTail.nextDep=r,Ue.depsTail=r):Ue.deps=Ue.depsTail=r,qf(r);else if(r.version===-1&&(r.version=this.version,r.nextDep)){const i=r.nextDep;i.prevDep=r.prevDep,r.prevDep&&(r.prevDep.nextDep=i),r.prevDep=Ue.depsTail,r.nextDep=void 0,Ue.depsTail.nextDep=r,Ue.depsTail=r,Ue.deps===r&&(Ue.deps=i)}return r}trigger(t){this.version++,Gi++,this.notify(t)}notify(t){wo();try{for(let r=this.subs;r;r=r.prevSub)r.sub.notify()&&r.sub.dep.notify()}finally{vo()}}}function qf(e){if(e.dep.sc++,e.sub.flags&4){const t=e.dep.computed;if(t&&!e.dep.subs){t.flags|=20;for(let i=t.deps;i;i=i.nextDep)qf(i)}const r=e.dep.subs;r!==e&&(e.prevSub=r,r&&(r.nextSub=e)),e.dep.subs=e}}const Pn=new WeakMap,Ur=Symbol(""),Ua=Symbol(""),ji=Symbol("");function at(e,t,r){if(Mt&&Ue){let i=Pn.get(e);i||Pn.set(e,i=new Map);let s=i.get(r);s||(i.set(r,s=new rs),s.map=i,s.key=r),s.track()}}function sr(e,t,r,i,s,n){const a=Pn.get(e);if(!a){Gi++;return}const o=u=>{u&&u.trigger()};if(wo(),t==="clear")a.forEach(o);else{const u=Se(e),d=u&&_o(r);if(u&&r==="length"){const c=Number(i);a.forEach((f,h)=>{(h==="length"||h===ji||!ui(h)&&h>=c)&&o(f)})}else switch((r!==void 0||a.has(void 0))&&o(a.get(r)),d&&o(a.get(ji)),t){case"add":u?d&&o(a.get("length")):(o(a.get(Ur)),Mi(e)&&o(a.get(Ua)));break;case"delete":u||(o(a.get(Ur)),Mi(e)&&o(a.get(Ua)));break;case"set":Mi(e)&&o(a.get(Ur));break}}vo()}function nb(e,t){const r=Pn.get(e);return r&&r.get(t)}function Qr(e){const t=Re(e);return t===e?t:(at(t,"iterate",ji),Nt(e)?t:t.map(ct))}function xo(e){return at(e=Re(e),"iterate",ji),e}const sb={__proto__:null,[Symbol.iterator](){return Is(this,Symbol.iterator,ct)},concat(...e){return Qr(this).concat(...e.map(t=>Se(t)?Qr(t):t))},entries(){return Is(this,"entries",e=>(e[1]=ct(e[1]),e))},every(e,t){return tr(this,"every",e,t,void 0,arguments)},filter(e,t){return tr(this,"filter",e,t,r=>r.map(ct),arguments)},find(e,t){return tr(this,"find",e,t,ct,arguments)},findIndex(e,t){return tr(this,"findIndex",e,t,void 0,arguments)},findLast(e,t){return tr(this,"findLast",e,t,ct,arguments)},findLastIndex(e,t){return tr(this,"findLastIndex",e,t,void 0,arguments)},forEach(e,t){return tr(this,"forEach",e,t,void 0,arguments)},includes(...e){return Es(this,"includes",e)},indexOf(...e){return Es(this,"indexOf",e)},join(e){return Qr(this).join(e)},lastIndexOf(...e){return Es(this,"lastIndexOf",e)},map(e,t){return tr(this,"map",e,t,void 0,arguments)},pop(){return gi(this,"pop")},push(...e){return gi(this,"push",e)},reduce(e,...t){return vl(this,"reduce",e,t)},reduceRight(e,...t){return vl(this,"reduceRight",e,t)},shift(){return gi(this,"shift")},some(e,t){return tr(this,"some",e,t,void 0,arguments)},splice(...e){return gi(this,"splice",e)},toReversed(){return Qr(this).toReversed()},toSorted(e){return Qr(this).toSorted(e)},toSpliced(...e){return Qr(this).toSpliced(...e)},unshift(...e){return gi(this,"unshift",e)},values(){return Is(this,"values",ct)}};function Is(e,t,r){const i=xo(e),s=i[t]();return i!==e&&!Nt(e)&&(s._next=s.next,s.next=()=>{const n=s._next();return n.value&&(n.value=r(n.value)),n}),s}const ab=Array.prototype;function tr(e,t,r,i,s,n){const a=xo(e),o=a!==e&&!Nt(e),u=a[t];if(u!==ab[t]){const f=u.apply(e,n);return o?ct(f):f}let d=r;a!==e&&(o?d=function(f,h){return r.call(this,ct(f),h,e)}:r.length>2&&(d=function(f,h){return r.call(this,f,h,e)}));const c=u.call(a,d,i);return o&&s?s(c):c}function vl(e,t,r,i){const s=xo(e);let n=r;return s!==e&&(Nt(e)?r.length>3&&(n=function(a,o,u){return r.call(this,a,o,u,e)}):n=function(a,o,u){return r.call(this,a,ct(o),u,e)}),s[t](n,...i)}function Es(e,t,r){const i=Re(e);at(i,"iterate",ji);const s=i[t](...r);return(s===-1||s===!1)&&Io(r[0])?(r[0]=Re(r[0]),i[t](...r)):s}function gi(e,t,r=[]){lr(),wo();const i=Re(e)[t].apply(e,r);return vo(),dr(),i}const ob=ho("__proto__,__v_isRef,__isVue"),Vf=new Set(Object.getOwnPropertyNames(Symbol).filter(e=>e!=="arguments"&&e!=="caller").map(e=>Symbol[e]).filter(ui));function ub(e){ui(e)||(e=String(e));const t=Re(this);return at(t,"has",e),t.hasOwnProperty(e)}class Ff{constructor(t=!1,r=!1){this._isReadonly=t,this._isShallow=r}get(t,r,i){if(r==="__v_skip")return t.__v_skip;const s=this._isReadonly,n=this._isShallow;if(r==="__v_isReactive")return!s;if(r==="__v_isReadonly")return s;if(r==="__v_isShallow")return n;if(r==="__v_raw")return i===(s?n?yb:Kf:n?jf:Gf).get(t)||Object.getPrototypeOf(t)===Object.getPrototypeOf(i)?t:void 0;const a=Se(t);if(!s){let u;if(a&&(u=sb[r]))return u;if(r==="hasOwnProperty")return ub}const o=Reflect.get(t,r,Ze(t)?t:i);return(ui(r)?Vf.has(r):ob(r))||(s||at(t,"get",r),n)?o:Ze(o)?a&&_o(r)?o:o.value:He(o)?s?en(o):ko(o):o}}class Hf extends Ff{constructor(t=!1){super(!1,t)}set(t,r,i,s){let n=t[r];if(!this._isShallow){const u=Vr(n);if(!Nt(i)&&!Vr(i)&&(n=Re(n),i=Re(i)),!Se(t)&&Ze(n)&&!Ze(i))return u?!1:(n.value=i,!0)}const a=Se(t)&&_o(r)?Number(r)e,mn=e=>Reflect.getPrototypeOf(e);function fb(e,t,r){return function(...i){const s=this.__v_raw,n=Re(s),a=Mi(n),o=e==="entries"||e===Symbol.iterator&&a,u=e==="keys"&&a,d=s[e](...i),c=r?Wa:t?La:ct;return!t&&at(n,"iterate",u?Ua:Ur),{next(){const{value:f,done:h}=d.next();return h?{value:f,done:h}:{value:o?[c(f[0]),c(f[1])]:c(f),done:h}},[Symbol.iterator](){return this}}}}function gn(e){return function(...t){return e==="delete"?!1:e==="clear"?void 0:this}}function hb(e,t){const r={get(s){const n=this.__v_raw,a=Re(n),o=Re(s);e||($r(s,o)&&at(a,"get",s),at(a,"get",o));const{has:u}=mn(a),d=t?Wa:e?La:ct;if(u.call(a,s))return d(n.get(s));if(u.call(a,o))return d(n.get(o));n!==a&&n.get(s)},get size(){const s=this.__v_raw;return!e&&at(Re(s),"iterate",Ur),Reflect.get(s,"size",s)},has(s){const n=this.__v_raw,a=Re(n),o=Re(s);return e||($r(s,o)&&at(a,"has",s),at(a,"has",o)),s===o?n.has(s):n.has(s)||n.has(o)},forEach(s,n){const a=this,o=a.__v_raw,u=Re(o),d=t?Wa:e?La:ct;return!e&&at(u,"iterate",Ur),o.forEach((c,f)=>s.call(n,d(c),d(f),a))}};return ot(r,e?{add:gn("add"),set:gn("set"),delete:gn("delete"),clear:gn("clear")}:{add(s){!t&&!Nt(s)&&!Vr(s)&&(s=Re(s));const n=Re(this);return mn(n).has.call(n,s)||(n.add(s),sr(n,"add",s,s)),this},set(s,n){!t&&!Nt(n)&&!Vr(n)&&(n=Re(n));const a=Re(this),{has:o,get:u}=mn(a);let d=o.call(a,s);d||(s=Re(s),d=o.call(a,s));const c=u.call(a,s);return a.set(s,n),d?$r(n,c)&&sr(a,"set",s,n):sr(a,"add",s,n),this},delete(s){const n=Re(this),{has:a,get:o}=mn(n);let u=a.call(n,s);u||(s=Re(s),u=a.call(n,s)),o&&o.call(n,s);const d=n.delete(s);return u&&sr(n,"delete",s,void 0),d},clear(){const s=Re(this),n=s.size!==0,a=s.clear();return n&&sr(s,"clear",void 0,void 0),a}}),["keys","values","entries",Symbol.iterator].forEach(s=>{r[s]=fb(s,e,t)}),r}function So(e,t){const r=hb(e,t);return(i,s,n)=>s==="__v_isReactive"?!e:s==="__v_isReadonly"?e:s==="__v_raw"?i:Reflect.get(Me(r,s)&&s in i?r:i,s,n)}const mb={get:So(!1,!1)},gb={get:So(!1,!0)},_b={get:So(!0,!1)};const Gf=new WeakMap,jf=new WeakMap,Kf=new WeakMap,yb=new WeakMap;function bb(e){switch(e){case"Object":case"Array":return 1;case"Map":case"Set":case"WeakMap":case"WeakSet":return 2;default:return 0}}function wb(e){return e.__v_skip||!Object.isExtensible(e)?0:bb(V0(e))}function ko(e){return Vr(e)?e:To(e,!1,db,mb,Gf)}function vb(e){return To(e,!1,pb,gb,jf)}function en(e){return To(e,!0,cb,_b,Kf)}function To(e,t,r,i,s){if(!He(e)||e.__v_raw&&!(t&&e.__v_isReactive))return e;const n=wb(e);if(n===0)return e;const a=s.get(e);if(a)return a;const o=new Proxy(e,n===2?i:r);return s.set(e,o),o}function Ui(e){return Vr(e)?Ui(e.__v_raw):!!(e&&e.__v_isReactive)}function Vr(e){return!!(e&&e.__v_isReadonly)}function Nt(e){return!!(e&&e.__v_isShallow)}function Io(e){return e?!!e.__v_raw:!1}function Re(e){const t=e&&e.__v_raw;return t?Re(t):e}function Zf(e){return!Me(e,"__v_skip")&&Object.isExtensible(e)&&Da(e,"__v_skip",!0),e}const ct=e=>He(e)?ko(e):e,La=e=>He(e)?en(e):e;function Ze(e){return e?e.__v_isRef===!0:!1}function or(e){return Xf(e,!1)}function Wr(e){return Xf(e,!0)}function Xf(e,t){return Ze(e)?e:new $b(e,t)}class $b{constructor(t,r){this.dep=new rs,this.__v_isRef=!0,this.__v_isShallow=!1,this._rawValue=r?t:Re(t),this._value=r?t:ct(t),this.__v_isShallow=r}get value(){return this.dep.track(),this._value}set value(t){const r=this._rawValue,i=this.__v_isShallow||Nt(t)||Vr(t);t=i?t:Re(t),$r(t,r)&&(this._rawValue=t,this._value=i?t:ct(t),this.dep.trigger())}}function Pr(e){return Ze(e)?e.value:e}function ft(e){return xe(e)?e():Pr(e)}const xb={get:(e,t,r)=>t==="__v_raw"?e:Pr(Reflect.get(e,t,r)),set:(e,t,r,i)=>{const s=e[t];return Ze(s)&&!Ze(r)?(s.value=r,!0):Reflect.set(e,t,r,i)}};function Yf(e){return Ui(e)?e:new Proxy(e,xb)}class Sb{constructor(t){this.__v_isRef=!0,this._value=void 0;const r=this.dep=new rs,{get:i,set:s}=t(r.track.bind(r),r.trigger.bind(r));this._get=i,this._set=s}get value(){return this._value=this._get()}set value(t){this._set(t)}}function kb(e){return new Sb(e)}class Tb{constructor(t,r,i){this._object=t,this._key=r,this._defaultValue=i,this.__v_isRef=!0,this._value=void 0}get value(){const t=this._object[this._key];return this._value=t===void 0?this._defaultValue:t}set value(t){this._object[this._key]=t}get dep(){return nb(Re(this._object),this._key)}}class Ib{constructor(t){this._getter=t,this.__v_isRef=!0,this.__v_isReadonly=!0,this._value=void 0}get value(){return this._value=this._getter()}}function Eb(e,t,r){return Ze(e)?e:xe(e)?new Ib(e):He(e)&&arguments.length>1?Cb(e,t,r):or(e)}function Cb(e,t,r){const i=e[t];return Ze(i)?i:new Tb(e,t,r)}class zb{constructor(t,r,i){this.fn=t,this.setter=r,this._value=void 0,this.dep=new rs(this),this.__v_isRef=!0,this.deps=void 0,this.depsTail=void 0,this.flags=16,this.globalVersion=Gi-1,this.next=void 0,this.effect=this,this.__v_isReadonly=!r,this.isSSR=i}notify(){if(this.flags|=16,!(this.flags&8)&&Ue!==this)return Df(this,!0),!0}get value(){const t=this.dep.track();return Wf(this),t&&(t.version=this.dep.version),this._value}set value(t){this.setter&&this.setter(t)}}function Ob(e,t,r=!1){let i,s;return xe(e)?i=e:(i=e.get,s=e.set),new zb(i,s,r)}const _n={},Un=new WeakMap;let Rr;function Ab(e,t=!1,r=Rr){if(r){let i=Un.get(r);i||Un.set(r,i=[]),i.push(e)}}function Rb(e,t,r=Pe){const{immediate:i,deep:s,once:n,scheduler:a,augmentJob:o,call:u}=r,d=x=>s?x:Nt(x)||s===!1||s===0?vr(x,1):vr(x);let c,f,h,m,g=!1,y=!1;if(Ze(e)?(f=()=>e.value,g=Nt(e)):Ui(e)?(f=()=>d(e),g=!0):Se(e)?(y=!0,g=e.some(x=>Ui(x)||Nt(x)),f=()=>e.map(x=>{if(Ze(x))return x.value;if(Ui(x))return d(x);if(xe(x))return u?u(x,2):x()})):xe(e)?t?f=u?()=>u(e,2):e:f=()=>{if(h){lr();try{h()}finally{dr()}}const x=Rr;Rr=c;try{return u?u(e,3,[m]):e(m)}finally{Rr=x}}:f=Xt,t&&s){const x=f,I=s===!0?1/0:s;f=()=>vr(x(),I)}const S=Bf(),v=()=>{c.stop(),S&&S.active&&go(S.effects,c)};if(n&&t){const x=t;t=(...I)=>{x(...I),v()}}let b=y?new Array(e.length).fill(_n):_n;const k=x=>{if(!(!(c.flags&1)||!c.dirty&&!x))if(t){const I=c.run();if(s||g||(y?I.some((z,O)=>$r(z,b[O])):$r(I,b))){h&&h();const z=Rr;Rr=c;try{const O=[I,b===_n?void 0:y&&b[0]===_n?[]:b,m];b=I,u?u(t,3,O):t(...O)}finally{Rr=z}}}else c.run()};return o&&o(k),c=new Mf(f),c.scheduler=a?()=>a(k,!1):k,m=x=>Ab(x,!1,c),h=c.onStop=()=>{const x=Un.get(c);if(x){if(u)u(x,4);else for(const I of x)I();Un.delete(c)}},t?i?k(!0):b=c.run():a?a(k.bind(null,!0),!0):c.run(),v.pause=c.pause.bind(c),v.resume=c.resume.bind(c),v.stop=v,v}function vr(e,t=1/0,r){if(t<=0||!He(e)||e.__v_skip||(r=r||new Set,r.has(e)))return e;if(r.add(e),t--,Ze(e))vr(e.value,t,r);else if(Se(e))for(let i=0;i{vr(i,t,r)});else if(F0(e)){for(const i in e)vr(e[i],t,r);for(const i of Object.getOwnPropertySymbols(e))Object.prototype.propertyIsEnumerable.call(e,i)&&vr(e[i],t,r)}return e}/** +* @vue/runtime-core v3.5.18 +* (c) 2018-present Yuxi (Evan) You and Vue contributors +* @license MIT +**/function tn(e,t,r,i){try{return i?e(...i):e()}catch(s){is(s,t,r)}}function Yt(e,t,r,i){if(xe(e)){const s=tn(e,t,r,i);return s&&zf(s)&&s.catch(n=>{is(n,t,r)}),s}if(Se(e)){const s=[];for(let n=0;n>>1,s=pt[i],n=Ki(s);n=Ki(r)?pt.push(e):pt.splice(Mb(t),0,e),e.flags|=1,Jf()}}function Jf(){Wn||(Wn=Qf.then(th))}function Nb(e){Se(e)?ni.push(...e):br&&e.id===-1?br.splice(ei+1,0,e):e.flags&1||(ni.push(e),e.flags|=1),Jf()}function $l(e,t,r=jt+1){for(;rKi(r)-Ki(i));if(ni.length=0,br){br.push(...t);return}for(br=t,ei=0;eie.id==null?e.flags&2?-1:1/0:e.id;function th(e){try{for(jt=0;jt{i._d&&Ol(-1);const n=Ln(t);let a;try{a=e(...s)}finally{Ln(n),i._d&&Ol(1)}return a};return i._n=!0,i._c=!0,i._d=!0,i}function Ir(e,t,r,i){const s=e.dirs,n=t&&t.dirs;for(let a=0;ae.__isTeleport;function zo(e,t){e.shapeFlag&6&&e.component?(e.transition=t,zo(e.component.subTree,t)):e.shapeFlag&128?(e.ssContent.transition=t.clone(e.ssContent),e.ssFallback.transition=t.clone(e.ssFallback)):e.transition=t}/*! #__NO_SIDE_EFFECTS__ */function Wb(e,t){return xe(e)?ot({name:e.name},t,{setup:e}):e}function ih(e){e.ids=[e.ids[0]+e.ids[2]+++"-",0,0]}function Lb(e){const t=li(),r=Wr(null);if(t){const s=t.refs===Pe?t.refs={}:t.refs;Object.defineProperty(s,e,{enumerable:!0,get:()=>r.value,set:n=>r.value=n})}return r}function Wi(e,t,r,i,s=!1){if(Se(e)){e.forEach((g,y)=>Wi(g,t&&(Se(t)?t[y]:t),r,i,s));return}if(Li(i)&&!s){i.shapeFlag&512&&i.type.__asyncResolved&&i.component.subTree.component&&Wi(e,t,r,i.component.subTree);return}const n=i.shapeFlag&4?Do(i.component):i.el,a=s?null:n,{i:o,r:u}=e,d=t&&t.r,c=o.refs===Pe?o.refs={}:o.refs,f=o.setupState,h=Re(f),m=f===Pe?()=>!1:g=>Me(h,g);if(d!=null&&d!==u&&(et(d)?(c[d]=null,m(d)&&(f[d]=null)):Ze(d)&&(d.value=null)),xe(u))tn(u,o,12,[a,c]);else{const g=et(u),y=Ze(u);if(g||y){const S=()=>{if(e.f){const v=g?m(u)?f[u]:c[u]:u.value;s?Se(v)&&go(v,n):Se(v)?v.includes(n)||v.push(n):g?(c[u]=[n],m(u)&&(f[u]=c[u])):(u.value=[n],e.k&&(c[e.k]=u.value))}else g?(c[u]=a,m(u)&&(f[u]=a)):y&&(u.value=a,e.k&&(c[e.k]=a))};a?(S.id=-1,bt(S,r)):S()}}}ts().requestIdleCallback;ts().cancelIdleCallback;const Li=e=>!!e.type.__asyncLoader,nh=e=>e.type.__isKeepAlive;function qb(e,t){sh(e,"a",t)}function Vb(e,t){sh(e,"da",t)}function sh(e,t,r=ht){const i=e.__wdc||(e.__wdc=()=>{let s=r;for(;s;){if(s.isDeactivated)return;s=s.parent}return e()});if(ns(t,i,r),r){let s=r.parent;for(;s&&s.parent;)nh(s.parent.vnode)&&Fb(i,t,r,s),s=s.parent}}function Fb(e,t,r,i){const s=ns(t,e,i,!0);Ao(()=>{go(i[t],s)},r)}function ns(e,t,r=ht,i=!1){if(r){const s=r[e]||(r[e]=[]),n=t.__weh||(t.__weh=(...a)=>{lr();const o=rn(r),u=Yt(t,r,e,a);return o(),dr(),u});return i?s.unshift(n):s.push(n),n}}const cr=e=>(t,r=ht)=>{(!Xi||e==="sp")&&ns(e,(...i)=>t(...i),r)},Hb=cr("bm"),Oo=cr("m"),Gb=cr("bu"),jb=cr("u"),Kb=cr("bum"),Ao=cr("um"),Zb=cr("sp"),Xb=cr("rtg"),Yb=cr("rtc");function Qb(e,t=ht){ns("ec",e,t)}const Jb=Symbol.for("v-ndc"),qa=e=>e?Th(e)?Do(e):qa(e.parent):null,qi=ot(Object.create(null),{$:e=>e,$el:e=>e.vnode.el,$data:e=>e.data,$props:e=>e.props,$attrs:e=>e.attrs,$slots:e=>e.slots,$refs:e=>e.refs,$parent:e=>qa(e.parent),$root:e=>qa(e.root),$host:e=>e.ce,$emit:e=>e.emit,$options:e=>oh(e),$forceUpdate:e=>e.f||(e.f=()=>{Co(e.update)}),$nextTick:e=>e.n||(e.n=Eo.bind(e.proxy)),$watch:e=>vw.bind(e)}),Cs=(e,t)=>e!==Pe&&!e.__isScriptSetup&&Me(e,t),ew={get({_:e},t){if(t==="__v_skip")return!0;const{ctx:r,setupState:i,data:s,props:n,accessCache:a,type:o,appContext:u}=e;let d;if(t[0]!=="$"){const m=a[t];if(m!==void 0)switch(m){case 1:return i[t];case 2:return s[t];case 4:return r[t];case 3:return n[t]}else{if(Cs(i,t))return a[t]=1,i[t];if(s!==Pe&&Me(s,t))return a[t]=2,s[t];if((d=e.propsOptions[0])&&Me(d,t))return a[t]=3,n[t];if(r!==Pe&&Me(r,t))return a[t]=4,r[t];Va&&(a[t]=0)}}const c=qi[t];let f,h;if(c)return t==="$attrs"&&at(e.attrs,"get",""),c(e);if((f=o.__cssModules)&&(f=f[t]))return f;if(r!==Pe&&Me(r,t))return a[t]=4,r[t];if(h=u.config.globalProperties,Me(h,t))return h[t]},set({_:e},t,r){const{data:i,setupState:s,ctx:n}=e;return Cs(s,t)?(s[t]=r,!0):i!==Pe&&Me(i,t)?(i[t]=r,!0):Me(e.props,t)||t[0]==="$"&&t.slice(1)in e?!1:(n[t]=r,!0)},has({_:{data:e,setupState:t,accessCache:r,ctx:i,appContext:s,propsOptions:n}},a){let o;return!!r[a]||e!==Pe&&Me(e,a)||Cs(t,a)||(o=n[0])&&Me(o,a)||Me(i,a)||Me(qi,a)||Me(s.config.globalProperties,a)},defineProperty(e,t,r){return r.get!=null?e._.accessCache[t]=0:Me(r,"value")&&this.set(e,t,r.value,null),Reflect.defineProperty(e,t,r)}};function xl(e){return Se(e)?e.reduce((t,r)=>(t[r]=null,t),{}):e}let Va=!0;function tw(e){const t=oh(e),r=e.proxy,i=e.ctx;Va=!1,t.beforeCreate&&Sl(t.beforeCreate,e,"bc");const{data:s,computed:n,methods:a,watch:o,provide:u,inject:d,created:c,beforeMount:f,mounted:h,beforeUpdate:m,updated:g,activated:y,deactivated:S,beforeDestroy:v,beforeUnmount:b,destroyed:k,unmounted:x,render:I,renderTracked:z,renderTriggered:O,errorCaptured:A,serverPrefetch:R,expose:W,inheritAttrs:ie,components:X,directives:ne,filters:Y}=t;if(d&&rw(d,i,null),a)for(const ae in a){const Z=a[ae];xe(Z)&&(i[ae]=Z.bind(r))}if(s){const ae=s.call(r,r);He(ae)&&(e.data=ko(ae))}if(Va=!0,n)for(const ae in n){const Z=n[ae],le=xe(Z)?Z.bind(r,r):xe(Z.get)?Z.get.bind(r,r):Xt,Be=!xe(Z)&&xe(Z.set)?Z.set.bind(r):Xt,D=Dt({get:le,set:Be});Object.defineProperty(i,ae,{enumerable:!0,configurable:!0,get:()=>D.value,set:q=>D.value=q})}if(o)for(const ae in o)ah(o[ae],i,r,ae);if(u){const ae=xe(u)?u.call(r):u;Reflect.ownKeys(ae).forEach(Z=>{uw(Z,ae[Z])})}c&&Sl(c,e,"c");function V(ae,Z){Se(Z)?Z.forEach(le=>ae(le.bind(r))):Z&&ae(Z.bind(r))}if(V(Hb,f),V(Oo,h),V(Gb,m),V(jb,g),V(qb,y),V(Vb,S),V(Qb,A),V(Yb,z),V(Xb,O),V(Kb,b),V(Ao,x),V(Zb,R),Se(W))if(W.length){const ae=e.exposed||(e.exposed={});W.forEach(Z=>{Object.defineProperty(ae,Z,{get:()=>r[Z],set:le=>r[Z]=le,enumerable:!0})})}else e.exposed||(e.exposed={});I&&e.render===Xt&&(e.render=I),ie!=null&&(e.inheritAttrs=ie),X&&(e.components=X),ne&&(e.directives=ne),R&&ih(e)}function rw(e,t,r=Xt){Se(e)&&(e=Fa(e));for(const i in e){const s=e[i];let n;He(s)?"default"in s?n=Vi(s.from||i,s.default,!0):n=Vi(s.from||i):n=Vi(s),Ze(n)?Object.defineProperty(t,i,{enumerable:!0,configurable:!0,get:()=>n.value,set:a=>n.value=a}):t[i]=n}}function Sl(e,t,r){Yt(Se(e)?e.map(i=>i.bind(t.proxy)):e.bind(t.proxy),t,r)}function ah(e,t,r,i){let s=i.includes(".")?vh(r,i):()=>r[i];if(et(e)){const n=t[e];xe(n)&&qr(s,n)}else if(xe(e))qr(s,e.bind(r));else if(He(e))if(Se(e))e.forEach(n=>ah(n,t,r,i));else{const n=xe(e.handler)?e.handler.bind(r):t[e.handler];xe(n)&&qr(s,n,e)}}function oh(e){const t=e.type,{mixins:r,extends:i}=t,{mixins:s,optionsCache:n,config:{optionMergeStrategies:a}}=e.appContext,o=n.get(t);let u;return o?u=o:!s.length&&!r&&!i?u=t:(u={},s.length&&s.forEach(d=>qn(u,d,a,!0)),qn(u,t,a)),He(t)&&n.set(t,u),u}function qn(e,t,r,i=!1){const{mixins:s,extends:n}=t;n&&qn(e,n,r,!0),s&&s.forEach(a=>qn(e,a,r,!0));for(const a in t)if(!(i&&a==="expose")){const o=iw[a]||r&&r[a];e[a]=o?o(e[a],t[a]):t[a]}return e}const iw={data:kl,props:Tl,emits:Tl,methods:Ci,computed:Ci,beforeCreate:dt,created:dt,beforeMount:dt,mounted:dt,beforeUpdate:dt,updated:dt,beforeDestroy:dt,beforeUnmount:dt,destroyed:dt,unmounted:dt,activated:dt,deactivated:dt,errorCaptured:dt,serverPrefetch:dt,components:Ci,directives:Ci,watch:sw,provide:kl,inject:nw};function kl(e,t){return t?e?function(){return ot(xe(e)?e.call(this,this):e,xe(t)?t.call(this,this):t)}:t:e}function nw(e,t){return Ci(Fa(e),Fa(t))}function Fa(e){if(Se(e)){const t={};for(let r=0;r1)return r&&xe(t)?t.call(i&&i.proxy):t}}function lh(){return!!(li()||Lr)}const dh={},ch=()=>Object.create(dh),ph=e=>Object.getPrototypeOf(e)===dh;function lw(e,t,r,i=!1){const s={},n=ch();e.propsDefaults=Object.create(null),fh(e,t,s,n);for(const a in e.propsOptions[0])a in s||(s[a]=void 0);r?e.props=i?s:vb(s):e.type.props?e.props=s:e.props=n,e.attrs=n}function dw(e,t,r,i){const{props:s,attrs:n,vnode:{patchFlag:a}}=e,o=Re(s),[u]=e.propsOptions;let d=!1;if((i||a>0)&&!(a&16)){if(a&8){const c=e.vnode.dynamicProps;for(let f=0;f{u=!0;const[h,m]=hh(f,t,!0);ot(a,h),m&&o.push(...m)};!r&&t.mixins.length&&t.mixins.forEach(c),e.extends&&c(e.extends),e.mixins&&e.mixins.forEach(c)}if(!n&&!u)return He(e)&&i.set(e,ii),ii;if(Se(n))for(let c=0;ce==="_"||e==="__"||e==="_ctx"||e==="$stable",Bo=e=>Se(e)?e.map(Kt):[Kt(e)],pw=(e,t,r)=>{if(t._n)return t;const i=Db((...s)=>Bo(t(...s)),r);return i._c=!1,i},mh=(e,t,r)=>{const i=e._ctx;for(const s in e){if(Ro(s))continue;const n=e[s];if(xe(n))t[s]=pw(s,n,i);else if(n!=null){const a=Bo(n);t[s]=()=>a}}},gh=(e,t)=>{const r=Bo(t);e.slots.default=()=>r},_h=(e,t,r)=>{for(const i in t)(r||!Ro(i))&&(e[i]=t[i])},fw=(e,t,r)=>{const i=e.slots=ch();if(e.vnode.shapeFlag&32){const s=t.__;s&&Da(i,"__",s,!0);const n=t._;n?(_h(i,t,r),r&&Da(i,"_",n,!0)):mh(t,i)}else t&&gh(e,t)},hw=(e,t,r)=>{const{vnode:i,slots:s}=e;let n=!0,a=Pe;if(i.shapeFlag&32){const o=t._;o?r&&o===1?n=!1:_h(s,t,r):(n=!t.$stable,mh(t,s)),a=t}else t&&(gh(e,t),a={default:1});if(n)for(const o in s)!Ro(o)&&a[o]==null&&delete s[o]},bt=Ew;function mw(e){return gw(e)}function gw(e,t){const r=ts();r.__VUE__=!0;const{insert:i,remove:s,patchProp:n,createElement:a,createText:o,createComment:u,setText:d,setElementText:c,parentNode:f,nextSibling:h,setScopeId:m=Xt,insertStaticContent:g}=e,y=(T,E,M,H=null,L=null,G=null,J=void 0,te=null,Q=!!E.dynamicChildren)=>{if(T===E)return;T&&!_i(T,E)&&(H=Ie(T),q(T,L,G,!0),T=null),E.patchFlag===-2&&(Q=!1,E.dynamicChildren=null);const{type:K,ref:me,shapeFlag:se}=E;switch(K){case as:S(T,E,M,H);break;case Sr:v(T,E,M,H);break;case Os:T==null&&b(E,M,H,J);break;case nr:X(T,E,M,H,L,G,J,te,Q);break;default:se&1?I(T,E,M,H,L,G,J,te,Q):se&6?ne(T,E,M,H,L,G,J,te,Q):(se&64||se&128)&&K.process(T,E,M,H,L,G,J,te,Q,tt)}me!=null&&L?Wi(me,T&&T.ref,G,E||T,!E):me==null&&T&&T.ref!=null&&Wi(T.ref,null,G,T,!0)},S=(T,E,M,H)=>{if(T==null)i(E.el=o(E.children),M,H);else{const L=E.el=T.el;E.children!==T.children&&d(L,E.children)}},v=(T,E,M,H)=>{T==null?i(E.el=u(E.children||""),M,H):E.el=T.el},b=(T,E,M,H)=>{[T.el,T.anchor]=g(T.children,E,M,H,T.el,T.anchor)},k=({el:T,anchor:E},M,H)=>{let L;for(;T&&T!==E;)L=h(T),i(T,M,H),T=L;i(E,M,H)},x=({el:T,anchor:E})=>{let M;for(;T&&T!==E;)M=h(T),s(T),T=M;s(E)},I=(T,E,M,H,L,G,J,te,Q)=>{E.type==="svg"?J="svg":E.type==="math"&&(J="mathml"),T==null?z(E,M,H,L,G,J,te,Q):R(T,E,L,G,J,te,Q)},z=(T,E,M,H,L,G,J,te)=>{let Q,K;const{props:me,shapeFlag:se,transition:he,dirs:be}=T;if(Q=T.el=a(T.type,G,me&&me.is,me),se&8?c(Q,T.children):se&16&&A(T.children,Q,null,H,L,zs(T,G),J,te),be&&Ir(T,null,H,"created"),O(Q,T,T.scopeId,J,H),me){for(const Ee in me)Ee!=="value"&&!Ni(Ee)&&n(Q,Ee,null,me[Ee],G,H);"value"in me&&n(Q,"value",null,me.value,G),(K=me.onVnodeBeforeMount)&&Ht(K,H,T)}be&&Ir(T,null,H,"beforeMount");const ke=_w(L,he);ke&&he.beforeEnter(Q),i(Q,E,M),((K=me&&me.onVnodeMounted)||ke||be)&&bt(()=>{K&&Ht(K,H,T),ke&&he.enter(Q),be&&Ir(T,null,H,"mounted")},L)},O=(T,E,M,H,L)=>{if(M&&m(T,M),H)for(let G=0;G{for(let K=Q;K{const te=E.el=T.el;let{patchFlag:Q,dynamicChildren:K,dirs:me}=E;Q|=T.patchFlag&16;const se=T.props||Pe,he=E.props||Pe;let be;if(M&&Er(M,!1),(be=he.onVnodeBeforeUpdate)&&Ht(be,M,E,T),me&&Ir(E,T,M,"beforeUpdate"),M&&Er(M,!0),(se.innerHTML&&he.innerHTML==null||se.textContent&&he.textContent==null)&&c(te,""),K?W(T.dynamicChildren,K,te,M,H,zs(E,L),G):J||Z(T,E,te,null,M,H,zs(E,L),G,!1),Q>0){if(Q&16)ie(te,se,he,M,L);else if(Q&2&&se.class!==he.class&&n(te,"class",null,he.class,L),Q&4&&n(te,"style",se.style,he.style,L),Q&8){const ke=E.dynamicProps;for(let Ee=0;Ee{be&&Ht(be,M,E,T),me&&Ir(E,T,M,"updated")},H)},W=(T,E,M,H,L,G,J)=>{for(let te=0;te{if(E!==M){if(E!==Pe)for(const G in E)!Ni(G)&&!(G in M)&&n(T,G,E[G],null,L,H);for(const G in M){if(Ni(G))continue;const J=M[G],te=E[G];J!==te&&G!=="value"&&n(T,G,te,J,L,H)}"value"in M&&n(T,"value",E.value,M.value,L)}},X=(T,E,M,H,L,G,J,te,Q)=>{const K=E.el=T?T.el:o(""),me=E.anchor=T?T.anchor:o("");let{patchFlag:se,dynamicChildren:he,slotScopeIds:be}=E;be&&(te=te?te.concat(be):be),T==null?(i(K,M,H),i(me,M,H),A(E.children||[],M,me,L,G,J,te,Q)):se>0&&se&64&&he&&T.dynamicChildren?(W(T.dynamicChildren,he,M,L,G,J,te),(E.key!=null||L&&E===L.subTree)&&yh(T,E,!0)):Z(T,E,M,me,L,G,J,te,Q)},ne=(T,E,M,H,L,G,J,te,Q)=>{E.slotScopeIds=te,T==null?E.shapeFlag&512?L.ctx.activate(E,M,H,J,Q):Y(E,M,H,L,G,J,Q):oe(T,E,Q)},Y=(T,E,M,H,L,G,J)=>{const te=T.component=Dw(T,H,L);if(nh(T)&&(te.ctx.renderer=tt),Pw(te,!1,J),te.asyncDep){if(L&&L.registerDep(te,V,J),!T.el){const Q=te.subTree=ur(Sr);v(null,Q,E,M),T.placeholder=Q.el}}else V(te,T,E,M,L,G,J)},oe=(T,E,M)=>{const H=E.component=T.component;if(Tw(T,E,M))if(H.asyncDep&&!H.asyncResolved){ae(H,E,M);return}else H.next=E,H.update();else E.el=T.el,H.vnode=E},V=(T,E,M,H,L,G,J)=>{const te=()=>{if(T.isMounted){let{next:se,bu:he,u:be,parent:ke,vnode:Ee}=T;{const je=bh(T);if(je){se&&(se.el=Ee.el,ae(T,se,J)),je.asyncDep.then(()=>{T.isUnmounted||te()});return}}let ge=se,Qe;Er(T,!1),se?(se.el=Ee.el,ae(T,se,J)):se=Ee,he&&ks(he),(Qe=se.props&&se.props.onVnodeBeforeUpdate)&&Ht(Qe,ke,se,Ee),Er(T,!0);const rt=Cl(T),mt=T.subTree;T.subTree=rt,y(mt,rt,f(mt.el),Ie(mt),T,L,G),se.el=rt.el,ge===null&&Iw(T,rt.el),be&&bt(be,L),(Qe=se.props&&se.props.onVnodeUpdated)&&bt(()=>Ht(Qe,ke,se,Ee),L)}else{let se;const{el:he,props:be}=E,{bm:ke,m:Ee,parent:ge,root:Qe,type:rt}=T,mt=Li(E);Er(T,!1),ke&&ks(ke),!mt&&(se=be&&be.onVnodeBeforeMount)&&Ht(se,ge,E),Er(T,!0);{Qe.ce&&Qe.ce._def.shadowRoot!==!1&&Qe.ce._injectChildStyle(rt);const je=T.subTree=Cl(T);y(null,je,M,H,T,L,G),E.el=je.el}if(Ee&&bt(Ee,L),!mt&&(se=be&&be.onVnodeMounted)){const je=E;bt(()=>Ht(se,ge,je),L)}(E.shapeFlag&256||ge&&Li(ge.vnode)&&ge.vnode.shapeFlag&256)&&T.a&&bt(T.a,L),T.isMounted=!0,E=M=H=null}};T.scope.on();const Q=T.effect=new Mf(te);T.scope.off();const K=T.update=Q.run.bind(Q),me=T.job=Q.runIfDirty.bind(Q);me.i=T,me.id=T.uid,Q.scheduler=()=>Co(me),Er(T,!0),K()},ae=(T,E,M)=>{E.component=T;const H=T.vnode.props;T.vnode=E,T.next=null,dw(T,E.props,H,M),hw(T,E.children,M),lr(),$l(T),dr()},Z=(T,E,M,H,L,G,J,te,Q=!1)=>{const K=T&&T.children,me=T?T.shapeFlag:0,se=E.children,{patchFlag:he,shapeFlag:be}=E;if(he>0){if(he&128){Be(K,se,M,H,L,G,J,te,Q);return}else if(he&256){le(K,se,M,H,L,G,J,te,Q);return}}be&8?(me&16&&U(K,L,G),se!==K&&c(M,se)):me&16?be&16?Be(K,se,M,H,L,G,J,te,Q):U(K,L,G,!0):(me&8&&c(M,""),be&16&&A(se,M,H,L,G,J,te,Q))},le=(T,E,M,H,L,G,J,te,Q)=>{T=T||ii,E=E||ii;const K=T.length,me=E.length,se=Math.min(K,me);let he;for(he=0;heme?U(T,L,G,!0,!1,se):A(E,M,H,L,G,J,te,Q,se)},Be=(T,E,M,H,L,G,J,te,Q)=>{let K=0;const me=E.length;let se=T.length-1,he=me-1;for(;K<=se&&K<=he;){const be=T[K],ke=E[K]=Q?wr(E[K]):Kt(E[K]);if(_i(be,ke))y(be,ke,M,null,L,G,J,te,Q);else break;K++}for(;K<=se&&K<=he;){const be=T[se],ke=E[he]=Q?wr(E[he]):Kt(E[he]);if(_i(be,ke))y(be,ke,M,null,L,G,J,te,Q);else break;se--,he--}if(K>se){if(K<=he){const be=he+1,ke=behe)for(;K<=se;)q(T[K],L,G,!0),K++;else{const be=K,ke=K,Ee=new Map;for(K=ke;K<=he;K++){const Ve=E[K]=Q?wr(E[K]):Kt(E[K]);Ve.key!=null&&Ee.set(Ve.key,K)}let ge,Qe=0;const rt=he-ke+1;let mt=!1,je=0;const Tt=new Array(rt);for(K=0;K=rt){q(Ve,L,G,!0);continue}let _t;if(Ve.key!=null)_t=Ee.get(Ve.key);else for(ge=ke;ge<=he;ge++)if(Tt[ge-ke]===0&&_i(Ve,E[ge])){_t=ge;break}_t===void 0?q(Ve,L,G,!0):(Tt[_t-ke]=K+1,_t>=je?je=_t:mt=!0,y(Ve,E[_t],M,null,L,G,J,te,Q),Qe++)}const ci=mt?yw(Tt):ii;for(ge=ci.length-1,K=rt-1;K>=0;K--){const Ve=ke+K,_t=E[Ve],pi=E[Ve+1],fi=Ve+1{const{el:G,type:J,transition:te,children:Q,shapeFlag:K}=T;if(K&6){D(T.component.subTree,E,M,H);return}if(K&128){T.suspense.move(E,M,H);return}if(K&64){J.move(T,E,M,tt);return}if(J===nr){i(G,E,M);for(let se=0;sete.enter(G),L);else{const{leave:se,delayLeave:he,afterLeave:be}=te,ke=()=>{T.ctx.isUnmounted?s(G):i(G,E,M)},Ee=()=>{se(G,()=>{ke(),be&&be()})};he?he(G,ke,Ee):Ee()}else i(G,E,M)},q=(T,E,M,H=!1,L=!1)=>{const{type:G,props:J,ref:te,children:Q,dynamicChildren:K,shapeFlag:me,patchFlag:se,dirs:he,cacheIndex:be}=T;if(se===-2&&(L=!1),te!=null&&(lr(),Wi(te,null,M,T,!0),dr()),be!=null&&(E.renderCache[be]=void 0),me&256){E.ctx.deactivate(T);return}const ke=me&1&&he,Ee=!Li(T);let ge;if(Ee&&(ge=J&&J.onVnodeBeforeUnmount)&&Ht(ge,E,T),me&6)Ge(T.component,M,H);else{if(me&128){T.suspense.unmount(M,H);return}ke&&Ir(T,null,E,"beforeUnmount"),me&64?T.type.remove(T,E,M,tt,H):K&&!K.hasOnce&&(G!==nr||se>0&&se&64)?U(K,E,M,!1,!0):(G===nr&&se&384||!L&&me&16)&&U(Q,E,M),H&&ee(T)}(Ee&&(ge=J&&J.onVnodeUnmounted)||ke)&&bt(()=>{ge&&Ht(ge,E,T),ke&&Ir(T,null,E,"unmounted")},M)},ee=T=>{const{type:E,el:M,anchor:H,transition:L}=T;if(E===nr){ye(M,H);return}if(E===Os){x(T);return}const G=()=>{s(M),L&&!L.persisted&&L.afterLeave&&L.afterLeave()};if(T.shapeFlag&1&&L&&!L.persisted){const{leave:J,delayLeave:te}=L,Q=()=>J(M,G);te?te(T.el,G,Q):Q()}else G()},ye=(T,E)=>{let M;for(;T!==E;)M=h(T),s(T),T=M;s(E)},Ge=(T,E,M)=>{const{bum:H,scope:L,job:G,subTree:J,um:te,m:Q,a:K,parent:me,slots:{__:se}}=T;El(Q),El(K),H&&ks(H),me&&Se(se)&&se.forEach(he=>{me.renderCache[he]=void 0}),L.stop(),G&&(G.flags|=8,q(J,T,E,M)),te&&bt(te,E),bt(()=>{T.isUnmounted=!0},E),E&&E.pendingBranch&&!E.isUnmounted&&T.asyncDep&&!T.asyncResolved&&T.suspenseId===E.pendingId&&(E.deps--,E.deps===0&&E.resolve())},U=(T,E,M,H=!1,L=!1,G=0)=>{for(let J=G;J{if(T.shapeFlag&6)return Ie(T.component.subTree);if(T.shapeFlag&128)return T.suspense.next();const E=h(T.anchor||T.el),M=E&&E[Pb];return M?h(M):E};let Wt=!1;const Ye=(T,E,M)=>{T==null?E._vnode&&q(E._vnode,null,null,!0):y(E._vnode||null,T,E,null,null,null,M),E._vnode=T,Wt||(Wt=!0,$l(),eh(),Wt=!1)},tt={p:y,um:q,m:D,r:ee,mt:Y,mc:A,pc:Z,pbc:W,n:Ie,o:e};return{render:Ye,hydrate:void 0,createApp:ow(Ye)}}function zs({type:e,props:t},r){return r==="svg"&&e==="foreignObject"||r==="mathml"&&e==="annotation-xml"&&t&&t.encoding&&t.encoding.includes("html")?void 0:r}function Er({effect:e,job:t},r){r?(e.flags|=32,t.flags|=4):(e.flags&=-33,t.flags&=-5)}function _w(e,t){return(!e||e&&!e.pendingBranch)&&t&&!t.persisted}function yh(e,t,r=!1){const i=e.children,s=t.children;if(Se(i)&&Se(s))for(let n=0;n>1,e[r[o]]0&&(t[i]=r[n-1]),r[n]=i)}}for(n=r.length,a=r[n-1];n-- >0;)r[n]=a,a=t[a];return r}function bh(e){const t=e.subTree.component;if(t)return t.asyncDep&&!t.asyncResolved?t:bh(t)}function El(e){if(e)for(let t=0;tVi(bw);function wh(e,t){return Mo(e,null,t)}function qr(e,t,r){return Mo(e,t,r)}function Mo(e,t,r=Pe){const{immediate:i,deep:s,flush:n,once:a}=r,o=ot({},r),u=t&&i||!t&&n!=="post";let d;if(Xi){if(n==="sync"){const m=ww();d=m.__watcherHandles||(m.__watcherHandles=[])}else if(!u){const m=()=>{};return m.stop=Xt,m.resume=Xt,m.pause=Xt,m}}const c=ht;o.call=(m,g,y)=>Yt(m,c,g,y);let f=!1;n==="post"?o.scheduler=m=>{bt(m,c&&c.suspense)}:n!=="sync"&&(f=!0,o.scheduler=(m,g)=>{g?m():Co(m)}),o.augmentJob=m=>{t&&(m.flags|=4),f&&(m.flags|=2,c&&(m.id=c.uid,m.i=c))};const h=Rb(e,t,o);return Xi&&(d?d.push(h):u&&h()),h}function vw(e,t,r){const i=this.proxy,s=et(e)?e.includes(".")?vh(i,e):()=>i[e]:e.bind(i,i);let n;xe(t)?n=t:(n=t.handler,r=t);const a=rn(this),o=Mo(s,n.bind(i),r);return a(),o}function vh(e,t){const r=t.split(".");return()=>{let i=e;for(let s=0;st==="modelValue"||t==="model-value"?e.modelModifiers:e[`${t}Modifiers`]||e[`${xr(t)}Modifiers`]||e[`${jr(t)}Modifiers`];function xw(e,t,...r){if(e.isUnmounted)return;const i=e.vnode.props||Pe;let s=r;const n=t.startsWith("update:"),a=n&&$w(i,t.slice(7));a&&(a.trim&&(s=r.map(c=>et(c)?c.trim():c)),a.number&&(s=r.map(j0)));let o,u=i[o=Ss(t)]||i[o=Ss(xr(t))];!u&&n&&(u=i[o=Ss(jr(t))]),u&&Yt(u,e,6,s);const d=i[o+"Once"];if(d){if(!e.emitted)e.emitted={};else if(e.emitted[o])return;e.emitted[o]=!0,Yt(d,e,6,s)}}function $h(e,t,r=!1){const i=t.emitsCache,s=i.get(e);if(s!==void 0)return s;const n=e.emits;let a={},o=!1;if(!xe(e)){const u=d=>{const c=$h(d,t,!0);c&&(o=!0,ot(a,c))};!r&&t.mixins.length&&t.mixins.forEach(u),e.extends&&u(e.extends),e.mixins&&e.mixins.forEach(u)}return!n&&!o?(He(e)&&i.set(e,null),null):(Se(n)?n.forEach(u=>a[u]=null):ot(a,n),He(e)&&i.set(e,a),a)}function ss(e,t){return!e||!Qn(t)?!1:(t=t.slice(2).replace(/Once$/,""),Me(e,t[0].toLowerCase()+t.slice(1))||Me(e,jr(t))||Me(e,t))}function Cl(e){const{type:t,vnode:r,proxy:i,withProxy:s,propsOptions:[n],slots:a,attrs:o,emit:u,render:d,renderCache:c,props:f,data:h,setupState:m,ctx:g,inheritAttrs:y}=e,S=Ln(e);let v,b;try{if(r.shapeFlag&4){const x=s||i,I=x;v=Kt(d.call(I,x,c,f,m,h,g)),b=o}else{const x=t;v=Kt(x.length>1?x(f,{attrs:o,slots:a,emit:u}):x(f,null)),b=t.props?o:Sw(o)}}catch(x){Fi.length=0,is(x,e,1),v=ur(Sr)}let k=v;if(b&&y!==!1){const x=Object.keys(b),{shapeFlag:I}=k;x.length&&I&7&&(n&&x.some(mo)&&(b=kw(b,n)),k=si(k,b,!1,!0))}return r.dirs&&(k=si(k,null,!1,!0),k.dirs=k.dirs?k.dirs.concat(r.dirs):r.dirs),r.transition&&zo(k,r.transition),v=k,Ln(S),v}const Sw=e=>{let t;for(const r in e)(r==="class"||r==="style"||Qn(r))&&((t||(t={}))[r]=e[r]);return t},kw=(e,t)=>{const r={};for(const i in e)(!mo(i)||!(i.slice(9)in t))&&(r[i]=e[i]);return r};function Tw(e,t,r){const{props:i,children:s,component:n}=e,{props:a,children:o,patchFlag:u}=t,d=n.emitsOptions;if(t.dirs||t.transition)return!0;if(r&&u>=0){if(u&1024)return!0;if(u&16)return i?zl(i,a,d):!!a;if(u&8){const c=t.dynamicProps;for(let f=0;fe.__isSuspense;function Ew(e,t){t&&t.pendingBranch?Se(e)?t.effects.push(...e):t.effects.push(e):Nb(e)}const nr=Symbol.for("v-fgt"),as=Symbol.for("v-txt"),Sr=Symbol.for("v-cmt"),Os=Symbol.for("v-stc"),Fi=[];let vt=null;function zi(e=!1){Fi.push(vt=e?null:[])}function Cw(){Fi.pop(),vt=Fi[Fi.length-1]||null}let Zi=1;function Ol(e,t=!1){Zi+=e,e<0&&vt&&t&&(vt.hasOnce=!0)}function Sh(e){return e.dynamicChildren=Zi>0?vt||ii:null,Cw(),Zi>0&&vt&&vt.push(e),e}function yn(e,t,r,i,s,n){return Sh(At(e,t,r,i,s,n,!0))}function zw(e,t,r,i,s){return Sh(ur(e,t,r,i,s,!0))}function Br(e){return e?e.__v_isVNode===!0:!1}function _i(e,t){return e.type===t.type&&e.key===t.key}const kh=({key:e})=>e??null,Bn=({ref:e,ref_key:t,ref_for:r})=>(typeof e=="number"&&(e=""+e),e!=null?et(e)||Ze(e)||xe(e)?{i:Zt,r:e,k:t,f:!!r}:e:null);function At(e,t=null,r=null,i=0,s=null,n=e===nr?0:1,a=!1,o=!1){const u={__v_isVNode:!0,__v_skip:!0,type:e,props:t,key:t&&kh(t),ref:t&&Bn(t),scopeId:rh,slotScopeIds:null,children:r,component:null,suspense:null,ssContent:null,ssFallback:null,dirs:null,transition:null,el:null,anchor:null,target:null,targetStart:null,targetAnchor:null,staticCount:0,shapeFlag:n,patchFlag:i,dynamicProps:s,dynamicChildren:null,appContext:null,ctx:Zt};return o?(No(u,r),n&128&&e.normalize(u)):r&&(u.shapeFlag|=et(r)?8:16),Zi>0&&!a&&vt&&(u.patchFlag>0||n&6)&&u.patchFlag!==32&&vt.push(u),u}const ur=Ow;function Ow(e,t=null,r=null,i=0,s=null,n=!1){if((!e||e===Jb)&&(e=Sr),Br(e)){const o=si(e,t,!0);return r&&No(o,r),Zi>0&&!n&&vt&&(o.shapeFlag&6?vt[vt.indexOf(e)]=o:vt.push(o)),o.patchFlag=-2,o}if(qw(e)&&(e=e.__vccOpts),t){t=Aw(t);let{class:o,style:u}=t;o&&!et(o)&&(t.class=bo(o)),He(u)&&(Io(u)&&!Se(u)&&(u=ot({},u)),t.style=yo(u))}const a=et(e)?1:xh(e)?128:Ub(e)?64:He(e)?4:xe(e)?2:0;return At(e,t,r,i,s,a,n,!0)}function Aw(e){return e?Io(e)||ph(e)?ot({},e):e:null}function si(e,t,r=!1,i=!1){const{props:s,ref:n,patchFlag:a,children:o,transition:u}=e,d=t?Bw(s||{},t):s,c={__v_isVNode:!0,__v_skip:!0,type:e.type,props:d,key:d&&kh(d),ref:t&&t.ref?r&&n?Se(n)?n.concat(Bn(t)):[n,Bn(t)]:Bn(t):n,scopeId:e.scopeId,slotScopeIds:e.slotScopeIds,children:o,target:e.target,targetStart:e.targetStart,targetAnchor:e.targetAnchor,staticCount:e.staticCount,shapeFlag:e.shapeFlag,patchFlag:t&&e.type!==nr?a===-1?16:a|16:a,dynamicProps:e.dynamicProps,dynamicChildren:e.dynamicChildren,appContext:e.appContext,dirs:e.dirs,transition:u,component:e.component,suspense:e.suspense,ssContent:e.ssContent&&si(e.ssContent),ssFallback:e.ssFallback&&si(e.ssFallback),placeholder:e.placeholder,el:e.el,anchor:e.anchor,ctx:e.ctx,ce:e.ce};return u&&i&&zo(c,u.clone(c)),c}function Ga(e=" ",t=0){return ur(as,null,e,t)}function Rw(e="",t=!1){return t?(zi(),zw(Sr,null,e)):ur(Sr,null,e)}function Kt(e){return e==null||typeof e=="boolean"?ur(Sr):Se(e)?ur(nr,null,e.slice()):Br(e)?wr(e):ur(as,null,String(e))}function wr(e){return e.el===null&&e.patchFlag!==-1||e.memo?e:si(e)}function No(e,t){let r=0;const{shapeFlag:i}=e;if(t==null)t=null;else if(Se(t))r=16;else if(typeof t=="object")if(i&65){const s=t.default;s&&(s._c&&(s._d=!1),No(e,s()),s._c&&(s._d=!0));return}else{r=32;const s=t._;!s&&!ph(t)?t._ctx=Zt:s===3&&Zt&&(Zt.slots._===1?t._=1:(t._=2,e.patchFlag|=1024))}else xe(t)?(t={default:t,_ctx:Zt},r=32):(t=String(t),i&64?(r=16,t=[Ga(t)]):r=8);e.children=t,e.shapeFlag|=r}function Bw(...e){const t={};for(let r=0;rht||Zt;let Vn,ja;{const e=ts(),t=(r,i)=>{let s;return(s=e[r])||(s=e[r]=[]),s.push(i),n=>{s.length>1?s.forEach(a=>a(n)):s[0](n)}};Vn=t("__VUE_INSTANCE_SETTERS__",r=>ht=r),ja=t("__VUE_SSR_SETTERS__",r=>Xi=r)}const rn=e=>{const t=ht;return Vn(e),e.scope.on(),()=>{e.scope.off(),Vn(t)}},Al=()=>{ht&&ht.scope.off(),Vn(null)};function Th(e){return e.vnode.shapeFlag&4}let Xi=!1;function Pw(e,t=!1,r=!1){t&&ja(t);const{props:i,children:s}=e.vnode,n=Th(e);lw(e,i,n,t),fw(e,s,r||t);const a=n?Uw(e,t):void 0;return t&&ja(!1),a}function Uw(e,t){const r=e.type;e.accessCache=Object.create(null),e.proxy=new Proxy(e.ctx,ew);const{setup:i}=r;if(i){lr();const s=e.setupContext=i.length>1?Lw(e):null,n=rn(e),a=tn(i,e,0,[e.props,s]),o=zf(a);if(dr(),n(),(o||e.sp)&&!Li(e)&&ih(e),o){if(a.then(Al,Al),t)return a.then(u=>{Rl(e,u)}).catch(u=>{is(u,e,0)});e.asyncDep=a}else Rl(e,a)}else Ih(e)}function Rl(e,t,r){xe(t)?e.type.__ssrInlineRender?e.ssrRender=t:e.render=t:He(t)&&(e.setupState=Yf(t)),Ih(e)}function Ih(e,t,r){const i=e.type;e.render||(e.render=i.render||Xt);{const s=rn(e);lr();try{tw(e)}finally{dr(),s()}}}const Ww={get(e,t){return at(e,"get",""),e[t]}};function Lw(e){const t=r=>{e.exposed=r||{}};return{attrs:new Proxy(e.attrs,Ww),slots:e.slots,emit:e.emit,expose:t}}function Do(e){return e.exposed?e.exposeProxy||(e.exposeProxy=new Proxy(Yf(Zf(e.exposed)),{get(t,r){if(r in t)return t[r];if(r in qi)return qi[r](e)},has(t,r){return r in t||r in qi}})):e.proxy}function qw(e){return xe(e)&&"__vccOpts"in e}const Dt=(e,t)=>Ob(e,t,Xi),Vw="3.5.18";/** +* @vue/runtime-dom v3.5.18 +* (c) 2018-present Yuxi (Evan) You and Vue contributors +* @license MIT +**/let Ka;const Bl=typeof window<"u"&&window.trustedTypes;if(Bl)try{Ka=Bl.createPolicy("vue",{createHTML:e=>e})}catch{}const Eh=Ka?e=>Ka.createHTML(e):e=>e,Fw="http://www.w3.org/2000/svg",Hw="http://www.w3.org/1998/Math/MathML",ir=typeof document<"u"?document:null,Ml=ir&&ir.createElement("template"),Gw={insert:(e,t,r)=>{t.insertBefore(e,r||null)},remove:e=>{const t=e.parentNode;t&&t.removeChild(e)},createElement:(e,t,r,i)=>{const s=t==="svg"?ir.createElementNS(Fw,e):t==="mathml"?ir.createElementNS(Hw,e):r?ir.createElement(e,{is:r}):ir.createElement(e);return e==="select"&&i&&i.multiple!=null&&s.setAttribute("multiple",i.multiple),s},createText:e=>ir.createTextNode(e),createComment:e=>ir.createComment(e),setText:(e,t)=>{e.nodeValue=t},setElementText:(e,t)=>{e.textContent=t},parentNode:e=>e.parentNode,nextSibling:e=>e.nextSibling,querySelector:e=>ir.querySelector(e),setScopeId(e,t){e.setAttribute(t,"")},insertStaticContent(e,t,r,i,s,n){const a=r?r.previousSibling:t.lastChild;if(s&&(s===n||s.nextSibling))for(;t.insertBefore(s.cloneNode(!0),r),!(s===n||!(s=s.nextSibling)););else{Ml.innerHTML=Eh(i==="svg"?`${e}`:i==="mathml"?`${e}`:e);const o=Ml.content;if(i==="svg"||i==="mathml"){const u=o.firstChild;for(;u.firstChild;)o.appendChild(u.firstChild);o.removeChild(u)}t.insertBefore(o,r)}return[a?a.nextSibling:t.firstChild,r?r.previousSibling:t.lastChild]}},jw=Symbol("_vtc");function Kw(e,t,r){const i=e[jw];i&&(t=(t?[t,...i]:[...i]).join(" ")),t==null?e.removeAttribute("class"):r?e.setAttribute("class",t):e.className=t}const Nl=Symbol("_vod"),Zw=Symbol("_vsh"),Xw=Symbol(""),Yw=/(^|;)\s*display\s*:/;function Qw(e,t,r){const i=e.style,s=et(r);let n=!1;if(r&&!s){if(t)if(et(t))for(const a of t.split(";")){const o=a.slice(0,a.indexOf(":")).trim();r[o]==null&&Mn(i,o,"")}else for(const a in t)r[a]==null&&Mn(i,a,"");for(const a in r)a==="display"&&(n=!0),Mn(i,a,r[a])}else if(s){if(t!==r){const a=i[Xw];a&&(r+=";"+a),i.cssText=r,n=Yw.test(r)}}else t&&e.removeAttribute("style");Nl in e&&(e[Nl]=n?i.display:"",e[Zw]&&(i.display="none"))}const Dl=/\s*!important$/;function Mn(e,t,r){if(Se(r))r.forEach(i=>Mn(e,t,i));else if(r==null&&(r=""),t.startsWith("--"))e.setProperty(t,r);else{const i=Jw(e,t);Dl.test(r)?e.setProperty(jr(i),r.replace(Dl,""),"important"):e[i]=r}}const Pl=["Webkit","Moz","ms"],As={};function Jw(e,t){const r=As[t];if(r)return r;let i=xr(t);if(i!=="filter"&&i in e)return As[t]=i;i=Of(i);for(let s=0;sRs||(nv.then(()=>Rs=0),Rs=Date.now());function av(e,t){const r=i=>{if(!i._vts)i._vts=Date.now();else if(i._vts<=r.attached)return;Yt(ov(i,r.value),t,5,[i])};return r.value=e,r.attached=sv(),r}function ov(e,t){if(Se(t)){const r=e.stopImmediatePropagation;return e.stopImmediatePropagation=()=>{r.call(e),e._stopped=!0},t.map(i=>s=>!s._stopped&&i&&i(s))}else return t}const Fl=e=>e.charCodeAt(0)===111&&e.charCodeAt(1)===110&&e.charCodeAt(2)>96&&e.charCodeAt(2)<123,uv=(e,t,r,i,s,n)=>{const a=s==="svg";t==="class"?Kw(e,i,a):t==="style"?Qw(e,r,i):Qn(t)?mo(t)||rv(e,t,r,i,n):(t[0]==="."?(t=t.slice(1),!0):t[0]==="^"?(t=t.slice(1),!1):lv(e,t,i,a))?(Ll(e,t,i),!e.tagName.includes("-")&&(t==="value"||t==="checked"||t==="selected")&&Wl(e,t,i,a,n,t!=="value")):e._isVueCE&&(/[A-Z]/.test(t)||!et(i))?Ll(e,xr(t),i,n,t):(t==="true-value"?e._trueValue=i:t==="false-value"&&(e._falseValue=i),Wl(e,t,i,a))};function lv(e,t,r,i){if(i)return!!(t==="innerHTML"||t==="textContent"||t in e&&Fl(t)&&xe(r));if(t==="spellcheck"||t==="draggable"||t==="translate"||t==="autocorrect"||t==="form"||t==="list"&&e.tagName==="INPUT"||t==="type"&&e.tagName==="TEXTAREA")return!1;if(t==="width"||t==="height"){const s=e.tagName;if(s==="IMG"||s==="VIDEO"||s==="CANVAS"||s==="SOURCE")return!1}return Fl(t)&&et(r)?!1:t in e}const dv=ot({patchProp:uv},Gw);let Hl;function cv(){return Hl||(Hl=mw(dv))}const pv=(...e)=>{const t=cv().createApp(...e),{mount:r}=t;return t.mount=i=>{const s=hv(i);if(!s)return;const n=t._component;!xe(n)&&!n.render&&!n.template&&(n.template=s.innerHTML),s.nodeType===1&&(s.textContent="");const a=r(s,!1,fv(s));return s instanceof Element&&(s.removeAttribute("v-cloak"),s.setAttribute("data-v-app","")),a},t};function fv(e){if(e instanceof SVGElement)return"svg";if(typeof MathMLElement=="function"&&e instanceof MathMLElement)return"mathml"}function hv(e){return et(e)?document.querySelector(e):e}/*! + * pinia v3.0.3 + * (c) 2025 Eduardo San Martin Morote + * @license MIT + */const mv=Symbol();var Gl;(function(e){e.direct="direct",e.patchObject="patch object",e.patchFunction="patch function"})(Gl||(Gl={}));function gv(){const e=eb(!0),t=e.run(()=>or({}));let r=[],i=[];const s=Zf({install(n){s._a=n,n.provide(mv,s),n.config.globalProperties.$pinia=s,i.forEach(a=>r.push(a)),i=[]},use(n){return this._a?r.push(n):i.push(n),this},_p:r,_a:null,_e:e,_s:new Map,state:t});return s}const _v=e=>{const t=gv();e.use(t)},yv=Object.freeze(Object.defineProperty({__proto__:null,install:_v},Symbol.toStringTag,{value:"Module"}));function Ch(e){return Bf()?(tb(e),!0):!1}function jl(){const e=new Set,t=n=>{e.delete(n)};return{on:n=>{e.add(n);const a=()=>t(n);return Ch(a),{off:a}},off:t,trigger:(...n)=>Promise.all(Array.from(e).map(a=>a(...n))),clear:()=>{e.clear()}}}const Bs=new WeakMap,bv=(...e)=>{var t;const r=e[0],i=(t=li())==null?void 0:t.proxy;if(i==null&&!lh())throw new Error("injectLocal must be called in setup");return i&&Bs.has(i)&&r in Bs.get(i)?Bs.get(i)[r]:Vi(...e)},zh=typeof window<"u"&&typeof document<"u";typeof WorkerGlobalScope<"u"&&globalThis instanceof WorkerGlobalScope;const wv=Object.prototype.toString,vv=e=>wv.call(e)==="[object Object]",$v=()=>{},xv=(e,t)=>Object.prototype.hasOwnProperty.call(e,t);function Oh(...e){if(e.length!==1)return Eb(...e);const t=e[0];return typeof t=="function"?en(kb(()=>({get:t,set:$v}))):or(t)}function Sv(e,t){function r(...i){return new Promise((s,n)=>{Promise.resolve(e(()=>t.apply(this,i),{fn:t,thisArg:this,args:i})).then(s).catch(n)})}return r}const Ah=e=>e();function kv(e=Ah,t={}){const{initialState:r="active"}=t,i=Oh(r==="active");function s(){i.value=!1}function n(){i.value=!0}const a=(...o)=>{i.value&&e(...o)};return{isActive:en(i),pause:s,resume:n,eventFilter:a}}function Kl(e){return e.endsWith("rem")?Number.parseFloat(e)*16:Number.parseFloat(e)}function Ms(e){return Array.isArray(e)?e:[e]}function Tv(e){return li()}function Iv(e,t,r={}){const{eventFilter:i=Ah,...s}=r;return qr(e,Sv(i,t),s)}function Ev(e,t,r={}){const{eventFilter:i,initialState:s="active",...n}=r,{eventFilter:a,pause:o,resume:u,isActive:d}=kv(i,{initialState:s});return{stop:Iv(e,t,{...n,eventFilter:a}),pause:o,resume:u,isActive:d}}function Rh(e,t=!0,r){Tv()?Oo(e,r):t?e():Eo(e)}function Cv(e=!1,t={}){const{truthyValue:r=!0,falsyValue:i=!1}=t,s=Ze(e),n=Wr(e);function a(o){if(arguments.length)return n.value=o,n.value;{const u=ft(r);return n.value=n.value===u?ft(i):u,n.value}}return s?a:[n,a]}function zv(e,t,r){return qr(e,t,{...r,immediate:!0})}const Yi=zh?window:void 0,Ov=zh?window.document:void 0;function Po(e){var t;const r=ft(e);return(t=r==null?void 0:r.$el)!=null?t:r}function Za(...e){const t=[],r=()=>{t.forEach(o=>o()),t.length=0},i=(o,u,d,c)=>(o.addEventListener(u,d,c),()=>o.removeEventListener(u,d,c)),s=Dt(()=>{const o=Ms(ft(e[0])).filter(u=>u!=null);return o.every(u=>typeof u!="string")?o:void 0}),n=zv(()=>{var o,u;return[(u=(o=s.value)==null?void 0:o.map(d=>Po(d)))!=null?u:[Yi].filter(d=>d!=null),Ms(ft(s.value?e[1]:e[0])),Ms(Pr(s.value?e[2]:e[1])),ft(s.value?e[3]:e[2])]},([o,u,d,c])=>{if(r(),!(o!=null&&o.length)||!(u!=null&&u.length)||!(d!=null&&d.length))return;const f=vv(c)?{...c}:c;t.push(...o.flatMap(h=>u.flatMap(m=>d.map(g=>i(h,m,g,f)))))},{flush:"post"}),a=()=>{n(),r()};return Ch(r),a}function Av(){const e=Wr(!1),t=li();return t&&Oo(()=>{e.value=!0},t),e}function Rv(e){const t=Av();return Dt(()=>(t.value,!!e()))}const Bv=Symbol("vueuse-ssr-width");function Mv(){const e=lh()?bv(Bv,null):null;return typeof e=="number"?e:void 0}function Nv(e,t={}){const{window:r=Yi,ssrWidth:i=Mv()}=t,s=Rv(()=>r&&"matchMedia"in r&&typeof r.matchMedia=="function"),n=Wr(typeof i=="number"),a=Wr(),o=Wr(!1),u=d=>{o.value=d.matches};return wh(()=>{if(n.value){n.value=!s.value;const d=ft(e).split(",");o.value=d.some(c=>{const f=c.includes("not all"),h=c.match(/\(\s*min-width:\s*(-?\d+(?:\.\d*)?[a-z]+\s*)\)/),m=c.match(/\(\s*max-width:\s*(-?\d+(?:\.\d*)?[a-z]+\s*)\)/);let g=!!(h||m);return h&&g&&(g=i>=Kl(h[1])),m&&g&&(g=i<=Kl(m[1])),f?!g:g});return}s.value&&(a.value=r.matchMedia(ft(e)),o.value=a.value.matches)}),Za(a,"change",u,{passive:!0}),Dt(()=>o.value)}const bn=typeof globalThis<"u"?globalThis:typeof window<"u"?window:typeof global<"u"?global:typeof self<"u"?self:{},wn="__vueuse_ssr_handlers__",Dv=Pv();function Pv(){return wn in bn||(bn[wn]=bn[wn]||{}),bn[wn]}function Bh(e,t){return Dv[e]||t}function Uv(e){return Nv("(prefers-color-scheme: dark)",e)}function Wv(e){return e==null?"any":e instanceof Set?"set":e instanceof Map?"map":e instanceof Date?"date":typeof e=="boolean"?"boolean":typeof e=="string"?"string":typeof e=="object"?"object":Number.isNaN(e)?"any":"number"}const Lv={boolean:{read:e=>e==="true",write:e=>String(e)},object:{read:e=>JSON.parse(e),write:e=>JSON.stringify(e)},number:{read:e=>Number.parseFloat(e),write:e=>String(e)},any:{read:e=>e,write:e=>String(e)},string:{read:e=>e,write:e=>String(e)},map:{read:e=>new Map(JSON.parse(e)),write:e=>JSON.stringify(Array.from(e.entries()))},set:{read:e=>new Set(JSON.parse(e)),write:e=>JSON.stringify(Array.from(e))},date:{read:e=>new Date(e),write:e=>e.toISOString()}},Zl="vueuse-storage";function qv(e,t,r,i={}){var s;const{flush:n="pre",deep:a=!0,listenToStorageChanges:o=!0,writeDefaults:u=!0,mergeDefaults:d=!1,shallow:c,window:f=Yi,eventFilter:h,onError:m=Y=>{console.error(Y)},initOnMounted:g}=i,y=(c?Wr:or)(typeof t=="function"?t():t),S=Dt(()=>ft(e));if(!r)try{r=Bh("getDefaultStorage",()=>{var Y;return(Y=Yi)==null?void 0:Y.localStorage})()}catch(Y){m(Y)}if(!r)return y;const v=ft(t),b=Wv(v),k=(s=i.serializer)!=null?s:Lv[b],{pause:x,resume:I}=Ev(y,()=>W(y.value),{flush:n,deep:a,eventFilter:h});qr(S,()=>X(),{flush:n});let z=!1;const O=Y=>{g&&!z||X(Y)},A=Y=>{g&&!z||ne(Y)};f&&o&&(r instanceof Storage?Za(f,"storage",O,{passive:!0}):Za(f,Zl,A)),g?Rh(()=>{z=!0,X()}):X();function R(Y,oe){if(f){const V={key:S.value,oldValue:Y,newValue:oe,storageArea:r};f.dispatchEvent(r instanceof Storage?new StorageEvent("storage",V):new CustomEvent(Zl,{detail:V}))}}function W(Y){try{const oe=r.getItem(S.value);if(Y==null)R(oe,null),r.removeItem(S.value);else{const V=k.write(Y);oe!==V&&(r.setItem(S.value,V),R(oe,V))}}catch(oe){m(oe)}}function ie(Y){const oe=Y?Y.newValue:r.getItem(S.value);if(oe==null)return u&&v!=null&&r.setItem(S.value,k.write(v)),v;if(!Y&&d){const V=k.read(oe);return typeof d=="function"?d(V,v):b==="object"&&!Array.isArray(V)?{...v,...V}:V}else return typeof oe!="string"?oe:k.read(oe)}function X(Y){if(!(Y&&Y.storageArea!==r)){if(Y&&Y.key==null){y.value=v;return}if(!(Y&&Y.key!==S.value)){x();try{(Y==null?void 0:Y.newValue)!==k.write(y.value)&&(y.value=ie(Y))}catch(oe){m(oe)}finally{Y?Eo(I):I()}}}}function ne(Y){X(Y.detail)}return y}const Vv="*,*::before,*::after{-webkit-transition:none!important;-moz-transition:none!important;-o-transition:none!important;-ms-transition:none!important;transition:none!important}";function Fv(e={}){const{selector:t="html",attribute:r="class",initialValue:i="auto",window:s=Yi,storage:n,storageKey:a="vueuse-color-scheme",listenToStorageChanges:o=!0,storageRef:u,emitAuto:d,disableTransition:c=!0}=e,f={auto:"",light:"light",dark:"dark",...e.modes||{}},h=Uv({window:s}),m=Dt(()=>h.value?"dark":"light"),g=u||(a==null?Oh(i):qv(a,i,n,{window:s,listenToStorageChanges:o})),y=Dt(()=>g.value==="auto"?m.value:g.value),S=Bh("updateHTMLAttrs",(x,I,z)=>{const O=typeof x=="string"?s==null?void 0:s.document.querySelector(x):Po(x);if(!O)return;const A=new Set,R=new Set;let W=null;if(I==="class"){const X=z.split(/\s/g);Object.values(f).flatMap(ne=>(ne||"").split(/\s/g)).filter(Boolean).forEach(ne=>{X.includes(ne)?A.add(ne):R.add(ne)})}else W={key:I,value:z};if(A.size===0&&R.size===0&&W===null)return;let ie;c&&(ie=s.document.createElement("style"),ie.appendChild(document.createTextNode(Vv)),s.document.head.appendChild(ie));for(const X of A)O.classList.add(X);for(const X of R)O.classList.remove(X);W&&O.setAttribute(W.key,W.value),c&&(s.getComputedStyle(ie).opacity,document.head.removeChild(ie))});function v(x){var I;S(t,r,(I=f[x])!=null?I:x)}function b(x){e.onChanged?e.onChanged(x,v):v(x)}qr(y,b,{flush:"post",immediate:!0}),Rh(()=>b(y.value));const k=Dt({get(){return d?g.value:y.value},set(x){g.value=x}});return Object.assign(k,{store:g,system:m,state:y})}function Hv(e={}){const{valueDark:t="dark",valueLight:r=""}=e,i=Fv({...e,onChanged:(a,o)=>{var u;e.onChanged?(u=e.onChanged)==null||u.call(e,a==="dark",o,a):o(a)},modes:{dark:t,light:r}}),s=Dt(()=>i.system.value);return Dt({get(){return i.value==="dark"},set(a){const o=a?"dark":"light";s.value===o?i.value="auto":i.value=o}})}const Gv={multiple:!0,accept:"*",reset:!1,directory:!1};function jv(e){if(!e)return null;if(e instanceof FileList)return e;const t=new DataTransfer;for(const r of e)t.items.add(r);return t.files}function Kv(e={}){const{document:t=Ov}=e,r=or(jv(e.initialFiles)),{on:i,trigger:s}=jl(),{on:n,trigger:a}=jl(),o=Dt(()=>{var f;const h=(f=Po(e.input))!=null?f:t?t.createElement("input"):void 0;return h&&(h.type="file",h.onchange=m=>{const g=m.target;r.value=g.files,s(r.value)},h.oncancel=()=>{a()}),h}),u=()=>{r.value=null,o.value&&o.value.value&&(o.value.value="",s(null))},d=f=>{const h=o.value;h&&(h.multiple=ft(f.multiple),h.accept=ft(f.accept),h.webkitdirectory=ft(f.directory),xv(f,"capture")&&(h.capture=ft(f.capture)))},c=f=>{const h=o.value;if(!h)return;const m={...Gv,...e,...f};d(m),ft(m.reset)&&u(),h.click()};return wh(()=>{d(e)}),{files:en(r),open:c,reset:u,onCancel:n,onChange:i}}/*! + * ONNX Runtime Web v1.22.0 + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. + */var Uo=Object.defineProperty,Zv=Object.getOwnPropertyDescriptor,Xv=Object.getOwnPropertyNames,Yv=Object.prototype.hasOwnProperty,Qv=(e=>typeof require<"u"?require:typeof Proxy<"u"?new Proxy(e,{get:(t,r)=>(typeof require<"u"?require:t)[r]}):e)(function(e){if(typeof require<"u")return require.apply(this,arguments);throw Error('Dynamic require of "'+e+'" is not supported')}),j=(e,t)=>()=>(e&&(t=e(e=0)),t),di=(e,t)=>{for(var r in t)Uo(e,r,{get:t[r],enumerable:!0})},Jv=(e,t,r,i)=>{if(t&&typeof t=="object"||typeof t=="function")for(let s of Xv(t))!Yv.call(e,s)&&s!==r&&Uo(e,s,{get:()=>t[s],enumerable:!(i=Zv(t,s))||i.enumerable});return e},Qi=e=>Jv(Uo({},"__esModule",{value:!0}),e),yi,gr,ti,Xl,Mh,Nh=j(()=>{yi=new Map,gr=[],ti=(e,t,r)=>{if(t&&typeof t.init=="function"&&typeof t.createInferenceSessionHandler=="function"){let i=yi.get(e);if(i===void 0)yi.set(e,{backend:t,priority:r});else{if(i.priority>r)return;if(i.priority===r&&i.backend!==t)throw new Error(`cannot register backend "${e}" using priority ${r}`)}if(r>=0){let s=gr.indexOf(e);s!==-1&&gr.splice(s,1);for(let n=0;n{let t=yi.get(e);if(!t)return"backend not found.";if(t.initialized)return t.backend;if(t.aborted)return t.error;{let r=!!t.initPromise;try{return r||(t.initPromise=t.backend.init(e)),await t.initPromise,t.initialized=!0,t.backend}catch(i){return r||(t.error=`${i}`,t.aborted=!0),t.error}finally{delete t.initPromise}}},Mh=async e=>{let t=e.executionProviders||[],r=t.map(u=>typeof u=="string"?u:u.name),i=r.length===0?gr:r,s,n=[],a=new Set;for(let u of i){let d=await Xl(u);typeof d=="string"?n.push({name:u,err:d}):(s||(s=d),s===d&&a.add(u))}if(!s)throw new Error(`no available backend found. ERR: ${n.map(u=>`[${u.name}] ${u.err}`).join(", ")}`);for(let{name:u,err:d}of n)r.includes(u)&&console.warn(`removing requested execution provider "${u}" from session options because it is not available: ${d}`);let o=t.filter(u=>a.has(typeof u=="string"?u:u.name));return[s,new Proxy(e,{get:(u,d)=>d==="executionProviders"?o:Reflect.get(u,d)})]}}),e$=j(()=>{Nh()}),Dh,t$=j(()=>{Dh="1.22.0"}),Ns,St,Ph=j(()=>{t$(),Ns="warning",St={wasm:{},webgl:{},webgpu:{},versions:{common:Dh},set logLevel(e){if(e!==void 0){if(typeof e!="string"||["verbose","info","warning","error","fatal"].indexOf(e)===-1)throw new Error(`Unsupported logging level: ${e}`);Ns=e}},get logLevel(){return Ns}},Object.defineProperty(St,"logLevel",{enumerable:!0})}),We,r$=j(()=>{Ph(),We=St}),Uh,Wh,i$=j(()=>{Uh=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);r.width=e.dims[3],r.height=e.dims[2];let i=r.getContext("2d");if(i!=null){let s,n;(t==null?void 0:t.tensorLayout)!==void 0&&t.tensorLayout==="NHWC"?(s=e.dims[2],n=e.dims[3]):(s=e.dims[3],n=e.dims[2]);let a=(t==null?void 0:t.format)!==void 0?t.format:"RGB",o=t==null?void 0:t.norm,u,d;o===void 0||o.mean===void 0?u=[255,255,255,255]:typeof o.mean=="number"?u=[o.mean,o.mean,o.mean,o.mean]:(u=[o.mean[0],o.mean[1],o.mean[2],0],o.mean[3]!==void 0&&(u[3]=o.mean[3])),o===void 0||o.bias===void 0?d=[0,0,0,0]:typeof o.bias=="number"?d=[o.bias,o.bias,o.bias,o.bias]:(d=[o.bias[0],o.bias[1],o.bias[2],0],o.bias[3]!==void 0&&(d[3]=o.bias[3]));let c=n*s,f=0,h=c,m=c*2,g=-1;a==="RGBA"?(f=0,h=c,m=c*2,g=c*3):a==="RGB"?(f=0,h=c,m=c*2):a==="RBG"&&(f=0,m=c,h=c*2);for(let y=0;y{let r=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d"),i;if(r!=null){let s,n,a;(t==null?void 0:t.tensorLayout)!==void 0&&t.tensorLayout==="NHWC"?(s=e.dims[2],n=e.dims[1],a=e.dims[3]):(s=e.dims[3],n=e.dims[2],a=e.dims[1]);let o=t!==void 0&&t.format!==void 0?t.format:"RGB",u=t==null?void 0:t.norm,d,c;u===void 0||u.mean===void 0?d=[255,255,255,255]:typeof u.mean=="number"?d=[u.mean,u.mean,u.mean,u.mean]:(d=[u.mean[0],u.mean[1],u.mean[2],255],u.mean[3]!==void 0&&(d[3]=u.mean[3])),u===void 0||u.bias===void 0?c=[0,0,0,0]:typeof u.bias=="number"?c=[u.bias,u.bias,u.bias,u.bias]:(c=[u.bias[0],u.bias[1],u.bias[2],0],u.bias[3]!==void 0&&(c[3]=u.bias[3]));let f=n*s;if(t!==void 0&&(t.format!==void 0&&a===4&&t.format!=="RGBA"||a===3&&t.format!=="RGB"&&t.format!=="BGR"))throw new Error("Tensor format doesn't match input tensor dims");let h=4,m=0,g=1,y=2,S=3,v=0,b=f,k=f*2,x=-1;o==="RGBA"?(v=0,b=f,k=f*2,x=f*3):o==="RGB"?(v=0,b=f,k=f*2):o==="RBG"&&(v=0,k=f,b=f*2),i=r.createImageData(s,n);for(let I=0;I{Wo(),vn=(e,t)=>{if(e===void 0)throw new Error("Image buffer must be defined");if(t.height===void 0||t.width===void 0)throw new Error("Image height and width must be defined");if(t.tensorLayout==="NHWC")throw new Error("NHWC Tensor layout is not supported yet");let{height:r,width:i}=t,s=t.norm??{mean:255,bias:0},n,a;typeof s.mean=="number"?n=[s.mean,s.mean,s.mean,s.mean]:n=[s.mean[0],s.mean[1],s.mean[2],s.mean[3]??255],typeof s.bias=="number"?a=[s.bias,s.bias,s.bias,s.bias]:a=[s.bias[0],s.bias[1],s.bias[2],s.bias[3]??0];let o=t.format!==void 0?t.format:"RGBA",u=t.tensorFormat!==void 0&&t.tensorFormat!==void 0?t.tensorFormat:"RGB",d=r*i,c=u==="RGBA"?new Float32Array(d*4):new Float32Array(d*3),f=4,h=0,m=1,g=2,y=3,S=0,v=d,b=d*2,k=-1;o==="RGB"&&(f=3,h=0,m=1,g=2,y=-1),u==="RGBA"?k=d*3:u==="RBG"?(S=0,b=d,v=d*2):u==="BGR"&&(b=0,v=d,S=d*2);for(let x=0;x{let r=typeof HTMLImageElement<"u"&&e instanceof HTMLImageElement,i=typeof ImageData<"u"&&e instanceof ImageData,s=typeof ImageBitmap<"u"&&e instanceof ImageBitmap,n=typeof e=="string",a,o=t??{},u=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},d=c=>typeof HTMLCanvasElement<"u"&&c instanceof HTMLCanvasElement||c instanceof OffscreenCanvas?c.getContext("2d"):null;if(r){let c=u();c.width=e.width,c.height=e.height;let f=d(c);if(f!=null){let h=e.height,m=e.width;if(t!==void 0&&t.resizedHeight!==void 0&&t.resizedWidth!==void 0&&(h=t.resizedHeight,m=t.resizedWidth),t!==void 0){if(o=t,t.tensorFormat!==void 0)throw new Error("Image input config format must be RGBA for HTMLImageElement");o.tensorFormat="RGBA",o.height=h,o.width=m}else o.tensorFormat="RGBA",o.height=h,o.width=m;f.drawImage(e,0,0),a=f.getImageData(0,0,m,h).data}else throw new Error("Can not access image data")}else if(i){let c,f;if(t!==void 0&&t.resizedWidth!==void 0&&t.resizedHeight!==void 0?(c=t.resizedHeight,f=t.resizedWidth):(c=e.height,f=e.width),t!==void 0&&(o=t),o.format="RGBA",o.height=c,o.width=f,t!==void 0){let h=u();h.width=f,h.height=c;let m=d(h);if(m!=null)m.putImageData(e,0,0),a=m.getImageData(0,0,f,c).data;else throw new Error("Can not access image data")}else a=e.data}else if(s){if(t===void 0)throw new Error("Please provide image config with format for Imagebitmap");let c=u();c.width=e.width,c.height=e.height;let f=d(c);if(f!=null){let h=e.height,m=e.width;return f.drawImage(e,0,0,m,h),a=f.getImageData(0,0,m,h).data,o.height=h,o.width=m,vn(a,o)}else throw new Error("Can not access image data")}else{if(n)return new Promise((c,f)=>{let h=u(),m=d(h);if(!e||!m)return f();let g=new Image;g.crossOrigin="Anonymous",g.src=e,g.onload=()=>{h.width=g.width,h.height=g.height,m.drawImage(g,0,0,h.width,h.height);let y=m.getImageData(0,0,h.width,h.height);o.height=h.height,o.width=h.width,c(vn(y.data,o))}});throw new Error("Input data provided is not supported - aborted tensor creation")}if(a!==void 0)return vn(a,o);throw new Error("Input data provided is not supported - aborted tensor creation")},qh=(e,t)=>{let{width:r,height:i,download:s,dispose:n}=t,a=[1,i,r,4];return new wt({location:"texture",type:"float32",texture:e,dims:a,download:s,dispose:n})},Vh=(e,t)=>{let{dataType:r,dims:i,download:s,dispose:n}=t;return new wt({location:"gpu-buffer",type:r??"float32",gpuBuffer:e,dims:i,download:s,dispose:n})},Fh=(e,t)=>{let{dataType:r,dims:i,download:s,dispose:n}=t;return new wt({location:"ml-tensor",type:r??"float32",mlTensor:e,dims:i,download:s,dispose:n})},Hh=(e,t,r)=>new wt({location:"cpu-pinned",type:e,data:t,dims:r??[t.length]})}),Mr,Oi,Ds,Gh,s$=j(()=>{Mr=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array],["int4",Uint8Array],["uint4",Uint8Array]]),Oi=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]),Ds=!1,Gh=()=>{if(!Ds){Ds=!0;let e=typeof BigInt64Array<"u"&&BigInt64Array.from,t=typeof BigUint64Array<"u"&&BigUint64Array.from,r=globalThis.Float16Array,i=typeof r<"u"&&r.from;e&&(Mr.set("int64",BigInt64Array),Oi.set(BigInt64Array,"int64")),t&&(Mr.set("uint64",BigUint64Array),Oi.set(BigUint64Array,"uint64")),i?(Mr.set("float16",r),Oi.set(r,"float16")):Mr.set("float16",Uint16Array)}}}),jh,Kh,a$=j(()=>{Wo(),jh=e=>{let t=1;for(let r=0;r{switch(e.location){case"cpu":return new wt(e.type,e.data,t);case"cpu-pinned":return new wt({location:"cpu-pinned",data:e.data,type:e.type,dims:t});case"texture":return new wt({location:"texture",texture:e.texture,type:e.type,dims:t});case"gpu-buffer":return new wt({location:"gpu-buffer",gpuBuffer:e.gpuBuffer,type:e.type,dims:t});case"ml-tensor":return new wt({location:"ml-tensor",mlTensor:e.mlTensor,type:e.type,dims:t});default:throw new Error(`tensorReshape: tensor location ${e.location} is not supported`)}}}),wt,Wo=j(()=>{i$(),n$(),s$(),a$(),wt=class{constructor(e,t,r){Gh();let i,s;if(typeof e=="object"&&"location"in e)switch(this.dataLocation=e.location,i=e.type,s=e.dims,e.location){case"cpu-pinned":{let a=Mr.get(i);if(!a)throw new TypeError(`unsupported type "${i}" to create tensor from pinned buffer`);if(!(e.data instanceof a))throw new TypeError(`buffer should be of type ${a.name}`);this.cpuData=e.data;break}case"texture":{if(i!=="float32")throw new TypeError(`unsupported type "${i}" to create tensor from texture`);this.gpuTextureData=e.texture,this.downloader=e.download,this.disposer=e.dispose;break}case"gpu-buffer":{if(i!=="float32"&&i!=="float16"&&i!=="int32"&&i!=="int64"&&i!=="uint32"&&i!=="uint8"&&i!=="bool"&&i!=="uint4"&&i!=="int4")throw new TypeError(`unsupported type "${i}" to create tensor from gpu buffer`);this.gpuBufferData=e.gpuBuffer,this.downloader=e.download,this.disposer=e.dispose;break}case"ml-tensor":{if(i!=="float32"&&i!=="float16"&&i!=="int32"&&i!=="int64"&&i!=="uint32"&&i!=="uint64"&&i!=="int8"&&i!=="uint8"&&i!=="bool"&&i!=="uint4"&&i!=="int4")throw new TypeError(`unsupported type "${i}" to create tensor from MLTensor`);this.mlTensorData=e.mlTensor,this.downloader=e.download,this.disposer=e.dispose;break}default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let a,o;if(typeof e=="string")if(i=e,o=r,e==="string"){if(!Array.isArray(t))throw new TypeError("A string tensor's data must be a string array.");a=t}else{let u=Mr.get(e);if(u===void 0)throw new TypeError(`Unsupported tensor type: ${e}.`);if(Array.isArray(t)){if(e==="float16"&&u===Uint16Array||e==="uint4"||e==="int4")throw new TypeError(`Creating a ${e} tensor from number array is not supported. Please use ${u.name} as data.`);e==="uint64"||e==="int64"?a=u.from(t,BigInt):a=u.from(t)}else if(t instanceof u)a=t;else if(t instanceof Uint8ClampedArray)if(e==="uint8")a=Uint8Array.from(t);else throw new TypeError("A Uint8ClampedArray tensor's data must be type of uint8");else if(e==="float16"&&t instanceof Uint16Array&&u!==Uint16Array)a=new globalThis.Float16Array(t.buffer,t.byteOffset,t.length);else throw new TypeError(`A ${i} tensor's data must be type of ${u}`)}else if(o=t,Array.isArray(e)){if(e.length===0)throw new TypeError("Tensor type cannot be inferred from an empty array.");let u=typeof e[0];if(u==="string")i="string",a=e;else if(u==="boolean")i="bool",a=Uint8Array.from(e);else throw new TypeError(`Invalid element type of data array: ${u}.`)}else if(e instanceof Uint8ClampedArray)i="uint8",a=Uint8Array.from(e);else{let u=Oi.get(e.constructor);if(u===void 0)throw new TypeError(`Unsupported type for tensor data: ${e.constructor}.`);i=u,a=e}if(o===void 0)o=[a.length];else if(!Array.isArray(o))throw new TypeError("A tensor's dims must be a number array");s=o,this.cpuData=a,this.dataLocation="cpu"}let n=jh(s);if(this.cpuData&&n!==this.cpuData.length&&!((i==="uint4"||i==="int4")&&Math.ceil(n/2)===this.cpuData.length))throw new Error(`Tensor's size(${n}) does not match data length(${this.cpuData.length}).`);this.type=i,this.dims=s,this.size=n}static async fromImage(e,t){return Lh(e,t)}static fromTexture(e,t){return qh(e,t)}static fromGpuBuffer(e,t){return Vh(e,t)}static fromMLTensor(e,t){return Fh(e,t)}static fromPinnedBuffer(e,t,r){return Hh(e,t,r)}toDataURL(e){return Uh(this,e)}toImageData(e){return Wh(this,e)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}get mlTensor(){if(this.ensureValid(),!this.mlTensorData)throw new Error("The data is not stored as a WebNN MLTensor.");return this.mlTensorData}async getData(e){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":case"ml-tensor":{if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;let t=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=t,e&&this.disposer&&(this.disposer(),this.disposer=void 0),t}finally{this.isDownloading=!1}}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.mlTensorData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if(this.dataLocation==="none")throw new Error("The tensor is disposed.")}reshape(e){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return Kh(this,e)}}}),Bt,Zh=j(()=>{Wo(),Bt=wt}),Fn,Ps,Qt,Pt,Xh=j(()=>{Ph(),Fn=(e,t)=>{(typeof St.trace>"u"?!St.wasm.trace:!St.trace)||console.timeStamp(`${e}::ORT::${t}`)},Ps=(e,t)=>{var s;let r=((s=new Error().stack)==null?void 0:s.split(/\r\n|\r|\n/g))||[],i=!1;for(let n=0;n{(typeof St.trace>"u"?!St.wasm.trace:!St.trace)||Ps("BEGIN",e)},Pt=e=>{(typeof St.trace>"u"?!St.wasm.trace:!St.trace)||Ps("END",e)}}),Yh,o$=j(()=>{Nh(),Zh(),Xh(),Yh=class Qh{constructor(t){this.handler=t}async run(t,r,i){Qt();let s={},n={};if(typeof t!="object"||t===null||t instanceof Bt||Array.isArray(t))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let a=!0;if(typeof r=="object"){if(r===null)throw new TypeError("Unexpected argument[1]: cannot be null.");if(r instanceof Bt)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(r)){if(r.length===0)throw new TypeError("'fetches' cannot be an empty array.");a=!1;for(let d of r){if(typeof d!="string")throw new TypeError("'fetches' must be a string array or an object.");if(this.outputNames.indexOf(d)===-1)throw new RangeError(`'fetches' contains invalid output name: ${d}.`);s[d]=null}if(typeof i=="object"&&i!==null)n=i;else if(typeof i<"u")throw new TypeError("'options' must be an object.")}else{let d=!1,c=Object.getOwnPropertyNames(r);for(let f of this.outputNames)if(c.indexOf(f)!==-1){let h=r[f];(h===null||h instanceof Bt)&&(d=!0,a=!1,s[f]=h)}if(d){if(typeof i=="object"&&i!==null)n=i;else if(typeof i<"u")throw new TypeError("'options' must be an object.")}else n=r}}else if(typeof r<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let d of this.inputNames)if(typeof t[d]>"u")throw new Error(`input '${d}' is missing in 'feeds'.`);if(a)for(let d of this.outputNames)s[d]=null;let o=await this.handler.run(t,s,n),u={};for(let d in o)if(Object.hasOwnProperty.call(o,d)){let c=o[d];c instanceof Bt?u[d]=c:u[d]=new Bt(c.type,c.data,c.dims)}return Pt(),u}async release(){return this.handler.dispose()}static async create(t,r,i,s){Qt();let n,a={};if(typeof t=="string"){if(n=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof Uint8Array){if(n=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof ArrayBuffer||typeof SharedArrayBuffer<"u"&&t instanceof SharedArrayBuffer){let c=t,f=0,h=t.byteLength;if(typeof r=="object"&&r!==null)a=r;else if(typeof r=="number"){if(f=r,!Number.isSafeInteger(f))throw new RangeError("'byteOffset' must be an integer.");if(f<0||f>=c.byteLength)throw new RangeError(`'byteOffset' is out of range [0, ${c.byteLength}).`);if(h=t.byteLength-f,typeof i=="number"){if(h=i,!Number.isSafeInteger(h))throw new RangeError("'byteLength' must be an integer.");if(h<=0||f+h>c.byteLength)throw new RangeError(`'byteLength' is out of range (0, ${c.byteLength-f}].`);if(typeof s=="object"&&s!==null)a=s;else if(typeof s<"u")throw new TypeError("'options' must be an object.")}else if(typeof i<"u")throw new TypeError("'byteLength' must be a number.")}else if(typeof r<"u")throw new TypeError("'options' must be an object.");n=new Uint8Array(c,f,h)}else throw new TypeError("Unexpected argument[0]: must be 'path' or 'buffer'.");let[o,u]=await Mh(a),d=await o.createInferenceSessionHandler(n,u);return Pt(),new Qh(d)}startProfiling(){this.handler.startProfiling()}endProfiling(){this.handler.endProfiling()}get inputNames(){return this.handler.inputNames}get outputNames(){return this.handler.outputNames}get inputMetadata(){return this.handler.inputMetadata}get outputMetadata(){return this.handler.outputMetadata}}}),Lo,u$=j(()=>{o$(),Lo=Yh}),l$=j(()=>{}),d$=j(()=>{}),c$=j(()=>{}),p$=j(()=>{}),f$={};di(f$,{InferenceSession:()=>Lo,TRACE:()=>Fn,TRACE_FUNC_BEGIN:()=>Qt,TRACE_FUNC_END:()=>Pt,Tensor:()=>Bt,env:()=>We,registerBackend:()=>ti});var Ut=j(()=>{e$(),r$(),u$(),Zh(),l$(),d$(),Xh(),c$(),p$()}),qo=j(()=>{}),Jh={};di(Jh,{default:()=>em});var Us,Ws,em,h$=j(()=>{var e;sy(),Kr(),Vo(),Us="ort-wasm-proxy-worker",Ws=((e=globalThis.self)==null?void 0:e.name)===Us,Ws&&(self.onmessage=t=>{let{type:r,in:i}=t.data;try{switch(r){case"init-wasm":Fo(i.wasm).then(()=>{ou(i).then(()=>{postMessage({type:r})},s=>{postMessage({type:r,err:s})})},s=>{postMessage({type:r,err:s})});break;case"init-ep":{let{epName:s,env:n}=i;uu(n,s).then(()=>{postMessage({type:r})},a=>{postMessage({type:r,err:a})});break}case"copy-from":{let{buffer:s}=i,n=Yn(s);postMessage({type:r,out:n});break}case"create":{let{model:s,options:n}=i;lu(s,n).then(a=>{postMessage({type:r,out:a})},a=>{postMessage({type:r,err:a})});break}case"release":du(i),postMessage({type:r});break;case"run":{let{sessionId:s,inputIndices:n,inputs:a,outputIndices:o,options:u}=i;cu(s,n,a,o,new Array(o.length).fill(null),u).then(d=>{d.some(c=>c[3]!=="cpu")?postMessage({type:r,err:"Proxy does not support non-cpu tensor location."}):postMessage({type:r,out:d},fu([...a,...d]))},d=>{postMessage({type:r,err:d})});break}case"end-profiling":pu(i),postMessage({type:r});break;default:}}catch(s){postMessage({type:r,err:s})}}),em=Ws?null:t=>new Worker(t??yt,{type:"module",name:Us})}),tm={};di(tm,{default:()=>rm});var Ls,qs,rm,Yl,m$=j(()=>{var e,t;qs=(Ls=import.meta.url,async function(r={}){var yl;var i,s,n=r,a=new Promise((l,p)=>{i=l,s=p}),o=typeof window=="object",u=typeof WorkerGlobalScope<"u",d=u&&((yl=self.name)==null?void 0:yl.startsWith("em-pthread"));n.mountExternalData=(l,p)=>{l.startsWith("./")&&(l=l.substring(2)),(n.Fb||(n.Fb=new Map)).set(l,p)},n.unmountExternalData=()=>{delete n.Fb};var c=globalThis.SharedArrayBuffer??new WebAssembly.Memory({initial:0,maximum:0,qc:!0}).buffer.constructor;let f=l=>async(...p)=>{var _;try{if(n.Gb)throw Error("Session already started");let w=n.Gb={ec:p[0],errors:[]},$=await l(...p);if(n.Gb!==w)throw Error("Session mismatch");(_=n.Kb)==null||_.flush();let C=w.errors;if(0F),0{if(l==="webgpu"){[n.Kb,n.Vb,n.Zb,n.Lb,n.Yb,n.kb,n.$b,n.bc,n.Wb,n.Xb,n.ac]=p;let _=n.Kb;n.jsepRegisterBuffer=(w,$,C,N)=>_.registerBuffer(w,$,C,N),n.jsepGetBuffer=w=>_.getBuffer(w),n.jsepCreateDownloader=(w,$,C)=>_.createDownloader(w,$,C),n.jsepOnCreateSession=w=>{_.onCreateSession(w)},n.jsepOnReleaseSession=w=>{_.onReleaseSession(w)},n.jsepOnRunStart=w=>_.onRunStart(w),n.cc=(w,$)=>{_.upload(w,$)}}else if(l==="webnn"){let _=p[0];[n.oc,n.Ob,n.webnnEnsureTensor,n.Pb,n.webnnDownloadTensor]=p.slice(1),n.webnnReleaseTensorId=n.Ob,n.webnnUploadTensor=n.Pb,n.webnnOnRunStart=w=>_.onRunStart(w),n.webnnOnRunEnd=_.onRunEnd.bind(_),n.webnnRegisterMLContext=(w,$)=>{_.registerMLContext(w,$)},n.webnnOnReleaseSession=w=>{_.onReleaseSession(w)},n.webnnCreateMLTensorDownloader=(w,$)=>_.createMLTensorDownloader(w,$),n.webnnRegisterMLTensor=(w,$,C,N)=>_.registerMLTensor(w,$,C,N),n.webnnCreateMLContext=w=>_.createMLContext(w),n.webnnRegisterMLConstant=(w,$,C,N,F,re)=>_.registerMLConstant(w,$,C,N,F,n.Fb,re),n.webnnRegisterGraphInput=_.registerGraphInput.bind(_),n.webnnIsGraphInput=_.isGraphInput.bind(_),n.webnnRegisterGraphOutput=_.registerGraphOutput.bind(_),n.webnnIsGraphOutput=_.isGraphOutput.bind(_),n.webnnCreateTemporaryTensor=_.createTemporaryTensor.bind(_),n.webnnIsGraphInputOutputTypeSupported=_.isGraphInputOutputTypeSupported.bind(_)}};let h=()=>{let l=(p,_,w)=>(...$)=>{let C=Vt,N=_==null?void 0:_();$=p(...$);let F=_==null?void 0:_();return N!==F&&(p=F,w(N),_=w=null),Vt!=C?new Promise((re,ce)=>{gs={resolve:re,reject:ce}}):$};(()=>{for(let p of["_OrtAppendExecutionProvider","_OrtCreateSession","_OrtRun","_OrtRunWithBinding","_OrtBindInput"])n[p]=l(n[p],()=>n[p],_=>n[p]=_)})(),f!==void 0&&(n._OrtRun=f(n._OrtRun),n._OrtRunWithBinding=f(n._OrtRunWithBinding)),h=void 0};n.asyncInit=()=>{h==null||h()};var m,g,y=Object.assign({},n),S=(l,p)=>{throw p},v="";(o||u)&&(u?v=self.location.href:typeof document<"u"&&document.currentScript&&(v=document.currentScript.src),Ls&&(v=Ls),v=v.startsWith("blob:")?"":v.slice(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1),u&&(g=l=>{var p=new XMLHttpRequest;return p.open("GET",l,!1),p.responseType="arraybuffer",p.send(null),new Uint8Array(p.response)}),m=async l=>{if(D(l))return new Promise((_,w)=>{var $=new XMLHttpRequest;$.open("GET",l,!0),$.responseType="arraybuffer",$.onload=()=>{$.status==200||$.status==0&&$.response?_($.response):w($.status)},$.onerror=w,$.send(null)});var p=await fetch(l,{credentials:"same-origin"});if(p.ok)return p.arrayBuffer();throw Error(p.status+" : "+p.url)});var b=console.log.bind(console),k=console.error.bind(console),x=b,I=k;Object.assign(n,y),y=null;var z,O,A,R,W,ie,X,ne,Y,oe,V,ae,Z,le=n.wasmBinary,Be=!1,D=l=>l.startsWith("file://");function q(){return z.buffer!=R.buffer&&T(),R}function ee(){return z.buffer!=R.buffer&&T(),W}function ye(){return z.buffer!=R.buffer&&T(),ie}function Ge(){return z.buffer!=R.buffer&&T(),X}function U(){return z.buffer!=R.buffer&&T(),ne}function Ie(){return z.buffer!=R.buffer&&T(),Y}function Wt(){return z.buffer!=R.buffer&&T(),oe}function Ye(){return z.buffer!=R.buffer&&T(),Z}if(d){let l=function(p){try{var _=p.data,w=_.Cb;if(w==="load"){let $=[];self.onmessage=C=>$.push(C),self.startWorker=()=>{postMessage({Cb:"loaded"});for(let C of $)l(C);self.onmessage=l};for(let C of _.Sb)n[C]&&!n[C].proxy||(n[C]=(...N)=>{postMessage({Cb:"callHandler",Rb:C,args:N})},C=="print"&&(x=n[C]),C=="printErr"&&(I=n[C]));z=_.lc,T(),tt(_.mc)}else if(w==="run"){by(_.Bb),ws(_.Bb,0,0,1,0,0),pi(),hs(_.Bb),Lt||(al(),Lt=!0);try{wy(_.hc,_.Ib)}catch($){if($!="unwind")throw $}}else _.target!=="setimmediate"&&(w==="checkMailbox"?Lt&&nn():w&&(I(`worker: received unknown command ${w}`),I(_)))}catch($){throw ol(),$}};var tt,Lt=!1;I=function(...p){p=p.join(" "),console.error(p)},self.alert=function(...p){postMessage({Cb:"alert",text:p.join(" "),jc:pn()})},self.onunhandledrejection=p=>{throw p.reason||p},self.onmessage=l}function T(){var l=z.buffer;n.HEAP8=R=new Int8Array(l),n.HEAP16=ie=new Int16Array(l),n.HEAPU8=W=new Uint8Array(l),n.HEAPU16=X=new Uint16Array(l),n.HEAP32=ne=new Int32Array(l),n.HEAPU32=Y=new Uint32Array(l),n.HEAPF32=oe=new Float32Array(l),n.HEAPF64=Z=new Float64Array(l),n.HEAP64=V=new BigInt64Array(l),n.HEAPU64=ae=new BigUint64Array(l)}function E(){d?startWorker(n):fe.Da()}d||(z=new WebAssembly.Memory({initial:256,maximum:65536,shared:!0}),T());var M,H=0,L=null;function G(){if(--H==0&&L){var l=L;L=null,l()}}function J(l){throw I(l="Aborted("+l+")"),Be=!0,l=new WebAssembly.RuntimeError(l+". Build with -sASSERTIONS for more info."),s(l),l}function te(){return{a:{L:me,Aa:K,b:$y,$:gu,A:bu,pa:wu,X:$u,Z:xu,qa:Su,na:ku,ga:Tu,ma:Iu,J:Eu,Y:Cu,V:zu,oa:Ou,W:Au,va:xy,E:Sy,Q:ky,O:Iy,D:Cy,v:zy,r:Oy,P:Ay,z:Uy,R:Wy,ja:Ly,T:qy,aa:Vy,M:Fy,F:Hy,ia:hs,sa:Gy,t:jy,Ca:Ky,w:Yy,o:Qy,m:e0,c:cs,Ba:t0,n:r0,j:s0,u:a0,p:o0,f:u0,s:l0,l:d0,e:c0,k:p0,h:f0,g:h0,d:m0,da:g0,ea:_0,fa:y0,ba:Gu,ca:ju,N:Ku,xa:w0,ua:$0,i:x0,C:S0,G:k0,ta:v0,x:T0,ra:I0,U:E0,q:b0,y:C0,K:z0,S:O0,za:A0,ya:R0,ka:Qu,la:Ju,_:mt,B:el,I:tl,ha:rl,H:il,a:z,wa:Qe}}}var Q={840156:(l,p,_,w,$)=>{if(n===void 0||!n.Fb)return 1;if((l=Fe(Number(l>>>0))).startsWith("./")&&(l=l.substring(2)),!(l=n.Fb.get(l)))return 2;if(p=Number(p>>>0),_=Number(_>>>0),w=Number(w>>>0),p+_>l.byteLength)return 3;try{let C=l.subarray(p,p+_);switch($){case 0:ee().set(C,w>>>0);break;case 1:n.nc?n.nc(w,C):n.cc(w,C);break;default:return 4}return 0}catch{return 4}},840980:(l,p,_)=>{n.Pb(l,ee().subarray(p>>>0,p+_>>>0))},841044:()=>n.oc(),841086:l=>{n.Ob(l)},841123:()=>{n.Wb()},841154:()=>{n.Xb()},841183:()=>{n.ac()},841208:l=>n.Vb(l),841241:l=>n.Zb(l),841273:(l,p,_)=>{n.Lb(Number(l),Number(p),Number(_),!0)},841336:(l,p,_)=>{n.Lb(Number(l),Number(p),Number(_))},841393:()=>typeof wasmOffsetConverter<"u",841450:l=>{n.kb("Abs",l,void 0)},841501:l=>{n.kb("Neg",l,void 0)},841552:l=>{n.kb("Floor",l,void 0)},841605:l=>{n.kb("Ceil",l,void 0)},841657:l=>{n.kb("Reciprocal",l,void 0)},841715:l=>{n.kb("Sqrt",l,void 0)},841767:l=>{n.kb("Exp",l,void 0)},841818:l=>{n.kb("Erf",l,void 0)},841869:l=>{n.kb("Sigmoid",l,void 0)},841924:(l,p,_)=>{n.kb("HardSigmoid",l,{alpha:p,beta:_})},842003:l=>{n.kb("Log",l,void 0)},842054:l=>{n.kb("Sin",l,void 0)},842105:l=>{n.kb("Cos",l,void 0)},842156:l=>{n.kb("Tan",l,void 0)},842207:l=>{n.kb("Asin",l,void 0)},842259:l=>{n.kb("Acos",l,void 0)},842311:l=>{n.kb("Atan",l,void 0)},842363:l=>{n.kb("Sinh",l,void 0)},842415:l=>{n.kb("Cosh",l,void 0)},842467:l=>{n.kb("Asinh",l,void 0)},842520:l=>{n.kb("Acosh",l,void 0)},842573:l=>{n.kb("Atanh",l,void 0)},842626:l=>{n.kb("Tanh",l,void 0)},842678:l=>{n.kb("Not",l,void 0)},842729:(l,p,_)=>{n.kb("Clip",l,{min:p,max:_})},842798:l=>{n.kb("Clip",l,void 0)},842850:(l,p)=>{n.kb("Elu",l,{alpha:p})},842908:l=>{n.kb("Gelu",l,void 0)},842960:l=>{n.kb("Relu",l,void 0)},843012:(l,p)=>{n.kb("LeakyRelu",l,{alpha:p})},843076:(l,p)=>{n.kb("ThresholdedRelu",l,{alpha:p})},843146:(l,p)=>{n.kb("Cast",l,{to:p})},843204:l=>{n.kb("Add",l,void 0)},843255:l=>{n.kb("Sub",l,void 0)},843306:l=>{n.kb("Mul",l,void 0)},843357:l=>{n.kb("Div",l,void 0)},843408:l=>{n.kb("Pow",l,void 0)},843459:l=>{n.kb("Equal",l,void 0)},843512:l=>{n.kb("Greater",l,void 0)},843567:l=>{n.kb("GreaterOrEqual",l,void 0)},843629:l=>{n.kb("Less",l,void 0)},843681:l=>{n.kb("LessOrEqual",l,void 0)},843740:(l,p,_,w,$)=>{n.kb("ReduceMean",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},843915:(l,p,_,w,$)=>{n.kb("ReduceMax",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},844089:(l,p,_,w,$)=>{n.kb("ReduceMin",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},844263:(l,p,_,w,$)=>{n.kb("ReduceProd",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},844438:(l,p,_,w,$)=>{n.kb("ReduceSum",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},844612:(l,p,_,w,$)=>{n.kb("ReduceL1",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},844785:(l,p,_,w,$)=>{n.kb("ReduceL2",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},844958:(l,p,_,w,$)=>{n.kb("ReduceLogSum",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},845135:(l,p,_,w,$)=>{n.kb("ReduceSumSquare",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},845315:(l,p,_,w,$)=>{n.kb("ReduceLogSumExp",l,{keepDims:!!p,noopWithEmptyAxes:!!_,axes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},845495:l=>{n.kb("Where",l,void 0)},845548:(l,p,_)=>{n.kb("Transpose",l,{perm:p?Array.from(U().subarray(Number(p)>>>0,Number(_)>>>0)):[]})},845672:(l,p,_,w)=>{n.kb("DepthToSpace",l,{blocksize:p,mode:Fe(_),format:w?"NHWC":"NCHW"})},845805:(l,p,_,w)=>{n.kb("DepthToSpace",l,{blocksize:p,mode:Fe(_),format:w?"NHWC":"NCHW"})},845938:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it,Yr)=>{n.kb("ConvTranspose",l,{format:re?"NHWC":"NCHW",autoPad:p,dilations:[_],group:w,kernelShape:[$],pads:[C,N],strides:[F],wIsConst:()=>!!q()[ce>>>0],outputPadding:$e?Array.from(U().subarray(Number($e)>>>0,Number(Ce)>>>0)):[],outputShape:Ne?Array.from(U().subarray(Number(Ne)>>>0,Number(it)>>>0)):[],activation:Fe(Yr)})},846371:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it)=>{n.kb("ConvTranspose",l,{format:F?"NHWC":"NCHW",autoPad:p,dilations:Array.from(U().subarray(Number(_)>>>0,2+(Number(_)>>>0)>>>0)),group:w,kernelShape:Array.from(U().subarray(Number($)>>>0,2+(Number($)>>>0)>>>0)),pads:Array.from(U().subarray(Number(C)>>>0,4+(Number(C)>>>0)>>>0)),strides:Array.from(U().subarray(Number(N)>>>0,2+(Number(N)>>>0)>>>0)),wIsConst:()=>!!q()[re>>>0],outputPadding:ce?Array.from(U().subarray(Number(ce)>>>0,Number($e)>>>0)):[],outputShape:Ce?Array.from(U().subarray(Number(Ce)>>>0,Number(Ne)>>>0)):[],activation:Fe(it)})},847032:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it,Yr)=>{n.kb("ConvTranspose",l,{format:re?"NHWC":"NCHW",autoPad:p,dilations:[_],group:w,kernelShape:[$],pads:[C,N],strides:[F],wIsConst:()=>!!q()[ce>>>0],outputPadding:$e?Array.from(U().subarray(Number($e)>>>0,Number(Ce)>>>0)):[],outputShape:Ne?Array.from(U().subarray(Number(Ne)>>>0,Number(it)>>>0)):[],activation:Fe(Yr)})},847465:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it)=>{n.kb("ConvTranspose",l,{format:F?"NHWC":"NCHW",autoPad:p,dilations:Array.from(U().subarray(Number(_)>>>0,2+(Number(_)>>>0)>>>0)),group:w,kernelShape:Array.from(U().subarray(Number($)>>>0,2+(Number($)>>>0)>>>0)),pads:Array.from(U().subarray(Number(C)>>>0,4+(Number(C)>>>0)>>>0)),strides:Array.from(U().subarray(Number(N)>>>0,2+(Number(N)>>>0)>>>0)),wIsConst:()=>!!q()[re>>>0],outputPadding:ce?Array.from(U().subarray(Number(ce)>>>0,Number($e)>>>0)):[],outputShape:Ce?Array.from(U().subarray(Number(Ce)>>>0,Number(Ne)>>>0)):[],activation:Fe(it)})},848126:(l,p)=>{n.kb("GlobalAveragePool",l,{format:p?"NHWC":"NCHW"})},848217:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it)=>{n.kb("AveragePool",l,{format:it?"NHWC":"NCHW",auto_pad:p,ceil_mode:_,count_include_pad:w,storage_order:$,dilations:C?Array.from(U().subarray(Number(C)>>>0,Number(N)>>>0)):[],kernel_shape:F?Array.from(U().subarray(Number(F)>>>0,Number(re)>>>0)):[],pads:ce?Array.from(U().subarray(Number(ce)>>>0,Number($e)>>>0)):[],strides:Ce?Array.from(U().subarray(Number(Ce)>>>0,Number(Ne)>>>0)):[]})},848696:(l,p)=>{n.kb("GlobalAveragePool",l,{format:p?"NHWC":"NCHW"})},848787:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it)=>{n.kb("AveragePool",l,{format:it?"NHWC":"NCHW",auto_pad:p,ceil_mode:_,count_include_pad:w,storage_order:$,dilations:C?Array.from(U().subarray(Number(C)>>>0,Number(N)>>>0)):[],kernel_shape:F?Array.from(U().subarray(Number(F)>>>0,Number(re)>>>0)):[],pads:ce?Array.from(U().subarray(Number(ce)>>>0,Number($e)>>>0)):[],strides:Ce?Array.from(U().subarray(Number(Ce)>>>0,Number(Ne)>>>0)):[]})},849266:(l,p)=>{n.kb("GlobalMaxPool",l,{format:p?"NHWC":"NCHW"})},849353:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it)=>{n.kb("MaxPool",l,{format:it?"NHWC":"NCHW",auto_pad:p,ceil_mode:_,count_include_pad:w,storage_order:$,dilations:C?Array.from(U().subarray(Number(C)>>>0,Number(N)>>>0)):[],kernel_shape:F?Array.from(U().subarray(Number(F)>>>0,Number(re)>>>0)):[],pads:ce?Array.from(U().subarray(Number(ce)>>>0,Number($e)>>>0)):[],strides:Ce?Array.from(U().subarray(Number(Ce)>>>0,Number(Ne)>>>0)):[]})},849828:(l,p)=>{n.kb("GlobalMaxPool",l,{format:p?"NHWC":"NCHW"})},849915:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it)=>{n.kb("MaxPool",l,{format:it?"NHWC":"NCHW",auto_pad:p,ceil_mode:_,count_include_pad:w,storage_order:$,dilations:C?Array.from(U().subarray(Number(C)>>>0,Number(N)>>>0)):[],kernel_shape:F?Array.from(U().subarray(Number(F)>>>0,Number(re)>>>0)):[],pads:ce?Array.from(U().subarray(Number(ce)>>>0,Number($e)>>>0)):[],strides:Ce?Array.from(U().subarray(Number(Ce)>>>0,Number(Ne)>>>0)):[]})},850390:(l,p,_,w,$)=>{n.kb("Gemm",l,{alpha:p,beta:_,transA:w,transB:$})},850494:l=>{n.kb("MatMul",l,void 0)},850548:(l,p,_,w)=>{n.kb("ArgMax",l,{keepDims:!!p,selectLastIndex:!!_,axis:w})},850656:(l,p,_,w)=>{n.kb("ArgMin",l,{keepDims:!!p,selectLastIndex:!!_,axis:w})},850764:(l,p)=>{n.kb("Softmax",l,{axis:p})},850827:(l,p)=>{n.kb("Concat",l,{axis:p})},850887:(l,p,_,w,$)=>{n.kb("Split",l,{axis:p,numOutputs:_,splitSizes:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},851043:l=>{n.kb("Expand",l,void 0)},851097:(l,p)=>{n.kb("Gather",l,{axis:Number(p)})},851168:(l,p)=>{n.kb("GatherElements",l,{axis:Number(p)})},851247:(l,p)=>{n.kb("GatherND",l,{batch_dims:Number(p)})},851326:(l,p,_,w,$,C,N,F,re,ce,$e)=>{n.kb("Resize",l,{antialias:p,axes:_?Array.from(U().subarray(Number(_)>>>0,Number(w)>>>0)):[],coordinateTransformMode:Fe($),cubicCoeffA:C,excludeOutside:N,extrapolationValue:F,keepAspectRatioPolicy:Fe(re),mode:Fe(ce),nearestMode:Fe($e)})},851688:(l,p,_,w,$,C,N)=>{n.kb("Slice",l,{starts:p?Array.from(U().subarray(Number(p)>>>0,Number(_)>>>0)):[],ends:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[],axes:C?Array.from(U().subarray(Number(C)>>>0,Number(N)>>>0)):[]})},851952:l=>{n.kb("Tile",l,void 0)},852004:(l,p,_)=>{n.kb("InstanceNormalization",l,{epsilon:p,format:_?"NHWC":"NCHW"})},852118:(l,p,_)=>{n.kb("InstanceNormalization",l,{epsilon:p,format:_?"NHWC":"NCHW"})},852232:l=>{n.kb("Range",l,void 0)},852285:(l,p)=>{n.kb("Einsum",l,{equation:Fe(p)})},852366:(l,p,_,w,$)=>{n.kb("Pad",l,{mode:p,value:_,pads:w?Array.from(U().subarray(Number(w)>>>0,Number($)>>>0)):[]})},852509:(l,p,_,w,$,C)=>{n.kb("BatchNormalization",l,{epsilon:p,momentum:_,spatial:!!$,trainingMode:!!w,format:C?"NHWC":"NCHW"})},852678:(l,p,_,w,$,C)=>{n.kb("BatchNormalization",l,{epsilon:p,momentum:_,spatial:!!$,trainingMode:!!w,format:C?"NHWC":"NCHW"})},852847:(l,p,_)=>{n.kb("CumSum",l,{exclusive:Number(p),reverse:Number(_)})},852944:(l,p,_)=>{n.kb("DequantizeLinear",l,{axis:p,blockSize:_})},853034:(l,p,_,w,$)=>{n.kb("GridSample",l,{align_corners:p,mode:Fe(_),padding_mode:Fe(w),format:$?"NHWC":"NCHW"})},853204:(l,p,_,w,$)=>{n.kb("GridSample",l,{align_corners:p,mode:Fe(_),padding_mode:Fe(w),format:$?"NHWC":"NCHW"})},853374:(l,p)=>{n.kb("ScatterND",l,{reduction:Fe(p)})},853459:(l,p,_,w,$,C,N,F,re)=>{n.kb("Attention",l,{numHeads:p,isUnidirectional:_,maskFilterValue:w,scale:$,doRotary:C,qkvHiddenSizes:N?Array.from(U().subarray(Number(F)>>>0,Number(F)+N>>>0)):[],pastPresentShareBuffer:!!re})},853731:l=>{n.kb("BiasAdd",l,void 0)},853786:l=>{n.kb("BiasSplitGelu",l,void 0)},853847:l=>{n.kb("FastGelu",l,void 0)},853903:(l,p,_,w,$,C,N,F,re,ce,$e,Ce,Ne,it,Yr,N0)=>{n.kb("Conv",l,{format:Ce?"NHWC":"NCHW",auto_pad:p,dilations:_?Array.from(U().subarray(Number(_)>>>0,Number(w)>>>0)):[],group:$,kernel_shape:C?Array.from(U().subarray(Number(C)>>>0,Number(N)>>>0)):[],pads:F?Array.from(U().subarray(Number(F)>>>0,Number(re)>>>0)):[],strides:ce?Array.from(U().subarray(Number(ce)>>>0,Number($e)>>>0)):[],w_is_const:()=>!!q()[Number(Ne)>>>0],activation:Fe(it),activation_params:Yr?Array.from(Wt().subarray(Number(Yr)>>>0,Number(N0)>>>0)):[]})},854487:l=>{n.kb("Gelu",l,void 0)},854539:(l,p,_,w,$,C,N,F,re)=>{n.kb("GroupQueryAttention",l,{numHeads:p,kvNumHeads:_,scale:w,softcap:$,doRotary:C,rotaryInterleaved:N,smoothSoftmax:F,localWindowSize:re})},854756:(l,p,_,w)=>{n.kb("LayerNormalization",l,{axis:p,epsilon:_,simplified:!!w})},854867:(l,p,_,w)=>{n.kb("LayerNormalization",l,{axis:p,epsilon:_,simplified:!!w})},854978:(l,p,_,w,$,C)=>{n.kb("MatMulNBits",l,{k:p,n:_,accuracyLevel:w,bits:$,blockSize:C})},855105:(l,p,_,w,$,C)=>{n.kb("MultiHeadAttention",l,{numHeads:p,isUnidirectional:_,maskFilterValue:w,scale:$,doRotary:C})},855264:(l,p)=>{n.kb("QuickGelu",l,{alpha:p})},855328:(l,p,_,w,$)=>{n.kb("RotaryEmbedding",l,{interleaved:!!p,numHeads:_,rotaryEmbeddingDim:w,scale:$})},855467:(l,p,_)=>{n.kb("SkipLayerNormalization",l,{epsilon:p,simplified:!!_})},855569:(l,p,_)=>{n.kb("SkipLayerNormalization",l,{epsilon:p,simplified:!!_})},855671:(l,p,_,w)=>{n.kb("GatherBlockQuantized",l,{gatherAxis:p,quantizeAxis:_,blockSize:w})},855792:l=>{n.$b(l)},855826:(l,p)=>n.bc(Number(l),Number(p),n.Gb.ec,n.Gb.errors)};function K(l,p,_){return Wu(async()=>{await n.Yb(Number(l),Number(p),Number(_))})}function me(){return typeof wasmOffsetConverter<"u"}class se{constructor(p){Ke(this,"name","ExitStatus");this.message=`Program terminated with exit(${p})`,this.status=p}}var he=l=>{l.terminate(),l.onmessage=()=>{}},be=[],ke=l=>{je.length==0&&(hu(),fi(je[0]));var p=je.pop();if(!p)return 6;Tt.push(p),Ve[l.Bb]=p,p.Bb=l.Bb;var _={Cb:"run",hc:l.fc,Ib:l.Ib,Bb:l.Bb};return p.postMessage(_,l.Nb),0},Ee=0,ge=(l,p,..._)=>{for(var w=2*_.length,$=xs(),C=$s(8*w),N=C>>>3,F=0;F<_.length;F++){var re=_[F];typeof re=="bigint"?(V[N+2*F]=1n,V[N+2*F+1]=re):(V[N+2*F]=0n,Ye()[N+2*F+1>>>0]=re)}return l=ul(l,0,w,C,p),hn($),l};function Qe(l){if(d)return ge(0,1,l);if(A=l,!(0{if(A=l,d)throw rt(l),"unwind";Qe(l)},je=[],Tt=[],ci=[],Ve={},_t=l=>{var p=l.Bb;delete Ve[p],je.push(l),Tt.splice(Tt.indexOf(l),1),l.Bb=0,ll(p)};function pi(){ci.forEach(l=>l())}var fi=l=>new Promise(p=>{l.onmessage=$=>{var C=($=$.data).Cb;if($.Hb&&$.Hb!=pn()){var N=Ve[$.Hb];N?N.postMessage($,$.Nb):I(`Internal error! Worker sent a message "${C}" to target pthread ${$.Hb}, but that thread no longer exists!`)}else C==="checkMailbox"?nn():C==="spawnThread"?ke($):C==="cleanupThread"?_t(Ve[$.ic]):C==="loaded"?(l.loaded=!0,p(l)):C==="alert"?alert(`Thread ${$.jc}: ${$.text}`):$.target==="setimmediate"?l.postMessage($):C==="callHandler"?n[$.Rb](...$.args):C&&I(`worker sent an unknown command ${C}`)},l.onerror=$=>{throw I(`worker sent an error! ${$.filename}:${$.lineno}: ${$.message}`),$};var _,w=[];for(_ of[])n.propertyIsEnumerable(_)&&w.push(_);l.postMessage({Cb:"load",Sb:w,lc:z,mc:O})});function hu(){var l=new Worker((()=>{let p=URL;return import.meta.url>"file:"&&import.meta.url<"file;"?new p("ort.bundle.min.mjs",import.meta.url):new URL(import.meta.url)})(),{type:"module",workerData:"em-pthread",name:"em-pthread"});je.push(l)}var by=l=>{T();var p=Ie()[l+52>>>2>>>0];l=Ie()[l+56>>>2>>>0],pl(p,p-l),hn(p)},wy=(l,p)=>{Ee=0,l=fl(l,p),0>>=0);throw p>>>=0,_>>>=0,Ie()[w.Jb+16>>>2>>>0]=0,Ie()[w.Jb+4>>>2>>>0]=p,Ie()[w.Jb+8>>>2>>>0]=_,l}function mu(l,p,_,w){return d?ge(2,1,l,p,_,w):gu(l,p,_,w)}function gu(l,p,_,w){if(l>>>=0,_>>>=0,w>>>=0,c===void 0)return 6;var $=[];return d&&$.length===0?mu(l,p>>>=0,_,w):(l={fc:_,Bb:l,Ib:w,Nb:$},d?(l.Cb="spawnThread",postMessage(l,$),0):ke(l))}var _u=typeof TextDecoder<"u"?new TextDecoder:void 0,yu=(l,p=0,_=NaN)=>{var w=(p>>>=0)+_;for(_=p;l[_]&&!(_>=w);)++_;if(16<_-p&&l.buffer&&_u)return _u.decode(l.buffer instanceof ArrayBuffer?l.subarray(p,_):l.slice(p,_));for(w="";p<_;){var $=l[p++];if(128&$){var C=63&l[p++];if((224&$)==192)w+=String.fromCharCode((31&$)<<6|C);else{var N=63&l[p++];65536>($=(240&$)==224?(15&$)<<12|C<<6|N:(7&$)<<18|C<<12|N<<6|63&l[p++])?w+=String.fromCharCode($):($-=65536,w+=String.fromCharCode(55296|$>>10,56320|1023&$))}}else w+=String.fromCharCode($)}return w},Fe=(l,p)=>(l>>>=0)?yu(ee(),l,p):"";function bu(l,p,_){return d?ge(3,1,l,p,_):0}function wu(l,p){if(d)return ge(4,1,l,p)}var vu=l=>{for(var p=0,_=0;_=w?p++:2047>=w?p+=2:55296<=w&&57343>=w?(p+=4,++_):p+=3}return p},Xr=(l,p,_)=>{var w=ee();if(p>>>=0,0<_){var $=p;_=p+_-1;for(var C=0;C=N&&(N=65536+((1023&N)<<10)|1023&l.charCodeAt(++C)),127>=N){if(p>=_)break;w[p++>>>0]=N}else{if(2047>=N){if(p+1>=_)break;w[p++>>>0]=192|N>>6}else{if(65535>=N){if(p+2>=_)break;w[p++>>>0]=224|N>>12}else{if(p+3>=_)break;w[p++>>>0]=240|N>>18,w[p++>>>0]=128|N>>12&63}w[p++>>>0]=128|N>>6&63}w[p++>>>0]=128|63&N}}w[p>>>0]=0,l=p-$}else l=0;return l};function $u(l,p){if(d)return ge(5,1,l,p)}function xu(l,p,_){if(d)return ge(6,1,l,p,_)}function Su(l,p,_){return d?ge(7,1,l,p,_):0}function ku(l,p){if(d)return ge(8,1,l,p)}function Tu(l,p,_){if(d)return ge(9,1,l,p,_)}function Iu(l,p,_,w){if(d)return ge(10,1,l,p,_,w)}function Eu(l,p,_,w){if(d)return ge(11,1,l,p,_,w)}function Cu(l,p,_,w){if(d)return ge(12,1,l,p,_,w)}function zu(l){if(d)return ge(13,1,l)}function Ou(l,p){if(d)return ge(14,1,l,p)}function Au(l,p,_){if(d)return ge(15,1,l,p,_)}var Ru,fr,xy=()=>J(""),qt=l=>{for(var p="";ee()[l>>>0];)p+=Ru[ee()[l++>>>0]];return p},us={},ls={};function Jt(l,p,_={}){return function(w,$,C={}){var N=$.name;if(!w)throw new fr(`type "${N}" must have a positive integer typeid pointer`);if(ls.hasOwnProperty(w)){if(C.Tb)return;throw new fr(`Cannot register type '${N}' twice`)}ls[w]=$,us.hasOwnProperty(w)&&($=us[w],delete us[w],$.forEach(F=>F()))}(l,p,_)}var Bu=(l,p,_)=>{switch(p){case 1:return _?w=>q()[w>>>0]:w=>ee()[w>>>0];case 2:return _?w=>ye()[w>>>1>>>0]:w=>Ge()[w>>>1>>>0];case 4:return _?w=>U()[w>>>2>>>0]:w=>Ie()[w>>>2>>>0];case 8:return _?w=>V[w>>>3]:w=>ae[w>>>3];default:throw new TypeError(`invalid integer width (${p}): ${l}`)}};function Sy(l,p,_){_>>>=0,Jt(l>>>=0,{name:p=qt(p>>>0),fromWireType:w=>w,toWireType:function(w,$){if(typeof $!="bigint"&&typeof $!="number")throw $=$===null?"null":(w=typeof $)=="object"||w==="array"||w==="function"?$.toString():""+$,new TypeError(`Cannot convert "${$}" to ${this.name}`);return typeof $=="number"&&($=BigInt($)),$},Db:hr,readValueFromPointer:Bu(p,_,p.indexOf("u")==-1),Eb:null})}var hr=8;function ky(l,p,_,w){Jt(l>>>=0,{name:p=qt(p>>>0),fromWireType:function($){return!!$},toWireType:function($,C){return C?_:w},Db:hr,readValueFromPointer:function($){return this.fromWireType(ee()[$>>>0])},Eb:null})}var ds=[],er=[];function cs(l){9<(l>>>=0)&&--er[l+1]==0&&(er[l]=void 0,ds.push(l))}var ut=l=>{if(!l)throw new fr("Cannot use deleted val. handle = "+l);return er[l]},xt=l=>{switch(l){case void 0:return 2;case null:return 4;case!0:return 6;case!1:return 8;default:let p=ds.pop()||er.length;return er[p]=l,er[p+1]=1,p}};function ps(l){return this.fromWireType(Ie()[l>>>2>>>0])}var Ty={name:"emscripten::val",fromWireType:l=>{var p=ut(l);return cs(l),p},toWireType:(l,p)=>xt(p),Db:hr,readValueFromPointer:ps,Eb:null};function Iy(l){return Jt(l>>>0,Ty)}var Ey=(l,p)=>{switch(p){case 4:return function(_){return this.fromWireType(Wt()[_>>>2>>>0])};case 8:return function(_){return this.fromWireType(Ye()[_>>>3>>>0])};default:throw new TypeError(`invalid float width (${p}): ${l}`)}};function Cy(l,p,_){_>>>=0,Jt(l>>>=0,{name:p=qt(p>>>0),fromWireType:w=>w,toWireType:(w,$)=>$,Db:hr,readValueFromPointer:Ey(p,_),Eb:null})}function zy(l,p,_,w,$){if(l>>>=0,_>>>=0,p=qt(p>>>0),$===-1&&($=4294967295),$=F=>F,w===0){var C=32-8*_;$=F=>F<>>C}var N=p.includes("unsigned")?function(F,re){return re>>>0}:function(F,re){return re};Jt(l,{name:p,fromWireType:$,toWireType:N,Db:hr,readValueFromPointer:Bu(p,_,w!==0),Eb:null})}function Oy(l,p,_){function w(C){var N=Ie()[C>>>2>>>0];return C=Ie()[C+4>>>2>>>0],new $(q().buffer,C,N)}var $=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array][p];Jt(l>>>=0,{name:_=qt(_>>>0),fromWireType:w,Db:hr,readValueFromPointer:w},{Tb:!0})}function Ay(l,p){Jt(l>>>=0,{name:p=qt(p>>>0),fromWireType:function(_){for(var w,$=Ie()[_>>>2>>>0],C=_+4,N=C,F=0;F<=$;++F){var re=C+F;F!=$&&ee()[re>>>0]!=0||(N=Fe(N,re-N),w===void 0?w=N:(w+="\0",w+=N),N=re+1)}return Ft(_),w},toWireType:function(_,w){w instanceof ArrayBuffer&&(w=new Uint8Array(w));var $=typeof w=="string";if(!($||w instanceof Uint8Array||w instanceof Uint8ClampedArray||w instanceof Int8Array))throw new fr("Cannot pass non-string to std::string");var C=$?vu(w):w.length,N=fn(4+C+1),F=N+4;if(Ie()[N>>>2>>>0]=C,$)Xr(w,F,C+1);else if($)for($=0;$>>0]=re}else for($=0;$>>0]=w[$];return _!==null&&_.push(Ft,N),N},Db:hr,readValueFromPointer:ps,Eb(_){Ft(_)}})}var Mu=typeof TextDecoder<"u"?new TextDecoder("utf-16le"):void 0,Ry=(l,p)=>{for(var _=l>>1,w=_+p/2;!(_>=w)&&Ge()[_>>>0];)++_;if(32<(_<<=1)-l&&Mu)return Mu.decode(ee().slice(l,_));for(_="",w=0;!(w>=p/2);++w){var $=ye()[l+2*w>>>1>>>0];if($==0)break;_+=String.fromCharCode($)}return _},By=(l,p,_)=>{if(_??(_=2147483647),2>_)return 0;var w=p;_=(_-=2)<2*l.length?_/2:l.length;for(var $=0;$<_;++$){var C=l.charCodeAt($);ye()[p>>>1>>>0]=C,p+=2}return ye()[p>>>1>>>0]=0,p-w},My=l=>2*l.length,Ny=(l,p)=>{for(var _=0,w="";!(_>=p/4);){var $=U()[l+4*_>>>2>>>0];if($==0)break;++_,65536<=$?($-=65536,w+=String.fromCharCode(55296|$>>10,56320|1023&$)):w+=String.fromCharCode($)}return w},Dy=(l,p,_)=>{if(p>>>=0,_??(_=2147483647),4>_)return 0;var w=p;_=w+_-4;for(var $=0;$=C&&(C=65536+((1023&C)<<10)|1023&l.charCodeAt(++$)),U()[p>>>2>>>0]=C,(p+=4)+4>_)break}return U()[p>>>2>>>0]=0,p-w},Py=l=>{for(var p=0,_=0;_=w&&++_,p+=4}return p};function Uy(l,p,_){if(l>>>=0,p>>>=0,_=qt(_>>>=0),p===2)var w=Ry,$=By,C=My,N=F=>Ge()[F>>>1>>>0];else p===4&&(w=Ny,$=Dy,C=Py,N=F=>Ie()[F>>>2>>>0]);Jt(l,{name:_,fromWireType:F=>{for(var re,ce=Ie()[F>>>2>>>0],$e=F+4,Ce=0;Ce<=ce;++Ce){var Ne=F+4+Ce*p;Ce!=ce&&N(Ne)!=0||($e=w($e,Ne-$e),re===void 0?re=$e:(re+="\0",re+=$e),$e=Ne+p)}return Ft(F),re},toWireType:(F,re)=>{if(typeof re!="string")throw new fr(`Cannot pass non-string to C++ string type ${_}`);var ce=C(re),$e=fn(4+ce+p);return Ie()[$e>>>2>>>0]=ce/p,$(re,$e+4,ce+p),F!==null&&F.push(Ft,$e),$e},Db:hr,readValueFromPointer:ps,Eb(F){Ft(F)}})}function Wy(l,p){Jt(l>>>=0,{Ub:!0,name:p=qt(p>>>0),Db:0,fromWireType:()=>{},toWireType:()=>{}})}function Ly(l){ws(l>>>0,!u,1,!o,131072,!1),pi()}var fs=l=>{if(!Be)try{if(l(),!(0>>=0,typeof Atomics.kc=="function"&&(Atomics.kc(U(),l>>>2,l).value.then(nn),l+=128,Atomics.store(U(),l>>>2,1))}var nn=()=>{var l=pn();l&&(hs(l),fs(cl))};function qy(l,p){(l>>>=0)==p>>>0?setTimeout(nn):d?postMessage({Hb:l,Cb:"checkMailbox"}):(l=Ve[l])&&l.postMessage({Cb:"checkMailbox"})}var ms=[];function Vy(l,p,_,w,$){for(p>>>=0,w/=2,ms.length=w,_=$>>>0>>>3,$=0;$>>0];return(p?Q[p]:M0[l])(...ms)}var Fy=()=>{Ee=0};function Hy(l){l>>>=0,d?postMessage({Cb:"cleanupThread",ic:l}):_t(Ve[l])}function Gy(l){}var sn=(l,p)=>{var _=ls[l];if(_===void 0)throw l=sl(l),_=qt(l),Ft(l),new fr(`${p} has unknown type ${_}`);return _},Nu=(l,p,_)=>{var w=[];return l=l.toWireType(w,_),w.length&&(Ie()[p>>>2>>>0]=xt(w)),l};function jy(l,p,_){return p>>>=0,_>>>=0,l=ut(l>>>0),p=sn(p,"emval::as"),Nu(p,_,l)}function Ky(l,p){return p>>>=0,l=ut(l>>>0),(p=sn(p,"emval::as")).toWireType(null,l)}var an=l=>{try{l()}catch(p){J(p)}},mr=0,Vt=null,Du=0,on=[],Pu={},Uu={},Zy=0,gs=null,Xy=[];function Wu(l){return function(p){if(!Be){if(mr===0){var _=!1,w=!1;p(($=0)=>{if(!Be&&(Du=$,_=!0,w)){mr=2,an(()=>gl(Vt)),typeof MainLoop<"u"&&MainLoop.Qb&&MainLoop.resume(),$=!1;try{var C=function(){var re=U()[Vt+8>>>2>>>0];return re=fe[Uu[re]],--Ee,re()}()}catch(re){C=re,$=!0}var N=!1;if(!Vt){var F=gs;F&&(gs=null,($?F.reject:F.resolve)(C),N=!0)}if($&&!N)throw C}}),w=!0,_||(mr=1,Vt=function(){var $=fn(65548),C=$+12;Ie()[$>>>2>>>0]=C,Ie()[$+4>>>2>>>0]=C+65536,C=on[0];var N=Pu[C];return N===void 0&&(N=Zy++,Pu[C]=N,Uu[N]=C),C=N,U()[$+8>>>2>>>0]=C,$}(),typeof MainLoop<"u"&&MainLoop.Qb&&MainLoop.pause(),an(()=>hl(Vt)))}else mr===2?(mr=0,an(_l),Ft(Vt),Vt=null,Xy.forEach(fs)):J(`invalid state: ${mr}`);return Du}}(p=>{l().then(p)})}function Yy(l){return l>>>=0,Wu(async()=>{var p=await ut(l);return xt(p)})}var un=[];function Qy(l,p,_,w){return _>>>=0,w>>>=0,(l=un[l>>>0])(null,p=ut(p>>>0),_,w)}var Jy={},ln=l=>{var p=Jy[l];return p===void 0?qt(l):p};function e0(l,p,_,w,$){return _>>>=0,w>>>=0,$>>>=0,(l=un[l>>>0])(p=ut(p>>>0),p[_=ln(_)],w,$)}function t0(l,p){return p>>>=0,(l=ut(l>>>0))==ut(p)}var Lu=()=>typeof globalThis=="object"?globalThis:Function("return this")();function r0(l){return(l>>>=0)==0?xt(Lu()):(l=ln(l),xt(Lu()[l]))}var i0=l=>{var p=un.length;return un.push(l),p},n0=(l,p)=>{for(var _=Array(l),w=0;w>>2>>>0],"parameter "+w);return _},qu=(l,p)=>Object.defineProperty(p,"name",{value:l});function s0(l,p,_){var w=(p=n0(l,p>>>0)).shift();l--;var $=`return function (obj, func, destructorsRef, args) { +`,C=0,N=[];_===0&&N.push("obj");for(var F=["retType"],re=[w],ce=0;ce$e.name).join(", ")}) => ${w.name}>`,i0(qu(_,l))}function a0(l){return l=ln(l>>>0),xt(n[l])}function o0(l,p){return p>>>=0,l=ut(l>>>0),p=ut(p),xt(l[p])}function u0(l){9<(l>>>=0)&&(er[l+1]+=1)}function l0(){return xt([])}function d0(l){l=ut(l>>>0);for(var p=Array(l.length),_=0;_>>0))}function p0(){return xt({})}function f0(l){for(var p=ut(l>>>=0);p.length;){var _=p.pop();p.pop()(_)}cs(l)}function h0(l,p,_){p>>>=0,_>>>=0,l=ut(l>>>0),p=ut(p),_=ut(_),l[p]=_}function m0(l,p){return p>>>=0,l=(l=sn(l>>>0,"_emval_take_value")).readValueFromPointer(p),xt(l)}function g0(l,p){l=-9007199254740992>l||9007199254740992>>=0,l=new Date(1e3*l),U()[p>>>2>>>0]=l.getUTCSeconds(),U()[p+4>>>2>>>0]=l.getUTCMinutes(),U()[p+8>>>2>>>0]=l.getUTCHours(),U()[p+12>>>2>>>0]=l.getUTCDate(),U()[p+16>>>2>>>0]=l.getUTCMonth(),U()[p+20>>>2>>>0]=l.getUTCFullYear()-1900,U()[p+24>>>2>>>0]=l.getUTCDay(),l=(l.getTime()-Date.UTC(l.getUTCFullYear(),0,1,0,0,0,0))/864e5|0,U()[p+28>>>2>>>0]=l}var Vu=l=>l%4==0&&(l%100!=0||l%400==0),Fu=[0,31,60,91,121,152,182,213,244,274,305,335],Hu=[0,31,59,90,120,151,181,212,243,273,304,334];function _0(l,p){l=-9007199254740992>l||9007199254740992>>=0,l=new Date(1e3*l),U()[p>>>2>>>0]=l.getSeconds(),U()[p+4>>>2>>>0]=l.getMinutes(),U()[p+8>>>2>>>0]=l.getHours(),U()[p+12>>>2>>>0]=l.getDate(),U()[p+16>>>2>>>0]=l.getMonth(),U()[p+20>>>2>>>0]=l.getFullYear()-1900,U()[p+24>>>2>>>0]=l.getDay();var _=(Vu(l.getFullYear())?Fu:Hu)[l.getMonth()]+l.getDate()-1|0;U()[p+28>>>2>>>0]=_,U()[p+36>>>2>>>0]=-60*l.getTimezoneOffset(),_=new Date(l.getFullYear(),6,1).getTimezoneOffset();var w=new Date(l.getFullYear(),0,1).getTimezoneOffset();l=0|(_!=w&&l.getTimezoneOffset()==Math.min(w,_)),U()[p+32>>>2>>>0]=l}function y0(l){l>>>=0;var p=new Date(U()[l+20>>>2>>>0]+1900,U()[l+16>>>2>>>0],U()[l+12>>>2>>>0],U()[l+8>>>2>>>0],U()[l+4>>>2>>>0],U()[l>>>2>>>0],0),_=U()[l+32>>>2>>>0],w=p.getTimezoneOffset(),$=new Date(p.getFullYear(),6,1).getTimezoneOffset(),C=new Date(p.getFullYear(),0,1).getTimezoneOffset(),N=Math.min(C,$);return 0>_?U()[l+32>>>2>>>0]=+($!=C&&N==w):0<_!=(N==w)&&($=Math.max(C,$),p.setTime(p.getTime()+6e4*((0<_?N:$)-w))),U()[l+24>>>2>>>0]=p.getDay(),_=(Vu(p.getFullYear())?Fu:Hu)[p.getMonth()]+p.getDate()-1|0,U()[l+28>>>2>>>0]=_,U()[l>>>2>>>0]=p.getSeconds(),U()[l+4>>>2>>>0]=p.getMinutes(),U()[l+8>>>2>>>0]=p.getHours(),U()[l+12>>>2>>>0]=p.getDate(),U()[l+16>>>2>>>0]=p.getMonth(),U()[l+20>>>2>>>0]=p.getYear(),l=p.getTime(),BigInt(isNaN(l)?-1:l/1e3)}function Gu(l,p,_,w,$,C,N){return d?ge(16,1,l,p,_,w,$,C,N):-52}function ju(l,p,_,w,$,C){if(d)return ge(17,1,l,p,_,w,$,C)}var hi={},b0=()=>performance.timeOrigin+performance.now();function Ku(l,p){if(d)return ge(18,1,l,p);if(hi[l]&&(clearTimeout(hi[l].id),delete hi[l]),!p)return 0;var _=setTimeout(()=>{delete hi[l],fs(()=>dl(l,performance.timeOrigin+performance.now()))},p);return hi[l]={id:_,rc:p},0}function w0(l,p,_,w){l>>>=0,p>>>=0,_>>>=0,w>>>=0;var $=new Date().getFullYear(),C=new Date($,0,1).getTimezoneOffset();$=new Date($,6,1).getTimezoneOffset();var N=Math.max(C,$);Ie()[l>>>2>>>0]=60*N,U()[p>>>2>>>0]=+(C!=$),l=(p=F=>{var re=Math.abs(F);return`UTC${0<=F?"-":"+"}${String(Math.floor(re/60)).padStart(2,"0")}${String(re%60).padStart(2,"0")}`})(C),p=p($),$Date.now();function $0(l,p,_){return 0<=l&&3>=l?(l===0?l=Date.now():l=performance.timeOrigin+performance.now(),V[_>>>0>>>3]=BigInt(Math.round(1e6*l)),0):28}var _s=[],Zu=(l,p)=>{_s.length=0;for(var _;_=ee()[l++>>>0];){var w=_!=105;p+=(w&=_!=112)&&p%8?4:0,_s.push(_==112?Ie()[p>>>2>>>0]:_==106?V[p>>>3]:_==105?U()[p>>>2>>>0]:Ye()[p>>>3>>>0]),p+=w?8:4}return _s};function x0(l,p,_){return l>>>=0,p=Zu(p>>>0,_>>>0),Q[l](...p)}function S0(l,p,_){return l>>>=0,p=Zu(p>>>0,_>>>0),Q[l](...p)}var k0=()=>{};function T0(l,p){return I(Fe(l>>>0,p>>>0))}var I0=()=>{throw Ee+=1,"unwind"};function E0(){return 4294901760}var C0=()=>navigator.hardwareConcurrency;function z0(){return J("Cannot use emscripten_pc_get_function without -sUSE_OFFSET_CONVERTER"),0}function O0(l){l>>>=0;var p=ee().length;if(l<=p||4294901760=_;_*=2){var w=p*(1+.2/_);w=Math.min(w,l+100663296);e:{w=(Math.min(4294901760,65536*Math.ceil(Math.max(l,w)/65536))-z.buffer.byteLength+65535)/65536|0;try{z.grow(w),T();var $=1;break e}catch{}$=void 0}if($)return!0}return!1}var dn=()=>(J("Cannot use convertFrameToPC (needed by __builtin_return_address) without -sUSE_OFFSET_CONVERTER"),0),mi={},Xu=l=>{l.forEach(p=>{dn()})};function A0(){var l=Error().stack.toString().split(` +`);return l[0]=="Error"&&l.shift(),Xu(l),mi.Mb=dn(),mi.dc=l,mi.Mb}function R0(l,p,_){if(l>>>=0,p>>>=0,mi.Mb==l)var w=mi.dc;else(w=Error().stack.toString().split(` +`))[0]=="Error"&&w.shift(),Xu(w);for(var $=3;w[$]&&dn()!=l;)++$;for(l=0;l<_&&w[l+$];++l)U()[p+4*l>>>2>>>0]=dn();return l}var ys,bs={},Yu=()=>{if(!ys){var l,p={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:"./this.program"};for(l in bs)bs[l]===void 0?delete p[l]:p[l]=bs[l];var _=[];for(l in p)_.push(`${l}=${p[l]}`);ys=_}return ys};function Qu(l,p){if(d)return ge(19,1,l,p);l>>>=0,p>>>=0;var _=0;return Yu().forEach((w,$)=>{var C=p+_;for($=Ie()[l+4*$>>>2>>>0]=C,C=0;C>>0]=w.charCodeAt(C);q()[$>>>0]=0,_+=w.length+1}),0}function Ju(l,p){if(d)return ge(20,1,l,p);l>>>=0,p>>>=0;var _=Yu();Ie()[l>>>2>>>0]=_.length;var w=0;return _.forEach($=>w+=$.length+1),Ie()[p>>>2>>>0]=w,0}function el(l){return d?ge(21,1,l):52}function tl(l,p,_,w){return d?ge(22,1,l,p,_,w):52}function rl(l,p,_,w){return d?ge(23,1,l,p,_,w):70}var B0=[null,[],[]];function il(l,p,_,w){if(d)return ge(24,1,l,p,_,w);p>>>=0,_>>>=0,w>>>=0;for(var $=0,C=0;C<_;C++){var N=Ie()[p>>>2>>>0],F=Ie()[p+4>>>2>>>0];p+=8;for(var re=0;re>>0],$e=B0[l];ce===0||ce===10?((l===1?x:I)(yu($e)),$e.length=0):$e.push(ce)}$+=F}return Ie()[w>>>2>>>0]=$,0}d||function(){for(var l=n.numThreads-1;l--;)hu();be.unshift(()=>{H++,function(p){d?p():Promise.all(je.map(fi)).then(p)}(()=>G())})}();for(var nl=Array(256),cn=0;256>cn;++cn)nl[cn]=String.fromCharCode(cn);Ru=nl,fr=n.BindingError=class extends Error{constructor(l){super(l),this.name="BindingError"}},n.InternalError=class extends Error{constructor(l){super(l),this.name="InternalError"}},er.push(0,1,void 0,1,null,1,!0,1,!1,1),n.count_emval_handles=()=>er.length/2-5-ds.length;var fe,M0=[Qe,rt,mu,bu,wu,$u,xu,Su,ku,Tu,Iu,Eu,Cu,zu,Ou,Au,Gu,ju,Ku,Qu,Ju,el,tl,rl,il];(async function(){function l(w,$){return fe=w.exports,fe=function(){var C=fe,N={};for(let[F,re]of Object.entries(C))N[F]=typeof re=="function"?(...ce)=>{on.push(F);try{return re(...ce)}finally{Be||(on.pop(),Vt&&mr===1&&on.length===0&&(mr=0,Ee+=1,an(ml),typeof Fibers<"u"&&Fibers.sc()))}}:re;return N}(),fe=function(){var C=fe,N=re=>ce=>re(ce)>>>0,F=re=>()=>re()>>>0;return(C=Object.assign({},C)).Ea=N(C.Ea),C.gb=F(C.gb),C.ib=N(C.ib),C.ub=N(C.ub),C.vb=F(C.vb),C.__cxa_get_exception_ptr=N(C.__cxa_get_exception_ptr),C}(),ci.push(fe.jb),O=$,G(),fe}H++;var p=te();if(n.instantiateWasm)return new Promise(w=>{n.instantiateWasm(p,($,C)=>{l($,C),w($.exports)})});if(d)return new Promise(w=>{tt=$=>{var C=new WebAssembly.Instance($,te());w(l(C,$))}});M??(M=n.locateFile?n.locateFile?n.locateFile("ort-wasm-simd-threaded.jsep.wasm",v):v+"ort-wasm-simd-threaded.jsep.wasm":new URL("/assets/ort-wasm-simd-threaded.jsep-CLPRrI3A.wasm",import.meta.url).href);try{var _=await async function(w){var $=M;if(!le&&typeof WebAssembly.instantiateStreaming=="function"&&!D($))try{var C=fetch($,{credentials:"same-origin"});return await WebAssembly.instantiateStreaming(C,w)}catch(N){I(`wasm streaming compile failed: ${N}`),I("falling back to ArrayBuffer instantiation")}return async function(N,F){try{var re=await async function(ce){if(!le)try{var $e=await m(ce);return new Uint8Array($e)}catch{}if(ce==M&&le)ce=new Uint8Array(le);else{if(!g)throw"both async and sync fetching of the wasm failed";ce=g(ce)}return ce}(N);return await WebAssembly.instantiate(re,F)}catch(ce){I(`failed to asynchronously prepare wasm: ${ce}`),J(ce)}}($,w)}(p);return l(_.instance,_.module)}catch(w){return s(w),Promise.reject(w)}})();var sl=l=>(sl=fe.Ea)(l),al=()=>(al=fe.Fa)();n._OrtInit=(l,p)=>(n._OrtInit=fe.Ga)(l,p),n._OrtGetLastError=(l,p)=>(n._OrtGetLastError=fe.Ha)(l,p),n._OrtCreateSessionOptions=(l,p,_,w,$,C,N,F,re,ce)=>(n._OrtCreateSessionOptions=fe.Ia)(l,p,_,w,$,C,N,F,re,ce),n._OrtAppendExecutionProvider=(l,p,_,w,$)=>(n._OrtAppendExecutionProvider=fe.Ja)(l,p,_,w,$),n._OrtAddFreeDimensionOverride=(l,p,_)=>(n._OrtAddFreeDimensionOverride=fe.Ka)(l,p,_),n._OrtAddSessionConfigEntry=(l,p,_)=>(n._OrtAddSessionConfigEntry=fe.La)(l,p,_),n._OrtReleaseSessionOptions=l=>(n._OrtReleaseSessionOptions=fe.Ma)(l),n._OrtCreateSession=(l,p,_)=>(n._OrtCreateSession=fe.Na)(l,p,_),n._OrtReleaseSession=l=>(n._OrtReleaseSession=fe.Oa)(l),n._OrtGetInputOutputCount=(l,p,_)=>(n._OrtGetInputOutputCount=fe.Pa)(l,p,_),n._OrtGetInputOutputMetadata=(l,p,_,w)=>(n._OrtGetInputOutputMetadata=fe.Qa)(l,p,_,w),n._OrtFree=l=>(n._OrtFree=fe.Ra)(l),n._OrtCreateTensor=(l,p,_,w,$,C)=>(n._OrtCreateTensor=fe.Sa)(l,p,_,w,$,C),n._OrtGetTensorData=(l,p,_,w,$)=>(n._OrtGetTensorData=fe.Ta)(l,p,_,w,$),n._OrtReleaseTensor=l=>(n._OrtReleaseTensor=fe.Ua)(l),n._OrtCreateRunOptions=(l,p,_,w)=>(n._OrtCreateRunOptions=fe.Va)(l,p,_,w),n._OrtAddRunConfigEntry=(l,p,_)=>(n._OrtAddRunConfigEntry=fe.Wa)(l,p,_),n._OrtReleaseRunOptions=l=>(n._OrtReleaseRunOptions=fe.Xa)(l),n._OrtCreateBinding=l=>(n._OrtCreateBinding=fe.Ya)(l),n._OrtBindInput=(l,p,_)=>(n._OrtBindInput=fe.Za)(l,p,_),n._OrtBindOutput=(l,p,_,w)=>(n._OrtBindOutput=fe._a)(l,p,_,w),n._OrtClearBoundOutputs=l=>(n._OrtClearBoundOutputs=fe.$a)(l),n._OrtReleaseBinding=l=>(n._OrtReleaseBinding=fe.ab)(l),n._OrtRunWithBinding=(l,p,_,w,$)=>(n._OrtRunWithBinding=fe.bb)(l,p,_,w,$),n._OrtRun=(l,p,_,w,$,C,N,F)=>(n._OrtRun=fe.cb)(l,p,_,w,$,C,N,F),n._OrtEndProfiling=l=>(n._OrtEndProfiling=fe.db)(l),n._JsepOutput=(l,p,_)=>(n._JsepOutput=fe.eb)(l,p,_),n._JsepGetNodeName=l=>(n._JsepGetNodeName=fe.fb)(l);var pn=()=>(pn=fe.gb)(),Ft=n._free=l=>(Ft=n._free=fe.hb)(l),fn=n._malloc=l=>(fn=n._malloc=fe.ib)(l),ws=(l,p,_,w,$,C)=>(ws=fe.lb)(l,p,_,w,$,C),ol=()=>(ol=fe.mb)(),ul=(l,p,_,w,$)=>(ul=fe.nb)(l,p,_,w,$),ll=l=>(ll=fe.ob)(l),vs=l=>(vs=fe.pb)(l),dl=(l,p)=>(dl=fe.qb)(l,p),cl=()=>(cl=fe.rb)(),pl=(l,p)=>(pl=fe.sb)(l,p),hn=l=>(hn=fe.tb)(l),$s=l=>($s=fe.ub)(l),xs=()=>(xs=fe.vb)(),fl=n.dynCall_ii=(l,p)=>(fl=n.dynCall_ii=fe.wb)(l,p),hl=l=>(hl=fe.xb)(l),ml=()=>(ml=fe.yb)(),gl=l=>(gl=fe.zb)(l),_l=()=>(_l=fe.Ab)();return n.stackSave=()=>xs(),n.stackRestore=l=>hn(l),n.stackAlloc=l=>$s(l),n.setValue=function(l,p,_="i8"){switch(_.endsWith("*")&&(_="*"),_){case"i1":case"i8":q()[l>>>0]=p;break;case"i16":ye()[l>>>1>>>0]=p;break;case"i32":U()[l>>>2>>>0]=p;break;case"i64":V[l>>>3]=BigInt(p);break;case"float":Wt()[l>>>2>>>0]=p;break;case"double":Ye()[l>>>3>>>0]=p;break;case"*":Ie()[l>>>2>>>0]=p;break;default:J(`invalid type for setValue: ${_}`)}},n.getValue=function(l,p="i8"){switch(p.endsWith("*")&&(p="*"),p){case"i1":case"i8":return q()[l>>>0];case"i16":return ye()[l>>>1>>>0];case"i32":return U()[l>>>2>>>0];case"i64":return V[l>>>3];case"float":return Wt()[l>>>2>>>0];case"double":return Ye()[l>>>3>>>0];case"*":return Ie()[l>>>2>>>0];default:J(`invalid type for getValue: ${p}`)}},n.UTF8ToString=Fe,n.stringToUTF8=Xr,n.lengthBytesUTF8=vu,function l(){if(0{qo(),Vs=typeof location>"u"?void 0:location.origin,Xa=import.meta.url>"file:"&&import.meta.url<"file;",Ql=()=>{{if(Xa){let e=URL;return new URL(new e("ort.bundle.min.mjs",import.meta.url).href,Vs).href}return import.meta.url}},yt=Ql(),im=()=>{if(yt&&!yt.startsWith("blob:"))return yt.substring(0,yt.lastIndexOf("/")+1)},$n=(e,t)=>{try{let r=t??yt;return(r?new URL(e,r):new URL(e)).origin===Vs}catch{return!1}},Jl=(e,t)=>{let r=t??yt;try{return(r?new URL(e,r):new URL(e)).href}catch{return}},ed=(e,t)=>`${t??"./"}${e}`,Fs=async e=>{let t=await(await fetch(e,{credentials:"same-origin"})).blob();return URL.createObjectURL(t)},td=async e=>(await import(e)).default,Hs=(h$(),Qi(Jh)).default,nm=async()=>{if(!yt)throw new Error("Failed to load proxy worker: cannot determine the script source URL.");if($n(yt))return[void 0,Hs()];let e=await Fs(yt);return[e,Hs(e)]},Gs=(m$(),Qi(tm)).default,sm=async(e,t,r)=>{if(!e&&!t&&Gs&&yt&&$n(yt))return[void 0,Gs];{let i="ort-wasm-simd-threaded.jsep.mjs",s=e??Jl(i,t),n=r&&s&&!$n(s,t),a=n?await Fs(s):s??ed(i,t);return[n?a:void 0,await td(a)]}}}),js,xn,bi,Ks,rd,id,nd,Fo,De,Kr=j(()=>{Vo(),xn=!1,bi=!1,Ks=!1,rd=()=>{if(typeof SharedArrayBuffer>"u")return!1;try{return typeof MessageChannel<"u"&&new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11]))}catch{return!1}},id=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch{return!1}},nd=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,5,1,96,0,1,123,3,2,1,0,10,19,1,17,0,65,1,253,15,65,2,253,15,65,3,253,15,253,147,2,11]))}catch{return!1}},Fo=async e=>{if(xn)return Promise.resolve();if(bi)throw new Error("multiple calls to 'initializeWebAssembly()' detected.");if(Ks)throw new Error("previous call to 'initializeWebAssembly()' failed.");bi=!0;let t=e.initTimeout,r=e.numThreads;if(e.simd!==!1){if(e.simd==="relaxed"){if(!nd())throw new Error("Relaxed WebAssembly SIMD is not supported in the current environment.")}else if(!id())throw new Error("WebAssembly SIMD is not supported in the current environment.")}let i=rd();r>1&&!i&&(typeof self<"u"&&!self.crossOriginIsolated&&console.warn("env.wasm.numThreads is set to "+r+", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info."),console.warn("WebAssembly multi-threading is not supported in the current environment. Falling back to single-threading."),e.numThreads=r=1);let s=e.wasmPaths,n=typeof s=="string"?s:void 0,a=s==null?void 0:s.mjs,o=(a==null?void 0:a.href)??a,u=s==null?void 0:s.wasm,d=(u==null?void 0:u.href)??u,c=e.wasmBinary,[f,h]=await sm(o,n,r>1),m=!1,g=[];if(t>0&&g.push(new Promise(y=>{setTimeout(()=>{m=!0,y()},t)})),g.push(new Promise((y,S)=>{let v={numThreads:r};if(c)v.wasmBinary=c;else if(d||n)v.locateFile=b=>d??n+b;else if(o&&o.indexOf("blob:")!==0)v.locateFile=b=>new URL(b,o).href;else if(f){let b=im();b&&(v.locateFile=k=>b+k)}h(v).then(b=>{bi=!1,xn=!0,js=b,y(),f&&URL.revokeObjectURL(f)},b=>{bi=!1,Ks=!0,S(b)})})),await Promise.race(g),m)throw new Error(`WebAssembly backend initializing failed due to timeout: ${t}ms`)},De=()=>{if(xn&&js)return js;throw new Error("WebAssembly is not initialized yet.")}}),Rt,Hn,Ae,Ho=j(()=>{Kr(),Rt=(e,t)=>{let r=De(),i=r.lengthBytesUTF8(e)+1,s=r._malloc(i);return r.stringToUTF8(e,s,i),t.push(s),s},Hn=(e,t,r,i)=>{if(typeof e=="object"&&e!==null){if(r.has(e))throw new Error("Circular reference in options");r.add(e)}Object.entries(e).forEach(([s,n])=>{let a=t?t+s:s;if(typeof n=="object")Hn(n,a+".",r,i);else if(typeof n=="string"||typeof n=="number")i(a,n.toString());else if(typeof n=="boolean")i(a,n?"1":"0");else throw new Error(`Can't handle extra config type: ${typeof n}`)})},Ae=e=>{let t=De(),r=t.stackSave();try{let i=t.PTR_SIZE,s=t.stackAlloc(2*i);t._OrtGetLastError(s,s+i);let n=Number(t.getValue(s,i===4?"i32":"i64")),a=t.getValue(s+i,"*"),o=a?t.UTF8ToString(a):"";throw new Error(`${e} ERROR_CODE: ${n}, ERROR_MESSAGE: ${o}`)}finally{t.stackRestore(r)}}}),am,g$=j(()=>{Kr(),Ho(),am=e=>{let t=De(),r=0,i=[],s=e||{};try{if((e==null?void 0:e.logSeverityLevel)===void 0)s.logSeverityLevel=2;else if(typeof e.logSeverityLevel!="number"||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if((e==null?void 0:e.logVerbosityLevel)===void 0)s.logVerbosityLevel=0;else if(typeof e.logVerbosityLevel!="number"||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);(e==null?void 0:e.terminate)===void 0&&(s.terminate=!1);let n=0;return(e==null?void 0:e.tag)!==void 0&&(n=Rt(e.tag,i)),r=t._OrtCreateRunOptions(s.logSeverityLevel,s.logVerbosityLevel,!!s.terminate,n),r===0&&Ae("Can't create run options."),(e==null?void 0:e.extra)!==void 0&&Hn(e.extra,"",new WeakSet,(a,o)=>{let u=Rt(a,i),d=Rt(o,i);t._OrtAddRunConfigEntry(r,u,d)!==0&&Ae(`Can't set a run config entry: ${a} - ${o}.`)}),[r,i]}catch(n){throw r!==0&&t._OrtReleaseRunOptions(r),i.forEach(a=>t._free(a)),n}}}),sd,ad,od,wi,ud,om,_$=j(()=>{Kr(),Ho(),sd=e=>{switch(e){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${e}`)}},ad=e=>{switch(e){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${e}`)}},od=e=>{e.extra||(e.extra={}),e.extra.session||(e.extra.session={});let t=e.extra.session;t.use_ort_model_bytes_directly||(t.use_ort_model_bytes_directly="1"),e.executionProviders&&e.executionProviders.some(r=>(typeof r=="string"?r:r.name)==="webgpu")&&(e.enableMemPattern=!1)},wi=(e,t,r,i)=>{let s=Rt(t,i),n=Rt(r,i);De()._OrtAddSessionConfigEntry(e,s,n)!==0&&Ae(`Can't set a session config entry: ${t} - ${r}.`)},ud=async(e,t,r)=>{for(let i of t){let s=typeof i=="string"?i:i.name,n=[];switch(s){case"webnn":if(s="WEBNN",typeof i!="string"){let c=i==null?void 0:i.deviceType;c&&wi(e,"deviceType",c,r)}break;case"webgpu":if(s="JS",typeof i!="string"){let c=i;if(c!=null&&c.preferredLayout){if(c.preferredLayout!=="NCHW"&&c.preferredLayout!=="NHWC")throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${c.preferredLayout}`);wi(e,"preferredLayout",c.preferredLayout,r)}}break;case"wasm":case"cpu":continue;default:throw new Error(`not supported execution provider: ${s}`)}let a=Rt(s,r),o=n.length,u=0,d=0;if(o>0){u=De()._malloc(o*De().PTR_SIZE),r.push(u),d=De()._malloc(o*De().PTR_SIZE),r.push(d);for(let c=0;c{let t=De(),r=0,i=[],s=e||{};od(s);try{let n=sd(s.graphOptimizationLevel??"all"),a=ad(s.executionMode??"sequential"),o=typeof s.logId=="string"?Rt(s.logId,i):0,u=s.logSeverityLevel??2;if(!Number.isInteger(u)||u<0||u>4)throw new Error(`log serverity level is not valid: ${u}`);let d=s.logVerbosityLevel??0;if(!Number.isInteger(d)||d<0||d>4)throw new Error(`log verbosity level is not valid: ${d}`);let c=typeof s.optimizedModelFilePath=="string"?Rt(s.optimizedModelFilePath,i):0;if(r=t._OrtCreateSessionOptions(n,!!s.enableCpuMemArena,!!s.enableMemPattern,a,!!s.enableProfiling,0,o,u,d,c),r===0&&Ae("Can't create session options."),s.executionProviders&&await ud(r,s.executionProviders,i),s.enableGraphCapture!==void 0){if(typeof s.enableGraphCapture!="boolean")throw new Error(`enableGraphCapture must be a boolean value: ${s.enableGraphCapture}`);wi(r,"enableGraphCapture",s.enableGraphCapture.toString(),i)}if(s.freeDimensionOverrides)for(let[f,h]of Object.entries(s.freeDimensionOverrides)){if(typeof f!="string")throw new Error(`free dimension override name must be a string: ${f}`);if(typeof h!="number"||!Number.isInteger(h)||h<0)throw new Error(`free dimension override value must be a non-negative integer: ${h}`);let m=Rt(f,i);t._OrtAddFreeDimensionOverride(r,m,h)!==0&&Ae(`Can't set a free dimension override: ${f} - ${h}.`)}return s.extra!==void 0&&Hn(s.extra,"",new WeakSet,(f,h)=>{wi(r,f,h,i)}),[r,i]}catch(n){throw r!==0&&t._OrtReleaseSessionOptions(r)!==0&&Ae("Can't release session options."),i.forEach(a=>t._free(a)),n}}}),Nr,ar,Dr,os,Gn,Go,jo,Ya,_e=j(()=>{Nr=e=>{switch(e){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float16":return 10;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;case"int4":return 22;case"uint4":return 21;default:throw new Error(`unsupported data type: ${e}`)}},ar=e=>{switch(e){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 10:return"float16";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";case 22:return"int4";case 21:return"uint4";default:throw new Error(`unsupported data type: ${e}`)}},Dr=(e,t)=>{let r=[-1,4,1,1,2,2,4,8,-1,1,2,8,4,8,-1,-1,-1,-1,-1,-1,-1,.5,.5][e],i=typeof t=="number"?t:t.reduce((s,n)=>s*n,1);return r>0?Math.ceil(i*r):void 0},os=e=>{switch(e){case"float16":return typeof Float16Array<"u"&&Float16Array.from?Float16Array:Uint16Array;case"float32":return Float32Array;case"uint8":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"bool":return Uint8Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${e}`)}},Gn=e=>{switch(e){case"verbose":return 0;case"info":return 1;case"warning":return 2;case"error":return 3;case"fatal":return 4;default:throw new Error(`unsupported logging level: ${e}`)}},Go=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",jo=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint64"||e==="int8"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Ya=e=>{switch(e){case"none":return 0;case"cpu":return 1;case"cpu-pinned":return 2;case"texture":return 3;case"gpu-buffer":return 4;case"ml-tensor":return 5;default:throw new Error(`unsupported data location: ${e}`)}}}),Ko,um=j(()=>{qo(),Ko=async e=>{if(typeof e=="string"){let t=await fetch(e);if(!t.ok)throw new Error(`failed to load external data file: ${e}`);let r=t.headers.get("Content-Length"),i=r?parseInt(r,10):0;if(i<1073741824)return new Uint8Array(await t.arrayBuffer());{if(!t.body)throw new Error(`failed to load external data file: ${e}, no response body.`);let s=t.body.getReader(),n;try{n=new ArrayBuffer(i)}catch(o){if(o instanceof RangeError){let u=Math.ceil(i/65536);n=new WebAssembly.Memory({initial:u,maximum:u}).buffer}else throw o}let a=0;for(;;){let{done:o,value:u}=await s.read();if(o)break;let d=u.byteLength;new Uint8Array(n,a,d).set(u),a+=d}return new Uint8Array(n,0,i)}}else return e instanceof Blob?new Uint8Array(await e.arrayBuffer()):e instanceof Uint8Array?e:new Uint8Array(e)}}),ld,dd,cd,pd,Zo,fd,Te,pr=j(()=>{_e(),ld=["V","I","W","E","F"],dd=(e,t)=>{console.log(`[${ld[e]},${new Date().toISOString()}]${t}`)},Zo=(e,t)=>{cd=e,pd=t},fd=(e,t)=>{let r=Gn(e),i=Gn(cd);r>=i&&dd(r,typeof t=="function"?t():t)},Te=(...e)=>{pd&&fd(...e)}}),hd,ai,B,jn,lm,dm,cm,we=j(()=>{hd=class{static calcMatMulShape(e,t){return e[1]!==t[0]?void 0:[e[0],t[1]]}},ai=class{static calcShape(e,t,r=!1){let i=e.length,s=t.length;if(i===0)return t;if(s===0)return e;let n=Math.max(e.length,t.length),a=new Array(n);if(r){if(i<2||s<2)return;let o=hd.calcMatMulShape([e[i-2],e[i-1]],[t[s-2],t[s-1]]);if(o===void 0)return;[a[n-2],a[n-1]]=o}for(let o=r?3:1;o<=n;o++){let u=i-o<0?1:e[i-o],d=s-o<0?1:t[s-o];if(u!==d&&u>1&&d>1)return;let c=Math.max(u,d);if(u&&d)a[n-o]=Math.max(u,d);else{if(c>1)return;a[n-o]=0}}return a}static isValidBroadcast(e,t){let r=e.length,i=t.length;if(r>i)return!1;for(let s=1;s<=r;s++)if(e[r-s]!==1&&e[r-s]!==t[i-s])return!1;return!0}},B=class Nn{static size(t){return Nn.getSizeFromDimensionRange(t,0,t.length)}static convertShape(t,r=4){let i=t.length;if(i===0)return[];let s=new Array(i),n=i-1;for(;n>=0;){if(t[n]%r===0){s[n]=t[n]/r;break}if(r%t[n]!==0)throw new Error("cannot convert shape");s[n]=1,r/=t[n],n--}for(n--;n>=0;n--)s[n]=t[n];return s}static sizeFromDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeFromDimension as Tensor has ${t.length} dimensions.`);return Nn.getSizeFromDimensionRange(t,r,t.length)}static sizeToDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeToDimension as Tensor has ${t.length} dimensions.`);return Nn.getSizeFromDimensionRange(t,0,r)}static getSizeFromDimensionRange(t,r,i){let s=1;for(let n=r;n=0;--s)i[s]=i[s+1]*t[s+1];return i}static normalizeAxis(t,r){if(t<-r&&t>=r)throw new Error("unsupported axis for this operation.");return t<0?t+r:t}static normalizeAxes(t,r){return t.map(i=>this.normalizeAxis(i,r??t.length))}static sortBasedOnPerm(t,r){return r?r.map(i=>t[i]):t.slice().reverse()}static padShape(t,r){let i=t.length;return t.map((s,n)=>s+r[n]+r[n+i])}static areEqual(t,r){return t.length!==r.length?!1:t.every((i,s)=>i===r[s])}},jn=class Ai{static adjustPoolAttributes(t,r,i,s,n,a){if(!t&&i.length!==r.length-2)throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions");if(t)for(let o=0;o=i.length?i.push(r[o+2]):i[o]=r[o+2];for(let o=0;o=i[o]||a[o+i.length]>=i[o])throw new Error("pads should be smaller than kernel")}}static adjustPadsBasedOnAutoPad(t,r,i,s,n,a,o){if(o){if(n.length!==2*(t.length-2))throw new Error("length of pads should be twice the length of data dimensions");if(r.length!==t.length-2)throw new Error("length of strides should be the length of data dimensions");if(s.length!==t.length-2)throw new Error("length of kernel shapes should be the length of data dimensions");for(let u=0;u{_e(),Xo=(e,t)=>new(os(t))(e)}),Zs,Qa,Xs,md,Ys,gd,Qs,Js,ea,_d,fm,y$=j(()=>{_e(),pr(),Zs=new Map([["float32",32],["float16",16],["int32",32],["uint32",32],["int64",64],["uint64",64],["int8",8],["uint8",8],["int4",4],["uint4",4]]),Qa=(e,t)=>{if(t==="int32")return e;let r=Zs.get(t);if(!r)throw new Error(`WebNN backend does not support data type: ${t}`);let i=r/8;if(e.byteLength%i!==0)throw new Error(`Invalid Uint8Array length - must be a multiple of ${i}.`);let s=e.byteLength/i,n=new(os(t))(e.buffer,e.byteOffset,s);switch(t){case"int64":case"uint64":{let a=new Int32Array(s);for(let o=0;o2147483647n||u<-2147483648n)throw new Error("Can not convert int64 data to int32 - value out of range.");a[o]=Number(u)}return new Uint8Array(a.buffer)}case"int8":case"uint8":case"uint32":{if(t==="uint32"&&n.some(o=>o>2147483647))throw new Error("Can not convert uint32 data to int32 - value out of range.");let a=Int32Array.from(n,Number);return new Uint8Array(a.buffer)}default:throw new Error(`Unsupported data conversion from ${t} to 'int32'`)}},Xs=(e,t)=>{if(t==="int32")return e;if(e.byteLength%4!==0)throw new Error("Invalid Uint8Array length - must be a multiple of 4 (int32).");let r=e.byteLength/4,i=new Int32Array(e.buffer,e.byteOffset,r);switch(t){case"int64":{let s=BigInt64Array.from(i,BigInt);return new Uint8Array(s.buffer)}case"uint64":{if(i.some(n=>n<0))throw new Error("Can not convert int32 data to uin64 - negative value found.");let s=BigUint64Array.from(i,BigInt);return new Uint8Array(s.buffer)}case"int8":{if(i.some(n=>n<-128||n>127))throw new Error("Can not convert int32 data to int8 - value out of range.");let s=Int8Array.from(i,Number);return new Uint8Array(s.buffer)}case"uint8":{if(i.some(s=>s<0||s>255))throw new Error("Can not convert int32 data to uint8 - value out of range.");return Uint8Array.from(i,Number)}case"uint32":{if(i.some(n=>n<0))throw new Error("Can not convert int32 data to uint32 - negative value found.");let s=Uint32Array.from(i,Number);return new Uint8Array(s.buffer)}default:throw new Error(`Unsupported data conversion from 'int32' to ${t}`)}},md=1,Ys=()=>md++,gd=new Map([["int8","int32"],["uint8","int32"],["uint32","int32"],["int64","int32"]]),Qs=(e,t)=>{let r=Zs.get(e);if(!r)throw new Error(`WebNN backend does not support data type: ${e}`);return t.length>0?Math.ceil(t.reduce((i,s)=>i*s)*r/8):0},Js=class{constructor(e){this.isDataConverted=!1;let{sessionId:t,context:r,tensor:i,dataType:s,shape:n,fallbackDataType:a}=e;this.sessionId=t,this.mlContext=r,this.mlTensor=i,this.dataType=s,this.tensorShape=n,this.fallbackDataType=a}get tensor(){return this.mlTensor}get type(){return this.dataType}get fallbackType(){return this.fallbackDataType}get shape(){return this.tensorShape}get byteLength(){return Qs(this.dataType,this.tensorShape)}destroy(){Te("verbose",()=>"[WebNN] TensorWrapper.destroy"),this.mlTensor.destroy()}write(e){this.mlContext.writeTensor(this.mlTensor,e)}async read(e){if(this.fallbackDataType){let t=await this.mlContext.readTensor(this.mlTensor),r=Xs(new Uint8Array(t),this.dataType);if(e){(e instanceof ArrayBuffer?new Uint8Array(e):new Uint8Array(e.buffer,e.byteOffset,e.byteLength)).set(r);return}else return r.buffer}else return e?this.mlContext.readTensor(this.mlTensor,e):this.mlContext.readTensor(this.mlTensor)}canReuseTensor(e,t,r){return this.mlContext===e&&this.dataType===t&&this.tensorShape.length===r.length&&this.tensorShape.every((i,s)=>i===r[s])}setIsDataConverted(e){this.isDataConverted=e}},ea=class{constructor(e,t){this.tensorManager=e,this.wrapper=t}get tensorWrapper(){return this.wrapper}releaseTensor(){this.tensorWrapper&&(this.tensorManager.releaseTensor(this.tensorWrapper),this.wrapper=void 0)}async ensureTensor(e,t,r,i){let s=this.tensorManager.getMLContext(e),n;if(!s.opSupportLimits().input.dataTypes.includes(t)){if(n=gd.get(t),!n||!s.opSupportLimits().input.dataTypes.includes(n))throw new Error(`WebNN backend does not support data type: ${t}`);Te("verbose",()=>`[WebNN] TensorIdTracker.ensureTensor: fallback dataType from ${t} to ${n}`)}if(this.wrapper){if(this.wrapper.canReuseTensor(s,t,r))return this.wrapper.tensor;if(i){if(this.wrapper.byteLength!==Qs(t,r))throw new Error("Unable to copy data to tensor with different size.");this.activeUpload=new Uint8Array(await this.wrapper.read())}this.tensorManager.releaseTensor(this.wrapper)}let a=typeof MLTensorUsage>"u"?void 0:MLTensorUsage.READ|MLTensorUsage.WRITE;return this.wrapper=await this.tensorManager.getCachedTensor(e,t,r,a,!0,!0,n),i&&this.activeUpload&&(this.wrapper.write(this.activeUpload),this.activeUpload=void 0),this.wrapper.tensor}upload(e){let t=e;if(this.wrapper){if(this.wrapper.fallbackType)if(this.wrapper.fallbackType==="int32")t=Qa(e,this.wrapper.type),this.wrapper.setIsDataConverted(!0);else throw new Error(`Unsupported fallback data type: ${this.wrapper.fallbackType}`);if(e.byteLength===this.wrapper.byteLength){this.wrapper.write(t);return}else Te("verbose",()=>"Data size does not match tensor size. Releasing tensor."),this.releaseTensor()}this.activeUpload?this.activeUpload.set(t):this.activeUpload=new Uint8Array(t)}async download(e){var t,r;if(this.activeUpload){let i=(t=this.wrapper)!=null&&t.isDataConverted?Xs(this.activeUpload,(r=this.wrapper)==null?void 0:r.type):this.activeUpload;if(e){e instanceof ArrayBuffer?new Uint8Array(e).set(i):new Uint8Array(e.buffer,e.byteOffset,e.byteLength).set(i);return}else return i.buffer}if(!this.wrapper)throw new Error("Tensor has not been created.");return e?this.wrapper.read(e):this.wrapper.read()}},_d=class{constructor(e){this.backend=e,this.tensorTrackersById=new Map,this.freeTensors=[],this.externalTensors=new Set}getMLContext(e){let t=this.backend.getMLContext(e);if(!t)throw new Error("MLContext not found for session.");return t}reserveTensorId(){let e=Ys();return this.tensorTrackersById.set(e,new ea(this)),e}releaseTensorId(e){let t=this.tensorTrackersById.get(e);t&&(this.tensorTrackersById.delete(e),t.tensorWrapper&&this.releaseTensor(t.tensorWrapper))}async ensureTensor(e,t,r,i,s){Te("verbose",()=>`[WebNN] TensorManager.ensureTensor {tensorId: ${t}, dataType: ${r}, shape: ${i}, copyOld: ${s}}`);let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");return n.ensureTensor(e,r,i,s)}upload(e,t){let r=this.tensorTrackersById.get(e);if(!r)throw new Error("Tensor not found.");r.upload(t)}async download(e,t){Te("verbose",()=>`[WebNN] TensorManager.download {tensorId: ${e}, dstBuffer: ${t==null?void 0:t.byteLength}}`);let r=this.tensorTrackersById.get(e);if(!r)throw new Error("Tensor not found.");return r.download(t)}releaseTensorsForSession(e){for(let t of this.freeTensors)t.sessionId===e&&t.destroy();this.freeTensors=this.freeTensors.filter(t=>t.sessionId!==e)}registerTensor(e,t,r,i){let s=this.getMLContext(e),n=Ys(),a=new Js({sessionId:e,context:s,tensor:t,dataType:r,shape:i});return this.tensorTrackersById.set(n,new ea(this,a)),this.externalTensors.add(a),n}async getCachedTensor(e,t,r,i,s,n,a){let o=this.getMLContext(e);for(let[d,c]of this.freeTensors.entries())if(c.canReuseTensor(o,t,r)){Te("verbose",()=>`[WebNN] Reusing tensor {dataType: ${t}, ${a?`fallbackDataType: ${a},`:""} shape: ${r}`);let f=this.freeTensors.splice(d,1)[0];return f.sessionId=e,f}Te("verbose",()=>`[WebNN] MLContext.createTensor {dataType: ${t}, ${a?`fallbackDataType: ${a},`:""} shape: ${r}}`);let u=await o.createTensor({dataType:a??t,shape:r,dimensions:r,usage:i,writable:s,readable:n});return new Js({sessionId:e,context:o,tensor:u,dataType:t,shape:r,fallbackDataType:a})}releaseTensor(e){this.externalTensors.has(e)&&this.externalTensors.delete(e),this.freeTensors.push(e)}},fm=(...e)=>new _d(...e)}),vi,yd,hm,b$=j(()=>{_e(),Kr(),pm(),y$(),pr(),vi=new Map([[1,"float32"],[10,"float16"],[6,"int32"],[12,"uint32"],[7,"int64"],[13,"uint64"],[22,"int4"],[21,"uint4"],[3,"int8"],[2,"uint8"],[9,"uint8"]]),yd=(e,t)=>{if(e===t)return!0;if(e===void 0||t===void 0)return!1;let r=Object.keys(e).sort(),i=Object.keys(t).sort();return r.length===i.length&&r.every((s,n)=>s===i[n]&&e[s]===t[s])},hm=class{constructor(e){this.tensorManager=fm(this),this.mlContextBySessionId=new Map,this.sessionIdsByMLContext=new Map,this.mlContextCache=[],this.sessionGraphInputs=new Map,this.sessionGraphOutputs=new Map,this.temporaryGraphInputs=[],this.temporaryGraphOutputs=[],this.temporarySessionTensorIds=new Map,Zo(e.logLevel,!!e.debug)}get currentSessionId(){if(this.activeSessionId===void 0)throw new Error("No active session");return this.activeSessionId}onRunStart(e){Te("verbose",()=>`[WebNN] onRunStart {sessionId: ${e}}`),this.activeSessionId=e}onRunEnd(e){Te("verbose",()=>`[WebNN] onRunEnd {sessionId: ${e}}`);let t=this.temporarySessionTensorIds.get(e);if(t){for(let r of t)Te("verbose",()=>`[WebNN] releasing temporary tensor {tensorId: ${r}}`),this.tensorManager.releaseTensorId(r);this.temporarySessionTensorIds.delete(e),this.activeSessionId=void 0}}async createMLContext(e){if(e instanceof GPUDevice){let r=this.mlContextCache.findIndex(i=>i.gpuDevice===e);if(r!==-1)return this.mlContextCache[r].mlContext;{let i=await navigator.ml.createContext(e);return this.mlContextCache.push({gpuDevice:e,mlContext:i}),i}}else if(e===void 0){let r=this.mlContextCache.findIndex(i=>i.options===void 0&&i.gpuDevice===void 0);if(r!==-1)return this.mlContextCache[r].mlContext;{let i=await navigator.ml.createContext();return this.mlContextCache.push({mlContext:i}),i}}let t=this.mlContextCache.findIndex(r=>yd(r.options,e));if(t!==-1)return this.mlContextCache[t].mlContext;{let r=await navigator.ml.createContext(e);return this.mlContextCache.push({options:e,mlContext:r}),r}}registerMLContext(e,t){this.mlContextBySessionId.set(e,t);let r=this.sessionIdsByMLContext.get(t);r||(r=new Set,this.sessionIdsByMLContext.set(t,r)),r.add(e),this.temporaryGraphInputs.length>0&&(this.sessionGraphInputs.set(e,this.temporaryGraphInputs),this.temporaryGraphInputs=[]),this.temporaryGraphOutputs.length>0&&(this.sessionGraphOutputs.set(e,this.temporaryGraphOutputs),this.temporaryGraphOutputs=[])}onReleaseSession(e){this.sessionGraphInputs.delete(e),this.sessionGraphOutputs.delete(e);let t=this.mlContextBySessionId.get(e);if(!t)return;this.tensorManager.releaseTensorsForSession(e),this.mlContextBySessionId.delete(e);let r=this.sessionIdsByMLContext.get(t);if(r.delete(e),r.size===0){this.sessionIdsByMLContext.delete(t);let i=this.mlContextCache.findIndex(s=>s.mlContext===t);i!==-1&&this.mlContextCache.splice(i,1)}}getMLContext(e){return this.mlContextBySessionId.get(e)}reserveTensorId(){return this.tensorManager.reserveTensorId()}releaseTensorId(e){Te("verbose",()=>`[WebNN] releaseTensorId {tensorId: ${e}}`),this.tensorManager.releaseTensorId(e)}async ensureTensor(e,t,r,i,s){let n=vi.get(r);if(!n)throw new Error(`Unsupported ONNX data type: ${r}`);return this.tensorManager.ensureTensor(e??this.currentSessionId,t,n,i,s)}async createTemporaryTensor(e,t,r){Te("verbose",()=>`[WebNN] createTemporaryTensor {onnxDataType: ${t}, shape: ${r}}`);let i=vi.get(t);if(!i)throw new Error(`Unsupported ONNX data type: ${t}`);let s=this.tensorManager.reserveTensorId();await this.tensorManager.ensureTensor(e,s,i,r,!1);let n=this.temporarySessionTensorIds.get(e);return n?n.push(s):this.temporarySessionTensorIds.set(e,[s]),s}uploadTensor(e,t){if(!De().shouldTransferToMLTensor)throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");Te("verbose",()=>`[WebNN] uploadTensor {tensorId: ${e}, data: ${t.byteLength}}`),this.tensorManager.upload(e,t)}async downloadTensor(e,t){return this.tensorManager.download(e,t)}createMLTensorDownloader(e,t){return async()=>{let r=await this.tensorManager.download(e);return Xo(r,t)}}registerMLTensor(e,t,r,i){let s=vi.get(r);if(!s)throw new Error(`Unsupported ONNX data type: ${r}`);let n=this.tensorManager.registerTensor(e,t,s,i);return Te("verbose",()=>`[WebNN] registerMLTensor {tensor: ${t}, dataType: ${s}, dimensions: ${i}} -> {tensorId: ${n}}`),n}registerMLConstant(e,t,r,i,s,n,a=!1){if(!n)throw new Error("External mounted files are not available.");let o=e;e.startsWith("./")&&(o=e.substring(2));let u=n.get(o);if(!u)throw new Error(`File with name ${o} not found in preloaded files.`);if(t+r>u.byteLength)throw new Error("Out of bounds: data offset and length exceed the external file data size.");let d=u.slice(t,t+r).buffer,c;switch(s.dataType){case"float32":c=new Float32Array(d);break;case"float16":c=typeof Float16Array<"u"&&Float16Array.from?new Float16Array(d):new Uint16Array(d);break;case"int32":c=new Int32Array(d);break;case"uint32":c=new Uint32Array(d);break;case"int64":if(a){let f=Qa(new Uint8Array(d),"int64");c=new Int32Array(f.buffer),s.dataType="int32"}else c=new BigInt64Array(d);break;case"uint64":c=new BigUint64Array(d);break;case"int8":c=new Int8Array(d);break;case"int4":case"uint4":case"uint8":c=new Uint8Array(d);break;default:throw new Error(`Unsupported data type: ${s.dataType} in creating WebNN Constant from external data.`)}return Te("verbose",()=>`[WebNN] registerMLConstant {dataType: ${s.dataType}, shape: ${s.shape}}} ${a?"(Note: it was int64 data type and registered to int32 as workaround)":""}`),i.constant(s,c)}registerGraphInput(e){this.temporaryGraphInputs.push(e)}registerGraphOutput(e){this.temporaryGraphOutputs.push(e)}isGraphInput(e,t){let r=this.sessionGraphInputs.get(e);return r?r.includes(t):!1}isGraphOutput(e,t){let r=this.sessionGraphOutputs.get(e);return r?r.includes(t):!1}isGraphInputOutputTypeSupported(e,t,r=!0){let i=this.mlContextBySessionId.get(e),s=vi.get(Nr(t));return typeof s>"u"?!1:r?!!(i!=null&&i.opSupportLimits().input.dataTypes.includes(s)):!!(i!=null&&i.opSupportLimits().output.dataTypes.includes(s))}flush(){}}}),Yo=j(()=>{}),ta,Sn,kn,bd,wd,ra,Ja,vd,mm,w$=j(()=>{pr(),Yo(),ta=new Map([[64,250],[128,200],[256,200],[512,200],[2048,230],[4096,200],[8192,50],[16384,50],[32768,50],[65536,50],[131072,50],[262144,50],[524288,50],[1048576,50],[2097152,30],[4194304,20],[8388608,10],[12582912,10],[16777216,10],[26214400,15],[33554432,22],[44236800,2],[58982400,6],[67108864,6],[134217728,6],[167772160,6]]),Sn=[],kn=e=>Math.ceil(Number(e)/16)*16,bd=e=>{for(let t=0;twd++,Ja=async(e,t,r,i)=>{let s=kn(r),n=e.device.createBuffer({size:s,usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});try{let a=e.getCommandEncoder();e.endComputePass(),a.copyBufferToBuffer(t,0,n,0,s),e.flush(),await n.mapAsync(GPUMapMode.READ);let o=n.getMappedRange();if(i){let u=i();return u.set(new Uint8Array(o,0,r)),u}else return new Uint8Array(o.slice(0,r))}finally{n.destroy()}},vd=class{constructor(e){this.backend=e,this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.buffersPending=[],this.capturedPendingBuffers=new Map;for(let[t]of ta)Sn.push(t),this.freeBuffers.set(t,[]),this.freeUniformBuffers.set(t,[]);this.sessionCount=0}upload(e,t){let r=t.buffer,i=t.byteOffset,s=t.byteLength,n=kn(s),a=this.storageCache.get(e);if(!a)throw new Error("gpu data for uploading does not exist");if(Number(a.originalSize)!==s)throw new Error(`inconsistent data size. gpu data size=${a.originalSize}, data size=${s}`);let o=this.backend.device.createBuffer({mappedAtCreation:!0,size:n,usage:GPUBufferUsage.MAP_WRITE|GPUBufferUsage.COPY_SRC}),u=o.getMappedRange();new Uint8Array(u).set(new Uint8Array(r,i,s)),o.unmap();let d=this.backend.device.createCommandEncoder();d.copyBufferToBuffer(o,0,a.gpuData.buffer,0,n),this.backend.device.queue.submit([d.finish()]),o.destroy(),Te("verbose",()=>`[WebGPU] GpuDataManager.upload(id=${e})`)}memcpy(e,t){let r=this.storageCache.get(e);if(!r)throw new Error("source gpu data for memcpy does not exist");let i=this.storageCache.get(t);if(!i)throw new Error("destination gpu data for memcpy does not exist");if(r.originalSize!==i.originalSize)throw new Error("inconsistent source and destination gpu data size");let s=kn(r.originalSize),n=this.backend.getCommandEncoder();this.backend.endComputePass(),n.copyBufferToBuffer(r.gpuData.buffer,0,i.gpuData.buffer,0,s)}registerExternalBuffer(e,t,r){let i;if(r){if(i=r[0],e===r[1])return Te("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${t}) => id=${i}, buffer is the same, skip.`),i;if(this.backend.capturedCommandList.has(this.backend.currentSessionId))throw new Error(`Registering a different external buffer under graph capture mode is not supported yet. + Please use the previous external buffer!`)}else i=ra();return this.storageCache.set(i,{gpuData:{id:i,type:0,buffer:e},originalSize:t}),Te("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${t}) => id=${i}, registered.`),i}unregisterExternalBuffer(e){e!==void 0&&(this.storageCache.delete(e),Te("verbose",()=>`[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${e}`))}create(e,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST){let r=bd(e),i,s=(t&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE,n=(t&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM;if(s||n){let o=(s?this.freeBuffers:this.freeUniformBuffers).get(r);o?o.length>0?i=o.pop():i=this.backend.device.createBuffer({size:r,usage:t}):i=this.backend.device.createBuffer({size:r,usage:t})}else i=this.backend.device.createBuffer({size:r,usage:t});let a={id:ra(),type:0,buffer:i};return this.storageCache.set(a.id,{gpuData:a,originalSize:Number(e)}),Te("verbose",()=>`[WebGPU] GpuDataManager.create(size=${e}) => id=${a.id}`),a}get(e){var t;return(t=this.storageCache.get(e))==null?void 0:t.gpuData}release(e){let t=typeof e=="bigint"?Number(e):e,r=this.storageCache.get(t);if(!r){if(this.storageCache.size===0)return 0;throw new Error("releasing data does not exist")}return Te("verbose",()=>`[WebGPU] GpuDataManager.release(id=${t}), gpuDataId=${r.gpuData.id}`),this.storageCache.delete(t),this.buffersPending.push(r.gpuData.buffer),r.originalSize}async download(e,t){let r=this.storageCache.get(Number(e));if(!r)throw new Error("data does not exist");await Ja(this.backend,r.gpuData.buffer,r.originalSize,t)}refreshPendingBuffers(){if(this.buffersPending.length!==0)if(this.backend.sessionStatus==="default"){for(let e of this.buffersPending){let t=ta.get(e.size);if((e.usage&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE){let r=this.freeBuffers.get(e.size)||[];t===void 0||r.length>=t?e.destroy():r.push(e)}else if((e.usage&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM){let r=this.freeUniformBuffers.get(e.size)||[];t===void 0||r.length>=t?e.destroy():r.push(e)}else e.destroy()}this.buffersPending=[]}else{let e=this.capturedPendingBuffers.get(this.backend.currentSessionId);e||(e=[],this.capturedPendingBuffers.set(this.backend.currentSessionId,e));for(let t of this.buffersPending)e.push(t);this.buffersPending=[]}}dispose(){this.freeBuffers.forEach(e=>{e.forEach(t=>{t.destroy()})}),this.freeUniformBuffers.forEach(e=>{e.forEach(t=>{t.destroy()})}),this.storageCache.forEach(e=>{e.gpuData.buffer.destroy()}),this.capturedPendingBuffers.forEach(e=>{e.forEach(t=>{t.destroy()})}),this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.capturedPendingBuffers=new Map}onCreateSession(){this.sessionCount+=1}onReleaseSession(e){let t=this.capturedPendingBuffers.get(e);t&&(t.forEach(r=>{r.destroy()}),this.capturedPendingBuffers.delete(e)),this.sessionCount-=1,this.sessionCount===0&&(Te("warning",()=>"[WebGPU] Clearing webgpu buffer cache"),this.storageCache.forEach(r=>{r.gpuData.buffer.destroy()}),this.storageCache=new Map)}},mm=(...e)=>new vd(...e)}),$d,Oe,qe=j(()=>{$d=class{constructor(e){Object.assign(this,e)}get cacheKey(){return this.key||(this.key=Object.getOwnPropertyNames(this).sort().map(e=>`${this[e]}`).join(";")),this.key}},Oe=e=>new $d(e)}),oi,Tn,Xe,st,pe,Le,eo,ri,kr,de,$i,P,ue,gm,Qo,xd,_m,ve=j(()=>{_e(),we(),oi=64,Tn=(e,t)=>{if(t===3)throw new Error("vec3 has same alignment as vec4, use vec4 instead");switch(Number(e)){case 10:return t>1?`vec${t}`:"f16";case 1:return t>1?`vec${t}`:"f32";case 6:return t>1?`vec${t}`:"i32";case 12:return t>1?`vec${t}`:"u32";case 7:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2","i32"];case 13:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2","u32"];case 9:if(t!==4)throw new Error("bool must be vec4");return["u32","vec4"];case 22:return"i32";case 21:return"u32";default:throw new Error(`Unknown data type: ${e}`)}},Xe=(e,t=1)=>{let r=Tn(e,t);return typeof r=="string"?r:r[0]},st=(e,t=1)=>{let r=Tn(e,t);return typeof r=="string"?r:r[1]},pe=(...e)=>{let t=[];return e.forEach(r=>{r.length!==0&&t.push({type:12,data:r},{type:12,data:B.computeStrides(r)})}),t},Le=e=>e%4===0?4:e%2===0?2:1,eo=(e="f32",t,r="0")=>!t||t===1?`${e}(${r})`:`vec${t}<${e}>(${r})`,ri=(e,t,r)=>e==="f32"?r:t===1?`f32(${r})`:`vec${t}(${r})`,kr=(e,t)=>t===4?`(${e}.x + ${e}.y + ${e}.z + ${e}.w)`:t===2?`(${e}.x + ${e}.y)`:t===3?`(${e}.x + ${e}.y + ${e}.z)`:e,de=(e,t,r,i)=>e.startsWith("uniforms.")&&r>4?typeof t=="string"?i==="f16"?`${e}[(${t}) / 8][(${t}) % 8 / 4][(${t}) % 8 % 4]`:`${e}[(${t}) / 4][(${t}) % 4]`:i==="f16"?`${e}[${Math.floor(t/8)}][${Math.floor(t%8/4)}][${t%8%4}]`:`${e}[${Math.floor(t/4)}][${t%4}]`:r>1?`${e}[${t}]`:e,$i=(e,t,r,i,s)=>{let n=typeof r=="number",a=n?r:r.length,o=[...new Array(a).keys()],u=a<2?"u32":a<=4?`vec${a}`:`array`,d=Tn(t,s),c=typeof d=="string"?d:d[1],f=typeof d=="string"?d:d[0],h={indices:u,value:c,storage:f,tensor:t},m=D=>typeof D=="string"?D:`${D}u`,g={offsetToIndices:!1,indicesToOffset:!1,broadcastedIndicesToOffset:!1,set:!1,setByIndices:!1,get:!1,getByIndices:!1},y=n?"uniforms.":"",S=`${y}${e}_shape`,v=`${y}${e}_strides`,b="";for(let D=0;D ${h.indices} { + var indices: ${h.indices}; + var current = offset; + ${b} + return indices; + }`,x=D=>(g.offsetToIndices=!0,a<2?D:`o2i_${e}(${D})`),I=[];if(a>=2)for(let D=a-1;D>=0;D--)I.push(`${de(v,D,a)} * (indices[${D}])`);let z=a<2?"":` + fn i2o_${e}(indices: ${h.indices}) -> u32 { + return ${I.join("+")}; + }`,O=D=>(g.indicesToOffset=!0,a<2?D:`i2o_${e}(${D})`),A=(...D)=>a===0?"0u":`${h.indices}(${D.map(m).join(",")})`,R=(D,q)=>a<2?`${D}`:`${de(D,q,a)}`,W=(D,q,ee)=>a<2?`${D}=${ee};`:`${de(D,q,a)}=${ee};`,ie={},X=(D,q)=>{g.broadcastedIndicesToOffset=!0;let ee=`${q.name}broadcastedIndicesTo${e}Offset`;if(ee in ie)return`${ee}(${D})`;let ye=[];for(let Ge=a-1;Ge>=0;Ge--){let U=q.indicesGet("outputIndices",Ge+q.rank-a);ye.push(`${R(v,Ge)} * (${U} % ${R(S,Ge)})`)}return ie[ee]=`fn ${ee}(outputIndices: ${q.type.indices}) -> u32 { + return ${ye.length>0?ye.join("+"):"0u"}; + }`,`${ee}(${D})`},ne=(D,q)=>(()=>{if(h.storage===h.value)return`${e}[${D}]=${q};`;if(h.storage==="vec2"&&h.value==="i32")return`${e}[${D}]=vec2(u32(${q}), select(0u, 0xFFFFFFFFu, ${q} < 0));`;if(h.storage==="vec2"&&h.value==="u32")return`${e}[${D}]=vec2(u32(${q}), 0u);`;if(h.storage==="u32"&&h.value==="vec4")return`${e}[${D}]=dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(${q}));`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),Y=D=>(()=>{if(h.storage===h.value)return`${e}[${D}]`;if(h.storage==="vec2"&&h.value==="i32")return`i32(${e}[${D}].x)`;if(h.storage==="vec2"&&h.value==="u32")return`u32(${e}[${D}].x)`;if(h.storage==="u32"&&h.value==="vec4")return`vec4(bool(${e}[${D}] & 0xFFu), bool(${e}[${D}] & 0xFF00u), bool(${e}[${D}] & 0xFF0000u), bool(${e}[${D}] & 0xFF000000u))`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),oe=a<2?"":` + fn get_${e}ByIndices(indices: ${h.indices}) -> ${c} { + return ${Y(`i2o_${e}(indices)`)}; + }`,V=a<2?"":(()=>{let D=o.map(ee=>`d${ee}: u32`).join(", "),q=o.map(ee=>`d${ee}`).join(", ");return` + fn get_${e}(${D}) -> ${c} { + return get_${e}ByIndices(${A(q)}); + }`})(),ae=(...D)=>{if(D.length!==a)throw new Error(`indices length must be ${a}`);let q=D.map(m).join(",");return a===0?Y("0u"):a===1?Y(q[0]):(g.get=!0,g.getByIndices=!0,g.indicesToOffset=!0,`get_${e}(${q})`)},Z=D=>a<2?Y(D):(g.getByIndices=!0,g.indicesToOffset=!0,`get_${e}ByIndices(${D})`),le=a<2?"":` + fn set_${e}ByIndices(indices: ${h.indices}, value: ${c}) { + ${ne(`i2o_${e}(indices)`,"value")} + }`,Be=a<2?"":(()=>{let D=o.map(ee=>`d${ee}: u32`).join(", "),q=o.map(ee=>`d${ee}`).join(", ");return` + fn set_${e}(${D}, value: ${c}) { + set_${e}ByIndices(${A(q)}, value); + }`})();return{impl:()=>{let D=[],q=!1;return g.offsetToIndices&&(D.push(k),q=!0),g.indicesToOffset&&(D.push(z),q=!0),g.broadcastedIndicesToOffset&&(Object.values(ie).forEach(ee=>D.push(ee)),q=!0),g.set&&(D.push(Be),q=!0),g.setByIndices&&(D.push(le),q=!0),g.get&&(D.push(V),q=!0),g.getByIndices&&(D.push(oe),q=!0),!n&&q&&D.unshift(`const ${S} = ${h.indices}(${r.join(",")});`,`const ${v} = ${h.indices}(${B.computeStrides(r).join(",")});`),D.join(` +`)},type:h,offsetToIndices:x,indicesToOffset:O,broadcastedIndicesToOffset:X,indices:A,indicesGet:R,indicesSet:W,set:(...D)=>{if(D.length!==a+1)throw new Error(`indices length must be ${a}`);let q=D[a];if(typeof q!="string")throw new Error("value must be string");let ee=D.slice(0,a).map(m).join(",");return a===0?ne("0u",q):a===1?ne(ee[0],q):(g.set=!0,g.setByIndices=!0,g.indicesToOffset=!0,`set_${e}(${ee}, ${q})`)},setByOffset:ne,setByIndices:(D,q)=>a<2?ne(D,q):(g.setByIndices=!0,g.indicesToOffset=!0,`set_${e}ByIndices(${D}, ${q});`),get:ae,getByOffset:Y,getByIndices:Z,usage:i,name:e,strides:v,shape:S,rank:a}},P=(e,t,r,i=1)=>$i(e,t,r,"input",i),ue=(e,t,r,i=1)=>$i(e,t,r,"output",i),gm=(e,t,r)=>$i(e,t,r,"atomicOutput",1),Qo=(e,t,r,i=1)=>$i(e,t,r,"internal",i),xd=class{constructor(e,t){this.normalizedDispatchGroup=e,this.limits=t,this.internalVariables=[],this.variables=[],this.uniforms=[],this.variableIndex=0}guardAgainstOutOfBoundsWorkgroupSizes(e){return`if (global_idx >= ${typeof e=="number"?`${e}u`:e}) { return; }`}mainStart(e=oi){let t=typeof e=="number"?e:e[0],r=typeof e=="number"?1:e[1],i=typeof e=="number"?1:e[2];if(t>this.limits.maxComputeWorkgroupSizeX||r>this.limits.maxComputeWorkgroupSizeY||i>this.limits.maxComputeWorkgroupSizeZ)throw new Error(`workgroup size [${t}, ${r}, ${i}] exceeds the maximum workgroup size [${this.limits.maxComputeWorkgroupSizeX}, ${this.limits.maxComputeWorkgroupSizeY}, ${this.limits.maxComputeWorkgroupSizeZ}].`);if(t*r*i>this.limits.maxComputeInvocationsPerWorkgroup)throw new Error(`workgroup size [${t}, ${r}, ${i}] exceeds the maximum workgroup invocations ${this.limits.maxComputeInvocationsPerWorkgroup}.`);let s=this.normalizedDispatchGroup[1]===1&&this.normalizedDispatchGroup[2]===1,n=s?`@builtin(global_invocation_id) global_id : vec3, + @builtin(workgroup_id) workgroup_id : vec3, + @builtin(local_invocation_index) local_idx : u32, + @builtin(local_invocation_id) local_id : vec3`:`@builtin(global_invocation_id) global_id : vec3, + @builtin(local_invocation_id) local_id : vec3, + @builtin(local_invocation_index) local_idx : u32, + @builtin(workgroup_id) workgroup_id : vec3, + @builtin(num_workgroups) num_workgroups : vec3`,a=s?`let global_idx = global_id.x; + let workgroup_index = workgroup_id.x;`:`let workgroup_index = workgroup_id.z * num_workgroups[0] * num_workgroups[1] + + workgroup_id.y * num_workgroups[0] + workgroup_id.x; + let global_idx = workgroup_index * ${t*r*i}u + local_idx;`;return`@compute @workgroup_size(${t}, ${r}, ${i}) + fn main(${n}) { + ${a} + `}appendVariableUniforms(e){e.rank!==0&&(e.shape.startsWith("uniforms.")&&this.uniforms.push({name:e.shape.replace("uniforms.",""),type:"u32",length:e.rank}),e.strides.startsWith("uniforms.")&&this.uniforms.push({name:e.strides.replace("uniforms.",""),type:"u32",length:e.rank}))}declareVariable(e,t){if(e.usage==="internal")throw new Error("cannot use internal variable with declareVariable(). use registerInternalVariables() instead.");this.variables.push(e),this.appendVariableUniforms(e);let r=e.usage==="input"?"read":"read_write",i=e.usage==="atomicOutput"?"atomic":e.type.storage;return`@group(0) @binding(${t}) var ${e.name}: array<${i}>;`}declareVariables(...e){return e.map(t=>this.declareVariable(t,this.variableIndex++)).join(` +`)}registerInternalVariable(e){if(e.usage!=="internal")throw new Error("cannot use input or output variable with registerInternalVariable(). use declareVariables() instead.");this.internalVariables.push(e),this.appendVariableUniforms(e)}registerInternalVariables(...e){return e.forEach(t=>this.registerInternalVariable(t)),this}registerUniform(e,t,r=1){return this.uniforms.push({name:e,type:t,length:r}),this}registerUniforms(e){return this.uniforms=this.uniforms.concat(e),this}uniformDeclaration(){if(this.uniforms.length===0)return"";let e=[];for(let{name:t,type:r,length:i}of this.uniforms)if(i&&i>4)r==="f16"?e.push(`@align(16) ${t}:array, ${Math.ceil(i/8)}>`):e.push(`${t}:array, ${Math.ceil(i/4)}>`);else{let s=i==null||i===1?r:`vec${i}<${r}>`;e.push(`${t}:${s}`)}return` + struct Uniforms { ${e.join(", ")} }; + @group(0) @binding(${this.variableIndex}) var uniforms: Uniforms;`}get additionalImplementations(){return this.uniformDeclaration()+this.variables.map(e=>e.impl()).join(` +`)+this.internalVariables.map(e=>e.impl()).join(` +`)}get variablesInfo(){if(this.uniforms.length===0)return;let e=t=>[12,10,1,6][["u32","f16","f32","i32"].indexOf(t)];return this.uniforms.map(t=>[e(t.type),t.length??1])}},_m=(e,t)=>new xd(e,t)}),Sd,ia,kd,Td,Id,Ed,$t,ym,bm,Tr=j(()=>{_e(),we(),qe(),ve(),Sd=(e,t)=>{if(!e||e.length!==1)throw new Error("Transpose requires 1 input.");if(t.length!==0&&t.length!==e[0].dims.length)throw new Error(`perm size ${t.length} does not match input rank ${e[0].dims.length}`)},ia=(e,t)=>t.length!==0?t:[...new Array(e).keys()].reverse(),kd=(e,t)=>B.sortBasedOnPerm(e,ia(e.length,t)),Td=(e,t,r,i)=>{let s=`fn perm(i: ${i.type.indices}) -> ${r.type.indices} { + var a: ${r.type.indices};`;for(let n=0;n{let r=[],i=[];for(let s=0;s{let r=0;for(let i=0;i{let r=e.dataType,i=e.dims.length,s=ia(i,t),n=kd(e.dims,s),a=e.dims,o=n,u=i<2||Ed(s,e.dims),d;if(u)return d=g=>{let y=P("input",r,a,4),S=ue("output",r,o,4);return` + ${g.registerUniform("output_size","u32").declareVariables(y,S)} + ${g.mainStart()} + ${g.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + output[global_idx] = input[global_idx]; + }`},{name:"TransposeCopy",shaderCache:{inputDependencies:["type"]},getRunData:()=>{let g=B.size(n);return{outputs:[{dims:n,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(g/64/4)},programUniforms:[{type:12,data:Math.ceil(g/4)}]}},getShaderSource:d};let{newShape:c,newPerm:f}=Id(e.dims,s),h=B.areEqual(f,[2,3,1]),m=B.areEqual(f,[3,1,2]);if(c.length===2||h||m){a=h?[c[0],c[1]*c[2]]:m?[c[0]*c[1],c[2]]:c,o=[a[1],a[0]];let g=16;return d=y=>{let S=P("a",r,a.length),v=ue("output",r,o.length);return` + ${y.registerUniform("output_size","u32").declareVariables(S,v)} + var tile : array, ${g}>; + ${y.mainStart([g,g,1])} + let stride = (uniforms.output_shape[1] - 1) / ${g} + 1; + let workgroup_id_x = workgroup_index % stride; + let workgroup_id_y = workgroup_index / stride; + let input_col = workgroup_id_y * ${g}u + local_id.x; + let input_row = workgroup_id_x * ${g}u + local_id.y; + if (input_row < uniforms.a_shape[0] && input_col < uniforms.a_shape[1]) { + tile[local_id.y][local_id.x] = ${S.getByIndices(`${S.type.indices}(input_row, input_col)`)}; + } + workgroupBarrier(); + + let output_col = workgroup_id_x * ${g}u + local_id.x; + let output_row = workgroup_id_y * ${g}u + local_id.y; + if (output_row < uniforms.output_shape[0] && output_col < uniforms.output_shape[1]) { + ${v.setByIndices(`${v.type.indices}(output_row, output_col)`,"tile[local_id.x][local_id.y]")} + } + }`},{name:"TransposeShared",shaderCache:{inputDependencies:["type"]},getRunData:()=>{let y=B.size(n);return{outputs:[{dims:n,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(o[1]/g),y:Math.ceil(o[0]/g)},programUniforms:[{type:12,data:y},...pe(a,o)]}},getShaderSource:d}}return d=g=>{let y=P("a",r,a.length),S=ue("output",r,o.length);return` + ${g.registerUniform("output_size","u32").declareVariables(y,S)} + + ${Td(s,i,y,S)} + + ${g.mainStart()} + ${g.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${S.offsetToIndices("global_idx")}; + let aIndices = perm(indices); + + ${S.setByOffset("global_idx",y.getByIndices("aIndices"))} + }`},{name:"Transpose",shaderCache:{hint:`${t}`,inputDependencies:["rank"]},getRunData:()=>{let g=B.size(n);return{outputs:[{dims:n,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:[{type:12,data:g},...pe(a,o)]}},getShaderSource:d}},ym=(e,t)=>{Sd(e.inputs,t.perm),e.compute($t(e.inputs[0],t.perm))},bm=e=>Oe({perm:e.perm})}),Cd,zd,Od,Ad,Rd,Bd,Md,Nd,Dd,Pd,It,wm,vm,$m,xm,Sm,km,Tm,Im,Em,Cm,v$=j(()=>{_e(),we(),ve(),Jo(),Tr(),Cd={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate * candidate",logSumExp:"bestValue + exp(candidate)",l1:"bestValue + abs(candidate)",l2:"bestValue + candidate * candidate",logSum:"bestValue + candidate"},zd={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate",logSumExp:"bestValue + candidate",l1:"bestValue + candidate",l2:"bestValue + candidate",logSum:"bestValue + candidate"},Od={max:"_A[offset]",min:"_A[offset]",mean:"0",sum:"0",prod:"1",sumSquare:"0",logSumExp:"0",l1:"0",l2:"0",logSum:"0"},Ad={max:"bestValue",min:"bestValue",sum:"bestValue",prod:"bestValue",sumSquare:"bestValue",logSumExp:"log(bestValue)",l1:"bestValue",l2:"sqrt(bestValue)",logSum:"log(bestValue)"},Rd=(e,t)=>{let r=[];for(let i=t-e;i{let r=[],i=e.length;for(let n=0;ne[n]);return[r,s]},Md=(e,t)=>{let r=e.length+t.length,i=[],s=0;for(let n=0;n{for(let r=0;r{let r=[];if(!Nd(e,t)){for(let i=0;ir.push(i))}return r},Pd=(e,t,r,i,s,n,a)=>{let o=r[0].dims,u=B.size(n),d=B.size(a),c=P("_A",r[0].dataType,o),f=ue("output",s,n),h=64;u===1&&(h=256);let m=` + var aBestValues : array; + `,g=y=>` + ${y.registerUniform("reduceSize","u32").declareVariables(c,f)} + ${m} + fn DIV_CEIL(a : u32, b : u32) -> u32 { + return ((a - 1u) / b + 1u); + } + ${y.mainStart(h)} + + let outputIndex = global_idx / ${h}; + let offset = outputIndex * uniforms.reduceSize; + + var bestValue = f32(${Od[i]}); + let Length = uniforms.reduceSize; + for (var k = local_idx; k < Length; k = k + ${h}) { + let candidate = f32(${c.getByOffset("offset + k")}); + bestValue = ${Cd[i]}; + } + aBestValues[local_idx] = bestValue; + workgroupBarrier(); + + var reduceSize = min(Length, ${h}u); + for (var currentSize = reduceSize / 2u; reduceSize > 1u; + currentSize = reduceSize / 2u) { + let interval = DIV_CEIL(reduceSize, 2u); + if (local_idx < currentSize) { + let candidate = aBestValues[local_idx + interval]; + bestValue = ${zd[i]}; + aBestValues[local_idx] = bestValue; + } + reduceSize = interval; + workgroupBarrier(); + } + + if (local_idx == 0u) { + ${f.setByOffset("outputIndex",`${i==="mean"?`${f.type.storage}(bestValue / f32(uniforms.reduceSize))`:`${f.type.storage}(${Ad[i]})`}`)}; + } + }`;return{name:e,shaderCache:{hint:`${t};${h}`,inputDependencies:["type"]},getShaderSource:g,getRunData:()=>({outputs:[{dims:n,dataType:s}],dispatchGroup:{x:u},programUniforms:[{type:12,data:d}]})}},It=(e,t,r,i)=>{let s=e.inputs.length===1?r:to(e.inputs,r),n=s.axes;n.length===0&&!s.noopWithEmptyAxes&&(n=e.inputs[0].dims.map((m,g)=>g));let a=B.normalizeAxes(n,e.inputs[0].dims.length),o=a,u=e.inputs[0],d=Dd(o,e.inputs[0].dims.length);d.length>0&&(u=e.compute($t(e.inputs[0],d),{inputs:[0],outputs:[-1]})[0],o=Rd(o.length,u.dims.length));let[c,f]=Bd(u.dims,o),h=c;s.keepDims&&(h=Md(c,a)),e.compute(Pd(t,s.cacheKey,[u],i,e.inputs[0].dataType,h,f),{inputs:[u]})},wm=(e,t)=>{It(e,"ReduceMeanShared",t,"mean")},vm=(e,t)=>{It(e,"ReduceL1Shared",t,"l1")},$m=(e,t)=>{It(e,"ReduceL2Shared",t,"l2")},xm=(e,t)=>{It(e,"ReduceLogSumExpShared",t,"logSumExp")},Sm=(e,t)=>{It(e,"ReduceMaxShared",t,"max")},km=(e,t)=>{It(e,"ReduceMinShared",t,"min")},Tm=(e,t)=>{It(e,"ReduceProdShared",t,"prod")},Im=(e,t)=>{It(e,"ReduceSumShared",t,"sum")},Em=(e,t)=>{It(e,"ReduceSumSquareShared",t,"sumSquare")},Cm=(e,t)=>{It(e,"ReduceLogSumShared",t,"logSum")}}),Et,Ud,Kn,to,Ct,Wd,Ld,qd,Vd,Fd,Hd,Gd,jd,Kd,Zd,zt,zm,Om,Am,Rm,Bm,Mm,Nm,Dm,Pm,Um,Jo=j(()=>{_e(),we(),qe(),ve(),v$(),Et=e=>{if(!e||e.length===0||e.length>2)throw new Error("Reduce op requires 1 or 2 inputs.");if(e.length===2&&e[1].dims.length!==1)throw new Error("Invalid axes input dims.")},Ud=e=>["","",`var value = ${e.getByIndices("input_indices")};`,""],Kn=(e,t,r,i,s,n,a=!1,o=!1)=>{let u=[],d=r[0].dims,c=d.length,f=B.normalizeAxes(s,c),h=!o&&f.length===0;d.forEach((y,S)=>{h||f.indexOf(S)>=0?a&&u.push(1):u.push(y)});let m=u.length,g=B.size(u);return{name:e,shaderCache:t,getShaderSource:y=>{let S=[],v=P("_A",r[0].dataType,c),b=ue("output",n,m),k=i(v,b,f),x=k[2];for(let I=0,z=0;I=0?(a&&z++,x=`for(var j${I}: u32 = 0; j${I} < ${d[I]}; j${I}++) { + ${k[2].includes("last_index")?`let last_index = j${I};`:""} + ${v.indicesSet("input_indices",I,`j${I}`)} + ${x} + }`):(S.push(`${v.indicesSet("input_indices",I,b.indicesGet("output_indices",z))};`),z++);return` + + ${y.registerUniform("output_size","u32").declareVariables(v,b)} + + ${y.mainStart()} + ${y.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + var input_indices: ${v.type.indices}; + let output_indices = ${b.offsetToIndices("global_idx")}; + + ${S.join(` +`)} + ${k[0]} // init ops for reduce max/min + ${k[1]} + ${x} + ${k[3]} + ${k.length===4?b.setByOffset("global_idx","value"):k.slice(4).join(` +`)} + }`},getRunData:()=>({outputs:[{dims:u,dataType:n}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:[{type:12,data:g},...pe(d,u)]})}},to=(e,t)=>{let r=[];return e[1].dims[0]>0&&e[1].getBigInt64Array().forEach(i=>r.push(Number(i))),Oe({axes:r,keepDims:t.keepDims,noopWithEmptyAxes:t.noopWithEmptyAxes})},Ct=(e,t,r,i)=>{let s=e.inputs,n=s.length===1?r:to(s,r);e.compute(Kn(t,{hint:n.cacheKey,inputDependencies:["rank"]},[s[0]],n.noopWithEmptyAxes&&n.axes.length===0?Ud:i,n.axes,s[0].dataType,n.keepDims,n.noopWithEmptyAxes),{inputs:[0]})},Wd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceLogSum",t,(r,i)=>[`var value = ${i.type.storage}(0);`,"",`value += ${r.getByIndices("input_indices")};`,"value = log(value);"])},Ld=(e,t)=>{Et(e.inputs),Ct(e,"ReduceL1",t,(r,i)=>[`var value = ${i.type.storage}(0);`,"",`value += abs(${r.getByIndices("input_indices")});`,""])},qd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceL2",t,(r,i)=>[`var t = ${i.type.value}(0); var value = ${i.type.value}(0);`,"",`t = ${r.getByIndices("input_indices")}; value += (t * t);`,"value = sqrt(value);"])},Vd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceLogSumExp",t,(r,i)=>[`var value = ${i.type.storage}(0);`,"",`value += exp(${r.getByIndices("input_indices")});`,"value = log(value);"])},Fd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceMax",t,(r,i,s)=>{let n=[];for(let a=0;a=0||s.length===0)&&n.push(r.indicesSet("input_indices",a,0));return[`${n.join(` +`)}`,`var value = ${r.getByIndices("input_indices")};`,`value = max(value, ${r.getByIndices("input_indices")});`,""]})},Hd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceMean",t,(r,i,s)=>{let n=1;for(let a=0;a=0||s.length===0)&&(n*=e.inputs[0].dims[a]);return["var sum = f32(0);","",`sum += f32(${r.getByIndices("input_indices")});`,`let value = ${i.type.value}(sum / ${n});`]})},Gd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceMin",t,(r,i,s)=>{let n=[];for(let a=0;a=0||s.length===0)&&n.push(`input_indices[${a}] = 0;`);return[`${n.join(` +`)}`,`var value = ${r.getByIndices("input_indices")};`,`value = min(value, ${r.getByIndices("input_indices")});`,""]})},jd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceProd",t,(r,i)=>[`var value = ${i.type.storage}(1);`,"",`value *= ${r.getByIndices("input_indices")};`,""])},Kd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceSum",t,(r,i)=>[`var value = ${i.type.storage}(0);`,"",`value += ${r.getByIndices("input_indices")};`,""])},Zd=(e,t)=>{Et(e.inputs),Ct(e,"ReduceSumSquare",t,(r,i)=>[`var t = ${i.type.value}(0); var value = ${i.type.value}(0);`,"",`t = ${r.getByIndices("input_indices")}; value += t * t;`,""])},zt=(e,t,r)=>{if(t.length===0)return r;let i=1,s=1;for(let n=0;n1024},zm=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Hd(e,t):wm(e,t)},Om=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Ld(e,t):vm(e,t)},Am=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?qd(e,t):$m(e,t)},Rm=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Vd(e,t):xm(e,t)},Bm=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Fd(e,t):Sm(e,t)},Mm=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Gd(e,t):km(e,t)},Nm=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?jd(e,t):Tm(e,t)},Dm=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Kd(e,t):Im(e,t)},Pm=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Zd(e,t):Em(e,t)},Um=(e,t)=>{zt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Wd(e,t):Cm(e,t)}}),na,Wm,Lm,ro,$$=j(()=>{_e(),qe(),Jo(),na=e=>{if(!e||e.length===0||e.length>2)throw new Error("ArgMinMaxOp op requires 1 or 2 inputs.");if(e[0].dataType!==1)throw new Error("Invalid input type.")},Wm=(e,t)=>{na(e.inputs);let r=(i,s,n)=>{let a=[];for(let o=0;o=0||n.length===0)&&a.push(`input_indices[${o}] = 0;`);return[`${a.join(` +`)}`,`var value = ${i.getByIndices("input_indices")}; +var best_index : i32 = 0;`,`if (${i.getByIndices("input_indices")} ${t.selectLastIndex>0?"<=":"<"} value) { + value = ${i.getByIndices("input_indices")}; + best_index = i32(last_index); + }`,"",s.setByOffset("global_idx","best_index")]};e.compute(Kn("ArgMin",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},Lm=(e,t)=>{na(e.inputs);let r=(i,s,n)=>{let a=[];for(let o=0;o=0||n.length===0)&&a.push(`input_indices[${o}] = 0;`);return[`${a.join(` +`)}`,`var value = ${i.getByIndices("input_indices")}; +var best_index : i32 = 0;`,`if (${i.getByIndices("input_indices")} ${t.selectLastIndex>0?">=":">"} value) { + value = ${i.getByIndices("input_indices")}; + best_index = i32(last_index); + }`,"",s.setByOffset("global_idx","best_index")]};e.compute(Kn("argMax",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},ro=e=>Oe(e)}),Xd,In,Yd,Qd,Jd,Ji,ec,qm,eu=j(()=>{_e(),we(),Yo(),ve(),Xd=(e,t)=>{let r=e[0],i=e[1],s=e[2],n=e[3],a=e[4],o=e[5];if(a&&o)throw new Error("Attention cannot have both past and attention_bias");if(r.dims.length!==3)throw new Error('Input "input" must have 3 dimensions');let u=r.dims[0],d=r.dims[1],c=r.dims[2];if(s.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimensions');if(i.dims.length!==2)throw new Error('Input "weights" is expected to have 2 dimensions');if(i.dims[0]!==c)throw new Error("Input 1 dimension 0 should have same length as dimension 2 of input 0");if(s.dims[0]!==i.dims[1])throw new Error('Input "bias" dimension 0 should have same length as dimension 1 of input "weights"');let f=s.dims[0]/3,h=f,m=h;if(t.qkvHiddenSizes.length>0){if(t.qkvHiddenSizes.length!==3)throw new Error("qkv_hidden_sizes attribute should have 3 elements");for(let k of t.qkvHiddenSizes)if(k%t.numHeads!==0)throw new Error("qkv_hidden_sizes should be divisible by num_heads");f=t.qkvHiddenSizes[0],h=t.qkvHiddenSizes[1],m=t.qkvHiddenSizes[2]}let g=d;if(f!==h)throw new Error("qkv_hidden_sizes first element should be same as the second");if(s.dims[0]!==f+h+m)throw new Error('Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes');let y=0;if(a){if(h!==m)throw new Error('Input "past" expect k_hidden_size == v_hidden_size');if(a.dims.length!==5)throw new Error('Input "past" must have 5 dimensions');if(a.dims[0]!==2)throw new Error('Input "past" first dimension must be 2');if(a.dims[1]!==u)throw new Error('Input "past" second dimension must be batch_size');if(a.dims[2]!==t.numHeads)throw new Error('Input "past" third dimension must be num_heads');if(a.dims[4]!==h/t.numHeads)throw new Error('Input "past" fifth dimension must be k_hidden_size / num_heads');t.pastPresentShareBuffer||(y=a.dims[3])}let S=g+y,v=-1,b=0;if(n)throw new Error("Mask not supported");if(a)throw new Error("past is not supported");if(o){if(o.dims.length!==4)throw new Error('Input "attention_bias" must have 4 dimensions');if(o.dims[0]!==u||o.dims[1]!==t.numHeads||o.dims[2]!==d||o.dims[3]!==S)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:u,sequenceLength:d,pastSequenceLength:y,kvSequenceLength:g,totalSequenceLength:S,maxSequenceLength:v,inputHiddenSize:c,hiddenSize:f,vHiddenSize:m,headSize:Math.floor(f/t.numHeads),vHeadSize:Math.floor(m/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:b,scale:t.scale,broadcastResPosBias:!1,passPastInKv:!1,qkvFormat:1}},In=(e,t,r)=>t&&e?` + let total_sequence_length_input = u32(${t.getByOffset("0")}); + let present_sequence_length = max(total_sequence_length_input, uniforms.past_sequence_length); + let is_subsequent_prompt: bool = sequence_length > 1 && sequence_length != total_sequence_length_input; + let is_first_prompt: bool = is_subsequent_prompt == false && sequence_length == total_sequence_length_input; + total_sequence_length = u32(${e==null?void 0:e.getByOffset("batchIdx")}) + 1; + var past_sequence_length: u32 = 0; + if (is_first_prompt == false) { + past_sequence_length = total_sequence_length - sequence_length; + } + `:` + ${r?"let past_sequence_length = uniforms.past_sequence_length":""}; + let present_sequence_length = total_sequence_length; + `,Yd=(e,t,r,i,s,n,a,o)=>{let u=Le(a?1:n),d=64,c=n/u;c{let b=ue("x",e.dataType,e.dims,u),k=[b],x=a?P("seq_lens",a.dataType,a.dims):void 0;x&&k.push(x);let I=o?P("total_sequence_length_input",o.dataType,o.dims):void 0;I&&k.push(I);let z=st(e.dataType),O=[{name:"batch_size",type:"u32"},{name:"num_heads",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"sequence_length",type:"u32"},{name:"total_sequence_length",type:"u32"},{name:"elements_per_thread",type:"u32"}];return` + var thread_max: array; + var thread_sum: array; + ${v.registerUniforms(O).declareVariables(...k)} + ${v.mainStart([d,1,1])} + let batchIdx = workgroup_id.z / uniforms.num_heads; + let headIdx = workgroup_id.z % uniforms.num_heads; + let sequence_length = uniforms.sequence_length; + var total_sequence_length = uniforms.total_sequence_length; + ${In(x,I,!1)} + let local_offset = local_idx * uniforms.elements_per_thread; + let offset = (global_idx / ${d}) * uniforms.total_sequence_length + local_offset; + let seq_causal_length = ${a?"u32(past_sequence_length + workgroup_id.y + 1)":"total_sequence_length"}; + var thread_max_vector = ${g}(-3.402823e+38f); + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) { + thread_max_vector = max(${g}(x[offset + i]), thread_max_vector); + } + thread_max[local_idx] = ${(()=>{switch(u){case 1:return"thread_max_vector";case 2:return"max(thread_max_vector.x, thread_max_vector.y)";case 4:return"max(max(thread_max_vector.x, thread_max_vector.y), max(thread_max_vector.z, thread_max_vector.w))";default:throw new Error(`Unsupported components: ${u}`)}})()}; + workgroupBarrier(); + + var max_value = f32(-3.402823e+38f); + for (var i = 0u; i < ${d}; i++) { + max_value = max(thread_max[i], max_value); + } + + var sum_vector = ${g}(0); + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) { + sum_vector += exp(${g}(x[offset + i]) - max_value); + } + thread_sum[local_idx] = ${(()=>{switch(u){case 1:return"sum_vector";case 2:return"sum_vector.x + sum_vector.y";case 4:return"sum_vector.x + sum_vector.y + sum_vector.z + sum_vector.w";default:throw new Error(`Unsupported components: ${u}`)}})()}; + workgroupBarrier(); + + var sum: f32 = 0; + for (var i = 0u; i < ${d}; i++) { + sum += thread_sum[i]; + } + + if (sum == 0) { + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) { + x[offset + i] = ${b.type.value}(${z}(1.0) / ${z}(seq_causal_length)); + } + } else { + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) { + var f32input = ${g}(x[offset + i]); + x[offset + i] = ${b.type.value}(exp(f32input - max_value) / sum); + } + } + ${a?` + for (var total_seq_id: u32 = seq_causal_length; total_seq_id + local_offset < uniforms.total_sequence_length; total_seq_id++) { + x[offset + total_seq_id] = ${b.type.value}(${z}(0)); + }`:""}; + }`};return{name:"AttentionProbsSoftmax",shaderCache:{hint:`${d};${m};${u}`,inputDependencies:y},getShaderSource:S,getRunData:()=>({outputs:[],dispatchGroup:{x:1,y:s,z:t*r},programUniforms:h})}},Qd=(e,t,r,i,s,n,a,o,u)=>{let d=a+n.kvSequenceLength,c=[n.batchSize,n.numHeads,n.sequenceLength,d],f=e>1&&i,h=n.kvNumHeads?n.kvNumHeads:n.numHeads,m=f?[n.batchSize,h,d,n.headSize]:void 0,g=n.nReps?n.nReps:1,y=n.scale===0?1/Math.sqrt(n.headSize):n.scale,S=Le(n.headSize),v=n.headSize/S,b=12,k={x:Math.ceil(d/b),y:Math.ceil(n.sequenceLength/b),z:n.batchSize*n.numHeads},x=[{type:12,data:n.sequenceLength},{type:12,data:v},{type:12,data:d},{type:12,data:n.numHeads},{type:12,data:n.headSize},{type:1,data:y},{type:12,data:a},{type:12,data:n.kvSequenceLength},{type:12,data:g}],I=f&&i&&B.size(i.dims)>0,z=["type","type"];I&&z.push("type"),s&&z.push("type"),o&&z.push("type"),u&&z.push("type");let O=[{dims:c,dataType:t.dataType,gpuDataType:0}];f&&O.push({dims:m,dataType:t.dataType,gpuDataType:0});let A=R=>{let W=P("q",t.dataType,t.dims,S),ie=P("key",r.dataType,r.dims,S),X=[W,ie];if(I){let le=P("past_key",i.dataType,i.dims,S);X.push(le)}s&&X.push(P("attention_bias",s.dataType,s.dims));let ne=o?P("seq_lens",o.dataType,o.dims):void 0;ne&&X.push(ne);let Y=u?P("total_sequence_length_input",u.dataType,u.dims):void 0;Y&&X.push(Y);let oe=ue("output",t.dataType,c),V=[oe];f&&V.push(ue("present_key",t.dataType,m,S));let ae=st(1,S),Z=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"alpha",type:"f32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return` + const TILE_SIZE = ${b}u; + + var tileQ: array<${W.type.storage}, ${b*b}>; + var tileK: array<${W.type.storage}, ${b*b}>; + ${R.registerUniforms(Z).declareVariables(...X,...V)} + ${R.mainStart([b,b,1])} + // x holds the N and y holds the M + let headIdx = workgroup_id.z % uniforms.num_heads; + let kvHeadIdx = ${g===1?"headIdx":"headIdx / uniforms.n_reps"}; + let kv_num_heads = ${g===1?"uniforms.num_heads":"uniforms.num_heads / uniforms.n_reps"}; + let batchIdx = workgroup_id.z / uniforms.num_heads; + let m = workgroup_id.y * TILE_SIZE; + let n = workgroup_id.x * TILE_SIZE; + let sequence_length = uniforms.M; + var total_sequence_length = uniforms.N; + ${In(ne,Y,!0)} + let absKvHeadIdx = batchIdx * kv_num_heads + kvHeadIdx; + let qOffset = workgroup_id.z * uniforms.M * uniforms.K + m * uniforms.K; + ${I&&f?"let pastKeyOffset = absKvHeadIdx * uniforms.past_sequence_length * uniforms.K;":""}; + let kOffset = absKvHeadIdx * uniforms.kv_sequence_length * uniforms.K; + ${f?"let presentKeyOffset = absKvHeadIdx * uniforms.N * uniforms.K;":""} + var value = ${ae}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (global_id.y < uniforms.M && w + local_id.x < uniforms.K) { + tileQ[TILE_SIZE * local_id.y + local_id.x] = q[qOffset + local_id.y * uniforms.K + w + local_id.x]; + } + if (n + local_id.y < uniforms.N && w + local_id.x < uniforms.K) { + var idx = TILE_SIZE * local_id.y + local_id.x; + ${I&&f?` + if (n + local_id.y < past_sequence_length) { + tileK[idx] = past_key[pastKeyOffset + (n + local_id.y) * uniforms.K + w + local_id.x]; + } else if (n + local_id.y - past_sequence_length < uniforms.kv_sequence_length) { + tileK[idx] = key[kOffset + (n + local_id.y - past_sequence_length) * uniforms.K + w + local_id.x]; + }`:` + if (n + local_id.y < uniforms.kv_sequence_length) { + tileK[idx] = key[kOffset + (n + local_id.y) * uniforms.K + w + local_id.x]; + }`} + ${f?`if (n + local_id.y < present_sequence_length) { + present_key[presentKeyOffset + (n + local_id.y) * uniforms.K + w + local_id.x] = tileK[idx]; + }`:""} + } + workgroupBarrier(); + + for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) { + value += ${ae}(tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k]); + } + + workgroupBarrier(); + } + + if (global_id.y < uniforms.M && global_id.x < total_sequence_length) { + let headOffset = workgroup_id.z * uniforms.M * uniforms.N; + let outputIdx = headOffset + global_id.y * uniforms.N + global_id.x; + var sum: f32 = ${(()=>{switch(S){case 1:return"value";case 2:return"value.x + value.y";case 4:return"value.x + value.y + value.z + value.w";default:throw new Error(`Unsupported components: ${S}`)}})()}; + output[outputIdx] = ${oe.type.value} (sum * uniforms.alpha) + ${s?"attention_bias[outputIdx]":"0.0"}; + } + }`};return{name:"AttentionProbs",shaderCache:{hint:`${S};${s!==void 0};${i!==void 0};${e}`,inputDependencies:z},getRunData:()=>({outputs:O,dispatchGroup:k,programUniforms:x}),getShaderSource:A}},Jd=(e,t,r,i,s,n,a=void 0,o=void 0)=>{let u=n+s.kvSequenceLength,d=s.nReps?s.nReps:1,c=s.vHiddenSize*d,f=e>1&&i,h=s.kvNumHeads?s.kvNumHeads:s.numHeads,m=f?[s.batchSize,h,u,s.headSize]:void 0,g=[s.batchSize,s.sequenceLength,c],y=12,S={x:Math.ceil(s.vHeadSize/y),y:Math.ceil(s.sequenceLength/y),z:s.batchSize*s.numHeads},v=[{type:12,data:s.sequenceLength},{type:12,data:u},{type:12,data:s.vHeadSize},{type:12,data:s.numHeads},{type:12,data:s.headSize},{type:12,data:c},{type:12,data:n},{type:12,data:s.kvSequenceLength},{type:12,data:d}],b=f&&i&&B.size(i.dims)>0,k=["type","type"];b&&k.push("type"),a&&k.push("type"),o&&k.push("type");let x=[{dims:g,dataType:t.dataType,gpuDataType:0}];f&&x.push({dims:m,dataType:t.dataType,gpuDataType:0});let I=z=>{let O=P("probs",t.dataType,t.dims),A=P("v",r.dataType,r.dims),R=[O,A];b&&R.push(P("past_value",i.dataType,i.dims));let W=a?P("seq_lens",a.dataType,a.dims):void 0;a&&R.push(W);let ie=o?P("total_sequence_length_input",o.dataType,o.dims):void 0;o&&R.push(ie);let X=[ue("output",t.dataType,g)];f&&X.push(ue("present_value",t.dataType,m));let ne=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"v_hidden_size",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return` + const TILE_SIZE = ${y}u; + var tileQ: array<${O.type.value}, ${y*y}>; + var tileV: array<${O.type.value}, ${y*y}>; + ${z.registerUniforms(ne).declareVariables(...R,...X)} + ${z.mainStart([y,y,1])} + let headIdx = workgroup_id.z % uniforms.num_heads; + let batchIdx = workgroup_id.z / uniforms.num_heads; + let kvHeadIdx = ${d===1?"headIdx":"headIdx / uniforms.n_reps"}; + let kv_num_heads = ${d===1?"uniforms.num_heads":"uniforms.num_heads / uniforms.n_reps"}; + let m = global_id.y; + let n = global_id.x; + let sequence_length = uniforms.M; + var total_sequence_length = uniforms.K; + ${In(W,ie,!0)} + let offsetA = workgroup_id.z * uniforms.M * uniforms.K + m * uniforms.K; + let absKvHeadIdx = batchIdx * kv_num_heads + kvHeadIdx; // kvHeadIdx is relative to the batch + ${b&&f?"let pastValueOffset = absKvHeadIdx * uniforms.N * uniforms.past_sequence_length + n;":""}; + let vOffset = absKvHeadIdx * uniforms.N * uniforms.kv_sequence_length + n; + ${f?"let presentValueOffset = absKvHeadIdx * uniforms.N * uniforms.K + n;":""} + var value = ${O.type.storage}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (m < uniforms.M && w + local_id.x < uniforms.K) { + tileQ[TILE_SIZE * local_id.y + local_id.x] = probs[offsetA + w + local_id.x]; + } + if (n < uniforms.N && w + local_id.y < uniforms.K) { + var idx = TILE_SIZE * local_id.y + local_id.x; + ${b&&f?` + if (w + local_id.y < past_sequence_length) { + tileV[idx] = past_value[pastValueOffset + (w + local_id.y) * uniforms.N]; + } else if (w + local_id.y - past_sequence_length < uniforms.kv_sequence_length) { + tileV[idx] = v[vOffset + (w + local_id.y - past_sequence_length) * uniforms.N]; + } + `:` + if (w + local_id.y < uniforms.kv_sequence_length) { + tileV[idx] = v[vOffset + (w + local_id.y) * uniforms.N]; + }`} + ${f?` + if (w + local_id.y < present_sequence_length) { + present_value[presentValueOffset + (w + local_id.y) * uniforms.N] = tileV[idx]; + }`:""} + } + workgroupBarrier(); + for (var k: u32 = 0u; k < TILE_SIZE && w+k < total_sequence_length; k++) { + value += tileQ[TILE_SIZE * local_id.y + k] * tileV[TILE_SIZE * k + local_id.x]; + } + workgroupBarrier(); + } + + // we need to transpose output from BNSH_v to BSND_v + if (m < uniforms.M && n < uniforms.N) { + let outputIdx = batchIdx * uniforms.M * uniforms.v_hidden_size + m * uniforms.v_hidden_size + + headIdx * uniforms.N + n; + output[outputIdx] = value; + } + }`};return{name:"AttentionScore",shaderCache:{hint:`${i!==void 0};${e}`,inputDependencies:k},getRunData:()=>({outputs:x,dispatchGroup:S,programUniforms:v}),getShaderSource:I}},Ji=(e,t,r,i,s,n,a,o,u,d,c=void 0,f=void 0)=>{let h=Math.min(e.outputCount,1+(a?1:0)+(o?1:0)),m=h>1?d.pastSequenceLength:0,g=m+d.kvSequenceLength,y=u&&B.size(u.dims)>0?u:void 0,S=[t,r];h>1&&a&&B.size(a.dims)>0&&S.push(a),y&&S.push(y),c&&S.push(c),f&&S.push(f);let v=e.compute(Qd(h,t,r,a,y,d,m,c,f),{inputs:S,outputs:h>1?[-1,1]:[-1]})[0];e.compute(Yd(v,d.batchSize,d.numHeads,m,d.sequenceLength,g,c,f),{inputs:c&&f?[v,c,f]:[v],outputs:[]});let b=[v,i];h>1&&o&&B.size(o.dims)>0&&b.push(o),c&&b.push(c),f&&b.push(f),e.compute(Jd(h,v,i,o,d,m,c,f),{inputs:b,outputs:h>1?[0,2]:[0]})},ec=(e,t)=>{let r=[t.batchSize,t.numHeads,t.sequenceLength,t.headSize],i=t.sequenceLength,s=t.inputHiddenSize,n=t.headSize,a=12,o={x:Math.ceil(t.headSize/a),y:Math.ceil(t.sequenceLength/a),z:t.batchSize*t.numHeads},u=[e.inputs[0],e.inputs[1],e.inputs[2]],d=[{type:12,data:i},{type:12,data:s},{type:12,data:n},{type:12,data:t.numHeads},{type:12,data:t.headSize},{type:12,data:t.hiddenSize},{type:12,data:t.hiddenSize+t.hiddenSize+t.vHiddenSize}],c=f=>{let h=ue("output_q",u[0].dataType,r),m=ue("output_k",u[0].dataType,r),g=ue("output_v",u[0].dataType,r),y=P("input",u[0].dataType,u[0].dims),S=P("weight",u[1].dataType,u[1].dims),v=P("bias",u[2].dataType,u[2].dims),b=y.type.storage,k=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"hidden_size",type:"u32"},{name:"ldb",type:"u32"}];return` + const TILE_SIZE = ${a}u; + var tileInput: array<${b}, ${a*a}>; + var tileWeightQ: array<${b}, ${a*a}>; + var tileWeightK: array<${b}, ${a*a}>; + var tileWeightV: array<${b}, ${a*a}>; + ${f.registerUniforms(k).declareVariables(y,S,v,h,m,g)} + ${f.mainStart([a,a,1])} + let batchIndex = workgroup_id.z / uniforms.num_heads; + let headNumber = workgroup_id.z % uniforms.num_heads; + let m = global_id.y; + let n = global_id.x; + + let inputOffset = batchIndex * (uniforms.M * uniforms.K) + m * uniforms.K; + let biasOffsetQ = headNumber * uniforms.head_size; + let biasOffsetK = uniforms.hidden_size + biasOffsetQ; + let biasOffsetV = uniforms.hidden_size + biasOffsetK; + + var valueQ = ${b}(0); + var valueK = ${b}(0); + var valueV = ${b}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (m < uniforms.M && w + local_id.x < uniforms.K) { + tileInput[TILE_SIZE * local_id.y + local_id.x] = input[inputOffset + w + local_id.x]; + } + if (n < uniforms.N && w + local_id.y < uniforms.K) { + let offset = n + (w + local_id.y) * uniforms.ldb; + tileWeightQ[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetQ + offset]; + tileWeightK[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetK + offset]; + tileWeightV[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetV + offset]; + } + workgroupBarrier(); + for (var k: u32 = 0u; k({outputs:[{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0}],dispatchGroup:o,programUniforms:d}),getShaderSource:c},{inputs:u,outputs:[-1,-1,-1]})},qm=(e,t)=>{let r=Xd(e.inputs,t),[i,s,n]=ec(e,r);return Ji(e,i,s,n,e.inputs[4],void 0,void 0,void 0,e.inputs[5],r)}}),tc,rc,ic,Vm,x$=j(()=>{Ut(),_e(),we(),qe(),ve(),tc=(e,t)=>{if(!e||e.length!==5)throw new Error("BatchNormalization requires 5 inputs");let r=(i,s,n)=>{let a=s.length;if(a!==i.length)throw new Error(`${n}: num dimensions != ${a}`);s.forEach((o,u)=>{if(o!==i[u])throw new Error(`${n}: dim[${u}] do not match`)})};if(e[0].dims.length>1){let i=t.format==="NHWC"?t.spatial?e[0].dims.slice(-1):e[0].dims.slice(-1).concat(e[0].dims.slice(1,e[0].dims.length-1)):e[0].dims.slice(1,t.spatial?2:void 0);r(e[1].dims,i,"Invalid input scale"),r(e[2].dims,i,"Invalid input B"),r(e[3].dims,i,"Invalid input mean"),r(e[4].dims,i,"Invalid input var")}else r(e[1].dims,[1],"Invalid input scale"),r(e[2].dims,[1],"Invalid input B"),r(e[3].dims,[1],"Invalid input mean"),r(e[4].dims,[1],"Invalid input var")},rc=(e,t)=>{let{epsilon:r,spatial:i,format:s}=t,n=e[0].dims,a=i?Le(n[n.length-1]):1,o=s==="NHWC"&&n.length>1?a:1,u=B.size(n)/a,d=i,c=d?n.length:n,f=P("x",e[0].dataType,e[0].dims,a),h=P("scale",e[1].dataType,e[1].dims,o),m=P("bias",e[2].dataType,e[2].dims,o),g=P("inputMean",e[3].dataType,e[3].dims,o),y=P("inputVar",e[4].dataType,e[4].dims,o),S=ue("y",e[0].dataType,c,a),v=()=>{let k="";if(i)k=`let cOffset = ${n.length===1?"0u":s==="NHWC"?`outputIndices[${n.length-1}] / ${a}`:"outputIndices[1]"};`;else if(s==="NCHW")k=` + ${S.indicesSet("outputIndices","0","0")} + let cOffset = ${S.indicesToOffset("outputIndices")};`;else{k=`var cIndices = ${h.type.indices}(0); + cIndices[0] = outputIndices[${n.length-1}];`;for(let x=1;x` + const epsilon = ${r}; + ${k.registerUniform("outputSize","u32").declareVariables(f,h,m,g,y,S)} + ${k.mainStart()} + ${k.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var outputIndices = ${S.offsetToIndices(`global_idx * ${a}`)}; + ${v()} + let scale = ${h.getByOffset("cOffset")}; + let bias = ${m.getByOffset("cOffset")}; + let inputMean = ${g.getByOffset("cOffset")}; + let inputVar = ${y.getByOffset("cOffset")}; + let x = ${f.getByOffset("global_idx")}; + let value = (x - inputMean) * inverseSqrt(inputVar + epsilon) * scale + bias; + ${S.setByOffset("global_idx","value")} + }`;return{name:"BatchNormalization",shaderCache:{hint:`${t.epsilon}_${t.format}_${i}_${a}`,inputDependencies:d?["rank","type","type","type","type"]:void 0},getShaderSource:b,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(u/64)},programUniforms:d?[{type:12,data:u},...pe(n)]:[{type:12,data:u}]})}},ic=e=>Oe(e),Vm=(e,t)=>{let{inputs:r,outputCount:i}=e,s=ic({...t,outputCount:i});if(We.webgpu.validateInputContent&&tc(r,s),t.trainingMode)throw new Error("BatchNormalization trainingMode is not supported yet.");e.compute(rc(r,s))}}),nc,sc,Fm,S$=j(()=>{we(),ve(),nc=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![320,640,1280].includes(e[0].dims[2]))throw new Error("number of channels should be 320, 640 or 1280");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},sc=e=>{let t=e[0].dims,r=e[0].dims[2],i=B.size(t)/4,s=e[0].dataType,n=P("input",s,t,4),a=P("bias",s,[r],4),o=P("residual",s,t,4),u=ue("output",s,t,4);return{name:"BiasAdd",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)}}),getShaderSource:d=>` + const channels = ${r}u / 4; + ${d.declareVariables(n,a,o,u)} + + ${d.mainStart()} + ${d.guardAgainstOutOfBoundsWorkgroupSizes(i)} + let value = ${n.getByOffset("global_idx")} + + ${a.getByOffset("global_idx % channels")} + ${o.getByOffset("global_idx")}; + ${u.setByOffset("global_idx","value")} + }`}},Fm=e=>{nc(e.inputs),e.compute(sc(e.inputs))}}),ac,ze,Hm,Gm,jm,Km,Zm,Xm,Ym,Qm,Jm,oc,eg,tg,rg,ig,Ri,ng,Dn,sg,ag,og,ug,lg,dg,cg,pg,fg,hg,mg,gg,_g,yg,bg,wg,sa,vg,io,no,$g,xg,Sg,uc,lc,kg,tu=j(()=>{_e(),we(),qe(),ve(),ac=(e,t,r,i,s,n,a)=>{let o=Math.ceil(t/4),u="";typeof s=="string"?u=`${s}(a)`:u=s("a");let d=P("inputData",r,[o],4),c=ue("outputData",i,[o],4),f=[{name:"vec_size",type:"u32"}];return a&&f.push(...a),` + ${e.registerUniforms(f).declareVariables(d,c)} + + ${n??""} + + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + + let a = ${d.getByOffset("global_idx")}; + ${c.setByOffset("global_idx",u)} + }`},ze=(e,t,r,i,s,n=e.dataType,a,o)=>{let u=[{type:12,data:Math.ceil(B.size(e.dims)/4)}];return a&&u.push(...a),{name:t,shaderCache:{hint:s,inputDependencies:["type"]},getShaderSource:d=>ac(d,B.size(e.dims),e.dataType,n,r,i,o),getRunData:d=>({outputs:[{dims:e.dims,dataType:n}],dispatchGroup:{x:Math.ceil(B.size(d[0].dims)/64/4)},programUniforms:u})}},Hm=e=>{e.compute(ze(e.inputs[0],"Abs","abs"))},Gm=e=>{e.compute(ze(e.inputs[0],"Acos","acos"))},jm=e=>{e.compute(ze(e.inputs[0],"Acosh","acosh"))},Km=e=>{e.compute(ze(e.inputs[0],"Asin","asin"))},Zm=e=>{e.compute(ze(e.inputs[0],"Asinh","asinh"))},Xm=e=>{e.compute(ze(e.inputs[0],"Atan","atan"))},Ym=e=>{e.compute(ze(e.inputs[0],"Atanh","atanh"))},Qm=e=>Oe(e),Jm=(e,t)=>{let r;switch(t.to){case 10:r="vec4";break;case 1:r="vec4";break;case 12:r="vec4";break;case 6:r="vec4";break;case 9:r="vec4";break;default:throw new RangeError(`not supported type (specified in attribute 'to' from 'Cast' operator): ${t.to}`)}e.compute(ze(e.inputs[0],"Cast",r,void 0,t.cacheKey,t.to))},oc=e=>{let t,r,i=e.length>=2&&e[1].data!==0,s=e.length>=3&&e[2].data!==0;switch(e[0].dataType){case 1:t=i?e[1].getFloat32Array()[0]:-34028234663852886e22,r=s?e[2].getFloat32Array()[0]:34028234663852886e22;break;case 10:t=i?e[1].getUint16Array()[0]:64511,r=s?e[2].getUint16Array()[0]:31743;break;default:throw new Error("Unsupport data type")}return Oe({min:t,max:r})},eg=(e,t)=>{let r=t||oc(e.inputs),i=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"Clip",s=>`clamp(${s}, vec4<${i}>(uniforms.min), vec4<${i}>(uniforms.max))`,void 0,r.cacheKey,void 0,[{type:e.inputs[0].dataType,data:r.min},{type:e.inputs[0].dataType,data:r.max}],[{name:"min",type:i},{name:"max",type:i}]),{inputs:[0]})},tg=e=>{e.compute(ze(e.inputs[0],"Ceil","ceil"))},rg=e=>{e.compute(ze(e.inputs[0],"Cos","cos"))},ig=e=>{e.compute(ze(e.inputs[0],"Cosh","cosh"))},Ri=e=>Oe(e),ng=(e,t)=>{let r=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"Elu",i=>`elu_vf32(${i})`,` + const elu_alpha_ = ${r}(${t.alpha}); + + fn elu_f32(a: ${r}) -> ${r} { + return select((exp(a) - 1.0) * elu_alpha_, a, a >= 0.0); + } + + fn elu_vf32(v: vec4<${r}>) -> vec4<${r}> { + return vec4(elu_f32(v.x), elu_f32(v.y), elu_f32(v.z), elu_f32(v.w)); + }`,t.cacheKey))},Dn=(e="f32")=>` +const r0: ${e} = 0.3275911; +const r1: ${e} = 0.254829592; +const r2: ${e} = -0.284496736; +const r3: ${e} = 1.421413741; +const r4: ${e} = -1.453152027; +const r5: ${e} = 1.061405429; + +fn erf_vf32(v: vec4<${e}>) -> vec4<${e}> { + let absv = abs(v); + let x = 1.0 / (1.0 + r0 * absv); + return sign(v) * (1.0 - ((((r5 * x + r4) * x + r3) * x + r2) * x + r1) * x * exp(-absv * absv)); +}`,sg=e=>{let t=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"Erf",r=>`erf_vf32(${r})`,Dn(t)))},ag=e=>{e.compute(ze(e.inputs[0],"Exp","exp"))},og=e=>{e.compute(ze(e.inputs[0],"Floor","floor"))},ug=e=>{let t=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"Gelu",r=>`0.5 * ${r} * (1.0 + erf_vf32(${r} * 0.7071067811865475))`,Dn(t)))},lg=(e,t)=>{let r=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"LeakyRelu",i=>`select(leaky_relu_alpha_ * ${i}, ${i}, ${i} >= vec4<${r}>(0.0))`,`const leaky_relu_alpha_ = ${r}(${t.alpha});`,t.cacheKey))},dg=e=>{e.compute(ze(e.inputs[0],"Not",t=>`!${t}`))},cg=e=>{e.compute(ze(e.inputs[0],"Neg",t=>`-${t}`))},pg=e=>{e.compute(ze(e.inputs[0],"Reciprocal",t=>`1.0/${t}`))},fg=e=>{let t=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"Relu",r=>`select(vec4<${t}>(0.0), ${r}, ${r} > vec4<${t}>(0.0))`))},hg=e=>{e.compute(ze(e.inputs[0],"Sigmoid",t=>`(1.0 / (1.0 + exp(-${t})))`))},mg=e=>Oe(e),gg=(e,t)=>{let r=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"HardSigmoid",i=>`max(vec4<${r}>(0.0), min(vec4<${r}>(1.0), ${t.alpha} * ${i} + vec4<${r}>(${t.beta})))`,void 0,t.cacheKey))},_g=e=>{e.compute(ze(e.inputs[0],"Sin","sin"))},yg=e=>{e.compute(ze(e.inputs[0],"Sinh","sinh"))},bg=e=>{e.compute(ze(e.inputs[0],"Sqrt","sqrt"))},wg=e=>{e.compute(ze(e.inputs[0],"Tan","tan"))},sa=e=>`sign(${e}) * (1 - exp(-2 * abs(${e}))) / (1 + exp(-2 * abs(${e})))`,vg=e=>{e.compute(ze(e.inputs[0],"Tanh",sa))},io=(e="f32")=>` +const fast_gelu_a: ${e} = 0.5; +const fast_gelu_b: ${e} = 0.7978845608028654; +const fast_gelu_c: ${e} = 0.035677408136300125; + +fn tanh_v(v: vec4<${e}>) -> vec4<${e}> { + return ${sa("v")}; +} +`,no=e=>`(fast_gelu_a + fast_gelu_a * tanh_v(${e} * (fast_gelu_c * ${e} * ${e} + fast_gelu_b))) * ${e}`,$g=e=>{let t=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"FastGelu",no,io(t),void 0,e.inputs[0].dataType))},xg=(e,t)=>{let r=st(e.inputs[0].dataType);return e.compute(ze(e.inputs[0],"ThresholdedRelu",i=>`select(vec4<${r}>(0.0), ${i}, ${i} > thresholded_relu_alpha_)`,`const thresholded_relu_alpha_ = vec4<${r}>(${t.alpha});`,t.cacheKey)),0},Sg=e=>{e.compute(ze(e.inputs[0],"Log","log"))},uc=(e,t)=>` +const alpha = vec4<${e}>(${t}); +const one = ${e}(1.0); +const zero = ${e}(0.0); + +fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> { + let v = x *alpha; + var x1 : vec4<${e}>; + for (var i = 0; i < 4; i = i + 1) { + if (v[i] >= zero) { + x1[i] = one / (one + exp(-v[i])); + } else { + x1[i] = one - one / (one + exp(v[i])); + } + } + return x * x1; +} +`,lc=e=>`quick_gelu_impl(${e})`,kg=(e,t)=>{let r=st(e.inputs[0].dataType);e.compute(ze(e.inputs[0],"QuickGelu",lc,uc(r,t.alpha),t.cacheKey,e.inputs[0].dataType))}}),dc,cc,Tg,k$=j(()=>{we(),ve(),tu(),dc=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![2560,5120,10240].includes(e[0].dims[2]))throw new Error("hidden state should be 2560, 5120 or 10240");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},cc=e=>{let t=e[0].dims.slice();t[2]=t[2]/2;let r=P("input",e[0].dataType,e[0].dims,4),i=P("bias",e[0].dataType,[e[0].dims[2]],4),s=ue("output",e[0].dataType,t,4),n=B.size(t)/4,a=Xe(e[0].dataType);return{name:"BiasSplitGelu",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(n/64)}}),getShaderSource:o=>` + const M_SQRT2 = sqrt(2.0); + const halfChannels = ${e[0].dims[2]/4/2}u; + + ${o.declareVariables(r,i,s)} + + ${Dn(a)} + + ${o.mainStart()} + ${o.guardAgainstOutOfBoundsWorkgroupSizes(n)} + let biasIdx = global_idx % halfChannels; + let batchIndex = global_idx / halfChannels; + let inputOffset = biasIdx + batchIndex * halfChannels * 2; + let valueLeft = input[inputOffset] + bias[biasIdx]; + let valueRight = input[inputOffset + halfChannels] + bias[biasIdx + halfChannels]; + let geluRight = valueRight * 0.5 * (erf_vf32(valueRight / M_SQRT2) + 1); + + ${s.setByOffset("global_idx","valueLeft * geluRight")} + }`}},Tg=e=>{dc(e.inputs),e.compute(cc(e.inputs))}}),pc,fc,Ot,Ig,Eg,Cg,zg,Og,Ag,Rg,Bg,Mg,Ng,T$=j(()=>{_e(),we(),ve(),pc=(e,t,r,i,s,n,a,o,u,d,c,f)=>{let h,m;typeof o=="string"?h=m=(b,k)=>`${o}((${b}),(${k}))`:typeof o=="function"?h=m=o:(h=o.scalar,m=o.vector);let g=ue("outputData",c,i.length,4),y=P("aData",u,t.length,4),S=P("bData",d,r.length,4),v;if(s)if(n){let b=B.size(t)===1,k=B.size(r)===1,x=t.length>0&&t[t.length-1]%4===0,I=r.length>0&&r[r.length-1]%4===0;b||k?v=g.setByOffset("global_idx",m(b?`${y.type.value}(${y.getByOffset("0")}.x)`:y.getByOffset("global_idx"),k?`${S.type.value}(${S.getByOffset("0")}.x)`:S.getByOffset("global_idx"))):v=` + let outputIndices = ${g.offsetToIndices("global_idx * 4u")}; + let offsetA = ${y.broadcastedIndicesToOffset("outputIndices",g)}; + let offsetB = ${S.broadcastedIndicesToOffset("outputIndices",g)}; + ${g.setByOffset("global_idx",m(a||x?y.getByOffset("offsetA / 4u"):`${y.type.value}(${y.getByOffset("offsetA / 4u")}[offsetA % 4u])`,a||I?S.getByOffset("offsetB / 4u"):`${S.type.value}(${S.getByOffset("offsetB / 4u")}[offsetB % 4u])`))} + `}else v=g.setByOffset("global_idx",m(y.getByOffset("global_idx"),S.getByOffset("global_idx")));else{if(!n)throw new Error("no necessary to use scalar implementation for element-wise binary op implementation.");let b=(k,x,I="")=>{let z=`aData[indexA${x}][componentA${x}]`,O=`bData[indexB${x}][componentB${x}]`;return` + let outputIndices${x} = ${g.offsetToIndices(`global_idx * 4u + ${x}u`)}; + let offsetA${x} = ${y.broadcastedIndicesToOffset(`outputIndices${x}`,g)}; + let offsetB${x} = ${S.broadcastedIndicesToOffset(`outputIndices${x}`,g)}; + let indexA${x} = offsetA${x} / 4u; + let indexB${x} = offsetB${x} / 4u; + let componentA${x} = offsetA${x} % 4u; + let componentB${x} = offsetB${x} % 4u; + ${k}[${x}] = ${I}(${h(z,O)}); + `};c===9?v=` + var data = vec4(0); + ${b("data",0,"u32")} + ${b("data",1,"u32")} + ${b("data",2,"u32")} + ${b("data",3,"u32")} + outputData[global_idx] = dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(data));`:v=` + ${b("outputData[global_idx]",0)} + ${b("outputData[global_idx]",1)} + ${b("outputData[global_idx]",2)} + ${b("outputData[global_idx]",3)} + `}return` + ${e.registerUniform("vec_size","u32").declareVariables(y,S,g)} + + ${f??""} + + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${v} + }`},fc=(e,t,r,i,s,n,a=r.dataType)=>{let o=r.dims.map(y=>Number(y)??1),u=i.dims.map(y=>Number(y)??1),d=!B.areEqual(o,u),c=o,f=B.size(o),h=!1,m=!1,g=[d];if(d){let y=ai.calcShape(o,u,!1);if(!y)throw new Error("Can't perform binary op on the given tensors");c=y.slice(),f=B.size(c);let S=B.size(o)===1,v=B.size(u)===1,b=o.length>0&&o[o.length-1]%4===0,k=u.length>0&&u[u.length-1]%4===0;g.push(S),g.push(v),g.push(b),g.push(k);let x=1;for(let I=1;Iy.toString()).join("_"),inputDependencies:["rank","rank"]},getShaderSource:y=>pc(y,o,u,c,h,d,m,s,r.dataType,i.dataType,a,n),getRunData:()=>({outputs:[{dims:c,dataType:a}],dispatchGroup:{x:Math.ceil(f/64/4)},programUniforms:[{type:12,data:Math.ceil(B.size(c)/4)},...pe(o,u,c)]})}},Ot=(e,t,r,i,s,n)=>{e.compute(fc(t,s??"",e.inputs[0],e.inputs[1],r,i,n))},Ig=e=>{Ot(e,"Add",(t,r)=>`${t}+${r}`)},Eg=e=>{Ot(e,"Div",(t,r)=>`${t}/${r}`)},Cg=e=>{Ot(e,"Equal",{scalar:(t,r)=>`u32(${t}==${r})`,vector:(t,r)=>`vec4(${t}==${r})`},void 0,void 0,9)},zg=e=>{Ot(e,"Mul",(t,r)=>`${t}*${r}`)},Og=e=>{let t=P("input",e.inputs[0].dataType,e.inputs[0].dims).type.value;Ot(e,"Pow",{scalar:(r,i)=>`pow_custom(${r},${i})`,vector:(r,i)=>`pow_vector_custom(${r},${i})`},` + fn pow_custom(a : ${t}, b : ${t}) -> ${t} { + if (b == ${t}(0.0)) { + return ${t}(1.0); + } else if (a < ${t}(0.0) && f32(b) != floor(f32(b))) { + return ${t}(pow(f32(a), f32(b))); // NaN + } + return select(sign(a), ${t}(1.0), round(f32(abs(b) % ${t}(2.0))) != 1.0) * ${t}(${t==="i32"?"round":""}(pow(f32(abs(a)), f32(b)))); + } + fn pow_vector_custom(a : vec4<${t}>, b : vec4<${t}>) -> vec4<${t}> { + // TODO: implement vectorized pow + return vec4<${t}>(pow_custom(a.x, b.x), pow_custom(a.y, b.y), pow_custom(a.z, b.z), pow_custom(a.w, b.w)); + } + `)},Ag=e=>{Ot(e,"Sub",(t,r)=>`${t}-${r}`)},Rg=e=>{Ot(e,"Greater",{scalar:(t,r)=>`u32(${t}>${r})`,vector:(t,r)=>`vec4(${t}>${r})`},void 0,void 0,9)},Bg=e=>{Ot(e,"Less",{scalar:(t,r)=>`u32(${t}<${r})`,vector:(t,r)=>`vec4(${t}<${r})`},void 0,void 0,9)},Mg=e=>{Ot(e,"GreaterOrEqual",{scalar:(t,r)=>`u32(${t}>=${r})`,vector:(t,r)=>`vec4(${t}>=${r})`},void 0,void 0,9)},Ng=e=>{Ot(e,"LessOrEqual",{scalar:(t,r)=>`u32(${t}<=${r})`,vector:(t,r)=>`vec4(${t}<=${r})`},void 0,void 0,9)}}),hc,mc,gc,_c,Dg,Pg,I$=j(()=>{_e(),we(),qe(),ve(),hc=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");let r=0,i=e[r],s=i.dataType,n=i.dims.length;e.forEach((a,o)=>{if(o!==r){if(a.dataType!==s)throw new Error("input tensors should be one type");if(a.dims.length!==n)throw new Error("input tensors should have the same shape");a.dims.forEach((u,d)=>{if(d!==t&&u!==i.dims[d])throw new Error("non concat dimensions must match")})}})},mc=(e,t)=>` + fn calculateInputIndex(index: u32) -> u32 { + let sizeInConcatAxis = array(${t}); + for (var i: u32 = 0u; i < ${e}; i += 1u ) { + if (index < sizeInConcatAxis[i]) { + return i; + } + } + return ${e}u; + }`,gc=(e,t)=>{let r=e.length,i=[];for(let s=0;s{let s=B.size(r),n=new Array(e.length),a=new Array(e.length),o=0,u=[],d=[],c=[{type:12,data:s}];for(let y=0;y`uniforms.sizeInConcatAxis${y}`).join(","),g=y=>` + + ${(()=>{y.registerUniform("outputSize","u32");for(let S=0;S(${m}); + ${h} -= sizeInConcatAxis[inputIndex - 1u]; + } + + ${gc(a,f)} + }`;return{name:"Concat",shaderCache:{hint:`${t}`,inputDependencies:u},getRunData:()=>({outputs:[{dims:r,dataType:i}],dispatchGroup:{x:Math.ceil(s/64)},programUniforms:c}),getShaderSource:g}},Dg=(e,t)=>{let r=e.inputs,i=r[0].dims,s=B.normalizeAxis(t.axis,i.length);hc(r,s);let n=i.slice();n[s]=r.reduce((o,u)=>o+(u.dims.length>s?u.dims[s]:0),0);let a=r.filter(o=>B.size(o.dims)>0);e.compute(_c(a,s,n,r[0].dataType),{inputs:a})},Pg=e=>Oe({axis:e.axis})}),Fr,Hr,Gr,ru,Zr=j(()=>{_e(),we(),Fr=(e,t,r="f32")=>{switch(e.activation){case"Relu":return`value = max(value, ${t}(0.0));`;case"Sigmoid":return`value = (${t}(1.0) / (${t}(1.0) + exp(-value)));`;case"Clip":return`value = clamp(value, ${t}(${r}(uniforms.clip_min)), ${t}(${r}(uniforms.clip_max)));`;case"HardSigmoid":return`value = max(${t}(0.0), min(${t}(1.0), ${r}(uniforms.alpha) * value + ${r}(uniforms.beta)));`;case"LeakyRelu":return`value = select(${r}(uniforms.alpha) * value, value, value >= ${t}(0.0));`;case"Tanh":return`let e2x = exp(-2.0 * abs(value)); + value = sign(value) * (1.0 - e2x) / (1.0 + e2x); + `;case"":return"";default:throw new Error(`Unsupported activation ${e.activation}`)}},Hr=(e,t)=>{e.activation==="Clip"?t.push({type:1,data:e.clipMax},{type:1,data:e.clipMin}):e.activation==="HardSigmoid"?t.push({type:1,data:e.alpha},{type:1,data:e.beta}):e.activation==="LeakyRelu"&&t.push({type:1,data:e.alpha})},Gr=(e,t)=>{e.activation==="Clip"?t.push({name:"clip_max",type:"f32"},{name:"clip_min",type:"f32"}):e.activation==="HardSigmoid"?t.push({name:"alpha",type:"f32"},{name:"beta",type:"f32"}):e.activation==="LeakyRelu"&&t.push({name:"alpha",type:"f32"})},ru=e=>{let t=(e==null?void 0:e.activation)||"";if(t==="HardSigmoid"){let[r,i]=(e==null?void 0:e.activation_params)||[.2,.5];return{activation:t,alpha:r,beta:i}}else if(t==="Clip"){let[r,i]=(e==null?void 0:e.activation_params)||[dm,cm];return{activation:t,clipMax:i,clipMin:r}}else if(t==="LeakyRelu"){let[r]=(e==null?void 0:e.activation_params)||[.01];return{activation:t,alpha:r}}return{activation:t}}}),Je,Ug,iu=j(()=>{Je=(e,t)=>{switch(e){case 1:return t;case 2:return`vec2<${t}>`;case 3:return`vec3<${t}>`;case 4:return`vec4<${t}>`;default:throw new Error(`${e}-component is not supported.`)}},Ug=e=>` + ${e?"value = value + getBiasByOutputCoords(coords);":""} + `}),Wg,E$=j(()=>{Wg=e=>` +fn getIndexFromCoords4D(coords : vec4, shape : vec4) -> i32 { + return dot(coords, vec4( + shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1)); +} +fn getOutputIndexFromCoords(coords : vec4) -> i32 { + return dot(coords, vec4( + i32(${e}.x), i32(${e}.y), i32(${e}.z), 1)); +} +`}),Hi,nu,su=j(()=>{_e(),we(),ve(),Zr(),Hi=(e,t,r,i,s)=>{let n=i-r;return` + ${Array.from({length:r}).map((a,o)=>` + if (${de(t.shape,o,t.rank)} != 1) { + ${t.indicesSet(e,o,de(s,o+n,i))} + } else { + ${t.indicesSet(e,o,0)} + }`).join("")} +`},nu=(e,t,r,i,s=!1,n)=>{let a=e[0].dims,o=e[1].dims,u=a[a.length-2],d=o[o.length-1],c=a[a.length-1],f=Le(d),h=Le(c),m=Le(u),g=B.size(r)/f/m,y=e.length>2,S=i?i.slice(0,-2):r.slice(0,-2),v=[B.size(S),u,d],b=[{type:12,data:g},{type:12,data:u},{type:12,data:d},{type:12,data:c}];Hr(t,b),b.push(...pe(S,a,o)),y&&b.push(...pe(e[2].dims)),b.push(...pe(v));let k=x=>{let I=Qo("batch_dims",e[0].dataType,S.length),z=P("a",e[0].dataType,a.length,h),O=P("b",e[1].dataType,o.length,f),A=ue("output",e[0].dataType,v.length,f),R=Xe(A.type.tensor),W=Fr(t,A.type.value,R),ie=[z,O],X="";if(y){let oe=s?f:1;ie.push(P("bias",e[2].dataType,e[2].dims.length,oe)),X=`${s?`value += bias[col / ${oe}];`:`value += ${A.type.value}(bias[row + i]);`}`}let ne=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"}];Gr(t,ne);let Y=()=>{let oe=`var a_data: ${z.type.value};`;for(let V=0;V; + for (var k: u32 = 0u; k < uniforms.K; k = k + ${h}) { + ${Y()} + } + for (var i = 0u; i < ${m}u; i++) { + var value = values[i]; + ${X} + ${W} + let cur_indices = ${A.type.indices}(batch, row + i, col); + let offset = ${A.indicesToOffset("cur_indices")}; + ${A.setByOffset(`offset / ${f}`,"value")}; + } + } + `};return{name:"MatMulNaive",shaderCache:{hint:`${t.activation};${f};${h};${m};${s}`,inputDependencies:y?["rank","rank","rank"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:b}),getShaderSource:k}}}),yc,bc,so,aa,wc,ao,vc,Zn,au=j(()=>{_e(),we(),ve(),Zr(),su(),iu(),yc=(e,t)=>e?` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + kStart + inputRow, + globalRowStart / innerElementSize + inputCol${t?", batchIndices":""}); + `:` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + globalRow + innerRow, + kStart / innerElementSize + inputCol${t?", batchIndices":""}); + `,bc=(e,t)=>e?` + let ACached0 = mm_Asub[k * innerElementSize][localRow]; + let ACached1 = mm_Asub[k * innerElementSize + 1][localRow]; + let ACached2 = mm_Asub[k * innerElementSize + 2][localRow]; + ${t===3?"":"let ACached3 = mm_Asub[k * innerElementSize + 3][localRow];"} + for (var i = 0; i < rowPerThread; i = i + 1) { + acc[i] = BCached0 * ACached0[i] + acc[i]; + acc[i] = BCached1 * ACached1[i] + acc[i]; + acc[i] = BCached2 * ACached2[i] + acc[i]; + ${t===3?"":"acc[i] = BCached3 * ACached3[i] + acc[i];"} + }`:` + for (var i = 0; i < rowPerThread; i = i + 1) { + let ACached = mm_Asub[tileRow + i][k]; + acc[i] = BCached0 * ACached.x + acc[i]; + acc[i] = BCached1 * ACached.y + acc[i]; + acc[i] = BCached2 * ACached.z + acc[i]; + ${t===3?"":"acc[i] = BCached3 * ACached.w + acc[i];"} + }`,so=(e,t,r="f32",i,s=!1,n=32,a=!1,o=32)=>{let u=t[1]*e[1],d=t[0]*e[0],c=s?u:n,f=s?n:u,h=c/t[0],m=n/t[1];if(!((s&&h===4&&e[1]===4||!s&&(h===3||h===4))&&c%t[0]===0&&n%t[1]===0&&e[0]===4))throw new Error(`If transposeA ${s} is true, innerElementSize ${h} and workPerThread[1] ${e[1]} must be 4. + Otherwise, innerElementSize ${h} must be 3 or 4. + tileAWidth ${c} must be divisible by workgroupSize[0]${t[0]}. tileInner ${n} must be divisible by workgroupSize[1] ${t[1]}. colPerThread ${e[0]} must be 4.`);return` +var mm_Asub: array, ${c/h}>, ${f}>; +var mm_Bsub: array, ${d/e[0]}>, ${n}>; + +const rowPerThread = ${e[1]}; +const colPerThread = ${e[0]}; +const innerElementSize = ${h}; +const tileInner = ${n}; + +@compute @workgroup_size(${t[0]}, ${t[1]}, ${t[2]}) +fn main(@builtin(local_invocation_id) localId : vec3, + @builtin(global_invocation_id) globalId : vec3, + @builtin(workgroup_id) workgroupId : vec3) { + let localRow = i32(localId.y); + let tileRow = localRow * rowPerThread; + let tileCol = i32(localId.x); + + let globalRow =i32(globalId.y) * rowPerThread; + let globalCol = i32(globalId.x); + let batch = ${a?"0":"i32(globalId.z)"}; + ${i?`let batchIndices = ${i.offsetToIndices("u32(batch)")};`:""} + let globalRowStart = i32(workgroupId.y) * ${u}; + + let num_tiles = ${a?`${Math.ceil(o/n)}`:"(uniforms.dim_inner - 1) / tileInner + 1"}; + var kStart = ${a?`i32(globalId.z) * ${o}`:"0"}; + + var acc: array, rowPerThread>; + + // Loop over shared dimension. + let tileRowB = localRow * ${m}; + for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let inputRow = tileRow + innerRow; + let inputCol = tileCol; + ${yc(s,i)} + } + + // Load one tile of B into local memory. + for (var innerRow = 0; innerRow < ${m}; innerRow = innerRow + 1) { + let inputRow = tileRowB + innerRow; + let inputCol = tileCol; + mm_Bsub[inputRow][inputCol] = mm_readB(batch, kStart + inputRow, globalCol${i?", batchIndices":""}); + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + for (var k = 0; k < tileInner / innerElementSize; k = k + 1) { + let BCached0 = mm_Bsub[k * innerElementSize][tileCol]; + let BCached1 = mm_Bsub[k * innerElementSize + 1][tileCol]; + let BCached2 = mm_Bsub[k * innerElementSize + 2][tileCol]; + ${h===3?"":"let BCached3 = mm_Bsub[k * innerElementSize + 3][tileCol];"} + + ${bc(s,h)} + } + + workgroupBarrier(); + } + + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]); + } +}`},aa=(e,t)=>e?` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + kStart + inputRow, + globalRowStart + inputCol${t?", batchIndices":""}); + `:` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + globalRowStart + inputRow, + kStart + inputCol${t?", batchIndices":""}); + `,wc=e=>e?"let ACached = mm_Asub[k][tileRow + innerRow];":"let ACached = mm_Asub[tileRow + innerRow][k];",ao=(e,t,r="f32",i,s=!1,n=32,a=!1,o=32,u=!1)=>{let d=e[1]*t[1],c=e[0]*t[0],f=s?d:n,h=s?n:d;if(!(h%t[1]===0&&f%t[0]===0&&n%t[1]===0))throw new Error(`tileAHight ${h} must be divisible by workgroupSize[1]${t[1]}, tileAWidth ${f} must be divisible by workgroupSize[0]${t[0]}, tileInner ${n} must be divisible by workgroupSize[1]${t[1]}`);let m=h/t[1],g=f/t[0],y=n/t[1],S=u?` + let localRow = i32(localId.y); + let localCol = i32(localId.x); + let globalRowStart = i32(workgroupId.y) * ${d}; + let globalColStart = i32(workgroupId.x) * ${c}; + + // Loop over shared dimension. + for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var inputRow = localRow; inputRow < ${h}; inputRow = inputRow + ${t[1]}) { + for (var inputCol = localCol; inputCol < ${f}; inputCol = inputCol + ${t[0]}) { + ${aa(s,i)} + } + } + // Load one tile of B into local memory. + for (var inputRow = localRow; inputRow < ${n}; inputRow = inputRow + ${t[1]}) { + for (var inputCol = localCol; inputCol < ${c}; inputCol = inputCol + ${t[0]}) { + mm_Bsub[inputRow][inputCol] = mm_readB(batch, + kStart + inputRow, + globalColStart + inputCol${i?", batchIndices":""}); + } + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + var BCached : array<${r}, colPerThread>; + for (var k = 0; k < tileInner; k = k + 1) { + for (var inner = 0; inner < colPerThread; inner = inner + 1) { + BCached[inner] = mm_Bsub[k][localCol + inner * ${t[0]}]; + } + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let ACached = ${s?`mm_Asub[k][localRow + innerRow * ${t[1]}];`:`mm_Asub[localRow + innerRow * ${t[1]}][k];`} + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + acc[innerRow][innerCol] = acc[innerRow][innerCol] + + ACached * BCached[innerCol]; + } + } + } + workgroupBarrier(); + } + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let gRow = globalRowStart + localRow + innerRow * ${t[1]}; + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + let gCol = globalColStart + localCol + innerCol * ${t[0]}; + mm_write(batch, gRow, gCol, acc[innerRow][innerCol]); + } + } + `:` +let tileRow = i32(localId.y) * rowPerThread; +let tileCol = i32(localId.x) * colPerThread; + +let globalRow = i32(globalId.y) * rowPerThread; +let globalCol = i32(globalId.x) * colPerThread; +let globalRowStart = i32(workgroupId.y) * ${d}; + +let tileRowA = i32(localId.y) * ${m}; +let tileColA = i32(localId.x) * ${g}; +let tileRowB = i32(localId.y) * ${y}; +// Loop over shared dimension. +for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var innerRow = 0; innerRow < ${m}; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < ${g}; innerCol = innerCol + 1) { + let inputRow = tileRowA + innerRow; + let inputCol = tileColA + innerCol; + ${aa(s,i)} + } + } + + // Load one tile of B into local memory. + for (var innerRow = 0; innerRow < ${y}; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + let inputRow = tileRowB + innerRow; + let inputCol = tileCol + innerCol; + mm_Bsub[inputRow][inputCol] = mm_readB(batch, + kStart + inputRow, + globalCol + innerCol${i?", batchIndices":""}); + } + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + var BCached : array<${r}, colPerThread>; + for (var k = 0; k < tileInner; k = k + 1) { + for (var inner = 0; inner < colPerThread; inner = inner + 1) { + BCached[inner] = mm_Bsub[k][tileCol + inner]; + } + + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + ${wc(s)} + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + acc[innerRow][innerCol] = acc[innerRow][innerCol] + ACached * BCached[innerCol]; + } + } + } + + workgroupBarrier(); +} + +for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + mm_write(batch, globalRow + innerRow, globalCol + innerCol, + acc[innerRow][innerCol]); + } +} +`;return` + var mm_Asub : array, ${h}>; + var mm_Bsub : array, ${n}>; + const rowPerThread = ${e[1]}; + const colPerThread = ${e[0]}; + const tileInner = ${n}; + +@compute @workgroup_size(${t[0]}, ${t[1]}, ${t[2]}) +fn main(@builtin(local_invocation_id) localId : vec3, + @builtin(global_invocation_id) globalId : vec3, + @builtin(workgroup_id) workgroupId : vec3) { + let batch = ${a?"0":"i32(globalId.z)"}; + ${i?`let batchIndices = ${i.offsetToIndices("u32(batch)")};`:""} + let num_tiles = ${a?`${Math.ceil(o/n)}`:"(uniforms.dim_inner - 1) / tileInner + 1"}; + var kStart = ${a?`i32(globalId.z) * ${o}`:"0"}; + + var acc : array, rowPerThread>; + ${S} + } +`},vc=(e,t,r,i,s=!1)=>{let[n,a,o,u]=i,d=Xe(i[0].type.tensor);return` + fn mm_readA(batch: i32, row: i32, colIn: i32, batchIndices: ${n.type.indices}) -> ${Je(e,d)} { + var value = ${Je(e,d)}(0.0); + let col = colIn * ${e}; + if(row < uniforms.dim_a_outer && col < uniforms.dim_inner) + { + var aIndices: ${a.type.indices}; + ${Hi("aIndices",a,a.rank-2,n.rank,"batchIndices")} + ${a.indicesSet("aIndices",a.rank-2,"u32(row)")} + ${a.indicesSet("aIndices",a.rank-1,"u32(colIn)")} + value = ${a.getByIndices("aIndices")}; + } + return value; + } + + fn mm_readB(batch: i32, row: i32, colIn: i32, batchIndices: ${n.type.indices}) -> ${Je(e,d)} { + var value = ${Je(e,d)}(0.0); + let col = colIn * ${e}; + if(row < uniforms.dim_inner && col < uniforms.dim_b_outer) + { + var bIndices: ${o.type.indices}; + ${Hi("bIndices",o,o.rank-2,n.rank,"batchIndices")} + ${o.indicesSet("bIndices",o.rank-2,"u32(row)")} + ${o.indicesSet("bIndices",o.rank-1,"u32(colIn)")} + value = ${o.getByIndices("bIndices")}; + } + return value; + } + + fn mm_write(batch: i32, row: i32, colIn: i32, valueIn: ${Je(e,d)}) { + let col = colIn * ${e}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) { + var value = valueIn; + let coords = vec3(batch, row, colIn); + ${t?`value = value + ${s?"bias[colIn]":`${Je(e,d)}(bias[row])`};`:""} + ${r} + ${u.setByIndices("vec3(coords)","value")} + } + } + `},Zn=(e,t,r,i,s=!1,n)=>{let a=e[0].dims,o=e[1].dims,u=a.slice(0,-2),d=o.slice(0,-2),c=i?i.slice(0,-2):r.slice(0,-2),f=B.size(c),h=a[a.length-2],m=a[a.length-1],g=o[o.length-1],y=m%4===0&&g%4===0,S=h<=8?[4,1,1]:[4,4,1],v=[8,8,1],b=[Math.ceil(g/v[0]/S[0]),Math.ceil(h/v[1]/S[1]),Math.ceil(f/v[2]/S[2])],k=y?4:1,x=[...u,h,m/k],I=x.length,z=[...d,m,g/k],O=z.length,A=[f,h,g/k],R=[{type:6,data:h},{type:6,data:g},{type:6,data:m}];Hr(t,R),R.push(...pe(c,x,z));let W=["rank","rank"],ie=e.length>2;ie&&(R.push(...pe(e[2].dims)),W.push("rank")),R.push(...pe(A));let X=ne=>{let Y=c.length,oe=Qo("batchDims",e[0].dataType,Y,1),V=Xe(e[0].dataType),ae=P("a",e[0].dataType,I,k),Z=P("b",e[1].dataType,O,k),le=ue("result",e[0].dataType,A.length,k),Be=[ae,Z];if(ie){let Ge=s?k:1;Be.push(P("bias",e[2].dataType,e[2].dims.length,Ge))}let D=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"}];Gr(t,D);let q=Xe(le.type.tensor),ee=Fr(t,le.type.value,q),ye=vc(k,ie,ee,[oe,ae,Z,le],s);return` + ${ne.registerUniforms(D).registerInternalVariables(oe).declareVariables(...Be,le)} + ${ye} + ${y?so(S,v,V,oe):ao(S,v,V,oe)} + `};return{name:"MatMul",shaderCache:{hint:`${S};${t.activation};${y};${s}`,inputDependencies:W},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:b[0],y:b[1],z:b[2]},programUniforms:R}),getShaderSource:X}}}),$c,Lg,C$=j(()=>{_e(),pr(),ve(),Zr(),iu(),E$(),au(),$c=(e,t,r,i,s=!1,n,a=4,o=4,u=4,d="f32")=>{let c=R=>{switch(R){case 1:return"resData = x[xIndex];";case 3:return`resData = vec3<${d}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`;case 4:return"resData = x[xIndex / 4];";default:throw new Error(`innerElementSize ${R} is not supported.`)}},f=R=>{switch(R){case 1:return"return w[row * i32(uniforms.w_shape[3]) + colIn];";case 4:return"return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];";default:throw new Error(`innerElementSize ${R} is not supported.`)}},h=e?` + let coord = vec4(batch, xRow, xCol, xCh); + `:` + let coord = vec4(batch, xCh, xRow, xCol); + `,m=e?` + let coords = vec4( + batch, + row / outWidth, + row % outWidth, + col); + `:` + let coords = vec4( + batch, + row, + col / outWidth, + col % outWidth); + `,g=e?"i32(uniforms.x_shape[1])":"i32(uniforms.x_shape[2])",y=e?"i32(uniforms.x_shape[2])":"i32(uniforms.x_shape[3])",S=e?"row":"col",v=e?"col":"row",b=` + let inChannels = i32(uniforms.w_shape[2]); + let outWidth = ${e?"i32(uniforms.result_shape[2])":"i32(uniforms.result_shape[3])"}; + let outRow = ${S} / outWidth; + let outCol = ${S} % outWidth; + + let WRow = ${v} / (i32(uniforms.w_shape[1]) * inChannels); + let WCol = ${v} / inChannels % i32(uniforms.w_shape[1]); + let xRow = outRow * uniforms.stride[0] + uniforms.dilation[0] * WRow - uniforms.pad[0]; + let xCol = outCol * uniforms.stride[1] + uniforms.dilation[1] * WCol - uniforms.pad[1]; + let xCh = ${v} % inChannels; + var resData = ${Je(a,d)}(0.0); + // The bounds checking is always needed since we use it to pad zero for + // the 'same' padding type. + if (xRow >= 0 && xRow < ${g} && xCol >= 0 && xCol < ${y}) { + ${h} + let xIndex = getIndexFromCoords4D(coord, vec4(uniforms.x_shape)); + ${c(a)} + } + return resData;`,k=e?t&&i?` + let col = colIn * ${a}; + ${b}`:` + let col = colIn * ${a}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_inner) { + ${b} + } + return ${Je(a,d)}(0.0);`:i&&r?` + let col = colIn * ${a}; + ${b}`:` + let col = colIn * ${a}; + if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) { + ${b} + } + return ${Je(a,d)}(0.0);`,x=e?i&&r?f(o):` + let col = colIn * ${o}; + if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) { + ${f(o)} + } + return ${Je(o,d)}(0.0);`:` + let col = colIn * ${o}; + if (row < uniforms.dim_inner && col < uniforms.dim_a_outer) { + ${f(o)} + } + return ${Je(o,d)}(0.0);`,I=Je(u,d),z=Je(e?a:o,d),O=Je(e?o:a,d),A=Fr(n,I,d);return` + fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${z} { + ${e?k:x} + } + + fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${O} { + ${e?x:k} + } + + fn mm_write(batch: i32, row : i32, colIn : i32, valueIn : ${I}) { + let col = colIn * ${u}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) + { + var value = valueIn; + let outWidth = ${e?"i32(uniforms.result_shape[2])":"i32(uniforms.result_shape[3])"}; + ${m} + ${Ug(s)} + ${A} + setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value); + } + }`},Lg=(e,t,r,i,s,n,a,o,u)=>{let d=t.format==="NHWC",c=d?e[0].dims[3]:e[0].dims[1],f=r[0],h=d?r[2]:r[3],m=d?r[1]:r[2],g=d?r[3]:r[1],y=d&&(c%4===0||c%3===0)&&g%4===0,S=d?g:h*m,v=d?h*m:g,b=[8,8,1],k=i<=8?[4,1,1]:[4,4,1],x=[Math.ceil(S/b[0]/k[0]),Math.ceil(v/b[1]/k[1]),Math.ceil(f/b[2]/k[2])];Te("verbose",()=>`[conv2d_mm_webgpu] dispatch = ${x}`);let I=y?d&&c%4!==0?3:4:1,z=b[1]*k[1],O=b[0]*k[0],A=Math.max(b[0]*I,b[1]),R=i%z===0,W=s%O===0,ie=n%A===0,X=y?[I,4,4]:[1,1,1],ne=[{type:6,data:i},{type:6,data:s},{type:6,data:n},{type:6,data:[t.pads[0],t.pads[1]]},{type:6,data:t.strides},{type:6,data:t.dilations}];Hr(t,ne),ne.push(...pe(e[0].dims,e[1].dims));let Y=["rank","rank"];a&&(ne.push(...pe(e[2].dims)),Y.push("rank")),ne.push(...pe(r));let oe=V=>{let ae=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"},{name:"pad",type:"i32",length:2},{name:"stride",type:"i32",length:2},{name:"dilation",type:"i32",length:2}];Gr(t,ae);let Z=y?4:1,le=Xe(e[0].dataType),Be=` + fn setOutputAtIndex(flatIndex : i32, value : ${y?`vec4<${le}>`:le}) { + result[flatIndex] = ${y?`vec4<${le}>`:le}(value); + } + fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${y?`vec4<${le}>`:le}) { + let flatIndex = getOutputIndexFromCoords(vec4(d0, d1, d2, d3)); + setOutputAtIndex(flatIndex ${y?"/ 4":""}, value); + }`,D=P("x",e[0].dataType,e[0].dims.length,I===3?1:I),q=P("w",e[1].dataType,e[1].dims.length,Z),ee=[D,q],ye=ue("result",e[0].dataType,r.length,Z);if(a){let Ge=P("bias",e[2].dataType,e[2].dims.length,Z);ee.push(Ge),Be+=` + fn getBiasByOutputCoords(coords : vec4) -> ${y?`vec4<${le}>`:le} { + return bias[coords.${d?"w":"y"}${y?"/ 4":""}]; + }`}return` + ${Wg("uniforms.result_strides")} + //struct Uniforms { xShape : vec4, wShape : vec4, outShape : vec4, + // outShapeStrides: vec3, filterDims : vec2, pad : vec2, stride : vec2, + // dilation : vec2, dimAOuter : i32, dimBOuter : i32, dimInner : i32 }; + ${V.registerUniforms(ae).declareVariables(...ee,ye)} + ${Be} + ${$c(d,R,W,ie,a,t,X[0],X[1],X[2],le)} + ${y?so(k,b,le,void 0,!d,A):ao(k,b,le,void 0,!d,A,!1,void 0,o)}`};return{name:"Conv2DMatMul",shaderCache:{hint:`${t.cacheKey};${I};${y};${R};${W};${ie};${z};${O};${A}`,inputDependencies:Y},getRunData:()=>({outputs:[{dims:u?u(r):r,dataType:e[0].dataType}],dispatchGroup:{x:x[0],y:x[1],z:x[2]},programUniforms:ne}),getShaderSource:oe}}}),xc,oa,xi,Sc,ua,kc,qg,Vg,z$=j(()=>{_e(),pr(),we(),ve(),Zr(),iu(),xc=e=>{let t=1;for(let r=0;rtypeof e=="number"?[e,e,e]:e,xi=(e,t)=>t<=1?e:e+(e-1)*(t-1),Sc=(e,t,r,i=1)=>{let s=xi(t,i);return Math.floor((e[0]*(r-1)-r+s)/2)},ua=(e,t,r,i,s)=>{s==null&&(s=Sc(e,t[0],i[0]));let n=[0,0,0,r];for(let a=0;a<3;a++)e[a]+2*s>=t[a]&&(n[a]=Math.trunc((e[a]-t[a]+2*s)/i[a]+1));return n},kc=(e,t,r,i,s,n,a,o,u,d)=>{let c,f,h,m;if(e==="VALID"&&(e=0),typeof e=="number"){c={top:e,bottom:e,left:e,right:e,front:e,back:e};let g=ua([t,r,i,1],[o,u,d],1,[s,n,a],e);f=g[0],h=g[1],m=g[2]}else if(Array.isArray(e)){if(!e.every((y,S,v)=>y===v[0]))throw Error(`Unsupported padding parameter: ${e}`);c={top:e[0],bottom:e[1],left:e[2],right:e[3],front:e[4],back:e[5]};let g=ua([t,r,i,1],[o,u,d],1,[s,n,a],e[0]);f=g[0],h=g[1],m=g[2]}else if(e==="SAME_UPPER"){f=Math.ceil(t/s),h=Math.ceil(r/n),m=Math.ceil(i/a);let g=(f-1)*s+o-t,y=(h-1)*n+u-r,S=(m-1)*a+d-i,v=Math.floor(g/2),b=g-v,k=Math.floor(y/2),x=y-k,I=Math.floor(S/2),z=S-I;c={top:k,bottom:x,left:I,right:z,front:v,back:b}}else throw Error(`Unknown padding parameter: ${e}`);return{padInfo:c,outDepth:f,outHeight:h,outWidth:m}},qg=(e,t,r,i,s,n=!1,a="channelsLast")=>{let o,u,d,c,f;if(a==="channelsLast")[o,u,d,c,f]=e;else if(a==="channelsFirst")[o,f,u,d,c]=e;else throw new Error(`Unknown dataFormat ${a}`);let[h,,m,g,y]=t,[S,v,b]=oa(r),[k,x,I]=oa(i),z=xi(m,k),O=xi(g,x),A=xi(y,I),{padInfo:R,outDepth:W,outHeight:ie,outWidth:X}=kc(s,u,d,c,S,v,b,z,O,A),ne=n?h*f:h,Y=[0,0,0,0,0];return a==="channelsFirst"?Y=[o,ne,W,ie,X]:a==="channelsLast"&&(Y=[o,W,ie,X,ne]),{batchSize:o,dataFormat:a,inDepth:u,inHeight:d,inWidth:c,inChannels:f,outDepth:W,outHeight:ie,outWidth:X,outChannels:ne,padInfo:R,strideDepth:S,strideHeight:v,strideWidth:b,filterDepth:m,filterHeight:g,filterWidth:y,effectiveFilterDepth:z,effectiveFilterHeight:O,effectiveFilterWidth:A,dilationDepth:k,dilationHeight:x,dilationWidth:I,inShape:e,outShape:Y,filterShape:t}},Vg=(e,t,r,i,s,n)=>{let a=n==="channelsLast";a?e[0].dims[3]:e[0].dims[1];let o=[64,1,1],u={x:r.map((S,v)=>v)},d=[Math.ceil(xc(u.x.map(S=>r[S]))/o[0]),1,1];Te("verbose",()=>`[conv3d_naive_webgpu] dispatch = ${d}`);let c=1,f=B.size(r),h=[{type:12,data:f},{type:12,data:i},{type:12,data:s},{type:12,data:t.strides},{type:12,data:t.dilations}];Hr(t,h),h.push(...pe(e[0].dims,e[1].dims));let m=["rank","rank"],g=e.length===3;g&&(h.push(...pe(e[2].dims)),m.push("rank")),h.push(...pe(r));let y=S=>{let v=[{name:"output_size",type:"u32"},{name:"filter_dims",type:"u32",length:i.length},{name:"pads",type:"u32",length:s.length},{name:"strides",type:"u32",length:t.strides.length},{name:"dilations",type:"u32",length:t.dilations.length}];Gr(t,v);let b=1,k=Xe(e[0].dataType),x=P("x",e[0].dataType,e[0].dims.length,c),I=P("W",e[1].dataType,e[1].dims.length,b),z=[x,I],O=ue("result",e[0].dataType,r.length,b),A="";if(g){let ie=P("bias",e[2].dataType,e[2].dims.length,b);z.push(ie),A+=` + fn getBiasByOutputCoords(coords : array) -> ${k} { + return bias[${a?de("coords",4,5):de("coords",1,5)}]; + }`}let R=Je(c,k),W=Fr(t,R,k);return` + ${A} + fn getX(d0 : u32, d1 : u32, d2 : u32, d3 : u32, d4 : u32) -> f32 { + let aIndices = array(d0, d1, d2, d3, d4); + return ${x.getByIndices("aIndices")}; + } + fn getW(d0 : u32, d1 : u32, d2 : u32, d3 : u32, d4 : u32) -> f32 { + let aIndices = array(d0, d1, d2, d3, d4); + return ${I.getByIndices("aIndices")}; + } + ${S.registerUniforms(v).declareVariables(...z,O)} + ${S.mainStart()} + ${S.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let coords = ${O.offsetToIndices("global_idx")}; + let batch = ${de("coords",0,x.rank)}; + let d2 = ${a?de("coords",x.rank-1,x.rank):de("coords",1,x.rank)}; + let xFRCCorner = vec3(${a?de("coords",1,x.rank):de("coords",2,x.rank)}, + ${a?de("coords",2,x.rank):de("coords",3,x.rank)}, + ${a?de("coords",3,x.rank):de("coords",4,x.rank)}) * uniforms.strides - uniforms.pads; + let xFCorner = xFRCCorner.x; + let xRCorner = xFRCCorner.y; + let xCCorner = xFRCCorner.z; + let xShapeY = ${a?de("uniforms.x_shape",1,x.rank):de("uniforms.x_shape",2,x.rank)}; + let xShapeZ = ${a?de("uniforms.x_shape",2,x.rank):de("uniforms.x_shape",3,x.rank)}; + let xShapeW = ${a?de("uniforms.x_shape",3,x.rank):de("uniforms.x_shape",4,x.rank)}; + let xShapeU = ${a?de("uniforms.x_shape",4,x.rank):de("uniforms.x_shape",1,x.rank)}; + let inputDepthNearestVec4 = (xShapeU / 4) * 4; + let inputDepthVec4Remainder = xShapeU % 4; + + var value = 0.0; + for (var wF = 0u; wF < uniforms.filter_dims[0]; wF++) { + let xF = xFCorner + wF * uniforms.dilations[0]; + if (xF < 0 || xF >= xShapeY) { + continue; + } + + for (var wR = 0u; wR < uniforms.filter_dims[1]; wR++) { + let xR = xRCorner + wR * uniforms.dilations[1]; + if (xR < 0 || xR >= xShapeZ) { + continue; + } + + for (var wC = 0u; wC < uniforms.filter_dims[2]; wC++) { + let xC = xCCorner + wC * uniforms.dilations[2]; + if (xC < 0 || xC >= xShapeW) { + continue; + } + + for (var d1 = 0u; d1 < inputDepthNearestVec4; d1 += 4) { + ${a?`let xValues = vec4( + getX(batch, xF, xR, xC, d1), + getX(batch, xF, xR, xC, d1 + 1), + getX(batch, xF, xR, xC, d1 + 2), + getX(batch, xF, xR, xC, d1 + 3)); + `:`let xValues = vec4( + getX(batch, d1, xF, xR, xC), + getX(batch, d1 + 1, xF, xR, xC), + getX(batch, d1 + 2, xF, xR, xC), + getX(batch, d1 + 3, xF, xR, xC)); + `} + let wValues = vec4( + getW(d2, d1, wF, wR, wC), + getW(d2, d1 + 1, wF, wR, wC), + getW(d2, d1 + 2, wF, wR, wC), + getW(d2, d1 + 3, wF, wR, wC)); + value += dot(xValues, wValues); + } + if (inputDepthVec4Remainder == 1) { + ${a?`value += getX(batch, xF, xR, xC, inputDepthNearestVec4) + * getW(d2, inputDepthNearestVec4, wF, wR, wC);`:`value += getX(batch, inputDepthNearestVec4, xF, xR, xC) + * getW(d2, inputDepthNearestVec4, wF, wR, wC);`} + } else if (inputDepthVec4Remainder == 2) { + ${a?`let xValues = vec2( + getX(batch, xF, xR, xC, inputDepthNearestVec4), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 1)); + `:`let xValues = vec2( + getX(batch, inputDepthNearestVec4, xF, xR, xC), + getX(batch, inputDepthNearestVec4 + 1, xF, xR, xC)); + `} + let wValues = vec2( + getW(d2, inputDepthNearestVec4, wF, wR, wC), + getW(d2, inputDepthNearestVec4 + 1, wF, wR, wC)); + value += dot(xValues, wValues); + } else if (inputDepthVec4Remainder == 3) { + ${a?`let xValues = vec3( + getX(batch, xF, xR, xC, inputDepthNearestVec4), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 1), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 2)); + `:`let xValues = vec3( + getX(batch, inputDepthNearestVec4, xF, xR, xC), + getX(batch, inputDepthNearestVec4 + 1, xF, xR, xC), + getX(batch, inputDepthNearestVec4 + 2, xF, xR, xC)); + `} + let wValues = vec3( + getW(d2, inputDepthNearestVec4, wF, wR, wC), + getW(d2, inputDepthNearestVec4 + 1, wF, wR, wC), + getW(d2, inputDepthNearestVec4 + 2, wF, wR, wC)); + value += dot(xValues, wValues); + } + } + } + } + ${g?"value = value + getBiasByOutputCoords(coords)":""}; + ${W} + result[global_idx] = f32(value); + }`};return{name:"Conv3DNaive",shaderCache:{hint:`${t.cacheKey};${a};${c};${g}`,inputDependencies:m},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:d[0],y:d[1],z:d[2]},programUniforms:h}),getShaderSource:y}}}),Fg,Hg,O$=j(()=>{_e(),we(),ve(),Zr(),Fg=(e,t,r,i)=>{let s=e.length>2,n=s?"value += b[output_channel];":"",a=e[0].dims,o=e[1].dims,u=t.format==="NHWC",d=u?r[3]:r[1],c=d/t.group,f=u&&c>=4?Le(d):1,h=B.size(r)/f,m=[{type:12,data:h},{type:12,data:t.dilations},{type:12,data:[t.strides[0],t.strides[1]]},{type:12,data:[t.pads[0],t.pads[1]]},{type:12,data:c}];Hr(t,m),m.push(...pe(a,[o[0],o[1],o[2],o[3]/f]));let g=s?["rank","rank","rank"]:["rank","rank"];m.push(...pe([r[0],r[1],r[2],r[3]/f]));let y=S=>{let v=ue("output",e[0].dataType,r.length,f),b=Xe(v.type.tensor),k=Fr(t,v.type.value,b),x=P("x",e[0].dataType,a.length),I=P("w",e[1].dataType,o.length,f),z=[x,I];s&&z.push(P("b",e[2].dataType,e[2].dims,f));let O=[{name:"output_size",type:"u32"},{name:"dilations",type:"u32",length:t.dilations.length},{name:"strides",type:"u32",length:2},{name:"pads",type:"u32",length:2},{name:"output_channels_per_group",type:"u32"}];Gr(t,O);let A=u?` + for (var wHeight: u32 = 0u; wHeight < uniforms.w_shape[0]; wHeight++) { + let xHeight = xRCCorner.x + wHeight * uniforms.dilations[0]; + + if (xHeight < 0u || xHeight >= uniforms.x_shape[1]) { + continue; + } + + for (var wWidth: u32 = 0u; wWidth < uniforms.w_shape[1]; wWidth++) { + let xWidth = xRCCorner.y + wWidth * uniforms.dilations[1]; + if (xWidth < 0u || xWidth >= uniforms.x_shape[2]) { + continue; + } + + for (var wInChannel: u32 = 0u; wInChannel < uniforms.w_shape[2]; wInChannel++) { + let input_channel = in_channel_offset + wInChannel; + let xVal = ${x.get("batch","xHeight","xWidth","input_channel")}; + let wVal = ${I.get("wHeight","wWidth","wInChannel","output_channel")}; + value += xVal * wVal; + } + } + } + `:` + for (var wInChannel: u32 = 0u; wInChannel < uniforms.w_shape[1]; wInChannel++) { + let input_channel = in_channel_offset + wInChannel; + for (var wHeight: u32 = 0u; wHeight < uniforms.w_shape[2]; wHeight++) { + let xHeight = xRCCorner.x + wHeight * uniforms.dilations[0]; + + if (xHeight < 0u || xHeight >= uniforms.x_shape[2]) { + continue; + } + + for (var wWidth: u32 = 0u; wWidth < uniforms.w_shape[3]; wWidth++) { + let xWidth = xRCCorner.y + wWidth * uniforms.dilations[1]; + if (xWidth < 0u || xWidth >= uniforms.x_shape[3]) { + continue; + } + + let xVal = ${x.get("batch","input_channel","xHeight","xWidth")}; + let wVal = ${I.get("output_channel","wInChannel","wHeight","wWidth")}; + value += xVal * wVal; + } + } + } + `;return` + ${S.registerUniforms(O).declareVariables(...z,v)} + + ${S.mainStart()} + ${S.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let outputIndices = ${v.offsetToIndices("global_idx")}; + let batch: u32 = outputIndices[0]; + let output_channel: u32 = outputIndices[${u?3:1}]; + let xRCCorner: vec2 = vec2(outputIndices[${u?1:2}], outputIndices[${u?2:3}]) * uniforms.strides - uniforms.pads; + let group_id: u32 = output_channel * ${f} / uniforms.output_channels_per_group; + var in_channel_offset = group_id * uniforms.w_shape[${u?2:1}]; + + var value: ${v.type.value} = ${v.type.value}(0); + ${A} + ${n} + ${k} + ${v.setByOffset("global_idx","value")} + }`};return{name:"GroupedConv",shaderCache:{hint:`${t.cacheKey}_${f}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:m}),getShaderSource:y}},Hg=(e,t,r,i)=>{let s=e.length>2,n=Le(r[3]),a=Le(r[2]),o=B.size(r)/n/a,u=[e[0].dims[0],e[0].dims[1],e[0].dims[2],e[0].dims[3]/n],d=[e[1].dims[0],e[1].dims[1],e[1].dims[2],e[1].dims[3]/n],c=[r[0],r[1],r[2],r[3]/n],f=[{type:12,data:o},{type:6,data:[t.strides[0],t.strides[1]]},{type:6,data:[t.pads[0],t.pads[1]]}];Hr(t,f),f.push(...pe(u,d,c));let h=(a-1)*t.strides[1]+d[1],m=g=>{let y=ue("output",e[0].dataType,c.length,n),S=Xe(y.type.tensor),v=Fr(t,y.type.value,S),b=P("x",e[0].dataType,u.length,n),k=P("w",e[1].dataType,d.length,n),x=[b,k];s&&x.push(P("b",e[2].dataType,e[2].dims,n));let I=s?"value += b[output_channel];":"",z=[{name:"output_size",type:"u32"},{name:"strides",type:"i32",length:2},{name:"pads",type:"i32",length:2}];return Gr(t,z),` + ${g.registerUniforms(z).declareVariables(...x,y)} + ${g.mainStart()} + ${g.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let width0 = uniforms.output_shape[3]; + let output_channel = global_idx % width0; + var index1 = global_idx / width0; + let width1 = uniforms.output_shape[2] / ${a}u; + let col = (index1 % width1) * ${a}u; + index1 = index1 / width1; + let row = index1 % uniforms.output_shape[1]; + let batch = index1 / uniforms.output_shape[1]; + + let x_corner = vec2(i32(row), i32(col)) * uniforms.strides - uniforms.pads; + + var x_vals: array<${b.type.value}, ${h}>; + var values: array<${y.type.value}, ${a}>; + let input_channel = output_channel; + // Use constant instead of uniform can give better performance for w's height/width. + for (var w_height: u32 = 0u; w_height < ${d[0]}; w_height++) { + let x_height = x_corner.x + i32(w_height); + if (x_height >= 0 && u32(x_height) < uniforms.x_shape[1]) { + for (var i = 0; i < ${h}; i++) { + let x_width = x_corner.y + i; + if (x_width >= 0 && u32(x_width) < uniforms.x_shape[2]) { + x_vals[i] = ${b.get("batch","u32(x_height)","u32(x_width)","input_channel")}; + } else { + x_vals[i] = ${b.type.value}(0); + } + } + for (var w_width: u32 = 0u; w_width < ${d[1]}; w_width++) { + let w_val = ${k.get("w_height","w_width","0","output_channel")}; + for (var i = 0u; i < ${a}u; i++) { + values[i] = fma(x_vals[i * u32(uniforms.strides[1]) + w_width], w_val, values[i]); + } + } + } + } + + for (var i = 0u; i < ${a}u; i++) { + var value = values[i]; + ${I} + ${v} + ${y.set("batch","row","col + i","output_channel","value")}; + } + }`};return{name:"GroupedConv-Vectorize",shaderCache:{hint:`${t.cacheKey};${n};${a};${h};${d[0]};${d[1]}`,inputDependencies:s?["rank","rank","type"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:f}),getShaderSource:m}}}),Tc,En,Ic,Cn,oo,la,Ec,Cc,uo,A$=j(()=>{we(),C$(),z$(),au(),O$(),Zr(),su(),Tr(),Tc=(e,t,r,i,s,n)=>{let a=e[0],o=e.slice(n?1:2,n?3:4),u=o.length,d=t[0],c=t.slice(2).map((h,m)=>h+(h-1)*(r[m]-1)),f=o.map((h,m)=>h+i[m]+i[m+u]).map((h,m)=>Math.floor((h-c[m]+s[m])/s[m]));return f.splice(0,0,a),f.splice(n?3:1,0,d),f},En=[2,3,1,0],Ic=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length>5)throw new Error("greater than 5D is not supported");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],i=e[1].dims[1]*t.group;if(r!==i)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");if(e.length===3&&(e[2].dims.length!==1||e[1].dims[0]!==e[2].dims[0]))throw new Error("invalid bias");let s=e[0].dims.length-2;if(t.dilations.length!==s)throw new Error(`dilations should be ${s}D`);if(t.strides.length!==s)throw new Error(`strides should be ${s}D`);if(t.pads.length!==s*2)throw new Error(`pads should be ${s*2}D`);if(t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape")},Cn=(e,t)=>{let r=e.kernelShape.slice();r.length{let t=ru(e),r=e.format,i=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],s=e.dilations,n=e.group,a=e.kernel_shape,o=e.pads,u=e.strides,d=e.w_is_const();return{autoPad:i,format:r,dilations:s,group:n,kernelShape:a,pads:o,strides:u,wIsConst:d,...t,cacheKey:`${e.format};${t.activation};`}},la=(e,t,r,i)=>{let s=r.format==="NHWC",n=Tc(t[0].dims,t[1].dims,r.dilations,r.pads,r.strides,s);if(r.group!==1){let z=[t[0]];if(s){let O=e.kernelCustomData.wT??e.compute($t(t[1],En),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=O),z.push(O)}else z.push(t[1]);t.length===3&&z.push(t[2]),!e.adapterInfo.isArchitecture("ampere")&&s&&t[1].dims[0]===r.group&&t[1].dims[1]===1&&r.dilations[0]===1&&r.dilations[1]===1?e.compute(Hg(z,r,n,i),{inputs:z}):e.compute(Fg(z,r,n,i),{inputs:z});return}let a=t.length===3,o=t[0].dims[s?1:2],u=t[0].dims[s?2:3],d=t[0].dims[s?3:1],c=t[1].dims[2],f=t[1].dims[3],h=n[s?1:2],m=n[s?2:3],g=n[s?3:1],y=s&&c===o&&f===u&&r.pads[0]===0&&r.pads[1]===0;if(y||c===1&&f===1&&r.dilations[0]===1&&r.dilations[1]===1&&r.strides[0]===1&&r.strides[1]===1&&r.pads[0]===0&&r.pads[1]===0){let z=n[0],O,A,R,W=[];if(s){let ne=e.kernelCustomData.wT??e.compute($t(t[1],En),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];if(r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=ne),y){let Y=o*u*d;O=t[0].reshape([1,z,Y]),A=ne.reshape([1,Y,g]),R=[1,z,g]}else O=t[0].reshape([z,o*u,d]),A=ne.reshape([1,d,g]),R=[z,h*m,g];W.push(O),W.push(A)}else O=t[0].reshape([z,d,o*u]),A=t[1].reshape([1,g,d]),R=[z,g,h*m],W.push(A),W.push(O);a&&W.push(t[2]);let ie=R[2],X=W[0].dims[W[0].dims.length-1];ie<8&&X<8?e.compute(nu(W,r,n,R,s,i),{inputs:W}):e.compute(Zn(W,r,n,R,s,i),{inputs:W});return}let S=!0,v=e.kernelCustomData.wT??e.compute($t(t[1],En),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=v);let b=[t[0],v];a&&b.push(t[2]);let k=s?h*m:g,x=s?g:h*m,I=c*f*d;e.compute(Lg(b,r,n,k,x,I,a,S,i),{inputs:b})},Ec=(e,t)=>{let r=t.format==="NHWC",i=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&i.push(e.inputs[2]);let s=[0,t.pads[0],0,t.pads[1]],n=[1].concat(t.strides),a=[1].concat(t.dilations),o=[1].concat(t.kernelShape),u=Cn({...t,pads:s,strides:n,dilations:a,kernelShape:o},i);la(e,i,u,d=>r?[d[0],d[2],d[3]]:[d[0],d[1],d[3]])},Cc=(e,t,r)=>{let i=r.format==="NHWC"?"channelsLast":"channelsFirst",s=Cn(r,t),n=r.autoPad==="NOTSET"?r.pads:r.autoPad,a=qg(t[0].dims,t[1].dims,r.strides,r.dilations,n,!1,i);e.compute(Vg(t,s,a.outShape,[a.filterDepth,a.filterHeight,a.filterWidth],[a.padInfo.front,a.padInfo.top,a.padInfo.left],i))},uo=(e,t)=>{if(Ic(e.inputs,t),e.inputs[0].dims.length===3)Ec(e,t);else if(e.inputs[0].dims.length===5)Cc(e,e.inputs,t);else{let r=Cn(t,e.inputs);la(e,e.inputs,r)}}}),Gg,R$=j(()=>{_e(),pr(),we(),ve(),Gg=(e,t,r)=>{let i=e.length>2,s=t.outputShape,n=t.format==="NHWC",a=t.group,o=e[1].dims,u=o[2]/a,d=o[3],c=n?Le(u):1,f=n&&d===1&&u>=4,h=f?Math.floor(u/4)*4:Math.floor(u/c)*c,m=u-h,g=n?Le(d):1,y=n?d===1?c:g:1,S=B.size(s)/g,v=[Math.ceil(S/64),1,1];Te("verbose",()=>`[conv2d_backprop_webgpu] dispatch = ${v}`);let b=["rank","rank"],k=[t.strides[0],t.strides[1]],x=[t.kernelShape[n?1:2],t.kernelShape[n?2:3]],I=[t.dilations[0],t.dilations[1]],z=[x[0]+(t.dilations[0]<=1?0:(t.kernelShape[n?1:2]-1)*(t.dilations[0]-1)),x[1]+(t.dilations[1]<=1?0:(t.kernelShape[n?2:3]-1)*(t.dilations[1]-1))],O=[z[0]-1-Math.floor((t.pads[0]+t.pads[2])/2),z[1]-1-Math.floor((t.pads[1]+t.pads[3])/2)],A=[{type:12,data:S},{type:12,data:k},{type:12,data:x},{type:12,data:I},{type:12,data:z},{type:6,data:O},{type:12,data:h},{type:12,data:u},{type:12,data:d},...pe(e[0].dims,e[1].dims)];i&&(A.push(...pe(e[2].dims)),b.push("rank")),A.push(...pe(s));let R=W=>{let ie=[{name:"output_size",type:"u32"},{name:"strides",type:"u32",length:k.length},{name:"filter_dims",type:"u32",length:x.length},{name:"dilations",type:"u32",length:x.length},{name:"effective_filter_dims",type:"u32",length:z.length},{name:"pads",type:"i32",length:O.length},{name:"input_channels_per_group_int",type:"u32"},{name:"input_channels_per_group",type:"u32"},{name:"output_channels_per_group",type:"u32"}],X=Xe(e[0].dataType),ne=n?1:2,Y=n?2:3,oe=n?3:1,V=P("W",e[1].dataType,e[1].dims.length,y),ae=P("Dy",e[0].dataType,e[0].dims.length,c),Z=[ae,V];i&&Z.push(P("bias",e[2].dataType,[s[oe]].length,g));let le=ue("result",e[0].dataType,s.length,g),Be=()=>{let ee="";if(f)c===4?ee+=` + let xValue = ${ae.getByOffset("x_offset")}; + let wValue = ${V.getByOffset("w_offset")}; + dotProd = dotProd + dot(xValue, wValue); + x_offset += 1u; + w_offset += 1u;`:c===2?ee+=` + dotProd = dotProd + dot(vec4<${X}>(${ae.getByOffset("x_offset")}, ${ae.getByOffset("x_offset + 1u")}), vec4<${X}>(${V.getByOffset("w_offset")}, ${V.getByOffset("w_offset + 1u")})); + x_offset += 2u; + w_offset += 2u;`:c===1&&(ee+=` + dotProd = dotProd + dot(vec4<${X}>(${ae.getByOffset("x_offset")}, ${ae.getByOffset("x_offset + 1u")}, ${ae.getByOffset("x_offset + 2u")}, ${ae.getByOffset("x_offset + 3u")}), vec4<${X}>(${V.getByOffset("w_offset")}, ${V.getByOffset("w_offset + 1u")}, ${V.getByOffset("w_offset + 2u")}, ${V.getByOffset("w_offset + 3u")})); + x_offset += 4u; + w_offset += 4u;`);else if(ee+=` + let xValue = ${n?ae.getByOffset(`${ae.indicesToOffset(`${ae.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${c}`):ae.get("batch","inputChannel","idyR","idyC")}; + `,c===1)ee+=` + let w_offset = ${V.indicesToOffset(`${V.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel, wOutChannel)`)}; + let wValue = ${V.getByOffset(`w_offset / ${y}`)}; + dotProd = dotProd + xValue * wValue;`;else for(let ye=0;ye{if(m===0)return"";if(!f)throw new Error(`packInputAs4 ${f} is not true.`);let ee="";if(c===1){ee+="dotProd = dotProd";for(let ye=0;ye(i32(r), i32(c)) - uniforms.pads; + let dyRCorner = dyCorner.x; + let dyCCorner = dyCorner.y; + let groupId = d1 / uniforms.output_channels_per_group; + let wOutChannel = d1 - groupId * uniforms.output_channels_per_group; + // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1). + // ? = to be determined. : = across all values in that axis. + var dotProd = ${le.type.value}(0.0); + var wR: u32 = 0; + if (uniforms.dilations.x == 1) { + // Minimum wR >= 0 that satisfies (dyRCorner + wR) % (uniforms.strides.x) == 0 + wR = u32(((dyRCorner + i32(uniforms.strides.x) - 1) / i32(uniforms.strides.x)) * i32(uniforms.strides.x) - dyRCorner); + } + for (; wR < uniforms.effective_filter_dims.x; wR = wR + 1) { + if (wR % uniforms.dilations.x != 0) { + continue; + } + let dyR = (${X}(dyRCorner) + ${X}(wR)) / ${X}(uniforms.strides[0]); + let wRPerm = uniforms.filter_dims.x - 1 - wR / uniforms.dilations.x; + if (dyR < 0.0 || dyR >= ${X}(uniforms.Dy_shape[${ne}]) || fract(dyR) > 0.0 || + wRPerm < 0) { + continue; + } + let idyR: u32 = u32(dyR); + var wC: u32 = 0; + if (uniforms.dilations.y == 1) { + // Minimum wC >= 0 that satisfies (dyCCorner + wC) % (uniforms.strides.y) == 0 + wC = u32(((dyCCorner + i32(uniforms.strides.y) - 1) / i32(uniforms.strides.y)) * i32(uniforms.strides.y) - dyCCorner); + } + for (; wC < uniforms.effective_filter_dims.y; wC = wC + 1) { + if (wC % uniforms.dilations.y != 0) { + continue; + } + let dyC = (${X}(dyCCorner) + ${X}(wC)) / ${X}(uniforms.strides.y); + let wCPerm = uniforms.filter_dims.y - 1 - wC / uniforms.dilations.y; + if (dyC < 0.0 || dyC >= ${X}(uniforms.Dy_shape[${Y}]) || + fract(dyC) > 0.0 || wCPerm < 0) { + continue; + } + let idyC: u32 = u32(dyC); + var inputChannel = groupId * uniforms.input_channels_per_group; + ${f?` + var x_offset = ${ae.indicesToOffset(`${ae.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${c}; + var w_offset = ${V.indicesToOffset(`${V.type.indices}(wRPerm, wCPerm, inputChannel, wOutChannel)`)} / ${y}; + `:""} + for (var d2: u32 = 0; d2 < uniforms.input_channels_per_group_int; d2 = d2 + ${f?4:c}) { + ${Be()} + inputChannel = inputChannel + ${f?4:c}; + } + ${D()} + wC = wC + uniforms.strides.y - 1; + } + wR = wR + uniforms.strides[0] - 1; + } + let value = dotProd${i?` + bias[d1 / ${g}]`:""}; + ${le.setByOffset("global_idx","value")}; + `;return` + ${W.registerUniforms(ie).declareVariables(...Z,le)} + ${W.mainStart()} + ${W.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}; + ${q}}`};return{name:"ConvTranspose2D",shaderCache:{hint:`${t.cacheKey};${c}${y}${g}${f}${m}`,inputDependencies:b},getRunData:()=>({dispatchGroup:{x:v[0],y:v[1],z:v[2]},outputs:[{dims:r?r(s):s,dataType:e[0].dataType}],programUniforms:A}),getShaderSource:R}}}),zc,Oc,Ac,da,jg,Rc,ca,Bc,Kg,B$=j(()=>{R$(),Zr(),Tr(),zc=(e,t,r,i,s,n)=>(e-1)*t+r+(i-1)*s+1-n,Oc=(e,t,r,i,s)=>{let n=Math.floor(e/2);t==="SAME_UPPER"?(r[i]=n,r[s]=e-n):t==="SAME_LOWER"&&(r[i]=e-n,r[s]=n)},Ac=(e,t,r,i,s,n,a,o,u,d)=>{let c=e.length-2,f=d.length===0;u.length{let r=e.kernelShape.slice();if(e.kernelShape.length===0||e.kernelShape.reduce((f,h)=>f*h,1)===0){r.length=0;for(let f=2;ff+h,0)===0){let f=t[0].dims.length-2;u=new Array(f).fill(1)}let d=e.strides.slice();if(d.reduce((f,h)=>f+h,0)===0){let f=t[0].dims.length-2;d=new Array(f).fill(1)}Ac(o,r,u,e.autoPad,e.group,s,d,i,a,n);let c=Object.assign({},e);return Object.assign(c,{kernelShape:r,pads:s,outputPadding:a,outputShape:n,dilations:u,strides:d}),c},jg=e=>{let t=ru(e),r=e.format,i=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][typeof e.autoPad>"u"?0:e.autoPad],s=e.dilations,n=e.group,a=e.kernelShape,o=e.pads,u=e.strides,d=e.wIsConst(),c=e.outputPadding,f=e.outputShape;return{autoPad:i,format:r,dilations:s,group:n,kernelShape:a,outputPadding:c,outputShape:f,pads:o,strides:u,wIsConst:d,...t,cacheKey:`${e.format};${t.activation};`}},Rc=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length!==4&&e[0].dims.length!==3)throw new Error("currently only support 2-dimensional conv");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],i=e[1].dims[0];if(r!==i)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");let s=e[1].dims[1]*t.group;if(e.length===3&&(e[2].dims.length!==1||e[2].dims[0]!==s))throw new Error("invalid bias");let n=e[0].dims.length-2;if(t.dilations.reduce((a,o)=>a+o,0)>0&&t.dilations.length!==n)throw new Error(`dilations should be ${n}D`);if(t.strides.reduce((a,o)=>a+o,0)>0&&t.strides.length!==n)throw new Error(`strides should be ${n}D`);if(t.pads.reduce((a,o)=>a+o,0)>0&&t.pads.length!==n*2)throw new Error(`pads should be ${n*2}D`);if(t.outputPadding.length!==n&&t.outputPadding.length!==0)throw new Error(`output_padding should be ${n}D`);if(t.kernelShape.reduce((a,o)=>a+o,0)>0&&t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape");if(t.outputShape.length!==0&&t.outputShape.length!==e[0].dims.length-2)throw new Error("invalid output shape")},ca=(e,t,r,i)=>{let s=e.kernelCustomData.wT??e.compute($t(t[1],[2,3,0,1]),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=s);let n=[t[0],s];t.length===3&&n.push(t[2]),e.compute(Gg(n,r,i),{inputs:n})},Bc=(e,t)=>{let r=t.format==="NHWC",i=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&i.push(e.inputs[2]);let s=t.kernelShape;(s.length===0||s[0]===0)&&(s=[e.inputs[1].dims[2]]);let n=t.dilations;(n.length===0||n[0]===0)&&(n=[1]);let a=t.strides;(a.length===0||a[0]===0)&&(a=[1]);let o=t.pads;o.length===0&&(o=[0,0]),o=[0,o[0],0,o[1]],a=[1].concat(a),n=[1].concat(n),s=[1].concat(s);let u=t.outputPadding;u=[0].concat(u);let d=da({...t,pads:o,strides:a,dilations:n,kernelShape:s,outputPadding:u},i);ca(e,i,d,c=>r?[c[0],c[2],c[3]]:[c[0],c[1],c[3]])},Kg=(e,t)=>{if(Rc(e.inputs,t),e.inputs[0].dims.length===3)Bc(e,t);else{let r=da(t,e.inputs);ca(e,e.inputs,r)}}}),Mc,Zg,Xg,M$=j(()=>{_e(),we(),qe(),ve(),Mc=(e,t,r,i)=>{let s=B.size(t),n=t.length,a=P("input",e,n),o=ue("output",e,n),u=r.dataType===6?r.getInt32Array()[0]:Number(r.getBigInt64Array()[0]),d=B.normalizeAxis(u,n),c=f=>{let h=` i32(${a.indicesGet("inputIndices","uniforms.axis")}) `,m=de("uniforms.input_shape","uniforms.axis",n),g=i.reverse?h+(i.exclusive?" + 1":""):"0",y=i.reverse?m:h+(i.exclusive?"":" + 1");return` + ${f.registerUniform("outputSize","u32").registerUniform("axis","u32").declareVariables(a,o)} + ${f.mainStart()} + ${f.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var inputIndices = ${o.offsetToIndices("global_idx")}; + var sum = ${o.type.value}(0); + let first : i32 = ${g}; + let last : i32 = ${y}; + for (var i : i32 = first; i < last; i++) { + ${a.indicesSet("inputIndices","uniforms.axis","u32(i)")}; + sum = sum + ${a.getByIndices("inputIndices")}; + } + ${o.setByOffset("global_idx","sum")}; + }`};return{name:"CumSum",shaderCache:{hint:i.cacheKey,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:t,dataType:e}],dispatchGroup:{x:Math.ceil(s/64)},programUniforms:[{type:12,data:s},{type:12,data:d},...pe(t,t)]}),getShaderSource:c}},Zg=(e,t)=>{let r=e.inputs[0].dims,i=e.inputs[0].dataType,s=e.inputs[1];e.compute(Mc(i,r,s,t),{inputs:[0]})},Xg=e=>{let t=e.exclusive===1,r=e.reverse===1;return Oe({exclusive:t,reverse:r})}}),Nc,Dc,Pc,Yg,Qg,N$=j(()=>{_e(),we(),qe(),ve(),Nc=e=>{if(!e||e.length!==1)throw new Error("DepthToSpace requires 1 input.");if(e[0].dims.length!==4)throw new Error("DepthToSpace requires 4D input.")},Dc=(e,t,r,i)=>{let s=[];s.push(`fn perm(i: ${i.type.indices}) -> ${r.type.indices} { + var a: ${r.type.indices};`);for(let n=0;n{let r,i,s,n,a,o,u=t.format==="NHWC",d=t.blocksize,c=t.mode==="DCR";u?([r,i,s,n]=e.dims,a=c?[r,i,s,d,d,n/d**2]:[r,i,s,n/d**2,d,d],o=c?[0,1,3,2,4,5]:[0,1,4,2,5,3]):([r,i,s,n]=[e.dims[0],e.dims[2],e.dims[3],e.dims[1]],a=c?[r,d,d,n/d**2,i,s]:[r,n/d**2,d,d,i,s],o=c?[0,3,4,1,5,2]:[0,1,4,2,5,3]);let f=e.reshape(a),h=f.dims.length,m=e.dataType,g=P("a",m,h),y=ue("output",m,h),S=v=>` + ${v.registerUniform("output_size","u32").declareVariables(g,y)} + + ${Dc(o,h,g,y)} + + ${v.mainStart()} + ${v.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${y.offsetToIndices("global_idx")}; + let aIndices = perm(indices); + + ${y.setByOffset("global_idx",g.getByIndices("aIndices"))} + }`;return{name:"DepthToSpace",shaderCache:{hint:`${e.dims};${t.blocksize};${t.mode}`,inputDependencies:["rank"]},getRunData:v=>{let b=u?[r,i*d,s*d,n/d**2]:[r,n/d**2,i*d,s*d],k=B.size(b),x=f.dims,I=B.sortBasedOnPerm(x,o);return{outputs:[{dims:b,dataType:v[0].dataType}],dispatchGroup:{x:Math.ceil(k/64)},programUniforms:[{type:12,data:k},...pe(x,I)]}},getShaderSource:S}},Yg=(e,t)=>{Nc(e.inputs),e.compute(Pc(e.inputs[0],t))},Qg=e=>Oe({blocksize:e.blocksize,mode:e.mode,format:e.format})}),zn,Si,pa,Uc,Wc,Lc,qc,fa,Vc,Jg,e_,D$=j(()=>{_e(),we(),qe(),ve(),zn="[a-zA-Z]|\\.\\.\\.",Si="("+zn+")+",pa="^"+Si+"$",Uc="("+Si+",)*"+Si,Wc="^"+Uc+"$",Lc=class{constructor(e=-1){this.symbolToIndices=new Map,this.inputIndex=e}addSymbol(e,t){let r=this.symbolToIndices.get(e);r===void 0?r=[t]:r.push(t),this.symbolToIndices.set(e,r)}},qc=class{constructor(e,t){var s;this.equation=t,this.hasEllipsis=!1,this.symbolToInfo=new Map,this.lhs=new Array,this.outputDims=[];let[r,i]=t.includes("->")?t.split("->",2):[t,""];if(!r.match(RegExp(Wc)))throw new Error("Invalid LHS term");if(r.split(",").forEach((n,a)=>{let o=e[a].dims.slice();if(!n.match(RegExp(pa)))throw new Error("Invalid LHS term");let u=this.processTerm(n,!0,o,a);this.lhs.push(u)}),i==="")i+=[...this.symbolToInfo.entries()].filter(([n,a])=>a.count===1||n==="...").map(([n])=>n).join("");else if(!i.match(RegExp(Si)))throw new Error("Invalid RHS");(s=i.match(RegExp(zn,"g")))==null||s.forEach(n=>{if(n==="...")this.outputDims=this.outputDims.concat(this.ellipsisDims);else{let a=this.symbolToInfo.get(n);if(a===void 0)throw new Error("Invalid RHS symbol");this.outputDims.push(a.dimValue)}}),this.rhs=this.processTerm(i,!1,this.outputDims)}addSymbol(e,t,r){let i=this.symbolToInfo.get(e);if(i!==void 0){if(i.dimValue!==t&&i.count!==1)throw new Error("Dimension mismatch");i.count++,i.inputIndices.push(r)}else i={count:1,dimValue:t,inputIndices:[r]};this.symbolToInfo.set(e,i)}processTerm(e,t,r,i=-1){let s=r.length,n=!1,a=[],o=0;if(!e.match(RegExp(pa))&&!t&&e!=="")throw new Error("Invalid LHS term");let u=e.match(RegExp(zn,"g")),d=new Lc(i);return u==null||u.forEach((c,f)=>{if(c==="..."){if(n)throw new Error("Only one ellipsis is allowed per input term");n=!0;let h=s-u.length+1;if(h<0)throw new Error("Ellipsis out of bounds");if(a=r.slice(o,o+h),this.hasEllipsis){if(this.ellipsisDims.length!==a.length||this.ellipsisDims.toString()!==a.toString())throw new Error("Ellipsis dimensions mismatch")}else if(t)this.hasEllipsis=!0,this.ellipsisDims=a;else throw new Error("Ellipsis must be specified in the LHS");for(let m=0;me+"_max",Vc=(e,t,r,i)=>{let s=e.map(d=>d.length).map((d,c)=>P(`input${c}`,t,d)),n=B.size(i),a=ue("output",t,i.length),o=[...r.symbolToInfo.keys()].filter(d=>!r.rhs.symbolToIndices.has(d)),u=d=>{let c=[],f="var prod = 1.0;",h="var sum = 0.0;",m="sum += prod;",g=[],y=[],S=[],v=[],b=r.symbolToInfo.size===r.rhs.symbolToIndices.size;r.symbolToInfo.forEach((x,I)=>{var z;if(r.rhs.symbolToIndices.has(I)){let O=(z=r.rhs.symbolToIndices.get(I))==null?void 0:z[0];O!==void 0&&r.lhs.forEach((A,R)=>{if(x.inputIndices.includes(R)){let W=A.symbolToIndices.get(I);if(W===void 0)throw new Error("Invalid symbol error");W.forEach(ie=>{c.push(`${s[R].indicesSet(`input${R}Indices`,ie,a.indicesGet("outputIndices",O))}`)})}})}else r.lhs.forEach((O,A)=>{if(x.inputIndices.includes(A)){let R=O.symbolToIndices.get(I);if(R===void 0)throw new Error("Invalid symbol error");R.forEach(W=>{g.push(`${s[A].indicesSet(`input${A}Indices`,W,`${I}`)}`)}),v.push(`prod *= ${s[A].getByIndices(`input${A}Indices`)};`)}}),y.push(`for(var ${I}: u32 = 0; ${I} < uniforms.${fa(I)}; ${I}++) {`),S.push("}")});let k=b?[...c,`let sum = ${s.map((x,I)=>x.getByIndices(`input${I}Indices`)).join(" * ")};`]:[...c,h,...y,...g,f,...v,m,...S];return` + ${d.registerUniforms(o.map(x=>({name:`${fa(x)}`,type:"u32"}))).registerUniform("outputSize","u32").declareVariables(...s,a)} + + ${d.mainStart()} + ${d.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var outputIndices = ${a.offsetToIndices("global_idx")}; + ${s.map((x,I)=>`var input${I}Indices: ${s[I].type.indices};`).join(` +`)} + ${k.join(` +`)}; + ${a.setByOffset("global_idx","sum")}; + }`};return{name:"Einsum",shaderCache:{hint:r.equation,inputDependencies:e.map(()=>"rank")},getRunData:()=>{let d=o.filter(f=>r.symbolToInfo.has(f)).map(f=>{var h;return{type:12,data:((h=r.symbolToInfo.get(f))==null?void 0:h.dimValue)||0}});d.push({type:12,data:n});let c=e.map((f,h)=>[...pe(f)]).reduce((f,h)=>f.concat(h),d);return c.push(...pe(i)),{outputs:[{dims:i,dataType:t}],dispatchGroup:{x:Math.ceil(n/64)},programUniforms:c}},getShaderSource:u}},Jg=(e,t)=>{let r=new qc(e.inputs,t.equation),i=r.outputDims,s=e.inputs.map((n,a)=>n.dims);e.compute(Vc(s,e.inputs[0].dataType,r,i))},e_=e=>{let t=e.equation.replace(/\s+/g,"");return Oe({equation:t})}}),Fc,ha,Hc,Gc,t_,P$=j(()=>{_e(),we(),ve(),Fc=e=>{if(!e||e.length!==2)throw new Error("Expand requires 2 input.");let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),i=r.length{let r=e.length-t.length,i=[];for(let s=0;se.length>t.length?ha(e,t):ha(t,e),Gc=e=>{let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),i=Hc(t,r),s=e[0].dataType,n=s===9||B.size(t)===1,a=s===9||t.length>0&&t[t.length-1]%4===0?4:1,o=n||i.length>0&&i[i.length-1]%4===0?4:1,u=Math.ceil(B.size(i)/o),d=f=>{let h=P("input",s,t.length,a),m=ue("output",s,i.length,o),g;if(s===9){let y=(S,v,b="")=>` + let outputIndices${v} = ${m.offsetToIndices(`outputOffset + ${v}u`)}; + let offset${v} = ${h.broadcastedIndicesToOffset(`outputIndices${v}`,m)}; + let index${v} = offset${v} / 4u; + let component${v} = offset${v} % 4u; + ${S}[${v}] = ${b}(${h.getByOffset(`index${v}`)}[component${v}]); + `;g=` + let outputOffset = global_idx * ${o}; + var data = vec4(0); + ${y("data",0,"u32")} + ${y("data",1,"u32")} + ${y("data",2,"u32")} + ${y("data",3,"u32")} + ${m.setByOffset("global_idx","data")} + }`}else g=` + let outputIndices = ${m.offsetToIndices(`global_idx * ${o}`)}; + let inputOffset = ${h.broadcastedIndicesToOffset("outputIndices",m)}; + let data = ${m.type.value}(${h.getByOffset(`inputOffset / ${a}`)}); + ${m.setByOffset("global_idx","data")} + }`;return` + ${f.registerUniform("vec_size","u32").declareVariables(h,m)} + ${f.mainStart()} + ${f.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${g}`},c=[{type:12,data:u},...pe(t,i)];return{name:"Expand",shaderCache:{hint:`${i.length};${a}${o}`,inputDependencies:["rank"]},getShaderSource:d,getRunData:()=>({outputs:[{dims:i,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(u/64)},programUniforms:c})}},t_=e=>{Fc(e.inputs),e.compute(Gc(e.inputs),{inputs:[0]})}}),jc,r_,U$=j(()=>{_e(),we(),ve(),tu(),jc=e=>{let t=e[0].dataType,r=B.size(e[0].dims),i=B.size(e[1].dims),s=i%4===0,n=a=>{let o=P("x",t,[1],4),u=P("bias",t,[1],4),d=ue("y",t,[1],4),c=[{name:"output_vec_size",type:"u32"},{name:"bias_size",type:"u32"}],f=m=>` + let bias${m}_offset: u32 = (global_idx * 4 + ${m}) % uniforms.bias_size; + let bias${m} = ${u.getByOffset(`bias${m}_offset / 4`)}[bias${m}_offset % 4];`,h=s?` + let bias = ${u.getByOffset("global_idx % (uniforms.bias_size / 4)")};`:`${f(0)}${f(1)}${f(2)}${f(3)} + let bias = ${o.type.value}(bias0, bias1, bias2, bias3);`;return`${a.registerUniforms(c).declareVariables(o,u,d)} + + ${io(st(t))} + + ${a.mainStart(oi)} + ${a.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_vec_size")} + + let x = ${o.getByOffset("global_idx")}; + ${h} + let x_in = x + bias; + ${d.setByOffset("global_idx",no("x_in"))} + }`};return{name:"FastGeluWithBias",shaderCache:{hint:`${s}`,inputDependencies:["type","type"]},getShaderSource:n,getRunData:a=>({outputs:[{dims:a[0].dims,dataType:a[0].dataType}],programUniforms:[{type:12,data:Math.ceil(r/4)},{type:12,data:i}],dispatchGroup:{x:Math.ceil(r/oi/4)}})}},r_=e=>{e.inputs.length<2||B.size(e.inputs[1].dims)===0?$g(e):e.compute(jc(e.inputs))}}),Kc,Zc,i_,n_,W$=j(()=>{_e(),we(),qe(),ve(),Kc=e=>{if(!e||e.length!==2)throw new Error("Gather requires 2 inputs.")},Zc=(e,t)=>{let r=e[0].dims,i=e[1].dims,s=r.length,n=B.normalizeAxis(t.axis,s),a=r.slice(0);a.splice(n,1,...i);let o=r[n],u=e[0].dataType===9?4:1,d=Math.ceil(B.size(a)/u),c=[{type:12,data:d},{type:6,data:o},{type:12,data:n},...pe(e[0].dims,e[1].dims,a)],f=h=>{let m=P("data",e[0].dataType,e[0].dims.length,u),g=P("inputIndices",e[1].dataType,e[1].dims.length),y=ue("output",e[0].dataType,a.length,u),S=b=>{let k=i.length,x=`var indicesIndices${b} = ${g.type.indices}(0);`;for(let I=0;I1?`indicesIndices${b}[${I}]`:`indicesIndices${b}`} = ${a.length>1?`outputIndices${b}[uniforms.axis + ${I}]`:`outputIndices${b}`};`;x+=` + var idx${b} = ${g.getByIndices(`indicesIndices${b}`)}; + if (idx${b} < 0) { + idx${b} = idx${b} + uniforms.axisDimLimit; + } + var dataIndices${b} : ${m.type.indices}; + `;for(let I=0,z=0;I1?`dataIndices${b}[${I}]`:`dataIndices${b}`} = u32(idx${b});`,z+=k):(x+=`${s>1?`dataIndices${b}[${I}]`:`dataIndices${b}`} = ${a.length>1?`outputIndices${b}[${z}]`:`outputIndices${b}`};`,z++);return x},v;if(e[0].dataType===9){let b=(k,x,I="")=>` + let outputIndices${x} = ${y.offsetToIndices(`outputOffset + ${x}u`)}; + ${S(x)}; + let offset${x} = ${m.indicesToOffset(`dataIndices${x}`)}; + let index${x} = offset${x} / 4u; + let component${x} = offset${x} % 4u; + ${k}[${x}] = ${I}(${m.getByOffset(`index${x}`)}[component${x}]); + `;v=` + let outputOffset = global_idx * ${u}; + var value = vec4(0); + ${b("value",0,"u32")} + ${b("value",1,"u32")} + ${b("value",2,"u32")} + ${b("value",3,"u32")} + ${y.setByOffset("global_idx","value")} + `}else v=` + let outputIndices = ${y.offsetToIndices("global_idx")}; + ${S("")}; + let value = ${m.getByIndices("dataIndices")}; + ${y.setByOffset("global_idx","value")}; + `;return` + ${h.registerUniform("outputSize","u32").registerUniform("axisDimLimit","i32").registerUniform("axis","u32").declareVariables(m,g,y)} + ${h.mainStart()} + ${h.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + ${v} + }`};return{name:"Gather",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:a,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(d/64)},programUniforms:c}),getShaderSource:f}},i_=e=>Oe({axis:e.axis}),n_=(e,t)=>{let r=e.inputs;Kc(r),e.compute(Zc(e.inputs,t))}}),Xc,s_,a_,L$=j(()=>{_e(),we(),ve(),Xc=(e,t,r,i,s,n,a,o,u)=>{let d=[{type:12,data:n},{type:12,data:i},{type:12,data:s},{type:12,data:r},{type:12,data:a},{type:12,data:o},{type:12,data:u}],c=[n];d.push(...pe(t.dims,c));let f=h=>{let m=P("indices_data",t.dataType,t.dims.length),g=ue("input_slice_offsets_data",12,1,1),y=[m,g],S=[{name:"output_size",type:"u32"},{name:"batch_dims",type:"u32"},{name:"input_dims",type:"u32",length:s.length},{name:"sizes_from_slice_dims_data",type:"u32",length:r.length},{name:"num_slices_per_batch",type:"u32"},{name:"input_batch_stride",type:"u32"},{name:"num_slice_dims",type:"u32"}];return` + ${h.registerUniforms(S).declareVariables(...y)} + ${h.mainStart()} + ${h.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let batch_idx = global_idx / uniforms.num_slices_per_batch; + let base_offset = batch_idx * uniforms.input_batch_stride; + + let slice_indices_base_offset = global_idx * uniforms.num_slice_dims; + var relative_slice_offset = 0; + for (var dim_idx = 0u; dim_idx < uniforms.num_slice_dims; dim_idx ++) { + var index = i32(indices_data[dim_idx + slice_indices_base_offset].x); + let input_dim_idx = uniforms.batch_dims + dim_idx; + if (index < 0) { + ${s.length===1?"index += i32(uniforms.input_dims);":"index += i32(uniforms.input_dims[input_dim_idx]);"} + } + ${r.length===1?"relative_slice_offset += index * i32(uniforms.sizes_from_slice_dims_data);":"relative_slice_offset += index * i32(uniforms.sizes_from_slice_dims_data[dim_idx]);"} + } + + input_slice_offsets_data[global_idx] = base_offset + u32(relative_slice_offset); + }`};return e.compute({name:"computeSliceOffsets",shaderCache:{hint:`${s.length}_${r.length}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:c,dataType:e.inputs[1].dataType}],dispatchGroup:{x:Math.ceil(n/64)},programUniforms:d}),getShaderSource:f},{inputs:[t],outputs:[-1]})[0]},s_=(e,t)=>{let r=e.inputs,i=r[0].dims,s=r[0].dataType,n=r[1].dims,a=n[n.length-1],o=B.sizeToDimension(n,n.length-1),u=B.sizeFromDimension(i,t.batchDims+a),d=B.sizeToDimension(i,t.batchDims),c=B.sizeFromDimension(i,t.batchDims),f=o/d,h=new Array(a),m=u;for(let x=0;xi.length)throw new Error("last dimension of indices must not be larger than rank of input tensor");let S=n.slice(0,-1).concat(i.slice(y)),v=B.size(S),b=[{type:12,data:v},{type:12,data:u},...pe(r[0].dims,g.dims,S)],k=x=>{let I=P("data",r[0].dataType,r[0].dims.length),z=P("slice_offsets",12,g.dims.length),O=ue("output",r[0].dataType,S.length);return` + ${x.registerUniform("output_size","u32").registerUniform("slice_size","u32").declareVariables(I,z,O)} + ${x.mainStart()} + ${x.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let slice_offset = slice_offsets[global_idx / uniforms.slice_size]; + output[global_idx] = data[u32(slice_offset) + global_idx % uniforms.slice_size]; + }`};e.compute({name:"GatherND",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:S,dataType:s}],dispatchGroup:{x:Math.ceil(v/64)},programUniforms:b}),getShaderSource:k},{inputs:[r[0],g]})},a_=e=>({batchDims:e.batch_dims,cacheKey:""})}),Yc,Qc,o_,u_,q$=j(()=>{_e(),we(),qe(),ve(),Yc=(e,t)=>{if(e.length<3||e.length>4)throw new Error("GatherBlockQuantized requires 3 or 4 inputs.");let r=B.normalizeAxis(t.quantizeAxis,e[0].dims.length),i=t.blockSize,s=e[0],n=e[2],a=e.length===4?e[3]:void 0;if(n.dims.length!==s.dims.length||!s.dims.map((o,u)=>u===r?Math.ceil(o/i)===n.dims[u]:o===n.dims[u]).reduce((o,u)=>o&&u,!0))throw new Error("Scales must have the same rank as the input tensor and the dims should match except on gatherAxis.");if(a){if(a.dataType!==s.dataType)throw new Error("Zero point must have the same data type as the input tensor.");if(a.dims.length!==n.dims.length||!a.dims.map((o,u)=>o===n.dims[u]).reduce((o,u)=>o&&u,!0))throw new Error("Zero point must have the same rank as the input tensor and the dims should match except on quantizeAxis.")}},Qc=(e,t)=>{let r=e[0].dims,i=e[1].dims,s=r.length,n=B.normalizeAxis(t.gatherAxis,s),a=B.normalizeAxis(t.quantizeAxis,s),o=r.slice(0);o.splice(n,1,...i);let u=B.size(o),d=e[2].dataType,c=e[0].dataType===22,f=[{type:12,data:u},{type:12,data:a},{type:12,data:n},{type:12,data:t.blockSize},...pe(...e.map((m,g)=>m.dims),o)],h=m=>{let g=P("data",e[0].dataType,e[0].dims.length),y=P("inputIndices",e[1].dataType,e[1].dims.length),S=P("scales",e[2].dataType,e[2].dims.length),v=e.length>3?P("zeroPoint",e[3].dataType,e[3].dims.length):void 0,b=ue("output",d,o.length),k=[g,y,S];v&&k.push(v);let x=[{name:"output_size",type:"u32"},{name:"quantize_axis",type:"u32"},{name:"gather_axis",type:"u32"},{name:"block_size",type:"u32"}];return` + ${m.registerUniforms(x).declareVariables(...k,b)} + ${m.mainStart()} + let output_indices = ${b.offsetToIndices("global_idx")}; + var indices_indices = ${y.type.indices}(0); + ${i.length>1?` + for (var i: u32 = 0; i < ${i.length}; i++) { + let index = ${b.indicesGet("output_indices","uniforms.gather_axis + i")}; + ${y.indicesSet("indices_indices","i","index")}; + }`:`indices_indices = ${b.indicesGet("output_indices","uniforms.gather_axis")};`}; + var data_indices = ${g.type.indices}(0); + for (var i: u32 = 0; i < uniforms.gather_axis; i++) { + let index = ${b.indicesGet("output_indices","i")}; + ${g.indicesSet("data_indices","i","index")}; + } + var index_from_indices = ${y.getByIndices("indices_indices")}; + if (index_from_indices < 0) { + index_from_indices += ${r[n]}; + } + ${g.indicesSet("data_indices","uniforms.gather_axis","u32(index_from_indices)")}; + for (var i = uniforms.gather_axis + 1; i < ${o.length}; i++) { + let index = ${b.indicesGet("output_indices",`i + ${i.length} - 1`)}; + ${g.indicesSet("data_indices","i","index")}; + } + let data_offset = ${g.indicesToOffset("data_indices")}; + let data_index = data_offset % 8; + // Convert 4-bit packed data to 8-bit packed data. + let packed_4bit_quantized_data = ${g.getByOffset("data_offset / 8")}; + let packed_8bit_quantized_data = (packed_4bit_quantized_data >> (4 * (data_index % 2))) & 0x0f0f0f0f; + let quantized_data_vec = ${c?"unpack4xI8":"unpack4xU8"}(u32(packed_8bit_quantized_data)); + let quantized_data = quantized_data_vec[data_index / 2]; + var scale_indices = data_indices; + let quantize_axis_index = ${S.indicesGet("data_indices","uniforms.quantize_axis")} / uniforms.block_size; + ${S.indicesSet("scale_indices","uniforms.quantize_axis","quantize_axis_index")}; + var scale = ${S.getByIndices("scale_indices")}; + ${v?` + let zero_point_indices = scale_indices; + let zero_point_offset = ${v.indicesToOffset("zero_point_indices")}; + let zero_point_index = zero_point_offset % 8; + let packed_4bit_zero_points = ${v.getByOffset("zero_point_offset / 8")}; + let packed_8bit_zero_points = (packed_4bit_zero_points >> (4 * (zero_point_index % 2))) & 0x0f0f0f0f; + let zero_point_vec = ${c?"unpack4xI8":"unpack4xU8"}(u32(packed_8bit_zero_points)); + let zero_point = zero_point_vec[zero_point_index / 2];`:"var zero_point = 0"}; + let dequantized_data = ${st(d)}(quantized_data - zero_point) * scale; + ${b.setByOffset("global_idx","dequantized_data")}; + }`};return{name:"GatherBlockQuantized",shaderCache:{hint:`${t.cacheKey};${e.filter((m,g)=>g!==1).map(m=>m.dims.join("_")).join(";")}`,inputDependencies:Array.from({length:e.length},(m,g)=>"rank")},getRunData:()=>({outputs:[{dims:o,dataType:d}],dispatchGroup:{x:Math.ceil(u/64)},programUniforms:f}),getShaderSource:h}},o_=(e,t)=>{let r=e.inputs;Yc(r,t),e.compute(Qc(e.inputs,t))},u_=e=>Oe({blockSize:e.blockSize,gatherAxis:e.gatherAxis,quantizeAxis:e.quantizeAxis})}),Jc,ep,l_,d_,V$=j(()=>{_e(),we(),qe(),ve(),Jc=e=>{if(!e||e.length!==2)throw new Error("GatherElements requires 2 inputs.");if(e[0].dims.length<1)throw new Error("GatherElements requires that the data input be rank >= 1.");if(e[0].dims.length!==e[1].dims.length)throw new Error(`GatherElements requires that the data input and + indices input tensors be of same rank.`)},ep=(e,t)=>{let r=e[0].dims,i=e[0].dataType,s=r.length,n=e[1].dims,a=e[1].dataType,o=B.normalizeAxis(t.axis,s),u=r[o],d=n.slice(0),c=B.size(d),f=P("input",i,s),h=P("indicesInput",a,n.length),m=ue("output",i,d.length),g=[{type:12,data:c},{type:6,data:u},{type:12,data:o}];return g.push(...pe(r,n,d)),{name:"GatherElements",shaderCache:{inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:d,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(c/64)},programUniforms:g}),getShaderSource:y=>` + ${y.registerUniform("outputSize","u32").registerUniform("axisDimLimit","i32").registerUniform("axis","u32").declareVariables(f,h,m)} + ${y.mainStart()} + ${y.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + + let outputIndices = ${m.offsetToIndices("global_idx")}; + + var idx = ${h.getByOffset("global_idx")}; + if (idx < 0) { + idx = idx + uniforms.axisDimLimit; + } + var inputIndices = ${f.type.indices}(outputIndices); + ${f.indicesSet("inputIndices","uniforms.axis","u32(idx)")}; + let value = ${f.getByIndices("inputIndices")}; + + ${m.setByOffset("global_idx","value")}; + }`}},l_=e=>Oe({axis:e.axis}),d_=(e,t)=>{let r=e.inputs;Jc(r),e.compute(ep(e.inputs,t))}}),tp,rp,c_,p_,F$=j(()=>{_e(),we(),ve(),tp=e=>{if(!e)throw new Error("Input is missing");if(e.length<2||e.length>3)throw new Error("Invaid input number.");if(e.length===3&&e[2].dims.length>2)throw new Error("Invalid input shape of C");if(e[0].dataType!==e[1].dataType||e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("Input types are mismatched")},rp=(e,t)=>{let r=e[0].dims.slice(),i=e[1].dims.slice(),[s,n,a]=lm.getShapeOfGemmResult(r,t.transA,i,t.transB,e.length===3?e[2].dims:void 0),o=[s,n];if(!o)throw new Error("Can't use gemm on the given tensors");let u=16,d=Math.ceil(n/u),c=Math.ceil(s/u),f=!0,h=B.size(o),m=[{type:12,data:f?d:h},{type:12,data:s},{type:12,data:n},{type:12,data:a},{type:1,data:t.alpha},{type:1,data:t.beta}],g=["type","type"];e.length===3&&(m.push(...pe(e[2].dims)),g.push("rank")),m.push(...pe(o));let y=v=>{let b="";t.transA&&t.transB?b="value += a[k * uniforms.M + m] * b[n * uniforms.K + k];":t.transA&&!t.transB?b="value += a[k * uniforms.M + m] * b[k * uniforms.N + n];":!t.transA&&t.transB?b="value += a[m * uniforms.K + k] * b[n * uniforms.K + k];":!t.transA&&!t.transB&&(b="value += a[m * uniforms.K + k] * b[k * uniforms.N + n];");let k=t.alpha===1?"":"value *= uniforms.alpha;",x=P("a",e[0].dataType,e[0].dims),I=P("b",e[1].dataType,e[1].dims),z=x.type.value,O=null,A=[x,I];e.length===3&&(O=P("c",e[2].dataType,e[2].dims.length),A.push(O));let R=ue("output",e[0].dataType,o.length);A.push(R);let W=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}];return` + ${v.registerUniforms(W).declareVariables(...A)} + + ${v.mainStart()} + ${v.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let m = global_idx / uniforms.N; + let n = global_idx % uniforms.N; + + var value = ${z}(0); + for (var k: u32 = 0u; k < uniforms.K; k++) { + ${b} + } + + ${k} + ${O!=null?`let cOffset = ${O.broadcastedIndicesToOffset("vec2(m, n)",R)}; value += ${z}(uniforms.beta) * ${O.getByOffset("cOffset")};`:""} + output[global_idx] = value; + }`},S=v=>{let b=P("a",e[0].dataType,e[0].dims),k=P("b",e[1].dataType,e[1].dims),x=null,I=[b,k];e.length===3&&(x=P("c",e[2].dataType,e[2].dims.length),I.push(x));let z=ue("output",e[0].dataType,o.length);I.push(z);let O=[{name:"num_tile_n",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}],A="",R="";t.transA&&t.transB?(R=` + var col = tile_row_start + local_id.x; + var row = k_start + local_id.y; + if (col < uniforms.M && row < uniforms.K) { + tile_a[local_id.y][local_id.x] = a[row * uniforms.M + col]; + } else { + tile_a[local_id.y][local_id.x] = ${b.type.value}(0); + } + + col = k_start + local_id.x; + row = tile_col_start + local_id.y; + if (col < uniforms.K && row < uniforms.N) { + tile_b[local_id.y][local_id.x] = b[row * uniforms.K + col]; + } else { + tile_b[local_id.y][local_id.x] = ${k.type.value}(0); + } + `,A="value += tile_a[k][local_id.y] * tile_b[local_id.x][k];"):t.transA&&!t.transB?(R=` + var col = tile_row_start + local_id.x; + var row = k_start + local_id.y; + if (col < uniforms.M && row < uniforms.K) { + tile_a[local_id.y][local_id.x] = a[row * uniforms.M + col]; + } else { + tile_a[local_id.y][local_id.x] = ${b.type.value}(0); + } + + col = tile_col_start + local_id.x; + row = k_start + local_id.y; + if (col < uniforms.N && row < uniforms.K) { + tile_b[local_id.y][local_id.x] = b[row * uniforms.N + col]; + } else { + tile_b[local_id.y][local_id.x] = ${k.type.value}(0); + } + `,A="value += tile_a[k][local_id.y] * tile_b[k][local_id.x];"):!t.transA&&t.transB?(R=` + var col = k_start + local_id.x; + var row = tile_row_start + local_id.y; + if (col < uniforms.K && row < uniforms.M) { + tile_a[local_id.y][local_id.x] = a[row * uniforms.K + col]; + } else { + tile_a[local_id.y][local_id.x] = ${b.type.value}(0); + } + + col = k_start + local_id.x; + row = tile_col_start + local_id.y; + if (col < uniforms.K && row < uniforms.N) { + tile_b[local_id.y][local_id.x] = b[row * uniforms.K + col]; + } else { + tile_b[local_id.y][local_id.x] = ${k.type.value}(0); + } + `,A="value += tile_a[local_id.y][k] * tile_b[local_id.x][k];"):!t.transA&&!t.transB&&(R=` + var col = k_start + local_id.x; + var row = tile_row_start + local_id.y; + if (col < uniforms.K && row < uniforms.M) { + tile_a[local_id.y][local_id.x] = a[row * uniforms.K + col]; + } else { + tile_a[local_id.y][local_id.x] = ${b.type.value}(0); + } + + col = tile_col_start + local_id.x; + row = k_start + local_id.y; + if (col < uniforms.N && row < uniforms.K) { + tile_b[local_id.y][local_id.x] = b[row * uniforms.N + col]; + } else { + tile_b[local_id.y][local_id.x] = ${k.type.value}(0); + } + `,A="value += tile_a[local_id.y][k] * tile_b[k][local_id.x];");let W=t.alpha===1?"":"value *= uniforms.alpha;";return` + ${v.registerUniforms(O).declareVariables(...I)} + var tile_a: array, ${u}>; + var tile_b: array, ${u}>; + ${v.mainStart([u,u,1])} + let tile_col_start = (workgroup_index % uniforms.num_tile_n) * ${u}; + let tile_row_start = (workgroup_index / uniforms.num_tile_n) * ${u}; + let num_tiles = (uniforms.K - 1) / ${u} + 1; + var k_start = 0u; + var value = ${z.type.value}(0); + for (var t: u32 = 0u; t < num_tiles; t++) { + ${R} + k_start = k_start + ${u}; + workgroupBarrier(); + + for (var k: u32 = 0u; k < ${u}; k++) { + ${A} + } + workgroupBarrier(); + } + + ${W} + let m = tile_row_start + local_id.y; + let n = tile_col_start + local_id.x; + ${x!=null?`let cOffset = ${x.broadcastedIndicesToOffset("vec2(m, n)",z)}; value += ${z.type.value}(uniforms.beta) * ${x.getByOffset("cOffset")};`:""} + if (m < uniforms.M && n < uniforms.N) { + output[m * uniforms.N + n] = value; + } + }`};return f?{name:"GemmShared",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:d*c},programUniforms:m}),getShaderSource:S}:{name:"Gemm",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:m}),getShaderSource:y}},c_=e=>{let t=e.transA,r=e.transB,i=e.alpha,s=e.beta;return{transA:t,transB:r,alpha:i,beta:s,cacheKey:`${e.transA};${e.transB};${e.alpha===1}`}},p_=(e,t)=>{tp(e.inputs),e.compute(rp(e.inputs,t))}}),Gt,rr,Cr,zr,ip,np,sp,ap,op,up,lp,dp,f_,h_,H$=j(()=>{_e(),we(),qe(),ve(),[Gt,rr,Cr,zr]=[0,1,2,3],ip=e=>{if(e[0].dims.length!==4)throw new Error("only 4-D tensor is supported.");if(e[0].dims.length!==e[1].dims.length)throw new Error("input dimensions must be equal to grid dimensions");if(e[0].dims.length-2!==e[1].dims[e[1].dims.length-1])throw new Error(`last dimension of grid must be equal to ${e[0].dims.length-2}`);if(e[0].dims[0]!==e[1].dims[0])throw new Error("grid batch size must match input batch size")},np=` + fn gs_get_cubic_coeffs(x: f32) -> vec4 { + let cubic_alpha = -0.75f; + let x_abs = abs(x); + var coeffs: vec4; + coeffs[0] = (((cubic_alpha * (x_abs + 1) - 5 * cubic_alpha) * (x_abs + 1) + 8 * cubic_alpha) * (x_abs + 1) - 4 * cubic_alpha); + coeffs[1] = (((cubic_alpha + 2) * x_abs - (cubic_alpha + 3)) * x_abs * x_abs + 1); + coeffs[2] = (((cubic_alpha + 2) * (1 - x_abs) - (cubic_alpha + 3)) * (1 - x_abs) * (1 - x_abs) + 1); + coeffs[3] = (((cubic_alpha * (2 - x_abs) - 5 * cubic_alpha) * (2 - x_abs) + 8 * cubic_alpha) * (2 - x_abs) - 4 * cubic_alpha); + return coeffs; + } +`,sp=e=>` + fn gs_bicubic_interpolate(p: mat4x4<${e}>, x: f32, y: f32) -> ${e} { + var v: vec4; + var coeffs = gs_get_cubic_coeffs(x); + for (var i = 0; i < 4; i++) { + v[i] = coeffs[0] * p[i][0] + coeffs[1] * p[i][1] + coeffs[2] * p[i][2] + coeffs[3] * p[i][3]; + } + coeffs = gs_get_cubic_coeffs(y); + let pixel = ${e}(coeffs[0] * v[0] + coeffs[1] * v[1] + coeffs[2] * v[2] + coeffs[3] * v[3]); + return pixel; + } +`,ap=e=>` + fn gs_denormalize(n: f32, length: i32) -> f32 { + ${e.alignCorners===0?` + // alignCorners: false => [-1, 1] to [-0.5, length - 0.5] + return ((n + 1.0) * f32(length) - 1.0) / 2.0; + `:` + // alignCorners: true => [-1, 1] to [0, length - 1] + return (n + 1.0) / 2.0 * (f32(length - 1)); + `} + } +`,op=e=>` + ${e.paddingMode==="reflection"?` + fn gs_reflect(x: i32, x_min: f32, x_max: f32) -> u32 { + var dx = 0.0; + var fx = f32(x); + let range = x_max - x_min; + if (fx < x_min) { + dx = x_min - fx; + let n = u32(dx / range); + let r = dx - f32(n) * range; + if (n % 2 == 0) { + fx = x_min + r; + } else { + fx = x_max - r; + } + } else if (fx > x_max) { + dx = fx - x_max; + let n = u32(dx / range); + let r = dx - f32(n) * range; + if (n % 2 == 0) { + fx = x_max - r; + } else { + fx = x_min + r; + } + } + return u32(fx); + }`:""} +`,up=(e,t,r)=>` + fn pixel_at_grid(r: i32, c: i32, H: i32, W: i32, batch: u32, channel: u32, border: vec4) -> ${t} { + var pixel = ${t}(0); + var indices = vec4(0); + indices[${Gt}] = batch; + indices[${rr}] = channel;`+(()=>{switch(r.paddingMode){case"zeros":return` + if (r >= 0 && r < H && c >=0 && c < W) { + indices[${Cr}] = u32(r); + indices[${zr}] = u32(c); + } else { + return ${t}(0); + } + `;case"border":return` + indices[${Cr}] = u32(clamp(r, 0, H - 1)); + indices[${zr}] = u32(clamp(c, 0, W - 1)); + `;case"reflection":return` + indices[${Cr}] = gs_reflect(r, border[1], border[3]); + indices[${zr}] = gs_reflect(c, border[0], border[2]); + `;default:throw new Error(`padding mode ${r.paddingMode} is not supported`)}})()+` + return ${e.getByIndices("indices")}; + } +`,lp=(e,t,r)=>(()=>{switch(r.mode){case"nearest":return` + let result = pixel_at_grid(i32(round(y)), i32(round(x)), H_in, W_in, indices[${Gt}], indices[${rr}], border); + `;case"bilinear":return` + let x1 = i32(floor(x)); + let y1 = i32(floor(y)); + let x2 = x1 + 1; + let y2 = y1 + 1; + + let p11 = pixel_at_grid(y1, x1, H_in, W_in, indices[${Gt}], indices[${rr}], border); + let p12 = pixel_at_grid(y1, x2, H_in, W_in, indices[${Gt}], indices[${rr}], border); + let p21 = pixel_at_grid(y2, x1, H_in, W_in, indices[${Gt}], indices[${rr}], border); + let p22 = pixel_at_grid(y2, x2, H_in, W_in, indices[${Gt}], indices[${rr}], border); + + let dx2 = ${t}(f32(x2) - x); + let dx1 = ${t}(x - f32(x1)); + let dy2 = ${t}(f32(y2) - y); + let dy1 = ${t}(y - f32(y1)); + let result = dy2 * (dx2 * p11 + dx1 * p12) + dy1 * (dx2 * p21 + dx1 * p22); + `;case"bicubic":return` + let x0 = i32(floor(x)) - 1; + let y0 = i32(floor(y)) - 1; + var p: mat4x4<${t}>; + for (var h = 0; h < 4; h++) { + for (var w = 0; w < 4; w++) { + p[h][w] = pixel_at_grid(h + y0, w + x0, H_in, W_in, indices[${Gt}], indices[${rr}], border); + } + } + + let dx = x - f32(x0 + 1); + let dy = y - f32(y0 + 1); + let result = gs_bicubic_interpolate(p, dx, dy); + `;default:throw new Error(`mode ${r.mode} is not supported`)}})()+`${e.setByOffset("global_idx","result")}`,dp=(e,t)=>{let r=P("x",e[0].dataType,e[0].dims.length),i=[e[1].dims[0],e[1].dims[1],e[1].dims[2]],s=P("grid",e[1].dataType,i.length,2),n=[e[0].dims[0],e[0].dims[1],e[1].dims[1],e[1].dims[2]];t.format==="NHWC"&&(n=[e[0].dims[0],e[1].dims[1],e[1].dims[2],e[0].dims[3]],[Gt,rr,Cr,zr]=[0,3,1,2]);let a=ue("output",e[0].dataType,n.length),o=r.type.value,u=B.size(n),d=[{type:12,data:u},...pe(e[0].dims,i,n)],c=f=>` + ${f.registerUniform("output_size","u32").declareVariables(r,s,a)} + ${np} + ${sp(o)} + ${ap(t)} + ${op(t)} + ${up(r,o,t)} + + ${f.mainStart()} + ${f.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let H_in = i32(uniforms.x_shape[${Cr}]); + let W_in = i32(uniforms.x_shape[${zr}]); + + ${t.alignCorners===0?` + let x_min = -0.5; + let x_max = f32(W_in) - 0.5; + let y_min = -0.5; + let y_max = f32(H_in) - 0.5; + `:` + let x_min = 0.0; + let x_max = f32(W_in) - 1.0; + let y_min = 0.0; + let y_max = f32(H_in) - 1.0; + `}; + let border = vec4(x_min, y_min, x_max, y_max); + + let indices = ${a.offsetToIndices("global_idx")}; + var grid_indices = vec3(indices[${Gt}], indices[${Cr}], indices[${zr}]); + let nxy = ${s.getByIndices("grid_indices")}; + var x = gs_denormalize(f32(nxy[0]), W_in); + var y = gs_denormalize(f32(nxy[1]), H_in); + + ${lp(a,o,t)} + }`;return{name:"GridSample",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:["type","type"]},getRunData:f=>{let h=B.size(n);return{outputs:[{dims:n,dataType:f[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:d}},getShaderSource:c}},f_=(e,t)=>{ip(e.inputs),e.compute(dp(e.inputs,t))},h_=e=>Oe({alignCorners:e.align_corners,mode:e.mode,paddingMode:e.padding_mode,format:e.format})}),lt,cp,m_,ma,pp,Bi,g_,__=j(()=>{_e(),we(),qe(),Yo(),eu(),ve(),Tr(),lt=(e,t)=>e.length>t&&e[t].dims.length>0?e[t]:void 0,cp=(e,t)=>{let r=e[0],i=lt(e,1),s=lt(e,2),n=lt(e,3),a=lt(e,4),o=lt(e,5),u=lt(e,6),d=lt(e,7);if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let c=r.dims[0],f=r.dims[1],h=r.dims.length===3?r.dims[2]:t.numHeads*r.dims[4],m=f,g=0,y=0,S=Math.floor(h/t.numHeads);if(u&&d&&B.size(u.dims)&&B.size(d.dims)){if(u.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(u.dims[0]!==c||u.dims[1]!==t.numHeads||u.dims[3]!==S)throw new Error('Input "past_key" shape (batch_size, num_heads, past_sequence_length, head_size)');if(d.dims[0]!==c||d.dims[1]!==t.numHeads||d.dims[3]!==S)throw new Error('Input "past_value" shape (batch_size, num_heads, past_sequence_length, head_size)');if(u.dims[2]!==d.dims[2])throw new Error('Input "past_key" and "past_value" shall have same dim 2 (past_sequence_length)');if(d.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');g=u.dims[2],y=u.dims[2]}else if(u&&B.size(u.dims)||d&&B.size(d.dims))throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let v;if(i&&B.size(i.dims)>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(i.dims.length<3||i.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==i.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(i.dims.length===3){if(i.dims[2]!==r.dims[2])throw new Error('Input "query" and "key" shall have same dim 2 (hidden_size)');v=2,m=i.dims[1]}else if(i.dims.length===5){if(i.dims[2]!==t.numHeads||i.dims[3]!==2||i.dims[4]!==S)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(s)throw new Error('Expect "value" be none when "key" has packed kv format.');v=5,m=i.dims[1]}else{if(i.dims[1]!==t.numHeads||i.dims[3]!==S)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');v=0,m=i.dims[2]}}else{if(r.dims.length!==5)throw new Error('Input "query" is expected to have 5 dimensions when key is empty');if(r.dims[2]!==t.numHeads||r.dims[3]!==3)throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');v=3}if(n&&B.size(n.dims)>0){if(n.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimension');if(i&&i.dims.length===5&&i.dims[3]===2)throw new Error("bias is not allowed for packed kv.")}let b=g+m,k=0;if(a&&B.size(a.dims)>0){k=8;let O=a.dims;throw O.length===1?O[0]===c?k=1:O[0]===3*c+2&&(k=3):O.length===2&&O[0]===c&&O[1]===b&&(k=5),k===8?new Error('Input "key_padding_mask" shape shall be (batch_size) or (batch_size, total_sequence_length)'):new Error("Mask not supported")}let x=!1,I=h;if(s&&B.size(s.dims)>0){if(s.dims.length!==3&&s.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==s.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(s.dims.length===3){if(m!==s.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');I=s.dims[2]}else{if(m!==s.dims[2])throw new Error('Input "key" and "value" shall have the same dim 2 (kv_sequence_length)');I=s.dims[1]*s.dims[3],x=!0}}let z=!1;if(a&&B.size(a.dims)>0)throw new Error("Key padding mask is not supported");if(o&&B.size(o.dims)>0){if(o.dims.length!==4)throw new Error('Input "attention_bias" is expected to have 4 dimensions');if(o.dims[0]!==c||o.dims[1]!==t.numHeads||o.dims[2]!==f||o.dims[3]!==b)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:c,sequenceLength:f,pastSequenceLength:g,kvSequenceLength:m,totalSequenceLength:b,maxSequenceLength:y,inputHiddenSize:0,hiddenSize:h,vHiddenSize:I,headSize:S,vHeadSize:Math.floor(I/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:k,scale:t.scale,broadcastResPosBias:z,passPastInKv:x,qkvFormat:v}},m_=e=>Oe({...e}),ma=Oe({perm:[0,2,1,3]}),pp=(e,t,r,i,s,n,a)=>{let o=[i,s,n],u=B.size(o),d=[{type:12,data:u},{type:12,data:a},{type:12,data:n}],c=f=>{let h=ue("qkv_with_bias",t.dataType,o),m=P("qkv",t.dataType,o),g=P("bias",r.dataType,o),y=[{name:"output_size",type:"u32"},{name:"bias_offset",type:"u32"},{name:"hidden_size",type:"u32"}];return` + ${f.registerUniforms(y).declareVariables(m,g,h)} + ${f.mainStart()} + ${f.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let bias_offset_idx = (global_idx % uniforms.hidden_size) + uniforms.bias_offset; + + qkv_with_bias[global_idx] = qkv[global_idx] + bias[bias_offset_idx]; + }`};return e.compute({name:"MultiHeadAttentionAddBias",shaderCache:{inputDependencies:["type","type"]},getRunData:()=>({outputs:[{dims:o,dataType:t.dataType,gpuDataType:0}],dispatchGroup:{x:Math.ceil(u/64)},programUniforms:d}),getShaderSource:c},{inputs:[t,r],outputs:[-1]})[0]},Bi=(e,t,r,i,s,n,a,o)=>{let u=n;if(a&&B.size(a.dims)>0){if(i===1)throw new Error("AddBiasReshape is not implemented. Please export your model with packed QKV or KV");return u=pp(e,n,a,t,i,r*s,o),u=u.reshape([t,i,r,s]),r===1||i===1?u:e.compute($t(u,ma.perm),{inputs:[u],outputs:[-1]})[0]}else return n.dims.length===3&&(u=n.reshape([t,i,r,s])),r===1||i===1?u:e.compute($t(u,ma.perm),{inputs:[u],outputs:[-1]})[0]},g_=(e,t)=>{let r=cp(e.inputs,t),i=e.inputs[0],s=lt(e.inputs,1),n=lt(e.inputs,2),a=lt(e.inputs,3),o=lt(e.inputs,4),u=lt(e.inputs,5),d=lt(e.inputs,6),c=lt(e.inputs,7);if(i.dims.length===5)throw new Error("Packed QKV is not implemented");if((s==null?void 0:s.dims.length)===5)throw new Error("Packed KV is not implemented");let f=s&&n&&s.dims.length===4&&n.dims.length===4,h=Bi(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,i,a,0);if(f)return Ji(e,h,s,n,o,void 0,d,c,u,r);if(!s||!n)throw new Error("key and value must be provided");let m=Bi(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.headSize,s,a,r.hiddenSize),g=Bi(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.vHeadSize,n,a,2*r.hiddenSize);Ji(e,h,m,g,o,void 0,d,c,u,r)}}),fp,hp,mp,gp,lo,y_,b_,w_=j(()=>{_e(),we(),qe(),ve(),fp=e=>{if(!e||e.length<1)throw new Error("too few inputs")},hp=(e,t)=>{let r=[],i=t.numOutputs;return e[1].dims[0]>0&&(e[1].getBigInt64Array().forEach(s=>r.push(Number(s))),i=r.length),Oe({numOutputs:i,axis:t.axis,splitSizes:r})},mp=e=>` +fn calculateOutputIndex(index: u32) -> u32 { + for (var i: u32 = 0u; i < ${e}u; i += 1u ) { + if (index < ${de("uniforms.size_in_split_axis","i",e)}) { + return i; + } + } + return ${e}u; +}`,gp=e=>{let t=e.length,r=[];for(let i=0;i{let r=e[0].dims,i=B.size(r),s=e[0].dataType,n=B.normalizeAxis(t.axis,r.length),a=new Array(t.numOutputs),o=P("input",s,r.length),u=new Array(t.numOutputs),d=[],c=[],f=0,h=[{type:12,data:i}];for(let g=0;g` + ${g.registerUniform("input_size","u32").registerUniform("size_in_split_axis","u32",u.length).declareVariables(o,...a)} + ${mp(u.length)} + ${gp(a)} + + ${g.mainStart()} + ${g.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.input_size")} + + var indices = ${o.offsetToIndices("global_idx")}; + var index = ${o.indicesGet("indices",n)}; + let output_number = calculateOutputIndex(index); + if (output_number != 0) { + index -= ${de("uniforms.size_in_split_axis","output_number - 1u",u.length)}; + ${o.indicesSet("indices",n,"index")}; + } + writeBufferData(output_number, indices, global_idx); + }`;return{name:"Split",shaderCache:{hint:t.cacheKey,inputDependencies:["rank"]},getShaderSource:m,getRunData:()=>({outputs:d,dispatchGroup:{x:Math.ceil(i/64)},programUniforms:h})}},y_=(e,t)=>{fp(e.inputs);let r=e.inputs.length===1?t:hp(e.inputs,t);e.compute(lo(e.inputs,r),{inputs:[0]})},b_=e=>{let t=e.axis,r=e.splitSizes,i=e.numOutputs<0?r.length:e.numOutputs;if(i!==r.length)throw new Error("numOutputs and splitSizes lengh must be equal");return Oe({axis:t,numOutputs:i,splitSizes:r})}}),_p,Xn,v_,$_=j(()=>{_e(),we(),qe(),ve(),_p=(e,t)=>{let[r,i,s,n]=e,{numHeads:a,rotaryEmbeddingDim:o}=t;if(r.dims.length!==3&&r.dims.length!==4)throw new Error(`Input 'x' is expected to have 3 or 4 dimensions, got ${r.dims.length}`);if(!B.areEqual(i.dims,[])&&!B.areEqual(i.dims,[1])&&i.dims.length!==2)throw new Error(`Input 'position_ids' is expected to have 0, 1, or 2 dimensions, got ${i.dims.length}`);if(s.dims.length!==2)throw new Error(`Input 'cos_cache' is expected to have 2 dimensions, got ${s.dims.length}`);if(n.dims.length!==2)throw new Error(`Input 'sin_cache' is expected to have 2 dimensions, got ${n.dims.length}`);if(!B.areEqual(s.dims,n.dims))throw new Error("Inputs 'cos_cache' and 'sin_cache' are expected to have the same shape");if(o>0&&a===0)throw new Error("num_heads must be provided if rotary_embedding_dim is specified");let u=r.dims[0],d=r.dims[r.dims.length-2],c=s.dims[0],f=B.sizeFromDimension(r.dims,1)/d,h=o===0?s.dims[1]*2:f/a;if(o>h)throw new Error("rotary_embedding_dim must be less than or equal to head_size");if(i.dims.length===2){if(u!==i.dims[0])throw new Error(`Input 'position_ids' dimension 0 should be of size batch_size, got ${i.dims[0]}`);if(d!==i.dims[1])throw new Error(`Input 'position_ids' dimension 1 should be of size sequence_length, got ${i.dims[1]}`)}if(h/2!==s.dims[1]&&o/2!==s.dims[1])throw new Error(`Input 'cos_cache' dimension 1 should be same as head_size / 2 or rotary_embedding_dim / 2, got ${s.dims[1]}`);if(d>c)throw new Error("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported")},Xn=(e,t)=>{let{interleaved:r,numHeads:i,rotaryEmbeddingDim:s,scale:n}=t,a=e[0].dims[0],o=B.sizeFromDimension(e[0].dims,1),u=e[0].dims[e[0].dims.length-2],d=o/u,c=e[2].dims[1],f=s===0?c*2:d/i,h=new Array(a,u,d/f,f-c),m=B.computeStrides(h),g=[{type:1,data:n},{type:12,data:h},{type:12,data:m},...e[0].dims.length===3?new Array({type:12,data:[o,d,f,1]}):[],...e[0].dims.length===4?new Array({type:12,data:[o,f,u*f,1]}):[],...pe(e[0].dims,e[1].dims,e[2].dims,e[3].dims,e[0].dims)],y=S=>{let v=P("input",e[0].dataType,e[0].dims.length),b=P("position_ids",e[1].dataType,e[1].dims.length),k=P("cos_cache",e[2].dataType,e[2].dims.length),x=P("sin_cache",e[3].dataType,e[3].dims.length),I=ue("output",e[0].dataType,e[0].dims.length);return S.registerUniforms([{name:"scale",type:"f32"},{name:"global_shape",type:"u32",length:h.length},{name:"global_strides",type:"u32",length:m.length},{name:"input_output_strides",type:"u32",length:m.length}]),` + ${S.declareVariables(v,b,k,x,I)} + + ${S.mainStart(oi)} + let half_rotary_emb_dim = uniforms.${k.name}_shape[1]; + let bsnh = global_idx / uniforms.global_strides % uniforms.global_shape; + let size = uniforms.global_shape[0] * uniforms.global_strides[0]; + ${S.guardAgainstOutOfBoundsWorkgroupSizes("size")} + + if (bsnh[3] < half_rotary_emb_dim) { + let position_ids_idx = + ${b.broadcastedIndicesToOffset("bsnh.xy",ue("",b.type.tensor,2))}; + let position_id = + u32(${b.getByOffset("position_ids_idx")}) + select(0, bsnh[1], position_ids_idx == 0); + let i = dot(bsnh, uniforms.input_output_strides) + select(0, bsnh[3], ${r}); + let j = i + select(half_rotary_emb_dim, 1, ${r}); + let re = ${v.getByOffset("i")} * ${k.get("position_id","bsnh[3]")} - + ${v.getByOffset("j")} * ${x.get("position_id","bsnh[3]")}; + ${I.setByOffset("i","re")} + let im = ${v.getByOffset("i")} * ${x.get("position_id","bsnh[3]")} + + ${v.getByOffset("j")} * ${k.get("position_id","bsnh[3]")}; + ${I.setByOffset("j","im")} + } else { + let k = dot(bsnh, uniforms.input_output_strides) + half_rotary_emb_dim; + ${I.setByOffset("k",v.getByOffset("k"))} + } + }`};return{name:"RotaryEmbedding",shaderCache:{hint:Oe({interleaved:r}).cacheKey,inputDependencies:["rank","rank","rank","rank"]},getShaderSource:y,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(B.size(h)/oi)},programUniforms:g})}},v_=(e,t)=>{_p(e.inputs,t),e.compute(Xn(e.inputs,t))}}),yp,bp,ga,wp,x_,G$=j(()=>{qe(),_e(),eu(),__(),w_(),Tr(),$_(),ve(),yp=(e,t)=>{if(t.doRotary&&e.length<=7)throw new Error("cos_cache and sin_cache inputs are required if do_rotary is specified");let r=e[0],i=e[1],s=e[2],n=e[3],a=e[4];if(t.doRotary!==0&&e.length<=7)throw new Error("cos_cast and sin_cache are expected if do_rotary attribute is non-zero");if(t.localWindowSize!==-1)throw new Error("Local attention is not supported");if(t.softcap!==0)throw new Error("Softcap is not supported");if(t.rotaryInterleaved!==0)throw new Error("Rotary interleaved is not supported");if(t.smoothSoftmax)throw new Error("Smooth softmax is not supported");if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let o=!1,u=r.dims[0],d=r.dims[1],c=r.dims.length===3?o?r.dims[2]/3:r.dims[2]:t.numHeads*r.dims[4],f=d,h=0,m=!i||i.dims.length===0,g=Math.floor(m?c/(t.numHeads+2*t.kvNumHeads):c/t.numHeads);m&&(c=g*t.numHeads);let y=n&&n.dims.length!==0,S=a&&a.dims.length!==0;if(y&&n.dims.length===4&&n.dims[0]===u&&n.dims[1]!==t.kvNumHeads&&n.dims[2]===t.kvNumHeads&&n.dims[3]===g)throw new Error("BSNH pastKey/pastValue is not supported");if(y&&S){if(n.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(a.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');h=n.dims[2]}else if(y||S)throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let v=1;if(i&&i.dims.length>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(i.dims.length<3||i.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==i.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(i.dims.length===3){if(r.dims[2]%i.dims[2]!==0)throw new Error('Dimension 2 of "query" should be a multiple of "key"');f=i.dims[1]}else if(i.dims.length===5){if(i.dims[2]!==t.numHeads||i.dims[3]!==2||i.dims[4]!==g)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(s)throw new Error('Expect "value" be none when "key" has packed kv format.');f=i.dims[1]}else{if(i.dims[1]!==t.numHeads||i.dims[3]!==g)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');f=i.dims[2]}}else{if(r.dims.length!==3&&r.dims.length!==5)throw new Error('Input "query" is expected to have 3 or 5 dimensions when key is empty');if(r.dims.length===5&&(r.dims[2]!==t.numHeads||r.dims[3]!==3))throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');v=3}let b=0,k=!1,x=t.kvNumHeads?g*t.kvNumHeads:c;if(s&&s.dims.length>0){if(s.dims.length!==3&&s.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==s.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(s.dims.length===3){if(f!==s.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');x=s.dims[2]}else{if(f!==s.dims[2])throw new Error('Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)');x=s.dims[1]*s.dims[3],k=!0}}let I=e.length>4?e[5]:void 0;if(I&&I.dims.length!==1&&I.dims[0]!==u)throw new Error('Input "seqlens" is expected to have 1 dimension and the same dim 0 as batch_size');return{batchSize:u,sequenceLength:d,pastSequenceLength:h,kvSequenceLength:f,totalSequenceLength:-1,maxSequenceLength:-1,inputHiddenSize:0,hiddenSize:c,vHiddenSize:x,headSize:g,vHeadSize:Math.floor(x/t.kvNumHeads),numHeads:t.numHeads,kvNumHeads:t.kvNumHeads,nReps:t.numHeads/t.kvNumHeads,pastPresentShareBuffer:!1,maskType:b,scale:t.scale,broadcastResPosBias:!1,passPastInKv:k,qkvFormat:v}},bp=Oe({perm:[0,2,1,3]}),ga=(e,t,r)=>{let i=t,s=r.kvNumHeads;return t.dims.length===3&&r.kvSequenceLength!==0&&(i=t.reshape([r.batchSize,r.kvSequenceLength,s,r.headSize]),i=e.compute($t(i,bp.perm),{inputs:[i],outputs:[-1]})[0]),i},wp=(e,t,r,i)=>{let s=7,n=["type","type"],a=[e*t],o=e*t,u=[{type:12,data:o},{type:12,data:t},{type:12,data:e}],d=c=>{let f=P("seq_lens",r.dataType,r.dims),h=P("total_seq_lens",i.dataType,i.dims),m=ue("pos_ids",s,a),g=[{name:"output_size",type:"u32"},{name:"sequence_length",type:"u32"},{name:"batch_size",type:"u32"}];return` + ${c.registerUniforms(g).declareVariables(f,h,m)} + ${c.mainStart()} + ${c.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let total_sequence_length = u32(${h.getByOffset("0")}); + let is_subsequent_prompt = uniforms.sequence_length > 1 && uniforms.sequence_length != total_sequence_length; + let is_first_prompt = !is_subsequent_prompt && uniforms.sequence_length == total_sequence_length; + let batch_idx = global_idx / uniforms.sequence_length; + let sequence_idx = i32(global_idx % uniforms.sequence_length); + var pos_id: i32 = 0; + let seqlen = ${f.getByOffset("batch_idx")}; + let total_seqlen = seqlen + 1; + if (is_first_prompt) { + if (sequence_idx < total_seqlen) { + pos_id = sequence_idx; + } else { + pos_id = 1; + } + ${m.setByOffset("global_idx","pos_id")} + } else if (is_subsequent_prompt) { + let past_seqlen = total_seqlen - i32(uniforms.sequence_length); + if (past_seqlen + sequence_idx < total_seqlen) { + pos_id = past_seqlen + sequence_idx; + } else { + pos_id = 1; + } + ${m.setByOffset("global_idx","pos_id")} + } else if (global_idx < uniforms.batch_size) { + ${m.setByOffset("global_idx","seqlen")} + }; + } + `};return{name:"GeneratePositionIds",shaderCache:{hint:`${e};${t}`,inputDependencies:n},getRunData:()=>({outputs:[{dims:a,dataType:s}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:u}),getShaderSource:d}},x_=(e,t)=>{var x;let r=yp(e.inputs,t);if(e.inputs[0].dims.length===5)throw new Error("Packed QKV is not implemented");if(((x=e.inputs[1])==null?void 0:x.dims.length)===5)throw new Error("Packed KV is not implemented");let i=e.inputs[0],s=e.inputs[1]&&e.inputs[1].dims.length>0?e.inputs[1]:void 0,n=e.inputs[2]&&e.inputs[2].dims.length>0?e.inputs[2]:void 0,a=e.inputs[3]&&e.inputs[3].dims.length!==0?e.inputs[3]:void 0,o=e.inputs[4]&&e.inputs[4].dims.length!==0?e.inputs[4]:void 0,u=e.inputs.length>4?e.inputs[5]:void 0,d=e.inputs.length>5?e.inputs[6]:void 0,c=r.kvNumHeads?r.kvNumHeads:r.numHeads,f=Oe({axis:2,numOutputs:3,splitSizes:[r.numHeads*r.headSize,c*r.headSize,c*r.headSize]}),[h,m,g]=!s&&!n?e.compute(lo([i],f),{inputs:[i],outputs:[-1,-1,-1]}):[i,s,n],y,S;if(t.doRotary){let I=e.compute(wp(r.batchSize,r.sequenceLength,u,d),{inputs:[u,d],outputs:[-1]})[0],z=e.inputs[7],O=e.inputs[8],A=Oe({interleaved:t.rotaryInterleaved!==0,numHeads:r.numHeads,rotaryEmbeddingDim:0,scale:t.scale}),R=[h,I,z,O],W=[-1];y=e.compute(Xn(R,A),{inputs:R,outputs:W})[0],R.splice(0,1,m);let ie=Oe({interleaved:t.rotaryInterleaved!==0,numHeads:r.kvNumHeads,rotaryEmbeddingDim:0,scale:t.scale});S=e.compute(Xn(R,ie),{inputs:R,outputs:W})[0]}let v=Bi(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,t.doRotary?y:h,void 0,0),b=ga(e,t.doRotary?S:m,r),k=ga(e,g,r);Ji(e,v,b,k,void 0,void 0,a,o,void 0,r,u,d)}}),_a,vp,$p,S_,j$=j(()=>{_e(),we(),Tr(),ve(),_a=(e,t,r,i,s,n,a,o)=>{let u=Le(n),d=u===1?"f32":`vec${u}f`,c=u===1?"vec2f":`mat2x${u}f`,f=s*a,h=64;f===1&&(h=256);let m=[s,a,n/u],g=[s,a,2],y=["rank","type","type"],S=[];S.push(...pe(m,g));let v=b=>{let k=P("x",t.dataType,3,u),x=P("scale",r.dataType,r.dims),I=P("bias",i.dataType,i.dims),z=ue("output",1,3,2),O=[k,x,I,z];return` + var workgroup_shared : array<${c}, ${h}>; + const workgroup_size = ${h}u; + ${b.declareVariables(...O)} + ${b.mainStart(h)} + let batch = workgroup_index / uniforms.x_shape[1]; + let channel = workgroup_index % uniforms.x_shape[1]; + let hight = uniforms.x_shape[2]; + // initialize workgroup memory + var sum = ${d}(0); + var squared_sum = ${d}(0); + for (var h = local_idx; h < hight; h += workgroup_size) { + let value = ${d}(${k.get("batch","channel","h")}); + sum += value; + squared_sum += value * value; + } + workgroup_shared[local_idx] = ${c}(sum, squared_sum); + workgroupBarrier(); + + for (var currSize = workgroup_size >> 1; currSize > 0; currSize = currSize >> 1) { + if (local_idx < currSize) { + workgroup_shared[local_idx] = workgroup_shared[local_idx] + workgroup_shared[local_idx + currSize]; + } + workgroupBarrier(); + } + if (local_idx == 0) { + let sum_final = ${kr("workgroup_shared[0][0]",u)} / f32(hight * ${u}); + let squared_sum_final = ${kr("workgroup_shared[0][1]",u)} / f32(hight * ${u}); + + let inv_std_dev = inverseSqrt(squared_sum_final - sum_final * sum_final + f32(${o})); + let channel_scale = inv_std_dev * f32(scale[channel]); + let channel_shift = f32(bias[channel]) - sum_final * channel_scale; + output[workgroup_index] = vec2f(channel_scale, channel_shift); + } + }`};return e.compute({name:"InstanceNormComputeChannelScaleShift",shaderCache:{hint:`${u};${o};${h}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:g,dataType:1}],dispatchGroup:{x:f},programUniforms:S}),getShaderSource:v},{inputs:[t,r,i],outputs:[-1]})[0]},vp=(e,t,r)=>{let i=t[0].dims,s=i,n=2,a=i[0],o=i[1],u=B.sizeFromDimension(i,n),d=Le(u),c=B.size(s)/d,f=_a(e,t[0],t[1],t[2],a,u,o,r.epsilon),h=[a,o,u/d],m=[a,o],g=["type","none"],y=S=>{let v=P("x",t[0].dataType,h.length,d),b=P("scale_shift",1,m.length,2),k=ue("output",t[0].dataType,h.length,d),x=[v,b,k];return` + ${S.registerUniform("output_size","u32").declareVariables(...x)} + ${S.mainStart()} + ${S.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let outputIndices = ${k.offsetToIndices("global_idx")}; + let batch = outputIndices[0]; + let channel = outputIndices[1]; + let scale_shift = ${b.getByIndices("vec2(batch, channel)")}; + let value = ${v.getByOffset("global_idx")} * ${k.type.value}(scale_shift.x) + ${k.type.value}(scale_shift.y); + ${k.setByOffset("global_idx","value")}; + }`};e.compute({name:"InstanceNormalization",shaderCache:{hint:`${d}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:s,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(c/64)},programUniforms:[{type:12,data:c},...pe(h,m,h)]}),getShaderSource:y},{inputs:[t[0],f]})},$p=(e,t,r)=>{let i=t[0].dims,s=i,n=i[0],a=i[i.length-1],o=B.sizeFromDimension(i,1)/a,u=Le(a),d=B.size(s)/u,c=[{type:12,data:o},{type:12,data:Math.floor(a/u)}],f=["type","type"],h=!1,m=[0,i.length-1];for(let v=0;vi[m[b]])),y=_a(e,g,t[1],t[2],n,o,a,r.epsilon),S=v=>{let b=Xe(t[0].dataType),k=u===1?"vec2f":`mat${u}x2f`,x=O=>{let A=O===0?"x":"y",R=u===1?"f32":`vec${u}f`;switch(u){case 1:return`${b}(${R}(scale.${A}))`;case 2:return`vec2<${b}>(${R}(scale[0].${A}, scale[1].${A}))`;case 4:return`vec4<${b}>(${R}(scale[0].${A}, scale[1].${A}, scale[2].${A}, scale[3].${A}))`;default:throw new Error(`Not supported compoents ${u}`)}},I=P("input",t[0].dataType,t[0].dims,u),z=ue("output",t[0].dataType,s,u);return` + @group(0) @binding(0) var input : array<${I.type.storage}>; + @group(0) @binding(1) var scale_input : array<${k}>; + @group(0) @binding(2) var output : array<${z.type.storage}>; + struct Uniforms {H: u32, C : u32}; + @group(0) @binding(3) var uniforms: Uniforms; + + ${v.mainStart()} + let current_image_number = global_idx / (uniforms.C * uniforms.H); + let current_channel_number = global_idx % uniforms.C; + + let scale_offset = current_image_number * uniforms.C + current_channel_number; + let scale = scale_input[scale_offset]; + output[global_idx] = fma(input[global_idx], ${x(0)}, ${x(1)}); + }`};e.compute({name:"InstanceNormalizationNHWC",shaderCache:{hint:`${u}`,inputDependencies:f},getRunData:()=>({outputs:[{dims:s,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(d/64)},programUniforms:c}),getShaderSource:S},{inputs:[t[0],y]})},S_=(e,t)=>{t.format==="NHWC"?$p(e,e.inputs,t):vp(e,e.inputs,t)}}),xp,Sp,k_,K$=j(()=>{_e(),we(),ve(),xp=e=>{if(!e||e.length<2)throw new Error("layerNorm requires at least 2 inputs.")},Sp=(e,t,r)=>{let i=t.simplified,s=e[0].dims,n=e[1],a=!i&&e[2],o=s,u=B.normalizeAxis(t.axis,s.length),d=B.sizeToDimension(s,u),c=B.sizeFromDimension(s,u),f=B.size(n.dims),h=a?B.size(a.dims):0;if(f!==c||a&&h!==c)throw new Error(`Size of X.shape()[axis:] == ${c}. + Size of scale and bias (if provided) must match this. + Got scale size of ${f} and bias size of ${h}`);let m=[];for(let I=0;I1,b=r>2,k=I=>{let z=Xe(e[0].dataType),O=[P("x",e[0].dataType,e[0].dims,g),P("scale",n.dataType,n.dims,g)];a&&O.push(P("bias",a.dataType,a.dims,g)),O.push(ue("output",e[0].dataType,o,g)),v&&O.push(ue("mean_data_output",1,m)),b&&O.push(ue("inv_std_output",1,m));let A=[{name:"norm_count",type:"u32"},{name:"norm_size",type:"f32"},{name:"norm_size_vectorized",type:"u32"},{name:"epsilon",type:"f32"}];return` + ${I.registerUniforms(A).declareVariables(...O)} + ${I.mainStart()} + ${I.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.norm_count")} + let offset = global_idx * uniforms.norm_size_vectorized; + var mean_vector = ${eo("f32",g)}; + var mean_square_vector = ${eo("f32",g)}; + + for (var h: u32 = 0u; h < uniforms.norm_size_vectorized; h++) { + let value = ${ri(z,g,"x[h + offset]")}; + mean_vector += value; + mean_square_vector += value * value; + } + let mean = ${kr("mean_vector",g)} / uniforms.norm_size; + let inv_std_dev = inverseSqrt(${kr("mean_square_vector",g)} / uniforms.norm_size ${i?"":"- mean * mean"} + uniforms.epsilon); + + for (var j: u32 = 0; j < uniforms.norm_size_vectorized; j++) { + let f32input = ${ri(z,g,"x[j + offset]")}; + let f32scale = ${ri(z,g,"scale[j]")}; + output[j + offset] = ${O[0].type.value}((f32input ${i?"":"- mean"}) * inv_std_dev * f32scale + ${a?`+ ${ri(z,g,"bias[j]")}`:""} + ); + } + + ${v?"mean_data_output[global_idx] = mean":""}; + ${b?"inv_std_output[global_idx] = inv_std_dev":""}; + }`},x=[{dims:o,dataType:e[0].dataType}];return v&&x.push({dims:m,dataType:1}),b&&x.push({dims:m,dataType:1}),{name:"LayerNormalization",shaderCache:{hint:`${g};${r};${i}`,inputDependencies:y},getRunData:()=>({outputs:x,dispatchGroup:{x:Math.ceil(d/64)},programUniforms:S}),getShaderSource:k}},k_=(e,t)=>{xp(e.inputs),e.compute(Sp(e.inputs,t,e.outputCount))}}),kp,T_,Z$=j(()=>{we(),su(),au(),kp=e=>{if(!e||e.length!==2)throw new Error("MatMul requires 2 inputs.");if(e[0].dims[e[0].dims.length-1]!==e[1].dims[e[1].dims.length-2])throw new Error("shared dimension does not match.")},T_=e=>{kp(e.inputs);let t=ai.calcShape(e.inputs[0].dims,e.inputs[1].dims,!0);if(!t)throw new Error("Can't use matmul on the given tensors");let r=t[t.length-1],i=e.inputs[0].dims[e.inputs[0].dims.length-1];if(r<8&&i<8)e.compute(nu(e.inputs,{activation:""},t));else{let s=t[t.length-2],n=B.size(e.inputs[0].dims.slice(0,-2)),a=B.size(e.inputs[1].dims.slice(0,-2));if(n!==1&&s===1&&a===1){let o=e.inputs[0].reshape([1,n,i]),u=e.inputs[1].reshape([1,i,r]),d=[1,n,r],c=[o,u];e.compute(Zn(c,{activation:""},t,d),{inputs:c})}else e.compute(Zn(e.inputs,{activation:""},t))}}}),Tp,Ip,Ep,I_,E_,X$=j(()=>{_e(),we(),qe(),ve(),Tp=(e,t)=>{if(e.length<3||e.length>4)throw new Error("MatMulNBits requires 3 or 4 inputs");let r=e[0],i=r.dims.length;if(r.dims[i-1]!==t.k)throw new Error("The last dim of input shape does not match the k value");let s=Math.floor((t.k+t.blockSize-1)/t.blockSize),n=t.blockSize/8*t.bits,a=e[1];if(!B.areEqual(a.dims,[t.n,s,n]))throw new Error("The second inputs must be 3D tensor with shape N X nBlocksPerCol X blobSize");let o=e[2].dims;if(B.size(o)!==t.n*s)throw new Error("scales input size error.");if(e.length===4){let u=e[3].dims,d=t.bits>4?t.n*s:t.n*Math.floor((s+1)/2);if(B.size(u)!==d)throw new Error("zeroPoints input size error.")}},Ip=(e,t)=>{let r=e[0].dims,i=r.length,s=r[i-2],n=t.k,a=t.n,o=r.slice(0,i-2),u=B.size(o),d=e[1].dims[2]/4,c=e[0].dataType,f=Le(t.k),h=Le(d),m=Le(a),g=o.concat([s,a]),y=s>1&&a/m%2===0?2:1,S=B.size(g)/m/y,v=64,b=[],k=[u,s,n/f],x=B.convertShape(e[1].dims).slice();x.splice(-1,1,d/h),b.push(...pe(k)),b.push(...pe(x)),b.push(...pe(e[2].dims)),e.length===4&&b.push(...pe(B.convertShape(e[3].dims)));let I=[u,s,a/m];b.push(...pe(I));let z=O=>{let A=k.length,R=P("a",e[0].dataType,A,f),W=P("b",12,x.length,h),ie=P("scales",e[2].dataType,e[2].dims.length),X=[R,W,ie],ne=e.length===4?P("zero_points",12,e[3].dims.length):void 0;ne&&X.push(ne);let Y=I.length,oe=ue("output",e[0].dataType,Y,m),V=Xe(e[0].dataType),ae=(()=>{switch(f){case 1:return`array<${V}, 8>`;case 2:return`mat4x2<${V}>`;case 4:return`mat2x4<${V}>`;default:throw new Error(`${f}-component is not supported.`)}})(),Z=()=>{let D=` + // reuse a data + var input_offset = ${R.indicesToOffset(`${R.type.indices}(batch, row, word_offset)`)}; + var a_data: ${ae}; + for (var j: u32 = 0; j < ${8/f}; j++) { + a_data[j] = ${R.getByOffset("input_offset")}; + input_offset++; + } + `;for(let q=0;q> 4) & b_mask); + b_quantized_values = ${ae}(${Array.from({length:4},(ee,ye)=>`${V}(b_value_lower[${ye}]), ${V}(b_value_upper[${ye}])`).join(", ")}); + b_dequantized_values = ${f===1?`${ae}(${Array.from({length:8},(ee,ye)=>`(b_quantized_values[${ye}] - ${ne?`zero_point${q}`:"zero_point"}) * scale${q}`).join(", ")});`:`(b_quantized_values - ${ae}(${Array(8).fill(`${ne?`zero_point${q}`:"zero_point"}`).join(",")})) * scale${q};`}; + workgroup_shared[local_id.x * ${y} + ${Math.floor(q/m)}]${m>1?`[${q%m}]`:""} += ${Array.from({length:8/f},(ee,ye)=>`${f===1?`a_data[${ye}] * b_dequantized_values[${ye}]`:`dot(a_data[${ye}], b_dequantized_values[${ye}])`}`).join(" + ")}; + `;return D},le=()=>{let D=` + var col_index = col * ${m}; + ${ne?` + let zero_point_bytes_per_col = (nBlocksPerCol + 1) / 2; + var zero_point_byte_count: u32; + var zero_point_word_index: u32; + var zero_point_byte_offset: u32; + let zero_point_nibble_offset: u32 = block & 0x1u; + var zero_point_bits_offset: u32; + var zero_point_word: u32;`:` + // The default zero point is 8 for unsigned 4-bit quantization. + let zero_point = ${V}(8);`} + `;for(let q=0;q> 0x1u); + zero_point_word_index = zero_point_byte_count >> 0x2u; + zero_point_byte_offset = zero_point_byte_count & 0x3u; + zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2); + zero_point_word = ${ne.getByOffset("zero_point_word_index")} >> zero_point_bits_offset; + let zero_point${q} = ${V}((zero_point_word) & 0xFu);`:""} + col_index += 1;`;return D},Be=()=>{let D=`col_index = col * ${m};`;for(let q=0;q; + var b_value_upper: vec4; + var b_quantized_values: ${ae}; + var b_dequantized_values: ${ae};`,D};return` + var workgroup_shared: array<${oe.type.value}, ${y*v}>; + ${O.declareVariables(...X,oe)} + ${O.mainStart([v,1,1])} + let output_indices = ${oe.offsetToIndices(`(global_idx / ${v}) * ${y}`)}; + let col = output_indices[2]; + let row = output_indices[1]; + let batch = output_indices[0]; + let nBlocksPerCol = uniforms.b_shape[1]; + + for (var block = local_id.x; block < nBlocksPerCol; block += ${v}) { + //process one block + var word_offset: u32 = block * ${t.blockSize/f}; + ${le()} + for (var word: u32 = 0; word < ${d}; word += ${h}) { + ${Be()} + for (var i: u32 = 0; i < ${h}; i++) { + ${Z()} + word_offset += ${8/f}; + } + } + } + workgroupBarrier(); + + if (local_id.x < ${y}) { + var output_value: ${oe.type.value} = ${oe.type.value}(0); + var workgroup_shared_offset: u32 = local_id.x; + for (var b: u32 = 0u; b < ${v}u; b++) { + output_value += workgroup_shared[workgroup_shared_offset]; + workgroup_shared_offset += ${y}; + } + ${oe.setByIndices(`${oe.type.indices}(batch, row, col + local_id.x)`,"output_value")}; + } + }`};return{name:"MatMulNBits",shaderCache:{hint:`${t.blockSize};${t.bits};${f};${h};${m};${y};${v}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:g,dataType:c}],dispatchGroup:{x:S},programUniforms:b}),getShaderSource:z}},Ep=(e,t)=>{let r=e[0].dims,i=r.length,s=r[i-2],n=t.k,a=t.n,o=r.slice(0,i-2),u=B.size(o),d=e[1].dims[2]/4,c=e[0].dataType,f=Le(t.k),h=Le(d),m=o.concat([s,a]),g=128,y=a%8===0?8:a%4===0?4:1,S=g/y,v=S*h*8,b=v/f,k=v/t.blockSize,x=B.size(m)/y,I=[],z=[u,s,n/f],O=B.convertShape(e[1].dims).slice();O.splice(-1,1,d/h),I.push(...pe(z)),I.push(...pe(O)),I.push(...pe(e[2].dims)),e.length===4&&I.push(...pe(B.convertShape(e[3].dims)));let A=[u,s,a];I.push(...pe(A));let R=W=>{let ie=z.length,X=P("a",e[0].dataType,ie,f),ne=P("b",12,O.length,h),Y=P("scales",e[2].dataType,e[2].dims.length),oe=[X,ne,Y],V=e.length===4?P("zero_points",12,e[3].dims.length):void 0;V&&oe.push(V);let ae=A.length,Z=ue("output",e[0].dataType,ae),le=Xe(e[0].dataType),Be=()=>{switch(f){case 1:return` + let a_data0 = vec4<${le}>(sub_a[word_offset], sub_a[word_offset + 1], sub_a[word_offset + 2], sub_a[word_offset + 3]); + let a_data1 = vec4<${le}>(sub_a[word_offset + 4], sub_a[word_offset + 5], sub_a[word_offset + 6], sub_a[word_offset + 7]);`;case 2:return` + let a_data0 = vec4<${le}>(sub_a[word_offset], sub_a[word_offset + 1]); + let a_data1 = vec4<${le}>(sub_a[word_offset + 2], sub_a[word_offset + 3]);`;case 4:return` + let a_data0 = sub_a[word_offset]; + let a_data1 = sub_a[word_offset + 1];`;default:throw new Error(`${f}-component is not supported.`)}};return` + var sub_a: array<${X.type.value}, ${b}>; + var inter_results: array, ${y}>; + ${W.declareVariables(...oe,Z)} + ${W.mainStart([S,y,1])} + let output_indices = ${Z.offsetToIndices(`workgroup_index * ${y}`)}; + let col = output_indices[2]; + let row = output_indices[1]; + let batch = output_indices[0]; + let n_blocks_per_col = uniforms.b_shape[1]; + let num_tiles = (n_blocks_per_col - 1) / ${k} + 1; + + // Loop over shared dimension. + for (var tile: u32 = 0; tile < num_tiles; tile += 1) { + let a_col_start = tile * ${b}; + // load one tile A data into shared memory. + for (var a_offset = local_idx; a_offset < ${b}; a_offset += ${g}) + { + let a_col = a_col_start + a_offset; + if (a_col < uniforms.a_shape[2]) + { + sub_a[a_offset] = ${X.getByIndices(`${X.type.indices}(batch, row, a_col)`)}; + } else { + sub_a[a_offset] = ${X.type.value}(0); + } + } + workgroupBarrier(); + + // each thread process one block + let b_row = col + local_id.y; + let block = tile * ${k} + local_id.x; + ${V?` + let zero_point_bytes_per_col = (n_blocks_per_col + 1) / 2; + let zero_point_byte_count = b_row * zero_point_bytes_per_col + (block >> 0x1u); + let zero_point_word_index = zero_point_byte_count >> 0x2u; + let zero_point_byte_offset = zero_point_byte_count & 0x3u; + let zero_point_nibble_offset: u32 = block & 0x1u; + let zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2); + let zero_point_word = ${V.getByOffset("zero_point_word_index")} >> zero_point_bits_offset; + let zero_point = ${le}((zero_point_word) & 0xFu);`:` + // The default zero point is 8 for unsigned 4-bit quantization. + let zero_point = ${le}(8);`} + let scale = ${Y.getByOffset("b_row * n_blocks_per_col + block")}; + let b_data = ${ne.getByIndices(`${ne.type.indices}(b_row, block, 0)`)}; + var word_offset = local_id.x * ${t.blockSize/f}; + for (var i: u32 = 0; i < ${h}; i++) { + ${Be()} + let b_value = ${h===1?"b_data":"b_data[i]"}; + let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu); + let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu); + let b_quantized_values = mat2x4<${le}>(${Array.from({length:4},(D,q)=>`${le}(b_value_lower[${q}]), ${le}(b_value_upper[${q}])`).join(", ")}); + let b_dequantized_values = (b_quantized_values - mat2x4<${le}>(${Array(8).fill("zero_point").join(",")})) * scale; + inter_results[local_id.y][local_id.x] += ${Array.from({length:2},(D,q)=>`${`dot(a_data${q}, b_dequantized_values[${q}])`}`).join(" + ")}; + word_offset += ${8/f}; + } + workgroupBarrier(); + } + + if (local_idx < ${y}) { + var output_value: ${Z.type.value} = ${Z.type.value}(0); + for (var b = 0u; b < ${S}; b++) { + output_value += inter_results[local_idx][b]; + } + if (col + local_idx < uniforms.output_shape[2]) + { + ${Z.setByIndices(`${Z.type.indices}(batch, row, col + local_idx)`,"output_value")} + } + } + }`};return{name:"BlockwiseMatMulNBits32",shaderCache:{hint:`${t.blockSize};${f};${h};${S};${y}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:m,dataType:c}],dispatchGroup:{x},programUniforms:I}),getShaderSource:R}},I_=(e,t)=>{Tp(e.inputs,t),t.blockSize===32&&e.adapterInfo.isVendor("intel")&&e.adapterInfo.isArchitecture("gen-12lp")?e.compute(Ep(e.inputs,t)):e.compute(Ip(e.inputs,t))},E_=e=>Oe(e)}),Cp,zp,Op,Ap,Rp,Bp,Mp,Np,C_,Y$=j(()=>{_e(),we(),ve(),Cp=e=>{if(!e||e.length<1)throw new Error("Too few inputs");if(e[0].dataType!==1&&e[0].dataType!==10)throw new Error("Input type must be float or float16.");if(e.length>=2){let t=e[0].dims.length*2===e[1].dims[0];if(e.length===4&&(t=e[3].dims[0]*2===e[1].dims[0]),!t)throw new Error("The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes].")}},zp=(e,t,r)=>{let i="";for(let s=t-1;s>=0;--s)i+=` + k = i32(${e.indicesGet("indices",s)}) - ${de("uniforms.pads",s,r)}; + if (k < 0) { + break; + } + if (k >= i32(${de("uniforms.x_shape",s,t)})) { + break; + } + offset += k * i32(${de("uniforms.x_strides",s,t)}); + `;return` + value = ${e.type.value}(uniforms.constant_value); + for (var i = 0; i < 1; i++) { + var offset = 0; + var k = 0; + ${i} + value = x[offset]; + } + `},Op=(e,t,r)=>{let i="";for(let s=t-1;s>=0;--s)i+=` + k = i32(${e.indicesGet("indices",s)}) - ${de("uniforms.pads",s,r)}; + if (k < 0) { + k = -k; + } + { + let _2n_1 = 2 * (i32(${de("uniforms.x_shape",s,t)}) - 1); + k = k % _2n_1; + if(k >= i32(${de("uniforms.x_shape",s,t)})) { + k = _2n_1 - k; + } + } + offset += k * i32(${de("uniforms.x_strides",s,t)}); + `;return` + var offset = 0; + var k = 0; + ${i} + value = x[offset]; + `},Ap=(e,t,r)=>{let i="";for(let s=t-1;s>=0;--s)i+=` + k = i32(${e.indicesGet("indices",s)}) - ${de("uniforms.pads",s,r)}; + if (k < 0) { + k = 0; + } + if (k >= i32(${de("uniforms.x_shape",s,t)})) { + k = i32(${de("uniforms.x_shape",s,t)}) - 1; + } + offset += k * i32(${de("uniforms.x_strides",s,t)}); + `;return` + var offset = 0; + var k = 0; + ${i} + value = x[offset]; + `},Rp=(e,t,r)=>{let i="";for(let s=t-1;s>=0;--s)i+=` + k = i32(${e.indicesGet("indices",s)}) - ${de("uniforms.pads",s,r)}; + if (k < 0) { + k += i32(${de("uniforms.x_shape",s,t)}]); + } + if (k >= i32(${de("uniforms.x_shape",s,t)})) { + k -= i32(${de("uniforms.x_shape",s,t)}); + } + offset += k * i32(${de("uniforms.x_strides",s,t)}); + `;return` + var offset = 0; + var k = 0; + ${i} + value = x[offset]; + `},Bp=(e,t,r)=>{switch(r.mode){case 0:return zp(e,t,r.pads.length);case 1:return Op(e,t,r.pads.length);case 2:return Ap(e,t,r.pads.length);case 3:return Rp(e,t,r.pads.length);default:throw new Error("Invalid mode")}},Mp=(e,t)=>{let r=B.padShape(e[0].dims.slice(),t.pads),i=e[0].dims,s=B.size(r),n=[{type:12,data:s},{type:6,data:t.pads}],a=e.length>=3&&e[2].data;t.mode===0&&n.push({type:a?e[2].dataType:1,data:t.value}),n.push(...pe(e[0].dims,r));let o=["rank"],u=d=>{let c=ue("output",e[0].dataType,r.length),f=P("x",e[0].dataType,i.length),h=f.type.value,m=Bp(c,i.length,t),g=[{name:"output_size",type:"u32"},{name:"pads",type:"i32",length:t.pads.length}];return t.mode===0&&g.push({name:"constant_value",type:a?h:"f32"}),` + ${d.registerUniforms(g).declareVariables(f,c)} + ${d.mainStart()} + ${d.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${c.offsetToIndices("global_idx")}; + + var value = ${h}(0); + ${m} + output[global_idx] = value; + }`};return{name:"Pad",shaderCache:{hint:`${t.mode}${a}`,inputDependencies:o},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(B.size(r)/64)},programUniforms:n}),getShaderSource:u}},Np=(e,t)=>{if(e.length>1){let r=e[1].getBigInt64Array(),i=e.length>=3&&e[2].data?e[2].dataType===10?e[2].getUint16Array()[0]:e[2].getFloat32Array()[0]:0,s=e[0].dims.length,n=new Int32Array(2*s).fill(0);if(e.length>=4){let o=e[3].getBigInt64Array();for(let u=0;un[Number(u)]=Number(o));let a=[];return n.forEach(o=>a.push(o)),{mode:t.mode,value:i,pads:a}}else return t},C_=(e,t)=>{Cp(e.inputs);let r=Np(e.inputs,t);e.compute(Mp(e.inputs,r),{inputs:[0]})}}),ki,ya,ba,wa,va,Dp,Pp,$a,xa,z_,O_,Sa,A_,R_,ka,B_,M_,N_,D_,Q$=j(()=>{Ut(),_e(),we(),ve(),ki=e=>{if(We.webgpu.validateInputContent&&(!e||e.length!==1))throw new Error("Pool ops requires 1 input.")},ya=(e,t,r)=>{let i=t.format==="NHWC",s=e.dims.slice();i&&s.splice(1,0,s.pop());let n=Object.hasOwnProperty.call(t,"dilations"),a=t.kernelShape.slice(),o=t.strides.slice(),u=n?t.dilations.slice():[],d=t.pads.slice();jn.adjustPoolAttributes(r,s,a,o,u,d);let c=jn.computePoolOutputShape(r,s,o,u,a,d,t.autoPad),f=Object.assign({},t);n?Object.assign(f,{kernelShape:a,strides:o,pads:d,dilations:u,cacheKey:t.cacheKey}):Object.assign(f,{kernelShape:a,strides:o,pads:d,cacheKey:t.cacheKey});let h=c.slice();return h.push(h.splice(1,1)[0]),[f,i?h:c]},ba=(e,t)=>{let r=t.format==="NHWC",i=B.size(e),s=B.size(t.kernelShape),n=[{type:12,data:i},{type:12,data:s}],a=[{name:"outputSize",type:"u32"},{name:"kernelSize",type:"u32"}];if(t.kernelShape.length<=2){let o=t.kernelShape[t.kernelShape.length-1],u=t.strides[t.strides.length-1],d=t.pads[t.pads.length/2-1],c=t.pads[t.pads.length-1],f=!!(d+c);n.push({type:12,data:o},{type:12,data:u},{type:12,data:d},{type:12,data:c}),a.push({name:"kw",type:"u32"},{name:"sw",type:"u32"},{name:"pwStart",type:"u32"},{name:"pwEnd",type:"u32"});let h=!1;if(t.kernelShape.length===2){let m=t.kernelShape[t.kernelShape.length-2],g=t.strides[t.strides.length-2],y=t.pads[t.pads.length/2-2],S=t.pads[t.pads.length-2];h=!!(y+S),n.push({type:12,data:m},{type:12,data:g},{type:12,data:y},{type:12,data:S}),a.push({name:"kh",type:"u32"},{name:"sh",type:"u32"},{name:"phStart",type:"u32"},{name:"phEnd",type:"u32"})}return[n,a,!0,f,h]}else{if(r)throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");let o=B.computeStrides(t.kernelShape);n.push({type:12,data:o},{type:12,data:t.pads},{type:12,data:t.strides}),a.push({name:"kernelStrides",type:"u32",length:o.length},{name:"pads",type:"u32",length:t.pads.length},{name:"strides",type:"u32",length:t.strides.length});let u=t.pads.reduce((d,c)=>d+c);return[n,a,!!u,!1,!1]}},wa=(e,t,r,i,s,n,a,o,u,d,c,f)=>{let h=s.format==="NHWC",m=t.type.value,g=ue("output",t.type.tensor,i);if(s.kernelShape.length<=2){let y="",S="",v="",b=r-(h?2:1);if(c?y=` + for (var i: u32 = 0u; i < uniforms.kw; i++) { + xIndices[${b}] = indices[${b}] * uniforms.sw - uniforms.pwStart + i; + if (xIndices[${b}] < 0 || xIndices[${b}] + >= uniforms.x_shape[${b}]) { + pad++; + continue; + } + let x_val = x[${t.indicesToOffset("xIndices")}]; + ${n} + }`:y=` + for (var i: u32 = 0u; i < uniforms.kw; i++) { + xIndices[${b}] = indices[${b}] * uniforms.sw - uniforms.pwStart + i; + let x_val = x[${t.indicesToOffset("xIndices")}]; + ${n} + }`,s.kernelShape.length===2){let k=r-(h?3:2);f?S=` + for (var j: u32 = 0u; j < uniforms.kh; j++) { + xIndices[${k}] = indices[${k}] * uniforms.sh - uniforms.phStart + j; + if (xIndices[${k}] < 0 || xIndices[${k}] >= uniforms.x_shape[${k}]) { + pad += i32(uniforms.kw); + continue; + } + `:S=` + for (var j: u32 = 0u; j < uniforms.kh; j++) { + xIndices[${k}] = indices[${k}] * uniforms.sh - uniforms.phStart + j; + `,v=` + } + `}return` + ${e.registerUniforms(u).declareVariables(t,g)} + + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + + let indices = ${g.offsetToIndices("global_idx")}; + var xIndices = ${g.offsetToIndices("global_idx")}; + + var value = ${m}(${o}); + var pad = 0; + ${S} + ${y} + ${v} + ${a} + + output[global_idx] = value; + }`}else{if(h)throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");let y=s.kernelShape.length,S=s.pads.length,v="";return d?v=` + if (xIndices[j] >= uniforms.x_shape[j]) { + pad++; + isPad = true; + break; + } + } + if (!isPad) { + let x_val = x[${t.indicesToOffset("xIndices")}]; + ${n} + }`:v=` + } + let x_val = x[${t.indicesToOffset("xIndices")}]; + ${n} + `,` + ${e.registerUniforms(u).declareVariables(t,g)} + + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + let indices = ${g.offsetToIndices("global_idx")}; + var xIndices = ${g.offsetToIndices("global_idx")}; + + var offsets: array; + + var value = ${m}(${o}); + var pad = 0; + var isPad = false; + + for (var i: u32 = 0u; i < uniforms.kernelSize; i++) { + var offset = i; + for (var j = 0u; j < ${y-1}u; j++) { + offsets[j] = offset / ${de("uniforms.kernelStrides","j",y)}; + offset -= offsets[j] * ${de("uniforms.kernelStrides","j",y)}; + } + offsets[${y-1}] = offset; + + isPad = false; + for (var j = ${r-y}u; j < ${r}u; j++) { + xIndices[j] = indices[j] * ${de("uniforms.strides",`j - ${r-y}u`,y)} + + offsets[j - ${r-y}u] - ${de("uniforms.pads","j - 2u",S)}; + ${v} + } + ${a} + + output[global_idx] = value; + }`}},va=e=>`${e.format};${e.ceilMode};${e.autoPad};${e.kernelShape.length}`,Dp=e=>`${va(e)};${e.countIncludePad}`,Pp=e=>`${va(e)};${e.storageOrder};${e.dilations}`,$a=e=>({format:e.format,autoPad:["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],ceilMode:e.ceil_mode,kernelShape:e.kernel_shape,strides:e.strides,pads:e.pads}),xa=(e,t,r,i)=>{let[s,n]=ya(t,i,r),a=P("x",t.dataType,t.dims.length),o=a.type.value,u="value += x_val;",d="";s.countIncludePad?d+=`value /= ${o}(uniforms.kernelSize);`:d+=`value /= ${o}(i32(uniforms.kernelSize) - pad);`;let[c,f,h,m,g]=ba(n,s);c.push(...pe(t.dims,n));let y=["rank"];return{name:e,shaderCache:{hint:`${i.cacheKey};${h};${m};${g}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:n,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(B.size(n)/64)},programUniforms:c}),getShaderSource:S=>wa(S,a,t.dims.length,n.length,s,u,d,0,f,h,m,g)}},z_=e=>{let t=e.count_include_pad!==0,r=$a(e);if(r.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for AveragePool");let i={countIncludePad:t,...r,cacheKey:""};return{...i,cacheKey:Dp(i)}},O_=(e,t)=>{ki(e.inputs),e.compute(xa("AveragePool",e.inputs[0],!1,t))},Sa={autoPad:"",ceilMode:0,countIncludePad:!1,kernelShape:[],strides:[],pads:[],storageOrder:0,dilations:[]},A_=e=>{let t=e.format;return{format:t,...Sa,cacheKey:t}},R_=(e,t)=>{ki(e.inputs),e.compute(xa("GlobalAveragePool",e.inputs[0],!0,t))},ka=(e,t,r,i)=>{let[s,n]=ya(t,i,r),a=` + value = max(x_val, value); + `,o="",u=P("x",t.dataType,t.dims.length),d=["rank"],[c,f,h,m,g]=ba(n,s);return c.push(...pe(t.dims,n)),{name:e,shaderCache:{hint:`${i.cacheKey};${h};${m};${g}`,inputDependencies:d},getRunData:()=>({outputs:[{dims:n,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(B.size(n)/64)},programUniforms:c}),getShaderSource:y=>wa(y,u,t.dims.length,n.length,s,a,o,t.dataType===10?-65504:-1e5,f,h,m,g)}},B_=(e,t)=>{ki(e.inputs),e.compute(ka("MaxPool",e.inputs[0],!1,t))},M_=e=>{let t=e.storage_order,r=e.dilations,i=$a(e);if(t!==0)throw new Error("column major storage order is not yet supported for MaxPool");if(i.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for MaxPool");let s={storageOrder:t,dilations:r,...i,cacheKey:""};return{...s,cacheKey:Pp(s)}},N_=e=>{let t=e.format;return{format:t,...Sa,cacheKey:t}},D_=(e,t)=>{ki(e.inputs),e.compute(ka("GlobalMaxPool",e.inputs[0],!0,t))}}),Up,Wp,P_,U_,J$=j(()=>{_e(),we(),qe(),ve(),Up=(e,t)=>{if(e.length<2||e.length>3)throw new Error("DequantizeLinear requires 2 or 3 inputs.");if(e.length===3&&e[1].dims===e[2].dims)throw new Error("x-scale and x-zero-point must have the same shape.");if(e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[0].dataType===6&&e.length>2)throw new Error("In the case of dequantizing int32 there is no zero point.");if(e[1].dims.length!==0&&e[1].dims.length!==1&&e[1].dims.length!==e[0].dims.length)throw new Error("scale input must be a scalar, a 1D tensor, or have the same rank as the input tensor.");if(e.length>2){if(e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[1].dims.length!==e[2].dims.length)throw new Error("scale and zero-point inputs must have the same rank.");if(!e[1].dims.map((r,i)=>r===e[2].dims[i]).reduce((r,i)=>r&&i,!0))throw new Error("scale and zero-point inputs must have the same shape.")}if(t.blockSize>0){if(e[1].dims.length===0||e[1].dims.length===1&&e[1].dims[0]===1)throw new Error("blockSize must be set only for block quantization.");if(!e[1].dims.map((s,n)=>n===t.axis||s===e[0].dims[n]).reduce((s,n)=>s&&n,!0))throw new Error("For block qunatization, scale input shape to match the input shape except for the axis");if(e[1].dims.length!==e[0].dims.length)throw new Error("For block qunatization the scale input rank must be the same as the x rank.");let r=e[0].dims[t.axis],i=e[1].dims[t.axis];if(t.blockSizeMath.ceil(r/(i-1)-1))throw new Error("blockSize must be with in the range [ceil(dI / Si), ceil(dI / (Si - 1) - 1)].")}},Wp=(e,t)=>{let r=B.normalizeAxis(t.axis,e[0].dims.length),i=e[0].dataType,s=i===3,n=e[0].dims,a=e[1].dataType,o=B.size(n),u=i===3||i===2,d=u?[Math.ceil(B.size(e[0].dims)/4)]:e[0].dims,c=e[1].dims,f=e.length>2?e[2]:void 0,h=f?u?[Math.ceil(B.size(f.dims)/4)]:f.dims:void 0,m=c.length===0||c.length===1&&c[0]===1,g=m===!1&&c.length===1,y=Le(o),S=m&&(!u||y===4),v=S?y:1,b=S&&!u?y:1,k=P("input",u?12:i,d.length,b),x=P("scale",a,c.length),I=f?P("zero_point",u?12:i,h.length):void 0,z=ue("output",a,n.length,v),O=[k,x];I&&O.push(I);let A=[d,c];f&&A.push(h);let R=[{type:12,data:o/v},{type:12,data:r},{type:12,data:t.blockSize},...pe(...A,n)],W=ie=>{let X=[{name:"output_size",type:"u32"},{name:"axis",type:"u32"},{name:"block_size",type:"u32"}];return` + ${ie.registerUniforms(X).declareVariables(...O,z)} + ${ie.mainStart()} + ${ie.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let output_indices = ${z.offsetToIndices("global_idx")}; + + // Set input x + ${u?` + let input = ${k.getByOffset("global_idx / 4")}; + let x_vec = ${s?"unpack4xI8(input)":"unpack4xU8(input)"}; + let x_value = ${v===1?"x_vec[global_idx % 4]":"x_vec"};`:`let x_value = ${k.getByOffset("global_idx")};`}; + + // Set scale input + ${m?`let scale_value= ${x.getByOffset("0")}`:g?` + let scale_index = ${z.indicesGet("output_indices","uniforms.axis")}; + let scale_value= ${x.getByOffset("scale_index")};`:` + var scale_indices: ${x.type.indices} = output_indices; + let index = ${x.indicesGet("scale_indices","uniforms.axis")} / uniforms.block_size; + ${x.indicesSet("scale_indices","uniforms.axis","index")}; + let scale_value= ${x.getByIndices("scale_indices")};`}; + + // Set zero-point input + ${I?m?u?` + let zero_point_input = ${I.getByOffset("0")}; + let zero_point_vec = ${s?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"}; + let zero_point_value= zero_point_vec[0]`:`let zero_point_value = ${I.getByOffset("0")}`:g?u?` + let zero_point_index = ${z.indicesGet("output_indices","uniforms.axis")}; + let zero_point_input = ${I.getByOffset("zero_point_index / 4")}; + let zero_point_vec = ${s?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"}; + let zero_point_value = zero_point_vec[zero_point_index % 4]`:` + let zero_point_index = ${z.indicesGet("output_indices","uniforms.axis")}; + let zero_point_value = ${I.getByOffset("zero_point_index")};`:u?` + let zero_point_offset = ${x.indicesToOffset("scale_indices")}; + let zero_point_input = ${I.getByOffset("zero_point_offset / 4")}; + let zero_point_vec = ${s?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"}; + let zero_point_value = zero_point_vec[zero_point_offset % 4];`:`let zero_point_value = ${I.getByIndices("scale_indices")};`:`let zero_point_value = ${u?s?"i32":"u32":k.type.value}(0);`}; + // Compute and write output + ${z.setByOffset("global_idx",`${z.type.value}(x_value - zero_point_value) * scale_value`)}; + }`};return{name:"DequantizeLinear",shaderCache:{hint:t.cacheKey,inputDependencies:I?["rank","rank","rank"]:["rank","rank"]},getShaderSource:W,getRunData:()=>({outputs:[{dims:n,dataType:a}],dispatchGroup:{x:Math.ceil(o/v/64),y:1,z:1},programUniforms:R})}},P_=(e,t)=>{Up(e.inputs,t),e.compute(Wp(e.inputs,t))},U_=e=>Oe({axis:e.axis,blockSize:e.blockSize})}),Lp,qp,W_,e2=j(()=>{Ut(),_e(),ve(),Lp=(e,t,r)=>{let i=e===t,s=et&&r>0;if(i||s||n)throw new Error("Range these inputs' contents are invalid.")},qp=(e,t,r,i)=>{let s=Math.abs(Math.ceil((t-e)/r)),n=[s],a=s,o=[{type:12,data:a},{type:i,data:e},{type:i,data:r},...pe(n)],u=d=>{let c=ue("output",i,n.length),f=c.type.value,h=[{name:"outputSize",type:"u32"},{name:"start",type:f},{name:"delta",type:f}];return` + ${d.registerUniforms(h).declareVariables(c)} + ${d.mainStart()} + ${d.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + output[global_idx] = uniforms.start + ${f}(global_idx) * uniforms.delta; + }`};return{name:"Range",shaderCache:{hint:`${i}`},getShaderSource:u,getRunData:()=>({outputs:[{dims:n,dataType:i}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:o})}},W_=e=>{let t=0,r=0,i=0;e.inputs[0].dataType===6?(t=e.inputs[0].getInt32Array()[0],r=e.inputs[1].getInt32Array()[0],i=e.inputs[2].getInt32Array()[0]):e.inputs[0].dataType===1&&(t=e.inputs[0].getFloat32Array()[0],r=e.inputs[1].getFloat32Array()[0],i=e.inputs[2].getFloat32Array()[0]),We.webgpu.validateInputContent&&Lp(t,r,i),e.compute(qp(t,r,i,e.inputs[0].dataType),{inputs:[]})}}),Vp,Ta,Ia,Fp,L_,q_,t2=j(()=>{_e(),we(),qe(),ve(),Vp=(e,t,r,i)=>{if(e!=="none"&&i!=="i32"&&i!=="u32"&&i!=="f32")throw new Error(`Input ${i} is not supported with reduction ${e}.`);let s=`{ + var oldValue = 0; + loop { + let newValueF32 =`,n=`; + let newValue = bitcast(newValueF32); + let res = atomicCompareExchangeWeak(&${t}, oldValue, newValue); + if res.exchanged { + break; + } + oldValue = res.old_value; + } + }`;switch(e){case"none":return`${t}=${r};`;case"add":return i==="i32"||i==="u32"?`atomicAdd(&${t}, bitcast<${i}>(${r}));`:` + ${s}bitcast<${i}>(oldValue) + (${r})${n}`;case"max":return i==="i32"||i==="u32"?`atomicMax(&${t}, bitcast<${i}>(${r}));`:` + ${s}max(bitcast(oldValue), (${r}))${n}`;case"min":return i==="i32"||i==="u32"?`atomicMin(&${t}, bitcast<${i}>(${r}));`:`${s}min(bitcast<${i}>(oldValue), (${r}))${n}`;case"mul":return`${s}(bitcast<${i}>(oldValue) * (${r}))${n}`;default:throw new Error(`Reduction ${e} is not supported.`)}},Ta=(e,t)=>`${e===1?` + let element_count_dim = uniforms.output_strides; + let dim_value = uniforms.output_shape;`:` + let element_count_dim = uniforms.output_strides[${t?"i - indices_start":"i"}]; + let dim_value = uniforms.output_shape[${t?"i - indices_start":"i"} + uniforms.last_index_dimension];`} + + if (index >= 0) { + if (index >= i32(dim_value)) { + index = i32(dim_value - 1); + } + } else { + if (index < -i32(dim_value)) { + index = 0; + } else { + index += i32(dim_value); + } + } + data_offset += u32((u32(index) * element_count_dim));`,Ia=(e,t,r)=>`for (var i = 0u; i < uniforms.num_updates_elements; i++) { + let value = updates[uniforms.num_updates_elements * ${r?"global_idx":"idx"} + i]; + ${Vp(e.reduction,"output[data_offset + i]","value",t)} + }`,Fp=(e,t)=>{let r=e[0].dims,i=e[1].dims,s=r,n=1,a=Math.ceil(B.size(i)/n),o=i[i.length-1],u=B.sizeFromDimension(r,o),d=B.sizeFromDimension(i,0)/o,c=[{type:12,data:a},{type:12,data:o},{type:12,data:u},...pe(e[1].dims,e[2].dims,s)],f=h=>{let m=P("indices",e[1].dataType,e[1].dims.length),g=P("updates",e[2].dataType,e[2].dims.length,n),y=t.reduction!=="none"&&t.reduction!==""?gm("output",e[0].dataType,s.length):ue("output",e[0].dataType,s.length,n);return` + ${h.registerUniform("output_size","u32").registerUniform("last_index_dimension","u32").registerUniform("num_updates_elements","u32").declareVariables(m,g,y)} + ${h.mainStart()} + ${h.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + var hasDuplicates = false; + if (${t.reduction==="none"}) { + for (var i = 0; i < ${d}; i = i + 1) { + for (var j = i + 1; j < ${d}; j = j + 1) { + var index_i = i32(indices[i].x); + var index_j = i32(indices[j].x); + if (index_i == index_j) { + hasDuplicates = true; + break; + } + } + if (hasDuplicates) { + break; + } + } + } + + if (${t.reduction==="none"} && hasDuplicates) { + if (global_idx != 0u) { + return; + } + // Process each index-update pair individually when duplicates exist + for (var idx = 0u; idx < ${d}u; idx++) { + var data_offset = 0u; + for (var i = 0u; i < uniforms.last_index_dimension; i++) { + var index = i32(indices[idx * uniforms.last_index_dimension + i].x); + ${Ta(r.length,!1)} + } + ${Ia(t,y.type.value,!1)} + } + return; + } + + var data_offset = 0u; + var indices_start = uniforms.last_index_dimension * global_idx; + var indices_end = indices_start + uniforms.last_index_dimension; + for (var i = indices_start; i < indices_end; i++) { + var index = i32(indices[i].x); + ${Ta(r.length,!0)} + } + ${Ia(t,y.type.value,!0)} + }`};return{name:"ScatterND",shaderCache:{hint:`${t.cacheKey}_${t.reduction}`,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:s,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:c}),getShaderSource:f}},L_=e=>Oe({reduction:e.reduction}),q_=(e,t)=>{e.compute(Fp(e.inputs,t),{inputs:[e.inputs[1],e.inputs[2]],outputs:[]})}}),Hp,Gp,jp,Ea,Kp,Zp,Xp,Yp,Qp,Jp,ef,tf,Ca,rf,nf,sf,af,of,V_,F_,r2=j(()=>{_e(),we(),qe(),ve(),Hp=(e,t)=>{if(e.every(r=>r>0||(()=>{throw new Error("Resize requires scales input values to be positive")})),e.length>0){if(t.mode==="linear"){if(!(e.length===2||e.length===3||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1||e.length===5&&e[0]===1&&e[1]===1))throw new Error(`For linear mode, Resize requires scales to be 2D, 3D, 4D with either two outermost or one innermost and + one outermost scale values equal to 1, or 5D with two outermost scale values equal to 1`)}else if(t.mode==="cubic"&&!(e.length===2||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1))throw new Error("Resize requires scales input size to be 2 or 4 for cubic mode")}},Gp=(e,t,r)=>{t.every(s=>s>=0&&s{throw new Error("Resize requires axes input values to be positive and less than rank")}));let i=new Array(r).fill(1);return t.forEach((s,n)=>i[s]=e[n]),i},jp=(e,t,r,i,s,n)=>{let[a,o,u]=r>10?[1,2,3]:[-1,e.length>1?1:-1,-1],d=e[0].dims.length;if(a>0&&e.length>a&&e[a].dims.length>0)e[a].getFloat32Array().forEach(c=>n.push(c));else if(t.coordinateTransformMode==="tf_crop_and_resize")throw new Error("Resize requires RoI input to be specified when coordinateTransformMode is tfCropAndResize");if(o>0&&e.length>o&&e[o].dims.length===1&&e[o].dims[0]>0){if(e[o].getFloat32Array().forEach(c=>i.push(c)),i.length!==0&&i.length!==d&&r>=18&&i.length!==t.axes.length)throw new Error("Resize requires scales input size to be same as input rank or axes size for opset 18 and up");Hp(i,t),t.axes.length>0&&Gp(i,t.axes,d).forEach((c,f)=>i[f]=c)}if(u>0&&e.length>u&&e[u].dims.length===1&&e[u].dims[0]>0&&(e[u].getBigInt64Array().forEach(c=>s.push(Number(c))),s.length!==0&&s.length!==d&&r>=18&&s.length!==t.axes.length))throw new Error("Resize requires sizes input size to be same as input rank or axes size for opset 18 and up");if(t.axes.length>0){if(i.length!==0&&i.length!==t.axes.length)throw new Error('Resize requires "scales" input size to be of axes rank when axes attributes is specified');if(s.length!==0&&s.length!==t.axes.length)throw new Error('Resize requires "sizes" input size to be of rank axes rank when axes attributes is specified')}if(typeof i<"u"&&typeof s<"u"&&i.length>0&&s.length>d)throw new Error("Resize requires only of scales or sizes to be specified")},Ea=(e,t,r,i)=>` + // The whole part and the fractional part are calculated separately due to inaccuracy of floating + // point division. As an example, f32(21) / f32(7) may evaluate to 2.99... instead of 3, causing an + // offset-by-one error later in floor(). + let big = (${e}) * (${t}); + let whole = ${i}(big / (${r})); + let fract = ${i}(big % (${r})) / ${i}(${r}); + return whole + fract; +`,Kp=(e,t)=>`fn getOriginalCoordinateFromResizedCoordinate(xResized: u32, xScale: f32, lengthResized: u32, + lengthOriginal: u32, roiStart: f32, roiEnd: f32) -> ${t} { `+(()=>{switch(e){case"asymmetric":return` + if (xScale < 1.0 || floor(xScale) != xScale) { + return ${t}(xResized) / ${t}(xScale); + } else { + ${Ea("xResized","lengthOriginal","lengthResized",t)} + } + `;case"pytorch_half_pixel":return`if (lengthResized > 1) { + return (${t}(xResized) + 0.5) / ${t}(xScale) - 0.5; + } else { + return 0.0; + }`;case"tf_half_pixel_for_nn":return`return (${t}(xResized) + 0.5) / ${t}(xScale);`;case"align_corners":return`if (lengthResized == 1) { + return 0.0; + } else { + ${Ea("xResized","lengthOriginal - 1","lengthResized - 1",t)} + }`;case"tf_crop_and_resize":return`if (lengthResized > 1) { + return ${t}(roiStart) * ${t}(lengthOriginal - 1) + + (${t}(xResized) * ${t}(roiEnd - roiStart) * ${t}(lengthOriginal - 1)) / + ${t}(lengthResized - 1); + } else { + return 0.5 * ${t}(roiStart + roiEnd) * ${t}(lengthOriginal - 1); + }`;case"half_pixel_symmetric":return`const outputWidth = ${t}xScale * ${t}(lengthResized); + const adjustment = ${t}(lengthResized) / outputWidth; + const center = ${t}(lengthOriginal) / 2; + const offset = center * (1 - adjustment); + return offset + ((${t}(xResized) + 0.5) / ${t}(xScale)) - 0.5;`;case"half_pixel":return`return ((${t}(xResized) + 0.5) / ${t}(xScale)) - 0.5;`;default:throw new Error(`Coordinate transform mode ${e} is not supported`)}})()+"}",Zp=(e,t,r)=>`fn getNearestPixelFromOriginal(xOriginal: ${r}, isDownSample: bool) -> ${r} {`+(()=>{switch(e){case"round_prefer_ceil":return"if (fract(xOriginal) == 0.5) { return ceil(xOriginal); } else { return round(xOriginal); }";case"floor":return"return floor(xOriginal);";case"ceil":return"return ceil(xOriginal);";case"round_prefer_floor":return"if (fract(xOriginal) == 0.5) { return floor(xOriginal); } else { return round(xOriginal); }";case"simple":default:if(t<11)return"if (isDownSample) { return ceil(xOriginal); } else { return xOriginal; }";throw new Error(`Nearest mode ${e} is not supported`)}})()+"}",Xp=(e,t,r)=>{let i=new Array(r).fill(0).concat(new Array(r).fill(1)),s=e.length===0?i:e.slice();return t.length>0?(t.forEach((n,a)=>{i[n]=s[a],i[a+r]=s[t.length+a]}),i):s},Yp=(e,t,r,i)=>{let s=[];if(r.length>0)if(i.length>0){if(e.forEach(n=>s.push(n)),Math.max(...i)>e.length)throw new Error("axes is out of bound");i.forEach((n,a)=>s[n]=r[a])}else r.forEach(n=>s.push(n));else{if(t.length===0)throw new Error("Resize requires either scales or sizes.");s=e.map((n,a)=>Math.round(n*t[a]))}return s},Qp=(e,t,r)=>{let i=(()=>{switch(r.keepAspectRatioPolicy){case"not_larger":return r.axes.length>0?Math.min(...r.axes.map(n=>t[n]),Number.MAX_VALUE):Math.min(...t,Number.MAX_VALUE);case"not_smaller":return r.axes.length>0?Math.max(...r.axes.map(n=>t[n]),Number.MIN_VALUE):Math.max(...t,Number.MIN_VALUE);default:throw new Error(`Keep aspect ratio policy ${r.keepAspectRatioPolicy} is not supported`)}})();t.fill(1,0,t.length);let s=e.slice();return r.axes.length>0?(r.axes.forEach(n=>t[n]=i),r.axes.forEach(n=>s[n]=Math.round(e[n]*t[n]))):(t.fill(i,0,t.length),s.forEach((n,a)=>s[a]=Math.round(n*t[a]))),s},Jp=(e,t,r,i,s)=>` + fn calculateOriginalIndicesFromOutputIndices(output_indices: ${e.type.indices}) -> array<${e.type.value}, ${r.length}> { + var original_indices: array<${e.type.value}, ${r.length}>; + for (var i:u32 = 0; i < ${r.length}; i++) { + var output_index = ${e.indicesGet("output_indices","i")}; + var scale = ${de("uniforms.scales","i",i)}; + var roi_low = ${de("uniforms.roi","i",s)}; + var roi_hi = ${de("uniforms.roi",`i + ${t.length}`,s)}; + if (scale == 1.0) { + original_indices[i] = ${e.type.value}(output_index); + } else { + var input_shape_i = ${de("uniforms.input_shape","i",t.length)}; + var output_shape_i = ${de("uniforms.output_shape","i",r.length)}; + original_indices[i] = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i, + input_shape_i, roi_low, roi_hi); + } + } + return original_indices; + }`,ef=(e,t,r,i,s,n,a)=>` + fn calculateInputIndicesFromOutputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} { + var input_indices: ${e.type.indices}; + for (var i:u32 = 0; i < ${i.length}; i++) { + var output_index = ${t.indicesGet("output_indices","i")}; + var input_index: u32; + var scale = ${de("uniforms.scales","i",s)}; + if (scale == 1.0) { + input_index = output_index; + } else { + var roi_low = ${de("uniforms.roi","i",n)}; + var roi_hi = ${de("uniforms.roi",`i + ${r.length}`,n)}; + var input_shape_i = ${de("uniforms.input_shape","i",r.length)}; + var output_shape_i = ${de("uniforms.output_shape","i",i.length)}; + var original_idx = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i, + input_shape_i, roi_low, roi_hi); + if (!${a} || (original_idx >= 0 && original_idx < ${t.type.value}(input_shape_i))) { + if (original_idx < 0) { + input_index = 0; + } else if (original_idx > ${t.type.value}(input_shape_i - 1)) { + input_index = input_shape_i - 1; + } else { + input_index = u32(getNearestPixelFromOriginal(original_idx, scale < 1)); + } + } else { + input_index = u32(original_idx); + } + } + ${e.indicesSet("input_indices","i","input_index")} + } + return input_indices; + }`,tf=(e,t)=>` + fn checkInputIndices(input_indices: ${e.type.indices}) -> bool { + for (var i:u32 = 0; i < ${t.length}; i++) { + var input_index = ${e.indicesGet("input_indices","i")}; + if (input_index < 0 || input_index >= ${de("uniforms.input_shape","i",t.length)}) { + return false; + } + } + return true; + }`,Ca=(e,t,r,i)=>e.rank>i?` + ${e.indicesSet("input_indices",t,"channel")}; + ${e.indicesSet("input_indices",r,"batch")}; +`:"",rf=(e,t,r,i,s)=>{let[n,a,o,u]=r.length===2?[-1,0,1,-1]:[0,2,3,1],d=e.type.value;return` + fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> ${d} { + var input_indices: ${e.type.indices}; + ${e.indicesSet("input_indices",a,`max(0, min(row, ${r[a]} - 1))`)}; + ${e.indicesSet("input_indices",o,`max(0, min(col, ${r[o]} - 1))`)}; + ${Ca(e,u,n,2)} + return ${e.getByIndices("input_indices")}; + } + + fn bilinearInterpolation(output_indices: ${t.type.indices}) -> ${d} { + var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); + var row:${d} = originalIndices[${a}]; + var col:${d} = originalIndices[${o}]; + ${i?`if (row < 0 || row > (${r[a]} - 1) || col < 0 || col > (${r[o]} - 1)) { + return ${s}; + }`:""}; + row = max(0, min(row, ${r[a]} - 1)); + col = max(0, min(col, ${r[o]} - 1)); + var row1: u32 = u32(row); + var col1: u32 = u32(col); + var row2: u32 = u32(row + 1); + var col2: u32 = u32(col + 1); + var channel: u32 = ${r.length>2?`u32(originalIndices[${u}])`:"0"}; + var batch: u32 = ${r.length>2?`u32(originalIndices[${n}])`:"0"}; + var x11: ${d} = getInputValue(batch, channel, row1, col1); + var x12: ${d} = getInputValue(batch, channel, row1, col2); + var x21: ${d} = getInputValue(batch, channel, row2, col1); + var x22: ${d} = getInputValue(batch, channel, row2, col2); + var dx1: ${d} = abs(row - ${d}(row1)); + var dx2: ${d} = abs(${d}(row2) - row); + var dy1: ${d} = abs(col - ${d}(col1)); + var dy2: ${d} = abs(${d}(col2) - col); + if (row1 == row2) { + dx1 = 0.5; + dx2 = 0.5; + } + if (col1 == col2) { + dy1 = 0.5; + dy2 = 0.5; + } + return (x11 * dx2 * dy2 + x12 * dx2 * dy1 + x21 * dx1 * dy2 + x22 * dx1 * dy1); + }`},nf=(e,t,r,i,s,n,a,o,u,d)=>{let c=r.length===2,[f,h]=c?[0,1]:[2,3],m=e.type.value,g=y=>{let S=y===f?"row":"col";return` + fn ${S}CubicInterpolation(input_indices: ${e.type.indices}, output_indices: ${t.type.indices}) -> ${m} { + var output_index = ${t.indicesGet("output_indices",y)}; + var originalIdx: ${m} = getOriginalCoordinateFromResizedCoordinate(output_index, ${s[y]}, + ${i[y]}, ${r[y]}, ${n[y]}, ${n[y]} + ${r.length}); + var fractOriginalIdx: ${m} = originalIdx - floor(originalIdx); + var coefs = getCubicInterpolationCoefs(fractOriginalIdx); + + if (${o} && (originalIdx < 0 || originalIdx > (${r[y]} - 1))) { + return ${u}; + } + var data: array<${m}, 4> = array<${m}, 4>(0.0, 0.0, 0.0, 0.0); + for (var i: i32 = -1; i < 3; i++) { + var ${S}: ${m} = originalIdx + ${m}(i); + if (${S} < 0 || ${S} >= ${r[y]}) { + ${d?`coefs[i + 1] = 0.0; + continue;`:o?`return ${u};`:`${S} = max(0, min(${S}, ${r[y]} - 1));`}; + } + var input_indices_copy: ${e.type.indices} = input_indices; + ${e.indicesSet("input_indices_copy",y,`u32(${S})`)}; + data[i + 1] = ${y===f?e.getByIndices("input_indices_copy"):"rowCubicInterpolation(input_indices_copy, output_indices)"}; + } + return cubicInterpolation1D(data, coefs); + }`};return` + ${g(f)}; + ${g(h)}; + fn getCubicInterpolationCoefs(s: ${m}) -> array<${m}, 4> { + var absS = abs(s); + var coeffs: array<${m}, 4> = array<${m}, 4>(0.0, 0.0, 0.0, 0.0); + var oneMinusAbsS: ${m} = 1.0 - absS; + var twoMinusAbsS: ${m} = 2.0 - absS; + var onePlusAbsS: ${m} = 1.0 + absS; + coeffs[0] = ((${a} * onePlusAbsS - 5 * ${a}) * onePlusAbsS + 8 * ${a}) * onePlusAbsS - 4 * ${a}; + coeffs[1] = ((${a} + 2) * absS - (${a} + 3)) * absS * absS + 1; + coeffs[2] = ((${a} + 2) * oneMinusAbsS - (${a} + 3)) * oneMinusAbsS * oneMinusAbsS + 1; + coeffs[3] = ((${a} * twoMinusAbsS - 5 * ${a}) * twoMinusAbsS + 8 * ${a}) * twoMinusAbsS - 4 * ${a}; + return coeffs; + } + + fn cubicInterpolation1D(x: array<${m}, 4>, coefs: array<${m}, 4>) -> ${m} { + var coefsSum: ${m} = coefs[0] + coefs[1] + coefs[2] + coefs[3]; + return (x[0] * coefs[0] + x[1] * coefs[1]+ x[2] * coefs[2]+ x[3] * coefs[3]) / coefsSum; + } + + fn bicubicInterpolation(output_indices: ${t.type.indices}) -> ${m} { + var input_indices: ${e.type.indices} = output_indices; + return colCubicInterpolation(input_indices, output_indices); + } + `},sf=(e,t,r,i,s)=>{let[n,a,o,u,d]=r.length===3?[-1,0,1,2,-1]:[0,2,3,4,1],c=e.type.value;return` + fn getInputValue(batch: u32, channel: u32, depth:u32, height: u32, width: u32) -> ${c} { + var input_indices: ${e.type.indices}; + ${e.indicesSet("input_indices",a,`max(0, min(depth, ${r[a]} - 1))`)}; + ${e.indicesSet("input_indices",o,`max(0, min(height, ${r[o]} - 1))`)}; + ${e.indicesSet("input_indices",u,`max(0, min(width, ${r[u]} - 1))`)}; + ${Ca(e,d,n,3)} + return ${e.getByIndices("input_indices")}; + } + + fn trilinearInterpolation(output_indices: ${t.type.indices}) -> ${c} { + var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); + var depth:${c} = originalIndices[${a}]; + var height:${c} = originalIndices[${o}]; + var width:${c} = originalIndices[${u}]; + ${i?`if (depth < 0 || depth > (${r[a]} - 1) || height < 0 || height > (${r[o]} - 1) || width < 0 || (width > ${r[u]} - 1)) { + return ${s}; + }`:""}; + + depth = max(0, min(depth, ${r[a]} - 1)); + height = max(0, min(height, ${r[o]} - 1)); + width = max(0, min(width, ${r[u]} - 1)); + var depth1: u32 = u32(depth); + var height1: u32 = u32(height); + var width1: u32 = u32(width); + var depth2: u32 = u32(depth + 1); + var height2: u32 = u32(height + 1); + var width2: u32 = u32(width + 1); + var channel: u32 = ${r.length>3?`u32(originalIndices[${d}])`:"0"}; + var batch: u32 = ${r.length>3?`u32(originalIndices[${n}])`:"0"}; + + var x111: ${c} = getInputValue(batch, channel, depth1, height1, width1); + var x112: ${c} = getInputValue(batch, channel, depth1, height1, width2); + var x121: ${c} = getInputValue(batch, channel, depth1, height2, width1); + var x122: ${c} = getInputValue(batch, channel, depth1, height2, width2); + var x211: ${c} = getInputValue(batch, channel, depth2, height1, width1); + var x212: ${c} = getInputValue(batch, channel, depth2, height1, width2); + var x221: ${c} = getInputValue(batch, channel, depth2, height2, width1); + var x222: ${c} = getInputValue(batch, channel, depth2, height2, width2); + var dx1: ${c} = abs(depth - ${c}(depth1)); + var dx2: ${c} = abs(${c}(depth2) - depth); + var dy1: ${c} = abs(height - ${c}(height1)); + var dy2: ${c} = abs(${c}(height2) - height); + var dz1: ${c} = abs(width - ${c}(width1)); + var dz2: ${c} = abs(${c}(width2) - width); + if (depth1 == depth2) { + dx1 = 0.5; + dx2 = 0.5; + } + if (height1 == height2) { + dy1 = 0.5; + dy2 = 0.5; + } + if (width1 == width2) { + dz1 = 0.5; + dz2 = 0.5; + } + return (x111 * dx2 * dy2 * dz2 + x112 * dx2 * dy2 * dz1 + x121 * dx2 * dy1 *dz2 + x122 * dx2 * dy1 * dz1 + + x211 * dx1 * dy2 * dz2 + x212 * dx1 * dy2 * dz1 + x221 * dx1 * dy1 *dz2 + x222 * dx1 * dy1 * dz1); + }`},af=(e,t,r,i,s,n)=>{let a=e.dims,o=Xp(n,t.axes,a.length),u=Yp(a,i,s,t.axes),d=i.slice();i.length===0&&(d=a.map((b,k)=>b===0?1:u[k]/b),t.keepAspectRatioPolicy!=="stretch"&&(u=Qp(a,d,t)));let c=ue("output",e.dataType,u.length),f=P("input",e.dataType,a.length),h=B.size(u),m=a.length===u.length&&a.every((b,k)=>b===u[k]),g=t.coordinateTransformMode==="tf_crop_and_resize",y=t.extrapolationValue,S=f.type.value,v=b=>` + ${m?"":` + ${Kp(t.coordinateTransformMode,S)}; + ${(()=>{switch(t.mode){case"nearest":return` + ${tf(f,a)}; + ${Zp(t.nearestMode,r,S)}; + ${ef(f,c,a,u,d.length,o.length,g)}; + `;case"linear":return` + ${Jp(c,a,u,d.length,o.length)}; + ${(()=>{if(a.length===2||a.length===4)return`${rf(f,c,a,g,y)}`;if(a.length===3||a.length===5)return`${sf(f,c,a,g,y)}`;throw Error("Linear mode only supports input dims 2, 3, 4 and 5 are supported in linear mode.")})()}; + `;case"cubic":return` + ${(()=>{if(a.length===2||a.length===4)return`${nf(f,c,a,u,d,o,t.cubicCoeffA,g,t.extrapolationValue,t.excludeOutside)}`;throw Error("Cubic mode only supports input dims 2 and 4 are supported in linear mode.")})()}; + `;default:throw Error("Invalid resize mode")}})()}; + `} + ${b.registerUniform("output_size","u32").registerUniform("scales","f32",d.length).registerUniform("roi","f32",o.length).declareVariables(f,c)} + ${b.mainStart()} + ${b.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + ${m?"output[global_idx] = input[global_idx];":` + let output_indices = ${c.offsetToIndices("global_idx")}; + var input_indices: ${f.type.indices}; + ${(()=>{switch(t.mode){case"nearest":return`input_indices = calculateInputIndicesFromOutputIndices(output_indices); + if (checkInputIndices(input_indices)) { + output[global_idx] = ${f.getByIndices("input_indices")}; + } else { + output[global_idx] = ${t.extrapolationValue}; + }`;case"linear":return`output[global_idx] = ${a.length===2||a.length===4?"bilinearInterpolation":"trilinearInterpolation"}(output_indices);`;case"cubic":return"output[global_idx] = bicubicInterpolation(output_indices);";default:throw Error(`Unsupported resize mode: ${t.mode}`)}})()}; +`} + }`;return{name:"Resize",shaderCache:{hint:`${t.cacheKey}|${r}|${d.length>0?t.mode==="cubic"?d:d.length:""}|${s.length>0?s:""}|${o.length>0?o:""}|${m}|${t.mode==="nearest"?a.length:a}`,inputDependencies:["rank"]},getShaderSource:v,getRunData:()=>({outputs:[{dims:u,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:[{type:12,data:h},{type:1,data:d},{type:1,data:o},...pe(a,u)]})}},of=e=>{let t=e.customDataBuffer;return new Uint32Array(t,t.byteOffset,1)[0]},V_=(e,t)=>{let r=[],i=[],s=[],n=of(e);if(t.antialias!==0)throw Error("Only default value (0) for Antialias attribute is supported");jp(e.inputs,t,n,r,i,s),e.compute(af(e.inputs[0],t,n,r,i,s),{inputs:[0]})},F_=e=>{let t=e.antialias,r=e.axes,i=e.coordinateTransformMode,s=e.cubicCoeffA,n=e.excludeOutside!==0,a=e.extrapolationValue,o=e.keepAspectRatioPolicy,u=e.mode,d=e.nearestMode===""?"simple":e.nearestMode;return Oe({antialias:t,axes:r,coordinateTransformMode:i,cubicCoeffA:s,excludeOutside:n,extrapolationValue:a,keepAspectRatioPolicy:o,mode:u,nearestMode:d})}}),uf,lf,H_,i2=j(()=>{_e(),we(),ve(),uf=e=>{if(!e||e.length<3)throw new Error("layerNorm requires at least 3 inputs.");let t=e[0],r=e[1],i=e[2];if(t.dataType!==r.dataType||t.dataType!==i.dataType)throw new Error("All inputs must have the same data type");if(t.dims.length!==3&&t.dims.length!==2)throw new Error("Input must be 2D or 3D");if(r.dims.length!==3&&r.dims.length!==2)throw new Error("Skip must be 2D or 3D");let s=t.dims[t.dims.length-1],n=t.dims[t.dims.length-2];if(r.dims[r.dims.length-1]!==s)throw new Error("Skip must have the same hidden size as input");if(r.dims[r.dims.length-2]!==n)throw new Error("Skip must have the same sequence length as input");if(i.dims.length!==1)throw new Error("Gamma must be 1D");if(i.dims[i.dims.length-1]!==s)throw new Error("Gamma must have the same hidden size as input");if(e.length>3){let a=e[3];if(a.dims.length!==1)throw new Error("Beta must be 1D");if(a.dims[a.dims.length-1]!==s)throw new Error("Beta must have the same hidden size as input")}if(e.length>4){let a=e[4];if(a.dims.length!==1)throw new Error("Bias must be 1D");if(a.dims[a.dims.length-1]!==s)throw new Error("Bias must have the same hidden size as input")}},lf=(e,t,r,i)=>{let s=t.simplified,n=e[0].dims,a=B.size(n),o=n,u=a,d=n.slice(-1)[0],c=i?n.slice(0,-1).concat(1):[],f=!s&&e.length>3,h=e.length>4,m=i&&r>1,g=i&&r>2,y=r>3,S=64,v=Le(d),b=[{type:12,data:u},{type:12,data:v},{type:12,data:d},{type:1,data:t.epsilon}],k=I=>{let z=[{name:"output_size",type:"u32"},{name:"components",type:"u32"},{name:"hidden_size",type:"u32"},{name:"epsilon",type:"f32"}],O=[P("x",e[0].dataType,e[0].dims,v),P("skip",e[1].dataType,e[1].dims,v),P("gamma",e[2].dataType,e[2].dims,v)];f&&O.push(P("beta",e[3].dataType,e[3].dims,v)),h&&O.push(P("bias",e[4].dataType,e[4].dims,v)),O.push(ue("output",e[0].dataType,o,v)),m&&O.push(ue("mean_output",1,c)),g&&O.push(ue("inv_std_output",1,c)),y&&O.push(ue("input_skip_bias_sum",e[0].dataType,o,v));let A=Xe(e[0].dataType),R=Xe(1,v);return` + + ${I.registerUniforms(z).declareVariables(...O)} + var sum_shared : array<${R}, ${S}>; + var sum_squared_shared : array<${R}, ${S}>; + + ${I.mainStart([S,1,1])} + let ix = local_id.x; + let iy = global_id.x / ${S}; + + let hidden_size_vectorized: u32 = uniforms.hidden_size / uniforms.components; + var stride = hidden_size_vectorized / ${S}; + let offset = ix * stride + iy * hidden_size_vectorized; + let offset1d = stride * ix; + if (ix == ${S-1}) { + stride = hidden_size_vectorized - stride * ix; + } + for (var i: u32 = 0; i < stride; i++) { + let skip_value = skip[offset + i]; + let bias_value = ${h?"bias[offset1d + i]":A+"(0.0)"}; + let input_value = x[offset + i]; + let value = input_value + skip_value + bias_value; + ${y?"input_skip_bias_sum[offset + i] = value;":""} + output[offset + i] = value; + let f32_value = ${ri(A,v,"value")}; + sum_shared[ix] += f32_value; + sum_squared_shared[ix] += f32_value * f32_value; + } + workgroupBarrier(); + + var reduce_size : u32 = ${S}; + for (var curr_size = reduce_size >> 1; curr_size > 0; curr_size = reduce_size >> 1) { + reduce_size = curr_size + (reduce_size & 1); + if (ix < curr_size) { + sum_shared[ix] += sum_shared[ix + reduce_size]; + sum_squared_shared[ix] += sum_squared_shared[ix + reduce_size]; + } + workgroupBarrier(); + } + + let sum = sum_shared[0]; + let square_sum = sum_squared_shared[0]; + let mean = ${kr("sum",v)} / f32(uniforms.hidden_size); + let inv_std_dev = inverseSqrt(${kr("square_sum",v)} / f32(uniforms.hidden_size) ${s?"":"- mean * mean"} + uniforms.epsilon); + ${m?"mean_output[global_idx] = mean;":""} + ${g?"inv_std_output[global_idx] = inv_std_dev;":""} + + for (var i: u32 = 0; i < stride; i++) { + output[offset + i] = (output[offset + i] ${s?"":`- ${A}(mean)`}) * + ${A}(inv_std_dev) * gamma[offset1d + i] + ${f?"+ beta[offset1d + i]":""}; + } + }`},x=[{dims:o,dataType:e[0].dataType}];return r>1&&x.push({dims:c,dataType:1}),r>2&&x.push({dims:c,dataType:1}),r>3&&x.push({dims:n,dataType:e[0].dataType}),{name:"SkipLayerNormalization",shaderCache:{hint:`${v};${m};${g};${y}`,inputDependencies:e.map((I,z)=>"type")},getShaderSource:k,getRunData:()=>({outputs:x,dispatchGroup:{x:Math.ceil(u/d)},programUniforms:b})}},H_=(e,t)=>{uf(e.inputs);let r=[0];e.outputCount>1&&r.push(-3),e.outputCount>2&&r.push(-3),e.outputCount>3&&r.push(3),e.compute(lf(e.inputs,t,e.outputCount,!1),{outputs:r})}}),df,Ti,cf,za,pf,ff,G_,j_,n2=j(()=>{_e(),we(),qe(),ve(),df=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");if(t.axes.length!==0){if(t.axes.length!==t.starts.length||t.axes.length!==t.ends.length)throw new Error("axes, starts and ends must have the same length")}else if(t.starts.length!==t.ends.length)throw new Error("starts and ends must have the same length");e.slice(1).forEach((r,i)=>{if(e[i+1].dataType!==6&&e[i+1].dataType!==7)throw new Error(`Input ${i} must be an array of int32 or int64`)})},Ti=(e,t)=>{let r=[];if(e.length>t)if(e[t].dataType===7)e[t].getBigInt64Array().forEach(i=>r.push(Number(i)));else if(e[t].dataType===6)e[t].getInt32Array().forEach(i=>r.push(Number(i)));else throw new Error(`Input ${t} must be an array of int32 or int64`);return r},cf=(e,t)=>{if(e.length>1){let r=Ti(e,1),i=Ti(e,2),s=Ti(e,3);return s.length===0&&(s=[...Array(e[0].dims.length).keys()]),Oe({starts:r,ends:i,axes:s})}else return t},za=(e,t,r,i,s)=>{let n=e;return e<0&&(n+=r[i[t]]),s[t]<0?Math.max(0,Math.min(n,r[i[t]]-1)):Math.max(0,Math.min(n,r[i[t]]))},pf=(e,t,r)=>`fn calculateInputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} { + var input_indices: ${e.type.indices}; + var carry = 0u; + for (var i = ${r.length}; i >= 0; i--) { + let input_shape_i = ${de("uniforms.input_shape","i",r.length)}; + let steps_i = ${de("uniforms.steps","i",r.length)}; + let signs_i = ${de("uniforms.signs","i",r.length)}; + let starts_i = ${de("uniforms.starts","i",r.length)}; + var output_index = ${t.indicesGet("output_indices","i")}; + var input_index = output_index * steps_i + starts_i + carry; + carry = input_index / input_shape_i; + input_index = input_index % input_shape_i; + if (signs_i < 0) { + input_index = input_shape_i - input_index - 1u + starts_i; + } + ${e.indicesSet("input_indices","i","input_index")}; + } + return input_indices; + }`,ff=(e,t)=>{let r=e[0].dims,i=B.size(r),s=t.axes.length>0?B.normalizeAxes(t.axes,r.length):[...Array(r.length).keys()],n=Ti(e,4);n.forEach(v=>v!==0||(()=>{throw new Error("step cannot be 0")})),n.length===0&&(n=Array(s.length).fill(1));let a=t.starts.map((v,b)=>za(v,b,r,s,n)),o=t.ends.map((v,b)=>za(v,b,r,s,n));if(s.length!==a.length||s.length!==o.length)throw new Error("start, ends and axes should have the same number of elements");if(s.length!==r.length)for(let v=0;vMath.sign(v));n.forEach((v,b,k)=>{if(v<0){let x=(o[b]-a[b])/v,I=a[b],z=I+x*n[b];a[b]=z,o[b]=I,k[b]=-v}});let d=r.slice(0);s.forEach((v,b)=>{d[v]=Math.ceil((o[v]-a[v])/n[v])});let c={dims:d,dataType:e[0].dataType},f=ue("output",e[0].dataType,d.length),h=P("input",e[0].dataType,e[0].dims.length),m=B.size(d),g=[{name:"outputSize",type:"u32"},{name:"starts",type:"u32",length:a.length},{name:"signs",type:"i32",length:u.length},{name:"steps",type:"u32",length:n.length}],y=[{type:12,data:m},{type:12,data:a},{type:6,data:u},{type:12,data:n},...pe(e[0].dims,d)],S=v=>` + ${v.registerUniforms(g).declareVariables(h,f)} + ${pf(h,f,r)} + ${v.mainStart()} + ${v.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + let output_indices = ${f.offsetToIndices("global_idx")}; + let input_indices = calculateInputIndices(output_indices); + ${f.setByOffset("global_idx",h.getByIndices("input_indices"))} + }`;return{name:"Slice",shaderCache:{hint:`${u.length}_${a.length}_${n.length}`,inputDependencies:["rank"]},getShaderSource:S,getRunData:()=>({outputs:[c],dispatchGroup:{x:Math.ceil(i/64)},programUniforms:y})}},G_=(e,t)=>{df(e.inputs,t);let r=cf(e.inputs,t);e.compute(ff(e.inputs,r),{inputs:[0]})},j_=e=>{let t=e.starts,r=e.ends,i=e.axes;return Oe({starts:t,ends:r,axes:i})}}),hf,mf,K_,Z_,s2=j(()=>{_e(),we(),qe(),Tr(),ve(),hf=e=>{if(!e||e.length!==1)throw new Error("Softmax op requires 1 input.")},mf=(e,t)=>{let r=e.inputs[0],i=r.dims,s=B.size(i),n=i.length,a=B.normalizeAxis(t.axis,n),o=aA),d[a]=n-1,d[n-1]=a,u=e.compute($t(r,d),{inputs:[r],outputs:[-1]})[0]):u=r;let c=u.dims,f=c[n-1],h=s/f,m=Le(f),g=f/m,y=64;h===1&&(y=256);let S=(O,A)=>A===4?`max(max(${O}.x, ${O}.y), max(${O}.z, ${O}.w))`:A===2?`max(${O}.x, ${O}.y)`:A===3?`max(max(${O}.x, ${O}.y), ${O}.z)`:O,v=P("x",u.dataType,u.dims,m),b=ue("result",u.dataType,u.dims,m),k=v.type.value,x=Xe(u.dataType)==="f32"?`var threadMax = ${k}(-3.402823e+38f);`:`var threadMax = ${k}(-65504.0h);`,I=O=>` + var rowMaxShared : ${k}; + var rowSumShared : ${k}; + var threadShared : array<${k}, ${y}>; + + fn getValue(row: i32, col: i32, row_stride: i32) -> ${k} { + let index = row * row_stride + col; + return x[index]; + } + + fn setValue(row: i32, col: i32, row_stride: i32, value: ${k}) { + let index = row * row_stride + col; + result[index] = value; + } + ${O.registerUniform("packedCols","i32").declareVariables(v,b)} + ${O.mainStart(y)} + let gindex = i32(global_idx); + let lindex = i32(local_idx); + const wg = ${y}; + let row = gindex / wg; + let cols = uniforms.packedCols; + let row_stride : i32 = uniforms.packedCols; + + // find the rows max + ${x} + for (var col = lindex; col < cols; col += wg) { + let value = getValue(row, col, row_stride); + threadMax = max(threadMax, value); + } + if (lindex < cols) { + threadShared[lindex] = threadMax; + } + workgroupBarrier(); + + var reduceSize = min(cols, wg); + for (var currSize = reduceSize >> 1; currSize > 0; currSize = reduceSize >> 1) { + reduceSize = currSize + (reduceSize & 1); + if (lindex < currSize) { + threadShared[lindex] = max(threadShared[lindex], threadShared[lindex + reduceSize]); + } + workgroupBarrier(); + } + if (lindex == 0) { + rowMaxShared = ${k}(${S("threadShared[0]",m)}); + } + workgroupBarrier(); + + // find the rows sum + var threadSum = ${k}(0.0); + for (var col = lindex; col < cols; col += wg) { + let subExp = exp(getValue(row, col, row_stride) - rowMaxShared); + threadSum += subExp; + } + threadShared[lindex] = threadSum; + workgroupBarrier(); + + for (var currSize = wg >> 1; currSize > 0; currSize = currSize >> 1) { + if (lindex < currSize) { + threadShared[lindex] = threadShared[lindex] + threadShared[lindex + currSize]; + } + workgroupBarrier(); + } + if (lindex == 0) { + rowSumShared = ${k}(${kr("threadShared[0]",m)}); + } + workgroupBarrier(); + + // calculate final value for each element in the row + for (var col = lindex; col < cols; col += wg) { + let value = exp(getValue(row, col, row_stride) - rowMaxShared) / rowSumShared; + setValue(row, col, row_stride, value); + } + }`,z=e.compute({name:"Softmax",shaderCache:{hint:`${m};${y}`,inputDependencies:["type"]},getRunData:()=>({outputs:[{dims:c,dataType:u.dataType}],dispatchGroup:{x:h},programUniforms:[{type:6,data:g}]}),getShaderSource:I},{inputs:[u],outputs:[o?-1:0]})[0];o&&e.compute($t(z,d),{inputs:[z]})},K_=(e,t)=>{hf(e.inputs),mf(e,t)},Z_=e=>Oe({axis:e.axis})}),Oa,gf,_f,yf,X_,a2=j(()=>{_e(),we(),ve(),Oa=e=>Array.from(e.getBigInt64Array(),Number),gf=e=>{if(!e||e.length!==2)throw new Error("Tile requires 2 inputs.");if(e[0].dataType!==1&&e[0].dataType!==10&&e[0].dataType!==6&&e[0].dataType!==12)throw new Error("Tile only support float, float16, int32, and uint32 data types");if(e[1].dataType!==7)throw new Error("Tile `repeats` input should be of int64 data type");if(e[1].dims.length!==1)throw new Error("Tile `repeats` input should be 1-D");if(Oa(e[1]).length!==e[0].dims.length)throw new Error("Tile `repeats` input should have same number of elements as rank of input data tensor")},_f=(e,t)=>{let r=[];for(let i=0;i{let r=e[0].dims,i=t??Oa(e[1]),s=_f(r,i),n=B.size(s),a=e[0].dataType,o=P("input",a,r.length),u=ue("output",a,s.length),d=c=>` + const inputShape = ${o.indices(...r)}; + ${c.registerUniform("output_size","u32").declareVariables(o,u)} + ${c.mainStart()} + ${c.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let output_indices = ${u.offsetToIndices("global_idx")}; + var input_indices: ${o.type.indices}; + for (var i = 0; i < ${r.length}; i++) { + let input_dim_i = ${o.indicesGet("uniforms.input_shape","i")}; + let input_dim_value = ${u.indicesGet("output_indices","i")} % input_dim_i; + + ${o.indicesSet("input_indices","i","input_dim_value")} + } + ${u.setByOffset("global_idx",o.getByIndices("input_indices"))} + }`;return{name:"Tile",shaderCache:{hint:`${i}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:s,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(n/64)},programUniforms:[{type:12,data:n},...pe(e[0].dims,s)]}),getShaderSource:d}},X_=e=>{gf(e.inputs),e.compute(yf(e.inputs),{inputs:[0]})}}),bf,wf,Y_,o2=j(()=>{_e(),we(),ve(),bf=(e,t,r,i,s)=>{let n=ue("output_data",s,r.length,4),a=P("a_data",t[1].dataType,t[1].dims.length,4),o=P("b_data",t[2].dataType,t[2].dims.length,4),u=P("c_data",t[0].dataType,t[0].dims.length,4),d,c=(f,h,m)=>`select(${h}, ${f}, ${m})`;if(!i)d=n.setByOffset("global_idx",c(a.getByOffset("global_idx"),o.getByOffset("global_idx"),u.getByOffset("global_idx")));else{let f=(h,m,g="")=>{let y=`a_data[index_a${m}][component_a${m}]`,S=`b_data[index_b${m}][component_b${m}]`,v=`bool(c_data[index_c${m}] & (0xffu << (component_c${m} * 8)))`;return` + let output_indices${m} = ${n.offsetToIndices(`global_idx * 4u + ${m}u`)}; + let offset_a${m} = ${a.broadcastedIndicesToOffset(`output_indices${m}`,n)}; + let offset_b${m} = ${o.broadcastedIndicesToOffset(`output_indices${m}`,n)}; + let offset_c${m} = ${u.broadcastedIndicesToOffset(`output_indices${m}`,n)}; + let index_a${m} = offset_a${m} / 4u; + let index_b${m} = offset_b${m} / 4u; + let index_c${m} = offset_c${m} / 4u; + let component_a${m} = offset_a${m} % 4u; + let component_b${m} = offset_b${m} % 4u; + let component_c${m} = offset_c${m} % 4u; + ${h}[${m}] = ${g}(${c(y,S,v)}); + `};s===9?d=` + var data = vec4(0); + ${f("data",0,"u32")} + ${f("data",1,"u32")} + ${f("data",2,"u32")} + ${f("data",3,"u32")} + output_data[global_idx] = dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(data));`:d=` + ${f("output_data[global_idx]",0)} + ${f("output_data[global_idx]",1)} + ${f("output_data[global_idx]",2)} + ${f("output_data[global_idx]",3)} + `}return` + ${e.registerUniform("vec_size","u32").declareVariables(u,a,o,n)} + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${d} + }`},wf=e=>{let t=e[1].dims,r=e[2].dims,i=e[0].dims,s=e[1].dataType,n=!(B.areEqual(t,r)&&B.areEqual(r,i)),a=t,o=B.size(t);if(n){let d=ai.calcShape(ai.calcShape(t,r,!1),i,!1);if(!d)throw new Error("Can't perform where op on the given tensors");a=d,o=B.size(a)}let u=Math.ceil(o/4);return{name:"Where",shaderCache:{inputDependencies:["rank","rank","rank"]},getShaderSource:d=>bf(d,e,a,n,s),getRunData:()=>({outputs:[{dims:a,dataType:s}],dispatchGroup:{x:Math.ceil(o/64/4)},programUniforms:[{type:12,data:u},...pe(i,t,r,a)]})}},Y_=e=>{e.compute(wf(e.inputs))}}),Q_,u2=j(()=>{$$(),eu(),x$(),S$(),k$(),T$(),I$(),A$(),B$(),M$(),N$(),D$(),P$(),U$(),W$(),L$(),q$(),V$(),F$(),H$(),G$(),j$(),K$(),Z$(),X$(),__(),Y$(),Q$(),J$(),e2(),t2(),Jo(),r2(),$_(),i2(),n2(),s2(),w_(),a2(),Tr(),tu(),o2(),Q_=new Map([["Abs",[Hm]],["Acos",[Gm]],["Acosh",[jm]],["Add",[Ig]],["ArgMax",[Lm,ro]],["ArgMin",[Wm,ro]],["Asin",[Km]],["Asinh",[Zm]],["Atan",[Xm]],["Atanh",[Ym]],["Attention",[qm]],["AveragePool",[O_,z_]],["BatchNormalization",[Vm]],["BiasAdd",[Fm]],["BiasSplitGelu",[Tg]],["Cast",[Jm,Qm]],["Ceil",[tg]],["Clip",[eg]],["Concat",[Dg,Pg]],["Conv",[uo,oo]],["ConvTranspose",[Kg,jg]],["Cos",[rg]],["Cosh",[ig]],["CumSum",[Zg,Xg]],["DepthToSpace",[Yg,Qg]],["DequantizeLinear",[P_,U_]],["Div",[Eg]],["Einsum",[Jg,e_]],["Elu",[ng,Ri]],["Equal",[Cg]],["Erf",[sg]],["Exp",[ag]],["Expand",[t_]],["FastGelu",[r_]],["Floor",[og]],["FusedConv",[uo,oo]],["Gather",[n_,i_]],["GatherElements",[d_,l_]],["GatherBlockQuantized",[o_,u_]],["GatherND",[s_,a_]],["Gelu",[ug]],["Gemm",[p_,c_]],["GlobalAveragePool",[R_,A_]],["GlobalMaxPool",[D_,N_]],["Greater",[Rg]],["GreaterOrEqual",[Mg]],["GridSample",[f_,h_]],["GroupQueryAttention",[x_]],["HardSigmoid",[gg,mg]],["InstanceNormalization",[S_]],["LayerNormalization",[k_]],["LeakyRelu",[lg,Ri]],["Less",[Bg]],["LessOrEqual",[Ng]],["Log",[Sg]],["MatMul",[T_]],["MatMulNBits",[I_,E_]],["MaxPool",[B_,M_]],["Mul",[zg]],["MultiHeadAttention",[g_,m_]],["Neg",[cg]],["Not",[dg]],["Pad",[C_]],["Pow",[Og]],["QuickGelu",[kg,Ri]],["Range",[W_]],["Reciprocal",[pg]],["ReduceMin",[Mm]],["ReduceMean",[zm]],["ReduceMax",[Bm]],["ReduceSum",[Dm]],["ReduceProd",[Nm]],["ReduceL1",[Om]],["ReduceL2",[Am]],["ReduceLogSum",[Um]],["ReduceLogSumExp",[Rm]],["ReduceSumSquare",[Pm]],["Relu",[fg]],["Resize",[V_,F_]],["RotaryEmbedding",[v_]],["ScatterND",[q_,L_]],["Sigmoid",[hg]],["Sin",[_g]],["Sinh",[yg]],["Slice",[G_,j_]],["SkipLayerNormalization",[H_]],["Split",[y_,b_]],["Sqrt",[bg]],["Softmax",[K_,Z_]],["Sub",[Ag]],["Tan",[wg]],["Tanh",[vg]],["ThresholdedRelu",[xg,Ri]],["Tile",[X_]],["Transpose",[ym,bm]],["Where",[Y_]]])}),J_,l2=j(()=>{Ut(),pr(),ve(),J_=class{constructor(e){this.backend=e,this.repo=new Map,this.attributesBound=!1}getArtifact(e){return this.repo.get(e)}setArtifact(e,t){this.repo.set(e,t)}run(e,t,r,i,s){Qt(e.programInfo.name);let n=this.backend.device,a=this.backend.getComputePassEncoder();this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2);let o=[];for(let d of t)o.push({binding:o.length,resource:{buffer:d.buffer}});for(let d of r)o.push({binding:o.length,resource:{buffer:d.buffer}});s&&o.push({binding:o.length,resource:s});let u=n.createBindGroup({layout:e.computePipeline.getBindGroupLayout(0),entries:o,label:e.programInfo.name});if(this.backend.sessionStatus==="capturing"){let d={kernelId:this.backend.currentKernelId,computePipeline:e.computePipeline,bindGroup:u,dispatchGroup:i};this.backend.capturedCommandList.get(this.backend.currentSessionId).push(d)}a.setPipeline(e.computePipeline),a.setBindGroup(0,u),a.dispatchWorkgroups(...i),this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2+1),this.backend.pendingDispatchNumber++,(this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber||this.backend.queryType==="at-passes")&&this.backend.endComputePass(),this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber&&this.backend.flush(),Pt(e.programInfo.name)}dispose(){}build(e,t){Qt(e.name);let r=this.backend.device,i=[];[{feature:"shader-f16",extension:"f16"},{feature:"subgroups",extension:"subgroups"}].forEach(d=>{r.features.has(d.feature)&&i.push(`enable ${d.extension};`)});let s=_m(t,this.backend.device.limits),n=e.getShaderSource(s),a=`${i.join(` +`)} +${s.additionalImplementations} +${n}`,o=r.createShaderModule({code:a,label:e.name});Te("verbose",()=>`[WebGPU] ${e.name} shader code: ${a}`);let u=r.createComputePipeline({compute:{module:o,entryPoint:"main"},layout:"auto",label:e.name});return Pt(e.name),{programInfo:e,computePipeline:u,uniformVariablesInfo:s.variablesInfo}}normalizeDispatchGroupSize(e){let t=typeof e=="number"?e:e.x,r=typeof e=="number"?1:e.y||1,i=typeof e=="number"?1:e.z||1,s=this.backend.device.limits.maxComputeWorkgroupsPerDimension;if(t<=s&&r<=s&&i<=s)return[t,r,i];let n=t*r*i,a=Math.ceil(Math.sqrt(n));if(a>s){if(a=Math.ceil(Math.cbrt(n)),a>s)throw new Error("Total dispatch size exceeds WebGPU maximum.");return[a,a,a]}else return[a,a,1]}}}),ey={};di(ey,{WebGpuBackend:()=>ty});var vf,$f,xf,ty,d2=j(()=>{Ut(),_e(),pr(),pm(),w$(),u2(),l2(),vf=(e,t)=>{if(t.length!==e.length)throw new Error(`inputDependencies length ${t.length} is not equal to inputTensors length ${e.length}.`);let r=[];for(let i=0;i{var s,n;let i=e.name;return(s=e.shaderCache)!=null&&s.hint&&(i+="["+e.shaderCache.hint+"]"),i+=":"+r+`:${vf(t,((n=e.shaderCache)==null?void 0:n.inputDependencies)??new Array(t.length).fill("dims"))}`,i},xf=class{constructor(e){e&&(this.architecture=e.architecture,this.vendor=e.vendor)}isArchitecture(e){return this.architecture===e}isVendor(e){return this.vendor===e}},ty=class{constructor(){this.currentSessionId=null,this.currentKernelId=null,this.commandEncoder=null,this.computePassEncoder=null,this.maxDispatchNumber=16,this.pendingDispatchNumber=0,this.pendingKernels=[],this.pendingQueries=new Map,this.sessionStatus="default",this.capturedCommandList=new Map,this.capturedPendingKernels=new Map,this.sessionExternalDataMapping=new Map}get currentKernelCustomData(){if(this.currentKernelId===null)throw new Error("currentKernelCustomData(): currentKernelId is null. (should not happen)");let e=this.kernelCustomData.get(this.currentKernelId);return e||(e={},this.kernelCustomData.set(this.currentKernelId,e)),e}async initialize(e,t){this.env=e;let r=[],i={requiredLimits:{maxComputeWorkgroupStorageSize:t.limits.maxComputeWorkgroupStorageSize,maxComputeWorkgroupsPerDimension:t.limits.maxComputeWorkgroupsPerDimension,maxStorageBufferBindingSize:t.limits.maxStorageBufferBindingSize,maxBufferSize:t.limits.maxBufferSize,maxComputeInvocationsPerWorkgroup:t.limits.maxComputeInvocationsPerWorkgroup,maxComputeWorkgroupSizeX:t.limits.maxComputeWorkgroupSizeX,maxComputeWorkgroupSizeY:t.limits.maxComputeWorkgroupSizeY,maxComputeWorkgroupSizeZ:t.limits.maxComputeWorkgroupSizeZ},requiredFeatures:r},s=n=>t.features.has(n)&&r.push(n)&&!0;s("chromium-experimental-timestamp-query-inside-passes")||s("timestamp-query"),s("shader-f16"),s("subgroups"),this.device=await t.requestDevice(i),this.adapterInfo=new xf(t.info||await t.requestAdapterInfo()),this.gpuDataManager=mm(this),this.programManager=new J_(this),this.kernels=new Map,this.kernelPersistentData=new Map,this.kernelCustomData=new Map,Zo(e.logLevel,!!e.debug),this.device.onuncapturederror=n=>{n.error instanceof GPUValidationError&&console.error(`An uncaught WebGPU validation error was raised: ${n.error.message}`)},Object.defineProperty(this.env.webgpu,"device",{value:this.device,writable:!1,enumerable:!0,configurable:!1}),Object.defineProperty(this.env.webgpu,"adapter",{value:t,writable:!1,enumerable:!0,configurable:!1}),this.setQueryType()}dispose(){typeof this.querySet<"u"&&this.querySet.destroy(),this.gpuDataManager.dispose()}getCommandEncoder(){return this.commandEncoder||(this.commandEncoder=this.device.createCommandEncoder()),this.commandEncoder}getComputePassEncoder(){if(!this.computePassEncoder){let e=this.getCommandEncoder(),t={};this.queryType==="at-passes"&&(t.timestampWrites={querySet:this.querySet,beginningOfPassWriteIndex:this.pendingDispatchNumber*2,endOfPassWriteIndex:this.pendingDispatchNumber*2+1}),this.computePassEncoder=e.beginComputePass(t)}return this.computePassEncoder}endComputePass(){this.computePassEncoder&&(this.computePassEncoder.end(),this.computePassEncoder=null)}flush(){if(!this.commandEncoder)return;Qt(),this.endComputePass();let e;this.queryType!=="none"&&(this.commandEncoder.resolveQuerySet(this.querySet,0,this.pendingDispatchNumber*2,this.queryResolveBuffer,0),e=this.device.createBuffer({size:this.pendingDispatchNumber*2*8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this.pendingQueries.set(e,this.pendingKernels),this.pendingKernels=[],this.commandEncoder.copyBufferToBuffer(this.queryResolveBuffer,0,e,0,this.pendingDispatchNumber*2*8)),this.device.queue.submit([this.commandEncoder.finish()]),this.gpuDataManager.refreshPendingBuffers(),this.commandEncoder=null,this.pendingDispatchNumber=0,this.queryType!=="none"&&e.mapAsync(GPUMapMode.READ).then(()=>{var i;let t=new BigUint64Array(e.getMappedRange()),r=this.pendingQueries.get(e);for(let s=0;s"u"&&(this.queryTimeBase=m);let y=Number(m-this.queryTimeBase),S=Number(g-this.queryTimeBase);if(!Number.isSafeInteger(y)||!Number.isSafeInteger(S))throw new RangeError("incorrect timestamp range");if((i=this.env.webgpu.profiling)!=null&&i.ondata)this.env.webgpu.profiling.ondata({version:1,inputsMetadata:f.map(v=>({dims:v.dims,dataType:ar(v.dataType)})),outputsMetadata:h.map(v=>({dims:v.dims,dataType:ar(v.dataType)})),kernelId:a,kernelType:u,kernelName:d,programName:c,startTime:y,endTime:S});else{let v="";f.forEach((k,x)=>{v+=`input[${x}]: [${k.dims}] | ${ar(k.dataType)}, `});let b="";h.forEach((k,x)=>{b+=`output[${x}]: [${k.dims}] | ${ar(k.dataType)}, `}),console.log(`[profiling] kernel "${a}|${u}|${d}|${c}" ${v}${b}execution time: ${S-y} ns`)}Fn("GPU",`${c}::${m}::${g}`)}e.unmap(),this.pendingQueries.delete(e)}),Pt()}run(e,t,r,i,s,n){Qt(e.name);let a=[];for(let b=0;bk):r;if(c.length!==o.length)throw new Error(`Output size ${c.length} must be equal to ${o.length}.`);let f=[],h=[];for(let b=0;b=n)throw new Error(`Invalid output index: ${c[b]}`);if(c[b]===-3)continue;let k=c[b]===-1,x=c[b]===-2,I=k||x?s(o[b].dataType,o[b].dims):i(c[b],o[b].dataType,o[b].dims);if(f.push(I),I.data===0)continue;let z=this.gpuDataManager.get(I.data);if(!z)throw new Error(`no GPU data for output: ${I.data}`);if(k&&this.temporaryData.push(z),x){let O=this.kernelPersistentData.get(this.currentKernelId);O||(O=[],this.kernelPersistentData.set(this.currentKernelId,O)),O.push(z)}h.push(z)}if(a.length!==t.length||h.length!==f.length){if(h.length===0)return Pt(e.name),f;throw new Error(`Program ${e.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`)}let m;if(d){let b=0,k=[];d.forEach(O=>{let A=typeof O.data=="number"?[O.data]:O.data;if(A.length===0)return;let R=O.type===10?2:4,W,ie;O.type===10?(ie=A.length>4?16:A.length>2?8:A.length*R,W=A.length>4?16:R*A.length):(ie=A.length<=2?A.length*R:16,W=16),b=Math.ceil(b/ie)*ie,k.push(b);let X=O.type===10?8:4;b+=A.length>4?Math.ceil(A.length/X)*W:A.length*R});let x=16;b=Math.ceil(b/x)*x;let I=new ArrayBuffer(b);d.forEach((O,A)=>{let R=k[A],W=typeof O.data=="number"?[O.data]:O.data;if(O.type===6)new Int32Array(I,R,W.length).set(W);else if(O.type===12)new Uint32Array(I,R,W.length).set(W);else if(O.type===10)new Uint16Array(I,R,W.length).set(W);else if(O.type===1)new Float32Array(I,R,W.length).set(W);else throw new Error(`Unsupported uniform type: ${ar(O.type)}`)});let z=this.gpuDataManager.create(b,GPUBufferUsage.COPY_DST|GPUBufferUsage.UNIFORM);this.device.queue.writeBuffer(z.buffer,0,I,0,b),this.gpuDataManager.release(z.id),m={offset:0,size:b,buffer:z.buffer}}let g=this.programManager.normalizeDispatchGroupSize(u),y=g[1]===1&&g[2]===1,S=$f(e,t,y),v=this.programManager.getArtifact(S);if(v||(v=this.programManager.build(e,g),this.programManager.setArtifact(S,v),Te("info",()=>`[artifact] key: ${S}, programName: ${e.name}`)),d&&v.uniformVariablesInfo){if(d.length!==v.uniformVariablesInfo.length)throw new Error(`Uniform variables count mismatch: expect ${v.uniformVariablesInfo.length}, got ${d.length} in program "${v.programInfo.name}".`);for(let b=0;b`[ProgramManager] run "${e.name}" (key=${S}) with ${g[0]}x${g[1]}x${g[2]}`),this.queryType!=="none"||this.sessionStatus==="capturing"){let b={kernelId:this.currentKernelId,programName:v.programInfo.name,inputTensorViews:t,outputTensorViews:f};this.pendingKernels.push(b),this.sessionStatus==="capturing"&&this.capturedPendingKernels.get(this.currentSessionId).push(b)}return this.programManager.run(v,a,h,g,m),Pt(e.name),f}upload(e,t){this.gpuDataManager.upload(e,t)}memcpy(e,t){this.gpuDataManager.memcpy(e,t)}async download(e,t){await this.gpuDataManager.download(e,t)}alloc(e){return this.gpuDataManager.create(e).id}free(e){return this.gpuDataManager.release(e)}createKernel(e,t,r,i){let s=Q_.get(e);if(!s)throw new Error(`kernel not implemented: ${e}`);let n={kernelType:e,kernelName:i,kernelEntry:s[0],attributes:[s[1],r]};this.kernels.set(t,n)}releaseKernel(e){let t=this.kernelPersistentData.get(e);if(t){for(let r of t)this.gpuDataManager.release(r.id);this.kernelPersistentData.delete(e)}this.kernelCustomData.delete(e),this.kernels.delete(e)}computeKernel(e,t,r){let i=this.kernels.get(e);if(!i)throw new Error(`kernel not created: ${e}`);let s=i.kernelType,n=i.kernelName,a=i.kernelEntry,o=i.attributes;if(this.currentKernelId!==null)throw new Error(`kernel "[${s}] ${n}" is not allowed to be called recursively`);this.currentKernelId=e,o[0]&&(o[1]=o[0](o[1]),o[0]=void 0),Te("info",()=>`[WebGPU] Start to run kernel "[${s}] ${n}"...`);let u=this.env.debug;this.temporaryData=[];try{return u&&this.device.pushErrorScope("validation"),a(t,o[1]),0}catch(d){return r.push(Promise.resolve(`[WebGPU] Kernel "[${s}] ${n}" failed. ${d}`)),1}finally{u&&r.push(this.device.popErrorScope().then(d=>d?`GPU validation error for kernel "[${s}] ${n}": ${d.message}`:null));for(let d of this.temporaryData)this.gpuDataManager.release(d.id);this.temporaryData=[],this.currentKernelId=null}}registerBuffer(e,t,r,i){let s=this.sessionExternalDataMapping.get(e);s||(s=new Map,this.sessionExternalDataMapping.set(e,s));let n=s.get(t),a=this.gpuDataManager.registerExternalBuffer(r,i,n);return s.set(t,[a,r]),a}unregisterBuffers(e){let t=this.sessionExternalDataMapping.get(e);t&&(t.forEach(r=>this.gpuDataManager.unregisterExternalBuffer(r[0])),this.sessionExternalDataMapping.delete(e))}getBuffer(e){let t=this.gpuDataManager.get(e);if(!t)throw new Error(`no GPU data for buffer: ${e}`);return t.buffer}createDownloader(e,t,r){return async()=>{let i=await Ja(this,e,t);return Xo(i.buffer,r)}}writeTimestamp(e){this.queryType==="inside-passes"&&this.computePassEncoder.writeTimestamp(this.querySet,e)}setQueryType(){var e;this.queryType="none",(((e=this.env.webgpu.profiling)==null?void 0:e.mode)==="default"||(typeof this.env.trace>"u"?this.env.wasm.trace:this.env.trace))&&(this.device.features.has("chromium-experimental-timestamp-query-inside-passes")?this.queryType="inside-passes":this.device.features.has("timestamp-query")&&(this.queryType="at-passes"),this.queryType!=="none"&&typeof this.querySet>"u"&&(this.querySet=this.device.createQuerySet({type:"timestamp",count:this.maxDispatchNumber*2}),this.queryResolveBuffer=this.device.createBuffer({size:this.maxDispatchNumber*2*8,usage:GPUBufferUsage.COPY_SRC|GPUBufferUsage.QUERY_RESOLVE})))}captureBegin(){Te("info","captureBegin"),this.capturedCommandList.get(this.currentSessionId)||this.capturedCommandList.set(this.currentSessionId,[]),this.capturedPendingKernels.get(this.currentSessionId)||this.capturedPendingKernels.set(this.currentSessionId,[]),this.flush(),this.sessionStatus="capturing"}captureEnd(){Te("info","captureEnd"),this.flush(),this.sessionStatus="default"}replay(){Te("info","replay"),this.sessionStatus="replaying";let e=this.capturedCommandList.get(this.currentSessionId),t=this.capturedPendingKernels.get(this.currentSessionId),r=e.length;this.pendingKernels=[];for(let i=0;i=this.maxDispatchNumber||this.queryType==="at-passes")&&this.endComputePass(),this.pendingDispatchNumber>=this.maxDispatchNumber&&this.flush()}this.flush(),this.sessionStatus="default"}onCreateSession(){this.gpuDataManager.onCreateSession()}onReleaseSession(e){this.unregisterBuffers(e),this.capturedCommandList.has(e)&&this.capturedCommandList.delete(e),this.capturedPendingKernels.has(e)&&this.capturedPendingKernels.delete(e),this.gpuDataManager.onReleaseSession(e)}onRunStart(e){this.currentSessionId=e,this.setQueryType()}}}),ry={};di(ry,{init:()=>iy});var On,Sf,iy,c2=j(()=>{_e(),pr(),we(),b$(),On=class ny{constructor(t,r,i,s){this.module=t,this.dataType=r,this.data=i,this.dims=s}getFloat32Array(){if(this.dataType!==1)throw new Error("Invalid data type");let t=B.size(this.dims);return t===0?new Float32Array:new Float32Array(this.module.HEAP8.buffer,this.data,t)}getBigInt64Array(){if(this.dataType!==7)throw new Error("Invalid data type");let t=B.size(this.dims);return t===0?new BigInt64Array:new BigInt64Array(this.module.HEAP8.buffer,this.data,t)}getInt32Array(){if(this.dataType!==6)throw new Error("Invalid data type");let t=B.size(this.dims);return t===0?new Int32Array:new Int32Array(this.module.HEAP8.buffer,this.data,t)}getUint16Array(){if(this.dataType!==10&&this.dataType!==4)throw new Error("Invalid data type");let t=B.size(this.dims);return t===0?new Uint16Array:new Uint16Array(this.module.HEAP8.buffer,this.data,t)}reshape(t){if(B.size(t)!==B.size(this.dims))throw new Error("Invalid new shape");return new ny(this.module,this.dataType,this.data,t)}},Sf=class{constructor(e,t,r){this.module=e,this.backend=t,this.customDataOffset=0,this.customDataSize=0,this.adapterInfo=t.adapterInfo;let i=e.PTR_SIZE,s=r/e.PTR_SIZE,n=i===4?"i32":"i64";this.opKernelContext=Number(e.getValue(i*s++,n));let a=Number(e.getValue(i*s++,n));this.outputCount=Number(e.getValue(i*s++,n)),this.customDataOffset=Number(e.getValue(i*s++,"*")),this.customDataSize=Number(e.getValue(i*s++,n));let o=[];for(let u=0;utypeof o=="number"?this.inputs[o]:o))??this.inputs,i=(t==null?void 0:t.outputs)??[],s=(o,u,d)=>new On(this.module,u,this.output(o,d),d),n=(o,u)=>{let d=Dr(o,u);if(!d)throw new Error(`Unsupported data type: ${o}`);let c=d>0?this.backend.gpuDataManager.create(d).id:0;return new On(this.module,o,c,u)};return this.backend.run(e,r,i,s,n,this.outputCount)}output(e,t){let r=this.module.stackSave();try{let i=this.module.PTR_SIZE,s=i===4?"i32":"i64",n=this.module.stackAlloc((1+t.length)*i);this.module.setValue(n,t.length,s);for(let a=0;a{let s=t.jsepInit;if(!s)throw new Error("Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.");if(e==="webgpu"){let n=(d2(),Qi(ey)).WebGpuBackend,a=new n;await a.initialize(r,i),s("webgpu",[a,o=>a.alloc(Number(o)),o=>a.free(o),(o,u,d,c=!1)=>{if(c)Te("verbose",()=>`[WebGPU] jsepCopyGpuToGpu: src=${Number(o)}, dst=${Number(u)}, size=${Number(d)}`),a.memcpy(Number(o),Number(u));else{Te("verbose",()=>`[WebGPU] jsepCopyCpuToGpu: dataOffset=${Number(o)}, gpuDataId=${Number(u)}, size=${Number(d)}`);let f=t.HEAPU8.subarray(Number(o>>>0),Number(o>>>0)+Number(d));a.upload(Number(u),f)}},async(o,u,d)=>{Te("verbose",()=>`[WebGPU] jsepCopyGpuToCpu: gpuDataId=${o}, dataOffset=${u}, size=${d}`),await a.download(Number(o),()=>t.HEAPU8.subarray(Number(u)>>>0,Number(u+d)>>>0))},(o,u,d)=>a.createKernel(o,Number(u),d,t.UTF8ToString(t._JsepGetNodeName(Number(u)))),o=>a.releaseKernel(o),(o,u,d,c)=>{Te("verbose",()=>`[WebGPU] jsepRun: sessionHandle=${d}, kernel=${o}, contextDataOffset=${u}`);let f=new Sf(t,a,Number(u));return a.computeKernel(Number(o),f,c)},()=>a.captureBegin(),()=>a.captureEnd(),()=>a.replay()])}else{let n=new hm(r);s("webnn",[n,()=>n.reserveTensorId(),a=>n.releaseTensorId(a),async(a,o,u,d,c)=>n.ensureTensor(a,o,u,d,c),(a,o)=>{n.uploadTensor(a,o)},async(a,o)=>n.downloadTensor(a,o)])}}}),kf,ou,uu,_r,Tf,Aa,Yn,lu,du,Ra,cu,pu,fu,sy=j(()=>{g$(),_$(),_e(),Kr(),Ho(),um(),kf=(e,t)=>{De()._OrtInit(e,t)!==0&&Ae("Can't initialize onnxruntime.")},ou=async e=>{kf(e.wasm.numThreads,Gn(e.logLevel))},uu=async(e,t)=>{var r,i;(i=(r=De()).asyncInit)==null||i.call(r);{let s=(c2(),Qi(ry)).init;if(t==="webgpu"){if(typeof navigator>"u"||!navigator.gpu)throw new Error("WebGPU is not supported in current environment");let n=e.webgpu.adapter;if(n){if(typeof n.limits!="object"||typeof n.features!="object"||typeof n.requestDevice!="function")throw new Error("Invalid GPU adapter set in `env.webgpu.adapter`. It must be a GPUAdapter object.")}else{let a=e.webgpu.powerPreference;if(a!==void 0&&a!=="low-power"&&a!=="high-performance")throw new Error(`Invalid powerPreference setting: "${a}"`);let o=e.webgpu.forceFallbackAdapter;if(o!==void 0&&typeof o!="boolean")throw new Error(`Invalid forceFallbackAdapter setting: "${o}"`);if(n=await navigator.gpu.requestAdapter({powerPreference:a,forceFallbackAdapter:o}),!n)throw new Error('Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.')}await s("webgpu",De(),e,n)}if(t==="webnn"){if(typeof navigator>"u"||!navigator.ml)throw new Error("WebNN is not supported in current environment");await s("webnn",De(),e)}}},_r=new Map,Tf=e=>{let t=De(),r=t.stackSave();try{let i=t.PTR_SIZE,s=t.stackAlloc(2*i);t._OrtGetInputOutputCount(e,s,s+i)!==0&&Ae("Can't get session input/output count.");let n=i===4?"i32":"i64";return[Number(t.getValue(s,n)),Number(t.getValue(s+i,n))]}finally{t.stackRestore(r)}},Aa=(e,t)=>{let r=De(),i=r.stackSave(),s=0;try{let n=r.PTR_SIZE,a=r.stackAlloc(2*n);r._OrtGetInputOutputMetadata(e,t,a,a+n)!==0&&Ae("Can't get session input/output metadata.");let o=Number(r.getValue(a,"*"));s=Number(r.getValue(a+n,"*"));let u=r.HEAP32[s/4];if(u===0)return[o,0];let d=r.HEAPU32[s/4+1],c=[];for(let f=0;f{let t=De(),r=t._malloc(e.byteLength);if(r===0)throw new Error(`Can't create a session. failed to allocate a buffer of size ${e.byteLength}.`);return t.HEAPU8.set(e,r),[r,e.byteLength]},lu=async(e,t)=>{var f,h,m,g;let r,i,s=De();Array.isArray(e)?[r,i]=e:e.buffer===s.HEAPU8.buffer?[r,i]=[e.byteOffset,e.byteLength]:[r,i]=Yn(e);let n=0,a=0,o=0,u=[],d=[],c=[];try{if([a,u]=await om(t),(t==null?void 0:t.externalData)&&s.mountExternalData){let A=[];for(let R of t.externalData){let W=typeof R=="string"?R:R.path;A.push(Ko(typeof R=="string"?R:R.data).then(ie=>{s.mountExternalData(W,ie)}))}await Promise.all(A)}for(let A of(t==null?void 0:t.executionProviders)??[])if((typeof A=="string"?A:A.name)==="webnn"){if(s.shouldTransferToMLTensor=!1,typeof A!="string"){let R=A,W=R==null?void 0:R.context,ie=R==null?void 0:R.gpuDevice,X=R==null?void 0:R.deviceType,ne=R==null?void 0:R.powerPreference;W?s.currentContext=W:ie?s.currentContext=await s.webnnCreateMLContext(ie):s.currentContext=await s.webnnCreateMLContext({deviceType:X,powerPreference:ne})}else s.currentContext=await s.webnnCreateMLContext();break}n=await s._OrtCreateSession(r,i,a),(f=s.webgpuOnCreateSession)==null||f.call(s,n),n===0&&Ae("Can't create a session."),(h=s.jsepOnCreateSession)==null||h.call(s),s.currentContext&&(s.webnnRegisterMLContext(n,s.currentContext),s.currentContext=void 0,s.shouldTransferToMLTensor=!0);let[y,S]=Tf(n),v=!!(t!=null&&t.enableGraphCapture),b=[],k=[],x=[],I=[],z=[];for(let A=0;AA==="gpu-buffer"||A==="ml-tensor"||A==="ml-tensor-cpu-output")&&(o=s._OrtCreateBinding(n),o===0&&Ae("Can't create IO binding."),O={handle:o,outputPreferredLocations:z,outputPreferredLocationsEncoded:z.map(A=>A==="ml-tensor-cpu-output"?"ml-tensor":A).map(A=>Ya(A))}),_r.set(n,[n,d,c,O,v,!1]),[n,b,k,x,I]}catch(y){throw d.forEach(S=>s._OrtFree(S)),c.forEach(S=>s._OrtFree(S)),o!==0&&s._OrtReleaseBinding(o)!==0&&Ae("Can't release IO binding."),n!==0&&s._OrtReleaseSession(n)!==0&&Ae("Can't release session."),y}finally{s._free(r),a!==0&&s._OrtReleaseSessionOptions(a)!==0&&Ae("Can't release session options."),u.forEach(y=>s._free(y)),(g=s.unmountExternalData)==null||g.call(s)}},du=e=>{var u,d,c;let t=De(),r=_r.get(e);if(!r)throw new Error(`cannot release session. invalid session id: ${e}`);let[i,s,n,a,o]=r;a&&(o&&t._OrtClearBoundOutputs(a.handle)!==0&&Ae("Can't clear bound outputs."),t._OrtReleaseBinding(a.handle)!==0&&Ae("Can't release IO binding.")),(u=t.jsepOnReleaseSession)==null||u.call(t,e),(d=t.webnnOnReleaseSession)==null||d.call(t,e),(c=t.webgpuOnReleaseSession)==null||c.call(t,e),s.forEach(f=>t._OrtFree(f)),n.forEach(f=>t._OrtFree(f)),t._OrtReleaseSession(i)!==0&&Ae("Can't release session."),_r.delete(e)},Ra=async(e,t,r,i,s,n,a=!1)=>{if(!e){t.push(0);return}let o=De(),u=o.PTR_SIZE,d=e[0],c=e[1],f=e[3],h=f,m,g;if(d==="string"&&(f==="gpu-buffer"||f==="ml-tensor"))throw new Error("String tensor is not supported on GPU.");if(a&&f!=="gpu-buffer")throw new Error(`External buffer must be provided for input/output index ${n} when enableGraphCapture is true.`);if(f==="gpu-buffer"){let v=e[2].gpuBuffer;g=Dr(Nr(d),c);{let b=o.jsepRegisterBuffer;if(!b)throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');m=b(i,n,v,g)}}else if(f==="ml-tensor"){let v=e[2].mlTensor;g=Dr(Nr(d),c);let b=o.webnnRegisterMLTensor;if(!b)throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');m=b(i,v,Nr(d),c)}else{let v=e[2];if(Array.isArray(v)){g=u*v.length,m=o._malloc(g),r.push(m);for(let b=0;bo.setValue(S+k*u,b,u===4?"i32":"i64"));let v=o._OrtCreateTensor(Nr(d),m,g,S,c.length,Ya(h));v===0&&Ae(`Can't create tensor for input/output. session=${i}, index=${n}.`),t.push(v)}finally{o.stackRestore(y)}},cu=async(e,t,r,i,s,n)=>{var ie,X,ne,Y;let a=De(),o=a.PTR_SIZE,u=_r.get(e);if(!u)throw new Error(`cannot run inference. invalid session id: ${e}`);let d=u[0],c=u[1],f=u[2],h=u[3],m=u[4],g=u[5],y=t.length,S=i.length,v=0,b=[],k=[],x=[],I=[],z=a.stackSave(),O=a.stackAlloc(y*o),A=a.stackAlloc(y*o),R=a.stackAlloc(S*o),W=a.stackAlloc(S*o);try{[v,b]=am(n);for(let Z=0;ZT*E,1);ee=ar(U);let Lt=h==null?void 0:h.outputPreferredLocations[i[Z]];if(ee==="string"){if(Lt==="gpu-buffer"||Lt==="ml-tensor")throw new Error("String tensor is not supported on GPU.");let T=[];for(let E=0;E0){let T=a.jsepGetBuffer;if(!T)throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');let E=T(ye),M=Dr(U,tt);if(M===void 0||!Go(ee))throw new Error(`Unsupported data type: ${ee}`);q=!0,V.push([ee,Ye,{gpuBuffer:E,download:a.jsepCreateDownloader(E,M,ee),dispose:()=>{a._OrtReleaseTensor(le)!==0&&Ae("Can't release tensor.")}},"gpu-buffer"])}else if(Lt==="ml-tensor"&&tt>0){let T=a.webnnEnsureTensor,E=a.webnnIsGraphInputOutputTypeSupported;if(!T||!E)throw new Error('preferredLocation "ml-tensor" is not supported without using WebNN.');if(Dr(U,tt)===void 0||!jo(ee))throw new Error(`Unsupported data type: ${ee}`);if(!E(e,ee,!1))throw new Error(`preferredLocation "ml-tensor" for ${ee} output is not supported by current WebNN Context.`);let M=await T(e,ye,U,Ye,!1);q=!0,V.push([ee,Ye,{mlTensor:M,download:a.webnnCreateMLTensorDownloader(ye,ee),dispose:()=>{a.webnnReleaseTensorId(ye),a._OrtReleaseTensor(le)}},"ml-tensor"])}else if(Lt==="ml-tensor-cpu-output"&&tt>0){let T=a.webnnCreateMLTensorDownloader(ye,ee)(),E=V.length;q=!0,ae.push((async()=>{let M=[E,await T];return a.webnnReleaseTensorId(ye),a._OrtReleaseTensor(le),M})()),V.push([ee,Ye,[],"cpu"])}else{let T=os(ee),E=new T(tt);new Uint8Array(E.buffer,E.byteOffset,E.byteLength).set(a.HEAPU8.subarray(ye,ye+E.byteLength)),V.push([ee,Ye,E,"cpu"])}}finally{a.stackRestore(Be),ee==="string"&&ye&&a._free(ye),q||a._OrtReleaseTensor(le)}}h&&!m&&(a._OrtClearBoundOutputs(h.handle)!==0&&Ae("Can't clear bound outputs."),_r.set(e,[d,c,f,h,m,!1]));for(let[Z,le]of await Promise.all(ae))V[Z][2]=le;return V}finally{(Y=a.webnnOnRunEnd)==null||Y.call(a,d),a.stackRestore(z),k.forEach(oe=>a._OrtReleaseTensor(oe)),x.forEach(oe=>a._OrtReleaseTensor(oe)),I.forEach(oe=>a._free(oe)),v!==0&&a._OrtReleaseRunOptions(v),b.forEach(oe=>a._free(oe))}},pu=e=>{let t=De(),r=_r.get(e);if(!r)throw new Error("invalid session id");let i=r[0],s=t._OrtEndProfiling(i);s===0&&Ae("Can't get an profile file name."),t._OrtFree(s)},fu=e=>{let t=[];for(let r of e){let i=r[2];!Array.isArray(i)&&"buffer"in i&&t.push(i.buffer)}return t}}),yr,gt,Jr,Ii,Ei,An,Ba,Rn,Or,Ar,If,ay,oy,uy,ly,dy,cy,py,fy=j(()=>{Ut(),sy(),Kr(),Vo(),yr=()=>!!We.wasm.proxy&&typeof document<"u",Jr=!1,Ii=!1,Ei=!1,Rn=new Map,Or=(e,t)=>{let r=Rn.get(e);r?r.push(t):Rn.set(e,[t])},Ar=()=>{if(Jr||!Ii||Ei||!gt)throw new Error("worker not ready")},If=e=>{switch(e.data.type){case"init-wasm":Jr=!1,e.data.err?(Ei=!0,Ba[1](e.data.err)):(Ii=!0,Ba[0]()),An&&(URL.revokeObjectURL(An),An=void 0);break;case"init-ep":case"copy-from":case"create":case"release":case"run":case"end-profiling":{let t=Rn.get(e.data.type);e.data.err?t.shift()[1](e.data.err):t.shift()[0](e.data.out);break}}},ay=async()=>{if(!Ii){if(Jr)throw new Error("multiple calls to 'initWasm()' detected.");if(Ei)throw new Error("previous call to 'initWasm()' failed.");if(Jr=!0,yr())return new Promise((e,t)=>{gt==null||gt.terminate(),nm().then(([r,i])=>{try{gt=i,gt.onerror=n=>t(n),gt.onmessage=If,Ba=[e,t];let s={type:"init-wasm",in:We};!s.in.wasm.wasmPaths&&(r||Xa)&&(s.in.wasm.wasmPaths={wasm:new URL("/assets/ort-wasm-simd-threaded.jsep-CLPRrI3A.wasm",import.meta.url).href}),gt.postMessage(s),An=r}catch(s){t(s)}},t)});try{await Fo(We.wasm),await ou(We),Ii=!0}catch(e){throw Ei=!0,e}finally{Jr=!1}}},oy=async e=>{if(yr())return Ar(),new Promise((t,r)=>{Or("init-ep",[t,r]);let i={type:"init-ep",in:{epName:e,env:We}};gt.postMessage(i)});await uu(We,e)},uy=async e=>yr()?(Ar(),new Promise((t,r)=>{Or("copy-from",[t,r]);let i={type:"copy-from",in:{buffer:e}};gt.postMessage(i,[e.buffer])})):Yn(e),ly=async(e,t)=>{if(yr()){if(t!=null&&t.preferredOutputLocation)throw new Error('session option "preferredOutputLocation" is not supported for proxy.');return Ar(),new Promise((r,i)=>{Or("create",[r,i]);let s={type:"create",in:{model:e,options:{...t}}},n=[];e instanceof Uint8Array&&n.push(e.buffer),gt.postMessage(s,n)})}else return lu(e,t)},dy=async e=>{if(yr())return Ar(),new Promise((t,r)=>{Or("release",[t,r]);let i={type:"release",in:e};gt.postMessage(i)});du(e)},cy=async(e,t,r,i,s,n)=>{if(yr()){if(r.some(a=>a[3]!=="cpu"))throw new Error("input tensor on GPU is not supported for proxy.");if(s.some(a=>a))throw new Error("pre-allocated output tensor is not supported for proxy.");return Ar(),new Promise((a,o)=>{Or("run",[a,o]);let u=r,d={type:"run",in:{sessionId:e,inputIndices:t,inputs:u,outputIndices:i,options:n}};gt.postMessage(d,fu(u))})}else return cu(e,t,r,i,s,n)},py=async e=>{if(yr())return Ar(),new Promise((t,r)=>{Or("end-profiling",[t,r]);let i={type:"end-profiling",in:e};gt.postMessage(i)});pu(e)}}),Ma,Ef,hy,p2=j(()=>{Ut(),fy(),_e(),qo(),um(),Ma=(e,t)=>{switch(e.location){case"cpu":return[e.type,e.dims,e.data,"cpu"];case"gpu-buffer":return[e.type,e.dims,{gpuBuffer:e.gpuBuffer},"gpu-buffer"];case"ml-tensor":return[e.type,e.dims,{mlTensor:e.mlTensor},"ml-tensor"];default:throw new Error(`invalid data location: ${e.location} for ${t()}`)}},Ef=e=>{switch(e[3]){case"cpu":return new Bt(e[0],e[2],e[1]);case"gpu-buffer":{let t=e[0];if(!Go(t))throw new Error(`not supported data type: ${t} for deserializing GPU tensor`);let{gpuBuffer:r,download:i,dispose:s}=e[2];return Bt.fromGpuBuffer(r,{dataType:t,dims:e[1],download:i,dispose:s})}case"ml-tensor":{let t=e[0];if(!jo(t))throw new Error(`not supported data type: ${t} for deserializing MLTensor tensor`);let{mlTensor:r,download:i,dispose:s}=e[2];return Bt.fromMLTensor(r,{dataType:t,dims:e[1],download:i,dispose:s})}default:throw new Error(`invalid data location: ${e[3]}`)}},hy=class{async fetchModelAndCopyToWasmMemory(e){return uy(await Ko(e))}async loadModel(e,t){Qt();let r;typeof e=="string"?r=await this.fetchModelAndCopyToWasmMemory(e):r=e,[this.sessionId,this.inputNames,this.outputNames,this.inputMetadata,this.outputMetadata]=await ly(r,t),Pt()}async dispose(){return dy(this.sessionId)}async run(e,t,r){Qt();let i=[],s=[];Object.entries(e).forEach(f=>{let h=f[0],m=f[1],g=this.inputNames.indexOf(h);if(g===-1)throw new Error(`invalid input '${h}'`);i.push(m),s.push(g)});let n=[],a=[];Object.entries(t).forEach(f=>{let h=f[0],m=f[1],g=this.outputNames.indexOf(h);if(g===-1)throw new Error(`invalid output '${h}'`);n.push(m),a.push(g)});let o=i.map((f,h)=>Ma(f,()=>`input "${this.inputNames[s[h]]}"`)),u=n.map((f,h)=>f?Ma(f,()=>`output "${this.outputNames[a[h]]}"`):null),d=await cy(this.sessionId,s,o,a,u,r),c={};for(let f=0;fpo,initializeFlags:()=>co,wasmBackend:()=>gy});var co,po,gy,f2=j(()=>{Ut(),fy(),p2(),co=()=>{(typeof We.wasm.initTimeout!="number"||We.wasm.initTimeout<0)&&(We.wasm.initTimeout=0);let e=We.wasm.simd;if(typeof e!="boolean"&&e!==void 0&&e!=="fixed"&&e!=="relaxed"&&(console.warn(`Property "env.wasm.simd" is set to unknown value "${e}". Reset it to \`false\` and ignore SIMD feature checking.`),We.wasm.simd=!1),typeof We.wasm.proxy!="boolean"&&(We.wasm.proxy=!1),typeof We.wasm.trace!="boolean"&&(We.wasm.trace=!1),typeof We.wasm.numThreads!="number"||!Number.isInteger(We.wasm.numThreads)||We.wasm.numThreads<=0)if(typeof self<"u"&&!self.crossOriginIsolated)We.wasm.numThreads=1;else{let t=typeof navigator>"u"?Qv("node:os").cpus().length:navigator.hardwareConcurrency;We.wasm.numThreads=Math.min(4,Math.ceil((t||1)/2))}},po=class{async init(e){co(),await ay(),await oy(e)}async createInferenceSessionHandler(e,t){let r=new hy;return await r.loadModel(e,t),r}},gy=new po});Ut();Ut();Ut();var h2="1.22.0";{let e=(f2(),Qi(my)).wasmBackend;ti("webgpu",e,5),ti("webnn",e,5),ti("cpu",e,10),ti("wasm",e,10)}Object.defineProperty(We.versions,"web",{value:h2,enumerable:!0});/** +* @license +* Copyright 2021 Google LLC. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* ============================================================================= +*//** + * @license + * Copyright 2020 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + *//** + * @license + * Copyright 2019 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */let fo=1;var m2=class{constructor(){Ke(this,"subscribers");Ke(this,"toasts");Ke(this,"dismissedToasts");Ke(this,"subscribe",e=>(this.subscribers.push(e),()=>{const t=this.subscribers.indexOf(e);this.subscribers.splice(t,1)}));Ke(this,"publish",e=>{this.subscribers.forEach(t=>t(e))});Ke(this,"addToast",e=>{this.publish(e),this.toasts=[...this.toasts,e]});Ke(this,"create",e=>{var a;const{message:t,...r}=e,i=typeof e.id=="number"||e.id&&((a=e.id)==null?void 0:a.length)>0?e.id:fo++,s=this.toasts.find(o=>o.id===i),n=e.dismissible===void 0?!0:e.dismissible;return this.dismissedToasts.has(i)&&this.dismissedToasts.delete(i),s?this.toasts=this.toasts.map(o=>o.id===i?(this.publish({...o,...e,id:i,title:t}),{...o,...e,id:i,dismissible:n,title:t}):o):this.addToast({title:t,...r,dismissible:n,id:i}),i});Ke(this,"dismiss",e=>(e?(this.dismissedToasts.add(e),requestAnimationFrame(()=>this.subscribers.forEach(t=>t({id:e,dismiss:!0})))):this.toasts.forEach(t=>{this.subscribers.forEach(r=>r({id:t.id,dismiss:!0}))}),e));Ke(this,"message",(e,t)=>this.create({...t,message:e,type:"default"}));Ke(this,"error",(e,t)=>this.create({...t,type:"error",message:e}));Ke(this,"success",(e,t)=>this.create({...t,type:"success",message:e}));Ke(this,"info",(e,t)=>this.create({...t,type:"info",message:e}));Ke(this,"warning",(e,t)=>this.create({...t,type:"warning",message:e}));Ke(this,"loading",(e,t)=>this.create({...t,type:"loading",message:e}));Ke(this,"promise",(e,t)=>{if(!t)return;let r;t.loading!==void 0&&(r=this.create({...t,promise:e,type:"loading",message:t.loading,description:typeof t.description!="function"?t.description:void 0}));const i=Promise.resolve(e instanceof Function?e():e);let s=r!==void 0,n;const a=i.then(async u=>{if(n=["resolve",u],Br(u))s=!1,this.create({id:r,type:"default",message:u});else if(_2(u)&&!u.ok){s=!1;const c=typeof t.error=="function"?await t.error(`HTTP error! status: ${u.status}`):t.error,f=typeof t.description=="function"?await t.description(`HTTP error! status: ${u.status}`):t.description,m=typeof c=="object"&&!Br(c)?c:{message:c||"",id:r||""};this.create({id:r,type:"error",description:f,...m})}else if(u instanceof Error){s=!1;const c=typeof t.error=="function"?await t.error(u):t.error,f=typeof t.description=="function"?await t.description(u):t.description,m=typeof c=="object"&&!Br(c)?c:{message:c||"",id:r||""};this.create({id:r,type:"error",description:f,...m})}else if(t.success!==void 0){s=!1;const c=typeof t.success=="function"?await t.success(u):t.success,f=typeof t.description=="function"?await t.description(u):t.description,m=typeof c=="object"&&!Br(c)?c:{message:c||"",id:r||""};this.create({id:r,type:"success",description:f,...m})}}).catch(async u=>{if(n=["reject",u],t.error!==void 0){s=!1;const d=typeof t.error=="function"?await t.error(u):t.error,c=typeof t.description=="function"?await t.description(u):t.description,h=typeof d=="object"&&!Br(d)?d:{message:d||"",id:r||""};this.create({id:r,type:"error",description:c,...h})}}).finally(()=>{var u;s&&(this.dismiss(r),r=void 0),(u=t.finally)==null||u.call(t)}),o=()=>new Promise((u,d)=>a.then(()=>n[0]==="reject"?d(n[1]):u(n[1])).catch(d));return typeof r!="string"&&typeof r!="number"?{unwrap:o}:Object.assign(r,{unwrap:o})});Ke(this,"custom",(e,t)=>{const r=(t==null?void 0:t.id)||fo++;return this.publish({component:e,id:r,...t}),r});Ke(this,"getActiveToasts",()=>this.toasts.filter(e=>!this.dismissedToasts.has(e.id)));this.subscribers=[],this.toasts=[],this.dismissedToasts=new Set}};const kt=new m2;function g2(e,t){const r=(t==null?void 0:t.id)||fo++;return kt.create({message:e,id:r,type:"default",...t}),r}const _2=e=>e&&typeof e=="object"&&"ok"in e&&typeof e.ok=="boolean"&&"status"in e&&typeof e.status=="number",y2=g2,b2=()=>kt.toasts,w2=()=>kt.getActiveToasts(),Na=Object.assign(y2,{success:kt.success,info:kt.info,warning:kt.warning,error:kt.error,custom:kt.custom,message:kt.message,promise:kt.promise,dismiss:kt.dismiss,loading:kt.loading},{getHistory:b2,getToasts:w2}),v2="/assets/best-DRyaABFw.onnx",_y=Hv(),Cf=Cv(_y),$2={class:"w-full h-full flex flex-col justify-center items-center"},x2=["width","height"],S2={key:0,absolute:"","top-0":"","left-0":"","w-full":"","h-full":"",flex:"","justify-center":"","items-center":"","text-gray-500":"","text-sm":""},k2={class:"flex gap-4 text-gray-500 text-lg"},T2=6,I2=.6,E2=Wb({__name:"App",setup(e){const t=or(null),r=or([]),i=or(640),s=Lb("canvasRef"),n=or(["assembling-machine-1","assembling-machine-2","assembling-machine-3","transport-belt","fast-transport-belt","express-transport-belt"]);function a(m,g,y){return m>g?[y,Math.round(y*g/m)]:[Math.round(y*m/g),y]}async function o(m){return new Promise(g=>{m.onload=()=>g()})}function u(m,g){const y=Math.max(m.topLeftX,g.topLeftX),S=Math.max(m.topLeftY,g.topLeftY),v=Math.min(m.bottomRightX,g.bottomRightX),b=Math.min(m.bottomRightY,g.bottomRightY),k=Math.max(0,v-y)*Math.max(0,b-S),x=(m.bottomRightX-m.topLeftX)*(m.bottomRightY-m.topLeftY),I=(g.bottomRightX-g.topLeftX)*(g.bottomRightY-g.topLeftY),z=x+I-k;return k/z}function d(m){if(!s.value)return;const g=s.value.getContext("2d");if(g){g.strokeStyle="rgb(0, 255, 0)",g.fillStyle="rgb(0, 255, 0)",g.font="20px Arial",g.lineWidth=2;for(const y of m)g.strokeRect(y.topLeftX,y.topLeftY,y.bottomRightX-y.topLeftX,y.bottomRightY-y.topLeftY),g.fillText(n.value[y.classId],y.topLeftX,y.topLeftY)}}function c(m,g){const y=[];for(let b=0;bO&&(O=oe,A=Y)}const R=k-I/2,W=x-z/2,ie=k+I/2,X=x+z/2,ne={topLeftX:R,topLeftY:W,bottomRightX:ie,bottomRightY:X,classId:A,confidence:O};Ok.confidence-b.confidence);const S=.6,v=[];for(;y.length>0;){const b=y.shift();v.push(b);for(let k=y.length-1;k>=0;k--)u(b,y[k])>=S&&y.splice(k,1)}return v}async function f(m){if(!m){Na.error("No file selected");return}if(!s.value){Na.error("Failed to get canvas element");return}const g=await createImageBitmap(m),y=document.createElement("img");y.src=URL.createObjectURL(m),r.value.push(y.src),await o(y);const[S,v]=a(g.width,g.height,i.value);y.width=S,y.height=v;const b=s.value.getContext("2d");if(!b){Na.error("Failed to get canvas context");return}b.clearRect(0,0,i.value,i.value),b.fillStyle="rgb(114, 114, 114)",b.fillRect(0,0,i.value,i.value);const k=(i.value-S)/2,x=(i.value-v)/2;b.drawImage(y,k,x,S,v);const I=b.getImageData(k,x,i.value,i.value).data,z=[],O=[],A=[];for(let Y=0;Y{m&&f(m[0])}),Ao(()=>{r.value.forEach(m=>URL.revokeObjectURL(m))}),(m,g)=>(zi(),yn("div",$2,[g[6]||(g[6]=At("div",{class:"flex flex-col gap-2 mb-8"},[At("div",{class:"text-3xl font-bold text-center"}," Factorio YOLO v0 "),At("div",{class:"text-gray-500 text-sm text-center"}," The playground for the Factorio YOLO v0 model. ")],-1)),At("div",{w:"80 md:120 lg:160",h:"80 md:120 lg:160","overflow-hidden":"","rounded-lg":"",relative:"","cursor-pointer":"",border:"1 gray-200 dark:gray-700","transition-all":"","duration-300":"","mb-4":"",onClick:g[0]||(g[0]=y=>Pr(h).open())},[At("canvas",{ref_key:"canvasRef",ref:s,width:i.value,height:i.value,class:"origin-top-left",scale:"50 md:75 lg:100","transition-all":"","duration-300":""},null,8,x2),r.value.length?Rw("",!0):(zi(),yn("div",S2,g[3]||(g[3]=[Ga(" Click to select a image ",-1),At("br",null,null,-1),Ga(" (Square image required) ",-1)])))]),At("div",k2,[Pr(_y)?(zi(),yn("div",{key:0,"cursor-pointer":"","i-solar-cloudy-moon-bold-duotone":"",onClick:g[1]||(g[1]=y=>Pr(Cf)())})):(zi(),yn("div",{key:1,"cursor-pointer":"","i-solar-sun-2-bold":"",onClick:g[2]||(g[2]=y=>Pr(Cf)())})),g[4]||(g[4]=At("a",{"i-carbon-logo-github":"",href:"https://github.com/moeru-ai/airi-factorio/tree/main/models/factorio-yolo-v0",target:"_blank"},null,-1)),g[5]||(g[5]=At("a",{"i-simple-icons-huggingface":"",href:"https://huggingface.co/spaces/proj-airi/factorio-yolo-v0-playground",target:"_blank"},null,-1))])]))}}),yy=pv(E2);Object.values([yv]).forEach(e=>{var t;return(t=e.install)==null?void 0:t.call(e,yy)});yy.mount("#app"); +//# sourceMappingURL=index-7glZF3tM.js.map