PNG  IHDRQgAMA a cHRMz&u0`:pQ<bKGDgmIDATxwUﹻ& ^CX(J I@ "% (** BX +*i"]j(IH{~R)[~>h{}gy)I$Ij .I$I$ʊy@}x.: $I$Ii}VZPC)I$IF ^0ʐJ$I$Q^}{"r=OzI$gRZeC.IOvH eKX $IMpxsk.쒷/&r[޳<v| .I~)@$updYRa$I |M.e JaֶpSYR6j>h%IRز if&uJ)M$I vLi=H;7UJ,],X$I1AҒJ$ XY XzI@GNҥRT)E@;]K*Mw;#5_wOn~\ DC&$(A5 RRFkvIR}l!RytRl;~^ǷJj اy뷦BZJr&ӥ8Pjw~vnv X^(I;4R=P[3]J,]ȏ~:3?[ a&e)`e*P[4]T=Cq6R[ ~ޤrXR Հg(t_HZ-Hg M$ãmL5R uk*`%C-E6/%[t X.{8P9Z.vkXŐKjgKZHg(aK9ڦmKjѺm_ \#$5,)-  61eJ,5m| r'= &ڡd%-]J on Xm|{ RҞe $eڧY XYrԮ-a7RK6h>n$5AVڴi*ֆK)mѦtmr1p| q:흺,)Oi*ֺK)ܬ֦K-5r3>0ԔHjJئEZj,%re~/z%jVMڸmrt)3]J,T K֦OvԒgii*bKiNO~%PW0=dii2tJ9Jݕ{7"I P9JKTbu,%r"6RKU}Ij2HKZXJ,妝 XYrP ެ24c%i^IK|.H,%rb:XRl1X4Pe/`x&P8Pj28Mzsx2r\zRPz4J}yP[g=L) .Q[6RjWgp FIH*-`IMRaK9TXcq*I y[jE>cw%gLRԕiFCj-ďa`#e~I j,%r,)?[gp FI˨mnWX#>mʔ XA DZf9,nKҲzIZXJ,L#kiPz4JZF,I,`61%2s $,VOϚ2/UFJfy7K> X+6 STXIeJILzMfKm LRaK9%|4p9LwJI!`NsiazĔ)%- XMq>pk$-$Q2x#N ؎-QR}ᶦHZډ)J,l#i@yn3LN`;nڔ XuX5pF)m|^0(>BHF9(cզEerJI rg7 4I@z0\JIi䵙RR0s;$s6eJ,`n 䂦0a)S)A 1eJ,堌#635RIgpNHuTH_SԕqVe ` &S)>p;S$魁eKIuX`I4춒o}`m$1":PI<[v9^\pTJjriRŭ P{#{R2,`)e-`mgj~1ϣLKam7&U\j/3mJ,`F;M'䱀 .KR#)yhTq;pcK9(q!w?uRR,n.yw*UXj#\]ɱ(qv2=RqfB#iJmmL<]Y͙#$5 uTU7ӦXR+q,`I}qL'`6Kͷ6r,]0S$- [RKR3oiRE|nӦXR.(i:LDLTJjY%o:)6rxzҒqTJjh㞦I.$YR.ʼnGZ\ֿf:%55 I˼!6dKxm4E"mG_ s? .e*?LRfK9%q#uh$)i3ULRfK9yxm܌bj84$i1U^@Wbm4uJ,ҪA>_Ij?1v32[gLRD96oTaR׿N7%L2 NT,`)7&ƝL*꽙yp_$M2#AS,`)7$rkTA29_Iye"|/0t)$n XT2`YJ;6Jx".e<`$) PI$5V4]29SRI>~=@j]lp2`K9Jaai^" Ԋ29ORI%:XV5]JmN9]H;1UC39NI%Xe78t)a;Oi Ҙ>Xt"~G>_mn:%|~ޅ_+]$o)@ǀ{hgN;IK6G&rp)T2i୦KJuv*T=TOSV>(~D>dm,I*Ɛ:R#ۙNI%D>G.n$o;+#RR!.eU˽TRI28t)1LWϚ>IJa3oFbu&:tJ*(F7y0ZR ^p'Ii L24x| XRI%ۄ>S1]Jy[zL$adB7.eh4%%누>WETf+3IR:I3Xה)3אOۦSRO'ٺ)S}"qOr[B7ϙ.edG)^ETR"RtRݜh0}LFVӦDB^k_JDj\=LS(Iv─aTeZ%eUAM-0;~˃@i|l @S4y72>sX-vA}ϛBI!ݎߨWl*)3{'Y|iSlEڻ(5KtSI$Uv02,~ԩ~x;P4ցCrO%tyn425:KMlD ^4JRxSهF_}شJTS6uj+ﷸk$eZO%G*^V2u3EMj3k%)okI]dT)URKDS 7~m@TJR~荪fT"֛L \sM -0T KfJz+nإKr L&j()[E&I ߴ>e FW_kJR|!O:5/2跌3T-'|zX ryp0JS ~^F>-2< `*%ZFP)bSn"L :)+pʷf(pO3TMW$~>@~ū:TAIsV1}S2<%ޟM?@iT ,Eūoz%i~g|`wS(]oȤ8)$ ntu`өe`6yPl IzMI{ʣzʨ )IZ2= ld:5+請M$-ї;U>_gsY$ÁN5WzWfIZ)-yuXIfp~S*IZdt;t>KūKR|$#LcԀ+2\;kJ`]YǔM1B)UbG"IRߊ<xܾӔJ0Z='Y嵤 Leveg)$znV-º^3Ւof#0Tfk^Zs[*I꯳3{)ˬW4Ւ4 OdpbZRS|*I 55#"&-IvT&/윚Ye:i$ 9{LkuRe[I~_\ؠ%>GL$iY8 9ܕ"S`kS.IlC;Ҏ4x&>u_0JLr<J2(^$5L s=MgV ~,Iju> 7r2)^=G$1:3G< `J3~&IR% 6Tx/rIj3O< ʔ&#f_yXJiގNSz; Tx(i8%#4 ~AS+IjerIUrIj362v885+IjAhK__5X%nV%Iͳ-y|7XV2v4fzo_68"S/I-qbf; LkF)KSM$ Ms>K WNV}^`-큧32ŒVؙGdu,^^m%6~Nn&͓3ŒVZMsRpfEW%IwdǀLm[7W&bIRL@Q|)* i ImsIMmKmyV`i$G+R 0tV'!V)֏28vU7͒vHꦼtxꗞT ;S}7Mf+fIRHNZUkUx5SAJㄌ9MqμAIRi|j5)o*^'<$TwI1hEU^c_j?Е$%d`z cyf,XO IJnTgA UXRD }{H}^S,P5V2\Xx`pZ|Yk:$e ~ @nWL.j+ϝYb퇪bZ BVu)u/IJ_ 1[p.p60bC >|X91P:N\!5qUB}5a5ja `ubcVxYt1N0Zzl4]7­gKj]?4ϻ *[bg$)+À*x쳀ogO$~,5 زUS9 lq3+5mgw@np1sso Ӻ=|N6 /g(Wv7U;zωM=wk,0uTg_`_P`uz?2yI!b`kĸSo+Qx%!\οe|އԁKS-s6pu_(ֿ$i++T8=eY; צP+phxWQv*|p1. ά. XRkIQYP,drZ | B%wP|S5`~́@i޾ E;Չaw{o'Q?%iL{u D?N1BD!owPHReFZ* k_-~{E9b-~P`fE{AܶBJAFO wx6Rox5 K5=WwehS8 (JClJ~ p+Fi;ŗo+:bD#g(C"wA^ r.F8L;dzdIHUX݆ϞXg )IFqem%I4dj&ppT{'{HOx( Rk6^C٫O.)3:s(۳(Z?~ٻ89zmT"PLtw䥈5&b<8GZ-Y&K?e8,`I6e(֍xb83 `rzXj)F=l($Ij 2*(F?h(/9ik:I`m#p3MgLaKjc/U#n5S# m(^)=y=đx8ŬI[U]~SцA4p$-F i(R,7Cx;X=cI>{Km\ o(Tv2vx2qiiDJN,Ҏ!1f 5quBj1!8 rDFd(!WQl,gSkL1Bxg''՞^ǘ;pQ P(c_ IRujg(Wz bs#P­rz> k c&nB=q+ؔXn#r5)co*Ũ+G?7< |PQӣ'G`uOd>%Mctz# Ԫڞ&7CaQ~N'-P.W`Oedp03C!IZcIAMPUۀ5J<\u~+{9(FbbyAeBhOSܳ1 bÈT#ŠyDžs,`5}DC-`̞%r&ڙa87QWWp6e7 Rϫ/oY ꇅ Nܶըtc!LA T7V4Jsū I-0Pxz7QNF_iZgúWkG83 0eWr9 X]㾮݁#Jˢ C}0=3ݱtBi]_ &{{[/o[~ \q鯜00٩|cD3=4B_b RYb$óBRsf&lLX#M*C_L܄:gx)WΘsGSbuL rF$9';\4Ɍq'n[%p.Q`u hNb`eCQyQ|l_C>Lb꟟3hSb #xNxSs^ 88|Mz)}:](vbۢamŖ࿥ 0)Q7@0=?^k(*J}3ibkFn HjB׻NO z x}7p 0tfDX.lwgȔhԾŲ }6g E |LkLZteu+=q\Iv0쮑)QٵpH8/2?Σo>Jvppho~f>%bMM}\//":PTc(v9v!gոQ )UfVG+! 35{=x\2+ki,y$~A1iC6#)vC5^>+gǵ@1Hy٪7u;p psϰu/S <aʸGu'tD1ԝI<pg|6j'p:tպhX{o(7v],*}6a_ wXRk,O]Lܳ~Vo45rp"N5k;m{rZbΦ${#)`(Ŵg,;j%6j.pyYT?}-kBDc3qA`NWQū20/^AZW%NQ MI.X#P#,^Ebc&?XR tAV|Y.1!؅⨉ccww>ivl(JT~ u`ٵDm q)+Ri x/x8cyFO!/*!/&,7<.N,YDŽ&ܑQF1Bz)FPʛ?5d 6`kQձ λc؎%582Y&nD_$Je4>a?! ͨ|ȎWZSsv8 j(I&yj Jb5m?HWp=g}G3#|I,5v珿] H~R3@B[☉9Ox~oMy=J;xUVoj bUsl_35t-(ՃɼRB7U!qc+x4H_Qo֮$[GO<4`&č\GOc[.[*Af%mG/ ňM/r W/Nw~B1U3J?P&Y )`ѓZ1p]^l“W#)lWZilUQu`-m|xĐ,_ƪ|9i:_{*(3Gѧ}UoD+>m_?VPۅ15&}2|/pIOʵ> GZ9cmíتmnz)yߐbD >e}:) r|@R5qVSA10C%E_'^8cR7O;6[eKePGϦX7jb}OTGO^jn*媓7nGMC t,k31Rb (vyܴʭ!iTh8~ZYZp(qsRL ?b}cŨʊGO^!rPJO15MJ[c&~Z`"ѓޔH1C&^|Ш|rʼ,AwĴ?b5)tLU)F| &g٣O]oqSUjy(x<Ϳ3 .FSkoYg2 \_#wj{u'rQ>o;%n|F*O_L"e9umDds?.fuuQbIWz |4\0 sb;OvxOSs; G%T4gFRurj(֍ڑb uԖKDu1MK{1^ q; C=6\8FR艇!%\YÔU| 88m)֓NcLve C6z;o&X x59:q61Z(T7>C?gcļxѐ Z oo-08jہ x,`' ҔOcRlf~`jj".Nv+sM_]Zk g( UOPyεx%pUh2(@il0ݽQXxppx-NS( WO+轾 nFߢ3M<;z)FBZjciu/QoF 7R¥ ZFLF~#ȣߨ^<쩡ݛкvџ))ME>ώx4m#!-m!L;vv#~Y[đKmx9.[,UFS CVkZ +ߟrY٧IZd/ioi$%͝ب_ֶX3ܫhNU ZZgk=]=bbJS[wjU()*I =ώ:}-蹞lUj:1}MWm=̛ _ ¾,8{__m{_PVK^n3esw5ӫh#$-q=A̟> ,^I}P^J$qY~Q[ Xq9{#&T.^GVj__RKpn,b=`żY@^՝;z{paVKkQXj/)y TIc&F;FBG7wg ZZDG!x r_tƢ!}i/V=M/#nB8 XxЫ ^@CR<{䤭YCN)eKOSƟa $&g[i3.C6xrOc8TI;o hH6P&L{@q6[ Gzp^71j(l`J}]e6X☉#͕ ׈$AB1Vjh㭦IRsqFBjwQ_7Xk>y"N=MB0 ,C #o6MRc0|$)ف"1!ixY<B9mx `,tA>)5ػQ?jQ?cn>YZe Tisvh# GMމȇp:ԴVuږ8ɼH]C.5C!UV;F`mbBk LTMvPʍϤj?ԯ/Qr1NB`9s"s TYsz &9S%U԰> {<ؿSMxB|H\3@!U| k']$U+> |HHMLޢ?V9iD!-@x TIî%6Z*9X@HMW#?nN ,oe6?tQwڱ.]-y':mW0#!J82qFjH -`ѓ&M0u Uγmxϵ^-_\])@0Rt.8/?ٰCY]x}=sD3ojަЫNuS%U}ԤwHH>ڗjܷ_3gN q7[q2la*ArǓԖ+p8/RGM ]jacd(JhWko6ڎbj]i5Bj3+3!\j1UZLsLTv8HHmup<>gKMJj0@H%,W΃7R) ">c, xixј^ aܖ>H[i.UIHc U1=yW\=S*GR~)AF=`&2h`DzT󑓶J+?W+}C%P:|0H܆}-<;OC[~o.$~i}~HQ TvXΈr=b}$vizL4:ȰT|4~*!oXQR6Lk+#t/g lԁߖ[Jڶ_N$k*". xsxX7jRVbAAʯKҎU3)zSNN _'s?f)6X!%ssAkʱ>qƷb hg %n ~p1REGMHH=BJiy[<5 ǁJҖgKR*倳e~HUy)Ag,K)`Vw6bRR:qL#\rclK/$sh*$ 6덤 KԖc 3Z9=Ɣ=o>X Ώ"1 )a`SJJ6k(<c e{%kϊP+SL'TcMJWRm ŏ"w)qc ef꒵i?b7b('"2r%~HUS1\<(`1Wx9=8HY9m:X18bgD1u ~|H;K-Uep,, C1 RV.MR5άh,tWO8WC$ XRVsQS]3GJ|12 [vM :k#~tH30Rf-HYݺ-`I9%lIDTm\ S{]9gOڒMNCV\G*2JRŨ;Rҏ^ڽ̱mq1Eu?To3I)y^#jJw^Ńj^vvlB_⋌P4x>0$c>K†Aļ9s_VjTt0l#m>E-,,x,-W)سo&96RE XR.6bXw+)GAEvL)͞K4$p=Ũi_ѱOjb HY/+@θH9޼]Nԥ%n{ &zjT? Ty) s^ULlb,PiTf^<À] 62R^V7)S!nllS6~͝V}-=%* ʻ>G DnK<y&>LPy7'r=Hj 9V`[c"*^8HpcO8bnU`4JȪAƋ#1_\ XϘHPRgik(~G~0DAA_2p|J묭a2\NCr]M_0 ^T%e#vD^%xy-n}-E\3aS%yN!r_{ )sAw ڼp1pEAk~v<:`'ӭ^5 ArXOI驻T (dk)_\ PuA*BY]yB"l\ey hH*tbK)3 IKZ򹞋XjN n *n>k]X_d!ryBH ]*R 0(#'7 %es9??ښFC,ՁQPjARJ\Ρw K#jahgw;2$l*) %Xq5!U᢯6Re] |0[__64ch&_}iL8KEgҎ7 M/\`|.p,~`a=BR?xܐrQ8K XR2M8f ?`sgWS%" Ԉ 7R%$ N}?QL1|-эټwIZ%pvL3Hk>,ImgW7{E xPHx73RA @RS CC !\ȟ5IXR^ZxHл$Q[ŝ40 (>+ _C >BRt<,TrT {O/H+˟Pl6 I B)/VC<6a2~(XwV4gnXR ϱ5ǀHٻ?tw똤Eyxp{#WK qG%5],(0ӈH HZ])ג=K1j&G(FbM@)%I` XRg ʔ KZG(vP,<`[ Kn^ SJRsAʠ5xՅF`0&RbV tx:EaUE/{fi2;.IAwW8/tTxAGOoN?G}l L(n`Zv?pB8K_gI+ܗ #i?ޙ.) p$utc ~DžfՈEo3l/)I-U?aԅ^jxArA ΧX}DmZ@QLےbTXGd.^|xKHR{|ΕW_h] IJ`[G9{).y) 0X YA1]qp?p_k+J*Y@HI>^?gt.06Rn ,` ?);p pSF9ZXLBJPWjgQ|&)7! HjQt<| ؅W5 x W HIzYoVMGP Hjn`+\(dNW)F+IrS[|/a`K|ͻ0Hj{R,Q=\ (F}\WR)AgSG`IsnAR=|8$}G(vC$)s FBJ?]_u XRvύ6z ŨG[36-T9HzpW̞ú Xg큽=7CufzI$)ki^qk-) 0H*N` QZkk]/tnnsI^Gu't=7$ Z;{8^jB% IItRQS7[ϭ3 $_OQJ`7!]W"W,)Iy W AJA;KWG`IY{8k$I$^%9.^(`N|LJ%@$I}ֽp=FB*xN=gI?Q{٥4B)mw $Igc~dZ@G9K X?7)aK%݅K$IZ-`IpC U6$I\0>!9k} Xa IIS0H$I H ?1R.Чj:4~Rw@p$IrA*u}WjWFPJ$I➓/6#! LӾ+ X36x8J |+L;v$Io4301R20M I$-E}@,pS^ޟR[/s¹'0H$IKyfŸfVOπFT*a$I>He~VY/3R/)>d$I>28`Cjw,n@FU*9ttf$I~<;=/4RD~@ X-ѕzἱI$: ԍR a@b X{+Qxuq$IЛzo /~3\8ڒ4BN7$IҀj V]n18H$IYFBj3̵̚ja pp $Is/3R Ӻ-Yj+L;.0ŔI$Av? #!5"aʄj}UKmɽH$IjCYs?h$IDl843.v}m7UiI=&=0Lg0$I4: embe` eQbm0u? $IT!Sƍ'-sv)s#C0:XB2a w I$zbww{."pPzO =Ɔ\[ o($Iaw]`E).Kvi:L*#gР7[$IyGPI=@R 4yR~̮´cg I$I/<tPͽ hDgo 94Z^k盇΄8I56^W$I^0̜N?4*H`237}g+hxoq)SJ@p|` $I%>-hO0eO>\ԣNߌZD6R=K ~n($I$y3D>o4b#px2$yڪtzW~a $I~?x'BwwpH$IZݑnC㧄Pc_9sO gwJ=l1:mKB>Ab<4Lp$Ib o1ZQ@85b̍ S'F,Fe,^I$IjEdù{l4 8Ys_s Z8.x m"+{~?q,Z D!I$ϻ'|XhB)=…']M>5 rgotԎ 獽PH$IjIPhh)n#cÔqA'ug5qwU&rF|1E%I$%]!'3AFD/;Ck_`9 v!ٴtPV;x`'*bQa w I$Ix5 FC3D_~A_#O݆DvV?<qw+I$I{=Z8".#RIYyjǪ=fDl9%M,a8$I$Ywi[7ݍFe$s1ՋBVA?`]#!oz4zjLJo8$I$%@3jAa4(o ;p,,dya=F9ً[LSPH$IJYЉ+3> 5"39aZ<ñh!{TpBGkj}Sp $IlvF.F$I z< '\K*qq.f<2Y!S"-\I$IYwčjF$ w9 \ߪB.1v!Ʊ?+r:^!I$BϹB H"B;L'G[ 4U#5>੐)|#o0aڱ$I>}k&1`U#V?YsV x>{t1[I~D&(I$I/{H0fw"q"y%4 IXyE~M3 8XψL}qE$I[> nD?~sf ]o΁ cT6"?'_Ἣ $I>~.f|'!N?⟩0G KkXZE]ޡ;/&?k OۘH$IRۀwXӨ<7@PnS04aӶp.:@\IWQJ6sS%I$e5ڑv`3:x';wq_vpgHyXZ 3gЂ7{{EuԹn±}$I$8t;b|591nءQ"P6O5i }iR̈́%Q̄p!I䮢]O{H$IRϻ9s֧ a=`- aB\X0"+5"C1Hb?߮3x3&gşggl_hZ^,`5?ߎvĸ%̀M!OZC2#0x LJ0 Gw$I$I}<{Eb+y;iI,`ܚF:5ܛA8-O-|8K7s|#Z8a&><a&/VtbtLʌI$I$I$I$I$I$IRjDD%tEXtdate:create2022-05-31T04:40:26+00:00!Î%tEXtdate:modify2022-05-31T04:40:26+00:00|{2IENDB`Mini Shell

HOME


Mini Shell 1.0
DIR:/lib/gcc/x86_64-redhat-linux/4.8.5/include/
Upload File :
Current File : //lib/gcc/x86_64-redhat-linux/4.8.5/include/avxintrin.h
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   GCC is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

/* Implemented from the specification included in the Intel C++ Compiler
   User Guide and Reference, version 11.0.  */

#ifndef _IMMINTRIN_H_INCLUDED
# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
#endif

/* Internal data types for implementing the intrinsics.  */
typedef double __v4df __attribute__ ((__vector_size__ (32)));
typedef float __v8sf __attribute__ ((__vector_size__ (32)));
typedef long long __v4di __attribute__ ((__vector_size__ (32)));
typedef int __v8si __attribute__ ((__vector_size__ (32)));
typedef short __v16hi __attribute__ ((__vector_size__ (32)));
typedef char __v32qi __attribute__ ((__vector_size__ (32)));

/* The Intel API is flexible enough that we must allow aliasing with other
   vector types, and their scalar components.  */
typedef float __m256 __attribute__ ((__vector_size__ (32),
				     __may_alias__));
typedef long long __m256i __attribute__ ((__vector_size__ (32),
					  __may_alias__));
typedef double __m256d __attribute__ ((__vector_size__ (32),
				       __may_alias__));

/* Compare predicates for scalar and packed compare intrinsics.  */

/* Equal (ordered, non-signaling)  */
#define _CMP_EQ_OQ	0x00
/* Less-than (ordered, signaling)  */
#define _CMP_LT_OS	0x01
/* Less-than-or-equal (ordered, signaling)  */
#define _CMP_LE_OS	0x02
/* Unordered (non-signaling)  */
#define _CMP_UNORD_Q	0x03
/* Not-equal (unordered, non-signaling)  */
#define _CMP_NEQ_UQ	0x04
/* Not-less-than (unordered, signaling)  */
#define _CMP_NLT_US	0x05
/* Not-less-than-or-equal (unordered, signaling)  */
#define _CMP_NLE_US	0x06
/* Ordered (nonsignaling)   */
#define _CMP_ORD_Q	0x07
/* Equal (unordered, non-signaling)  */
#define _CMP_EQ_UQ	0x08
/* Not-greater-than-or-equal (unordered, signaling)  */
#define _CMP_NGE_US	0x09
/* Not-greater-than (unordered, signaling)  */
#define _CMP_NGT_US	0x0a
/* False (ordered, non-signaling)  */
#define _CMP_FALSE_OQ	0x0b
/* Not-equal (ordered, non-signaling)  */
#define _CMP_NEQ_OQ	0x0c
/* Greater-than-or-equal (ordered, signaling)  */
#define _CMP_GE_OS	0x0d
/* Greater-than (ordered, signaling)  */
#define _CMP_GT_OS	0x0e
/* True (unordered, non-signaling)  */
#define _CMP_TRUE_UQ	0x0f
/* Equal (ordered, signaling)  */
#define _CMP_EQ_OS	0x10
/* Less-than (ordered, non-signaling)  */
#define _CMP_LT_OQ	0x11
/* Less-than-or-equal (ordered, non-signaling)  */
#define _CMP_LE_OQ	0x12
/* Unordered (signaling)  */
#define _CMP_UNORD_S	0x13
/* Not-equal (unordered, signaling)  */
#define _CMP_NEQ_US	0x14
/* Not-less-than (unordered, non-signaling)  */
#define _CMP_NLT_UQ	0x15
/* Not-less-than-or-equal (unordered, non-signaling)  */
#define _CMP_NLE_UQ	0x16
/* Ordered (signaling)  */
#define _CMP_ORD_S	0x17
/* Equal (unordered, signaling)  */
#define _CMP_EQ_US	0x18
/* Not-greater-than-or-equal (unordered, non-signaling)  */
#define _CMP_NGE_UQ	0x19
/* Not-greater-than (unordered, non-signaling)  */
#define _CMP_NGT_UQ	0x1a
/* False (ordered, signaling)  */
#define _CMP_FALSE_OS	0x1b
/* Not-equal (ordered, signaling)  */
#define _CMP_NEQ_OS	0x1c
/* Greater-than-or-equal (ordered, non-signaling)  */
#define _CMP_GE_OQ	0x1d
/* Greater-than (ordered, non-signaling)  */
#define _CMP_GT_OQ	0x1e
/* True (unordered, signaling)  */
#define _CMP_TRUE_US	0x1f

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_addsub_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_addsub_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
}


extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_and_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_and_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_andnot_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_andnot_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
}

/* Double/single precision floating point blend instructions - select
   data from 2 sources using constant/variable mask.  */

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
{
  return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
					      (__v4df)__Y,
					      __M);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
{
  return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
					     (__v8sf)__Y,
					     __M);
}
#else
#define _mm256_blend_pd(X, Y, M)					\
  ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X),		\
					(__v4df)(__m256d)(Y), (int)(M)))

#define _mm256_blend_ps(X, Y, M)					\
  ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X),		\
				       (__v8sf)(__m256)(Y), (int)(M)))
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
{
  return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
					       (__v4df)__Y,
					       (__v4df)__M);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
{
  return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
					      (__v8sf)__Y,
					      (__v8sf)__M);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_div_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_div_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B);
}

/* Dot product instructions with mask-defined summing and zeroing parts
   of result.  */

#ifdef __OPTIMIZE__
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
{
  return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
					  (__v8sf)__Y,
					  __M);
}
#else
#define _mm256_dp_ps(X, Y, M)						\
  ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X),		\
				    (__v8sf)(__m256)(Y), (int)(M)))
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadd_pd (__m256d __X, __m256d __Y)
{
  return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadd_ps (__m256 __X, __m256 __Y)
{
  return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsub_pd (__m256d __X, __m256d __Y)
{
  return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsub_ps (__m256 __X, __m256 __Y)
{
  return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
}

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
{
  return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
					     __mask);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
{
  return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
					    __mask);
}
#else
#define _mm256_shuffle_pd(A, B, N)					\
  ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A),		\
				      (__v4df)(__m256d)(B), (int)(N)))

#define _mm256_shuffle_ps(A, B, N)					\
  ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A),		\
				      (__v8sf)(__m256)(B), (int)(N)))
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
}

#ifdef __OPTIMIZE__
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
{
  return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
{
  return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
{
  return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
					    __P);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
{
  return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
					   __P);
}

extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
{
  return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
{
  return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
}
#else
#define _mm_cmp_pd(X, Y, P)						\
  ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X),		\
				   (__v2df)(__m128d)(Y), (int)(P)))

#define _mm_cmp_ps(X, Y, P)						\
  ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X),			\
				  (__v4sf)(__m128)(Y), (int)(P)))

#define _mm256_cmp_pd(X, Y, P)						\
  ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X),		\
				      (__v4df)(__m256d)(Y), (int)(P)))

#define _mm256_cmp_ps(X, Y, P)						\
  ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X),		\
				     (__v8sf)(__m256)(Y), (int)(P)))

#define _mm_cmp_sd(X, Y, P)						\
  ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X),		\
				   (__v2df)(__m128d)(Y), (int)(P)))

#define _mm_cmp_ss(X, Y, P)						\
  ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X),			\
				  (__v4sf)(__m128)(Y), (int)(P)))
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_pd (__m128i __A)
{
  return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_ps (__m256i __A)
{
  return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtpd_ps (__m256d __A)
{
  return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtps_epi32 (__m256 __A)
{
  return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtps_pd (__m128 __A)
{
  return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvttpd_epi32 (__m256d __A)
{
  return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtpd_epi32 (__m256d __A)
{
  return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvttps_epi32 (__m256 __A)
{
  return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
}

#ifdef __OPTIMIZE__
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extractf128_pd (__m256d __X, const int __N)
{
  return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extractf128_ps (__m256 __X, const int __N)
{
  return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extractf128_si256 (__m256i __X, const int __N)
{
  return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extract_epi32 (__m256i __X, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
  return _mm_extract_epi32 (__Y, __N % 4);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extract_epi16 (__m256i __X, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
  return _mm_extract_epi16 (__Y, __N % 8);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extract_epi8 (__m256i __X, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
  return _mm_extract_epi8 (__Y, __N % 16);
}

#ifdef __x86_64__
extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extract_epi64 (__m256i __X, const int __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
  return _mm_extract_epi64 (__Y, __N % 2);
}
#endif
#else
#define _mm256_extractf128_pd(X, N)					\
  ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X),	\
						(int)(N)))

#define _mm256_extractf128_ps(X, N)					\
  ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X),	\
					       (int)(N)))

#define _mm256_extractf128_si256(X, N)					\
  ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X),	\
						(int)(N)))

#define _mm256_extract_epi32(X, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
      _mm_extract_epi32 (__Y, (N) % 4);					\
    }))

#define _mm256_extract_epi16(X, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
      _mm_extract_epi16 (__Y, (N) % 8);					\
    }))

#define _mm256_extract_epi8(X, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
      _mm_extract_epi8 (__Y, (N) % 16);					\
    }))

#ifdef __x86_64__
#define _mm256_extract_epi64(X, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
      _mm_extract_epi64 (__Y, (N) % 2);					\
    }))
#endif
#endif

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_zeroall (void)
{
  __builtin_ia32_vzeroall ();
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_zeroupper (void)
{
  __builtin_ia32_vzeroupper ();
}

extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutevar_pd (__m128d __A, __m128i __C)
{
  return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
						(__v2di)__C);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutevar_pd (__m256d __A, __m256i __C)
{
  return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
						   (__v4di)__C);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutevar_ps (__m128 __A, __m128i __C)
{
  return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
					       (__v4si)__C);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutevar_ps (__m256 __A, __m256i __C)
{
  return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
						  (__v8si)__C);
}

#ifdef __OPTIMIZE__
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute_pd (__m128d __X, const int __C)
{
  return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute_pd (__m256d __X, const int __C)
{
  return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute_ps (__m128 __X, const int __C)
{
  return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute_ps (__m256 __X, const int __C)
{
  return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
}
#else
#define _mm_permute_pd(X, C)						\
  ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))

#define _mm256_permute_pd(X, C)						\
  ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X),	(int)(C)))

#define _mm_permute_ps(X, C)						\
  ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))

#define _mm256_permute_ps(X, C)						\
  ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
#endif

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
{
  return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
						    (__v4df)__Y,
						    __C);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
{
  return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
						   (__v8sf)__Y,
						   __C);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
{
  return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
						    (__v8si)__Y,
						    __C);
}
#else
#define _mm256_permute2f128_pd(X, Y, C)					\
  ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X),	\
					      (__v4df)(__m256d)(Y),	\
					      (int)(C)))

#define _mm256_permute2f128_ps(X, Y, C)					\
  ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X),	\
					     (__v8sf)(__m256)(Y),	\
					     (int)(C)))

#define _mm256_permute2f128_si256(X, Y, C)				\
  ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X),	\
					      (__v8si)(__m256i)(Y),	\
					      (int)(C)))
#endif

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcast_ss (float const *__X)
{
  return (__m128) __builtin_ia32_vbroadcastss (__X);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_sd (double const *__X)
{
  return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_ss (float const *__X)
{
  return (__m256) __builtin_ia32_vbroadcastss256 (__X);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_pd (__m128d const *__X)
{
  return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_ps (__m128 const *__X)
{
  return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
}

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
{
  return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
						     (__v2df)__Y,
						     __O);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
{
  return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
						    (__v4sf)__Y,
						    __O);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
{
  return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
						     (__v4si)__Y,
						     __O);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
  __Y = _mm_insert_epi32 (__Y, __D, __N % 4);
  return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
  __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
  return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
  __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
  return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
}

#ifdef __x86_64__
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insert_epi64 (__m256i __X, long long __D, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
  __Y = _mm_insert_epi64 (__Y, __D, __N % 2);
  return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
}
#endif
#else
#define _mm256_insertf128_pd(X, Y, O)					\
  ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X),	\
					       (__v2df)(__m128d)(Y),	\
					       (int)(O)))

#define _mm256_insertf128_ps(X, Y, O)					\
  ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X),	\
					      (__v4sf)(__m128)(Y),  	\
					      (int)(O)))

#define _mm256_insertf128_si256(X, Y, O)				\
  ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X),	\
					       (__v4si)(__m128i)(Y),	\
					       (int)(O)))

#define _mm256_insert_epi32(X, D, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
      __Y = _mm_insert_epi32 (__Y, (D), (N) % 4);			\
      _mm256_insertf128_si256 ((X), __Y, (N) >> 2);			\
    }))

#define _mm256_insert_epi16(X, D, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
      __Y = _mm_insert_epi16 (__Y, (D), (N) % 8);			\
      _mm256_insertf128_si256 ((X), __Y, (N) >> 3);			\
    }))

#define _mm256_insert_epi8(X, D, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
      __Y = _mm_insert_epi8 (__Y, (D), (N) % 16);			\
      _mm256_insertf128_si256 ((X), __Y, (N) >> 4);			\
    }))

#ifdef __x86_64__
#define _mm256_insert_epi64(X, D, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
      __Y = _mm_insert_epi64 (__Y, (D), (N) % 2);			\
      _mm256_insertf128_si256 ((X), __Y, (N) >> 1);			\
    }))
#endif
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_pd (double const *__P)
{
  return *(__m256d *)__P;
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_pd (double *__P, __m256d __A)
{
  *(__m256d *)__P = __A;
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_ps (float const *__P)
{
  return *(__m256 *)__P;
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_ps (float *__P, __m256 __A)
{
  *(__m256 *)__P = __A;
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_pd (double const *__P)
{
  return (__m256d) __builtin_ia32_loadupd256 (__P);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_pd (double *__P, __m256d __A)
{
  __builtin_ia32_storeupd256 (__P, (__v4df)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_ps (float const *__P)
{
  return (__m256) __builtin_ia32_loadups256 (__P);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_ps (float *__P, __m256 __A)
{
  __builtin_ia32_storeups256 (__P, (__v8sf)__A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_si256 (__m256i const *__P)
{
  return *__P;
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_si256 (__m256i *__P, __m256i __A)
{
  *__P = __A;
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_si256 (__m256i const *__P)
{
  return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_si256 (__m256i *__P, __m256i __A)
{
  __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A);
}

extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskload_pd (double const *__P, __m128i __M)
{
  return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
					      (__v2di)__M);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
{
  __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskload_pd (double const *__P, __m256i __M)
{
  return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
						 (__v4di)__M);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
{
  __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskload_ps (float const *__P, __m128i __M)
{
  return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
					     (__v4si)__M);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
{
  __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskload_ps (float const *__P, __m256i __M)
{
  return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
						(__v8si)__M);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
{
  __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movehdup_ps (__m256 __X)
{
  return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_moveldup_ps (__m256 __X)
{
  return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movedup_pd (__m256d __X)
{
  return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_lddqu_si256 (__m256i const *__P)
{
  return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_si256 (__m256i *__A, __m256i __B)
{
  __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_pd (double *__A, __m256d __B)
{
  __builtin_ia32_movntpd256 (__A, (__v4df)__B);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_ps (float *__P, __m256 __A)
{
  __builtin_ia32_movntps256 (__P, (__v8sf)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rsqrt_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sqrt_pd (__m256d __A)
{
  return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sqrt_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
}

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_round_pd (__m256d __V, const int __M)
{
  return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_round_ps (__m256 __V, const int __M)
{
  return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
}
#else
#define _mm256_round_pd(V, M) \
  ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))

#define _mm256_round_ps(V, M) \
  ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
#endif

#define _mm256_ceil_pd(V)	_mm256_round_pd ((V), _MM_FROUND_CEIL)
#define _mm256_floor_pd(V)	_mm256_round_pd ((V), _MM_FROUND_FLOOR)
#define _mm256_ceil_ps(V)	_mm256_round_ps ((V), _MM_FROUND_CEIL)
#define _mm256_floor_ps(V)	_mm256_round_ps ((V), _MM_FROUND_FLOOR)

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testz_pd (__m128d __M, __m128d __V)
{
  return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testc_pd (__m128d __M, __m128d __V)
{
  return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testnzc_pd (__m128d __M, __m128d __V)
{
  return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testz_ps (__m128 __M, __m128 __V)
{
  return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testc_ps (__m128 __M, __m128 __V)
{
  return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testnzc_ps (__m128 __M, __m128 __V)
{
  return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testz_pd (__m256d __M, __m256d __V)
{
  return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testc_pd (__m256d __M, __m256d __V)
{
  return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testnzc_pd (__m256d __M, __m256d __V)
{
  return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testz_ps (__m256 __M, __m256 __V)
{
  return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testc_ps (__m256 __M, __m256 __V)
{
  return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testnzc_ps (__m256 __M, __m256 __V)
{
  return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testz_si256 (__m256i __M, __m256i __V)
{
  return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testc_si256 (__m256i __M, __m256i __V)
{
  return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testnzc_si256 (__m256i __M, __m256i __V)
{
  return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movemask_pd (__m256d __A)
{
  return __builtin_ia32_movmskpd256 ((__v4df)__A);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movemask_ps (__m256 __A)
{
  return __builtin_ia32_movmskps256 ((__v8sf)__A);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_pd (void)
{
  return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_ps (void)
{
  return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
				 0.0, 0.0, 0.0, 0.0 };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_si256 (void)
{
  return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
}

/* Create the vector [A B C D].  */
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_pd (double __A, double __B, double __C, double __D)
{
  return __extension__ (__m256d){ __D, __C, __B, __A };
}

/* Create the vector [A B C D E F G H].  */
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_ps (float __A, float __B, float __C, float __D,
	       float __E, float __F, float __G, float __H)
{
  return __extension__ (__m256){ __H, __G, __F, __E,
				 __D, __C, __B, __A };
}

/* Create the vector [A B C D E F G H].  */
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_epi32 (int __A, int __B, int __C, int __D,
		  int __E, int __F, int __G, int __H)
{
  return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
					  __D, __C, __B, __A };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
		  short __q11, short __q10, short __q09, short __q08,
		  short __q07, short __q06, short __q05, short __q04,
		  short __q03, short __q02, short __q01, short __q00)
{
  return __extension__ (__m256i)(__v16hi){
    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
  };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_epi8  (char __q31, char __q30, char __q29, char __q28,
		  char __q27, char __q26, char __q25, char __q24,
		  char __q23, char __q22, char __q21, char __q20,
		  char __q19, char __q18, char __q17, char __q16,
		  char __q15, char __q14, char __q13, char __q12,
		  char __q11, char __q10, char __q09, char __q08,
		  char __q07, char __q06, char __q05, char __q04,
		  char __q03, char __q02, char __q01, char __q00)
{
  return __extension__ (__m256i)(__v32qi){
    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
    __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
    __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
  };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_epi64x (long long __A, long long __B, long long __C,
		   long long __D)
{
  return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
}

/* Create a vector with all elements equal to A.  */
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_pd (double __A)
{
  return __extension__ (__m256d){ __A, __A, __A, __A };
}

/* Create a vector with all elements equal to A.  */
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_ps (float __A)
{
  return __extension__ (__m256){ __A, __A, __A, __A,
				 __A, __A, __A, __A };
}

/* Create a vector with all elements equal to A.  */
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi32 (int __A)
{
  return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
					  __A, __A, __A, __A };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi16 (short __A)
{
  return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
			   __A, __A, __A, __A, __A, __A, __A, __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi8 (char __A)
{
  return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
			  __A, __A, __A, __A, __A, __A, __A, __A,
			  __A, __A, __A, __A, __A, __A, __A, __A,
			  __A, __A, __A, __A, __A, __A, __A, __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi64x (long long __A)
{
  return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
}

/* Create vectors of elements in the reversed order from the
   _mm256_set_XXX functions.  */

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_pd (double __A, double __B, double __C, double __D)
{
  return _mm256_set_pd (__D, __C, __B, __A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_ps (float __A, float __B, float __C, float __D,
		float __E, float __F, float __G, float __H)
{
  return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
		   int __E, int __F, int __G, int __H)
{
  return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
		   short __q11, short __q10, short __q09, short __q08,
		   short __q07, short __q06, short __q05, short __q04,
		   short __q03, short __q02, short __q01, short __q00)
{
  return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
			   __q04, __q05, __q06, __q07,
			   __q08, __q09, __q10, __q11,
			   __q12, __q13, __q14, __q15);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_epi8  (char __q31, char __q30, char __q29, char __q28,
		   char __q27, char __q26, char __q25, char __q24,
		   char __q23, char __q22, char __q21, char __q20,
		   char __q19, char __q18, char __q17, char __q16,
		   char __q15, char __q14, char __q13, char __q12,
		   char __q11, char __q10, char __q09, char __q08,
		   char __q07, char __q06, char __q05, char __q04,
		   char __q03, char __q02, char __q01, char __q00)
{
  return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
			  __q04, __q05, __q06, __q07,
			  __q08, __q09, __q10, __q11,
			  __q12, __q13, __q14, __q15,
			  __q16, __q17, __q18, __q19,
			  __q20, __q21, __q22, __q23,
			  __q24, __q25, __q26, __q27,
			  __q28, __q29, __q30, __q31);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_epi64x (long long __A, long long __B, long long __C,
		    long long __D)
{
  return _mm256_set_epi64x (__D, __C, __B, __A);
}

/* Casts between various SP, DP, INT vector types.  Note that these do no
   conversion of values, they just change the type.  */
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castpd_ps (__m256d __A)
{
  return (__m256) __A;
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castpd_si256 (__m256d __A)
{
  return (__m256i) __A;
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castps_pd (__m256 __A)
{
  return (__m256d) __A;
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castps_si256(__m256 __A)
{
  return (__m256i) __A;
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castsi256_ps (__m256i __A)
{
  return (__m256) __A;
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castsi256_pd (__m256i __A)
{
  return (__m256d) __A;
}

extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castpd256_pd128 (__m256d __A)
{
  return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castps256_ps128 (__m256 __A)
{
  return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castsi256_si128 (__m256i __A)
{
  return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
}

/* When cast is done from a 128 to 256-bit type, the low 128 bits of
   the 256-bit result contain source parameter value and the upper 128
   bits of the result are undefined.  Those intrinsics shouldn't
   generate any extra moves.  */

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castpd128_pd256 (__m128d __A)
{
  return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castps128_ps256 (__m128 __A)
{
  return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castsi128_si256 (__m128i __A)
{
  return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
}