题解 P3372 【【模板】线段树 1】

2019-06-25 22:24:23


指令集大法好!

直接用指令集优化暴力就行了。时间复杂度 $O(\frac{nm}{4})$ 。

关于指令集可以去看我写的 blog

// ===================================
//   author: M_sea
//   website: http://m-sea-blog.com/
// ===================================
#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
#pragma GCC target("avx,avx2")
#include <immintrin.h>
#include <algorithm>
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <cmath>
#define re register
using namespace std;
typedef long long ll;
using int_t=long long int;

inline int read() {
    int X=0,w=1; char c=getchar();
    while (c<'0'||c>'9') { if (c=='-') w=-1; c=getchar(); }
    while (c>='0'&&c<='9') X=X*10+c-'0',c=getchar();
    return X*w;
}

const int N=100000+10;

int n,m; ll a[N];

inline void modify(ll* l,ll* r,ll x) {
    __m256i add=_mm256_set1_epi64x(x);
    while (int_t(l)%64&&l<=r) *l+=x,++l;
    while (l+3<=r) {
        __m256i p=*(__m256i*)l;
        *(__m256i*)l=_mm256_add_epi64(p,add);
        l+=4;
    }
    while (l<=r) *l+=x,++l;
}

inline ll query(ll* l,ll* r) {
    __m256i sum=_mm256_set1_epi64x(0);
    ll res=0;
    while (int_t(l)%64&&l<=r) res+=*l,++l;
    while (l+3<=r) {
        sum=_mm256_add_epi64(sum,*(__m256i*)l);
        l+=4;
    }
    while (l<=r) res+=*l,++l;
    ll* arr=(ll*)&sum;
    for (re int i=0;i<4;++i) res+=arr[i];
    return res;
}

int main() {
    n=read(),m=read();
    for (re int i=1;i<=n;++i) a[i]=read();
    for (re int i=1;i<=m;++i) {
        int op=read();
        if (op==1) {
            int l=read(),r=read(),k=read();
            modify(a+l,a+r,(ll)k);
        } else {
            int l=read(),r=read();
            printf("%lld\n",query(a+l,a+r));
        }
    }
    return 0;
}