Big-O of log base b of n + 1 - runtime

I have question about runtime. What is the runtime of the following code in terms of big-O?
int* convert(int n, int b){
int d = (log2(n))/(log2(b))+1;
int* array = new int[d];
int i = d;
while(n > 0){
array[--i] = n%b;
n = n/b;
}
return array;
}

Related

Using heap sort, append an array elements

I have given an array int A[] = {12,10,9,2,11,8,14,3,5};
In this array, 1st 4 elements(from index 0 to index 3) follow max heap condition. But last 5 elements(index 4 to index 8) don't follow max heap condition. So, I have to write a code so that the whole array follow max heap condition.
I have given a function call max_heap_append(A,3,8); and I have to use it in my code to write the program. It is an assignment so I have to follow the instruction.
I have written this code bellow but when I run the program, nothing happens.
#include <stdio.h>
#include <stdlib.h>
void swap(int * a, int * b )
{
int temp;
temp = *a;
*a = *b;
*b = temp;
}
void heapify( int A[], int q, int i)
{
int largest = i;
int l = 2 * i + 1 ;
int r = 2 * i + 2;
if( l < q && A[l] > A[largest])
{
largest = l;
}
if( r < q && A[r] > A[largest])
{
largest = r;
}
if( largest != i)
{
swap( &A[i] , &A[largest]);
heapify(A, q, largest);
}
}
void max_heap_append(int A[], int p , int q)
{
int i;
for( i = q / 2 -1; i >= 0; i--)
{
heapify( A , q , i);
}
// sort the heap
for( i = q; i>= 0; i--)
{
swap(&A[0] , &A[i]);
heapify(A, i, 0);
}
}
void printA(int A[], int q)
{
int i;
for( i = 0; i <= q; i++)
{
printf("%d", A[i]);
}
printf("%d\n");
}
int main()
{
int A[] = {12,10,9,2,11,8,14,3};
max_heap_append(A,3,8);
printf("Sorted: ");
printA(A, 8);
return 0;
}
Its not followed heapify from 0 to 3 index.. so u need to heapify all. there is some mistake. if your array size is 8 then u can not excess a[8], you can access a[0] to a[7]. so you need to iterate from 0 to 7.
Try with this:
#include <stdio.h>
#include <stdlib.h>
void swap(int * a, int * b )
{
int temp;
temp = *a;
*a = *b;
*b = temp;
}
void heapify( int A[], int q, int i)
{
int largest = i;
int l = 2 * i + 1 ;
int r = 2 * i + 2;
if( l < q && A[l] > A[largest])
{
largest = l;
}
if( r < q && A[r] > A[largest])
{
largest = r;
}
if( largest != i)
{
swap( &A[i] , &A[largest]);
heapify(A, q, largest);
}
}
void max_heap_append(int A[], int p , int q)
{
int i;
for( i = q-1; i >= 0; i--)
{
heapify( A , q , i);
}
// sort the heap
for( i = q-1; i>= 0; i--)
{
swap(&A[0] , &A[i]);
heapify(A, i, 0);
}
}
void printA(int A[], int q)
{
int i;
for( i = 0; i < q; i++)
{
printf("%d ", A[i]);
}
printf("\n");
}
int main()
{
int A[] = {12,10,9,2,11,8,14,3};
max_heap_append(A,3,8);
printf("Sorted: ");
printA(A, 8);
return 0;
}
You have several problems in your code
printA
One is/can be indicated by the compiler, in printA :
printf("%d\n");
‘%d’ expects a matching ‘int’ argument, but there no no argument
It is easy to guess you just wanted to print a newline, so that line can be replaced by
putchar('\n');
Still in printA you print the numbers without a separator, the result is not usable, for instance do
printf("%d ", A[i]);
When I look at the call of printA in main the parameter n is the number of elements in A, so the end test of the for is invalid because you try to print a value out of the array, the loop must be :
for( i = 0; i < q; i++)
max_heap_append
in the second for the index i can value 0, in that case you swap the first element of the array with itself, that has no sense and the same for the call of heapify with the 2 last arguments valuing 0
When you call that function in main the parameter q receive the number of elements in the array, which is also the first value of i still in that second for and &A[i] is out of the array. You need to replace that line by
for( i = q-1; i> 0; i--)
If I do all these changes :
Compilation and execution :
bruno#bruno-XPS-8300:/tmp$ gcc -g -Wall h.c
bruno#bruno-XPS-8300:/tmp$ ./a.out
Sorted: 2 3 8 9 10 11 12 14
bruno#bruno-XPS-8300:/tmp$

Mapping a large number to a unique number using MOD operation

Let the Roots of a first degree polynomial( Q(x) ) be x0 = -b/a. Since the range of the variable a and b is large, x0 can be large as well for it to be stored in a variable(x0).
so, it is converted to some unique number using some operation with mod
int x0 = mul(mod - b, rev(a));
problem link: hackerank problem
Can someone please explain how this line of code works and the math behind this operation?
the whole code-
#include <bits/stdc++.h>
using namespace std;
#define forn(i,n) for (int i = 0; i < int(n); ++i)
typedef long long ll;
const int inf = int(1e9) + int(1e5);
const ll infl = ll(2e18) + ll(1e10);
const int mod = 1e9 + 7;
int udd(int &a, int b) {
a += b;
if (a >= mod)
a -= mod;
return a;
}
int add(int a, int b) {
return udd(a, b);
}
int mul(ll a, ll b) {
return a * b % mod;
}
//============didnt understand this step
int bin(int a, int d) {
int b = 1;
while (d) {
if (d & 1)
b = mul(b, a);
d >>= 1;
a = mul(a, a);
}
return b;
}
int rev(int a) {
assert(a != 0);
return bin(a, mod - 2);
}
const int maxn = 100100;
int px[maxn];
int c[maxn];
struct Fenwick {
int a[maxn];
int t[maxn];
void set(int pos, int val) {
int delta = add(val, mod - a[pos]);
a[pos] = val;
delta = mul(delta, px[pos]);
for (int i = pos; i < maxn; i |= i + 1) {
udd(t[i], delta);
}
}
int get(int r) {
int res = 0;
for (int i = r - 1; i >= 0; i = (i & (i + 1)) - 1)
udd(res, t[i]);
return res;
}
int get(int l, int r) {
return add(get(r), mod - get(l));
}
} fw;
int main() {
#ifdef LOCAL
assert(freopen("test.in", "r", stdin));
#endif
int n, a, b, q;
cin >> n >> a >> b >> q;
//========what does this line do?
int x0 = mul(mod - b, rev(a));
px[0] = 1;
for (int i = 1; i < n; ++i)
px[i] = mul(px[i - 1], x0);
forn (i, n) {
cin >> c[i];
fw.set(i, c[i]);
}
forn (i, q) {
int t, a, b;
cin >> t >> a >> b;
if (t == 1) {
fw.set(a, b);
} else {
++b;
int s = fw.get(a, b);
if (x0 == 0)
s = fw.a[a];
cout << (s == 0 ? "Yes" : "No") << '\n';
}
}
}
bin is the halving-and-squaring implementation for the (in this case modular) power function a^d % mod, so that the modular inverse in rev can be computed via the little theorem of Fermat.

Algorithm. How to find longest subsequence of integers in an array such that gcd of any two consecutive number in the sequence is greather than 1?

Given`en an array of integers. We have to find the length of the longest subsequence of integers such that gcd of any two consecutive elements in the sequence is greater than 1.
for ex: if array = [12, 8, 2, 3, 6, 9]
then one such subsequence can be = {12, 8, 2, 6, 9}
other one can be= {12, 3, 6, 9}
I tried to solve this problem by dynamic programming. Assume that maxCount is the array such that maxCount[i] will have the length of such longest subsequence
ending at index i.
`maxCount[0]=1 ;
for(i=1; i<N; i++)
{
max = 1 ;
for(j=i-1; j>=0; j--)
{
if(gcd(arr[i], arr[j]) > 1)
{
temp = maxCount[j] + 1 ;
if(temp > max)
max = temp ;
}
}
maxCount[i]=max;
}``
max = 0;
for(i=0; i<N; i++)
{
if(maxCount[i] > max)
max = maxCount[i] ;
}
cout<<max<<endl ;
`
But, this approach is getting timeout. As its time complexity is O(N^2). Can we improve the time complexity?
The condition "gcd is greater than 1" means that numbers have at least one common divisor. So, let dp[i] equals to the length of longest sequence finishing on a number divisible by i.
int n;
cin >> n;
const int MAX_NUM = 100 * 1000;
static int dp[MAX_NUM];
for(int i = 0; i < n; ++i)
{
int x;
cin >> x;
int cur = 1;
vector<int> d;
for(int i = 2; i * i <= x; ++i)
{
if(x % i == 0)
{
cur = max(cur, dp[i] + 1);
cur = max(cur, dp[x / i] + 1);
d.push_back(i);
d.push_back(x / i);
}
}
if(x > 1)
{
cur = max(cur, dp[x] + 1);
d.push_back(x);
}
for(int j : d)
{
dp[j] = cur;
}
}
cout << *max_element(dp, dp + MAX_NUM) << endl;
This solution has O(N * sqrt(MAX_NUM)) complexity. Actually you can calculate dp values only for prime numbers. To implement this you should be able to get prime factorization in less than O(N^0.5) time (this method, for example). That optimization should cast complexity to O(N * factorization + Nlog(N)). As memory optimization, you can replace dp array with map or unordered_map.
GCD takes log m time, where m is the maximum number in the array. Therefore, using a Segment Tree and binary search, one can reduce the time complexity to O(n log (m² * n)) (with O(n log m) preprocessing). This list details other data structures that can be used for RMQ-type queries and to reduce the complexity further.
Here is one possible implementation of this:
#include <bits/stdc++.h>
using namespace std;
struct SegTree {
using ftype = function<int(int, int)>;
vector<int> vec;
int l, og, dummy;
ftype f;
template<typename T> SegTree(const vector<T> &v, const T &x, const ftype &func) : og(v.size()), f(func), l(1), dummy(x) {
assert(og >= 1);
while (l < og) l *= 2;
vec = vector<int>(l*2);
for (int i = l; i < l+og; i++) vec[i] = v[i-l];
for (int i = l+og; i < 2*l; i++) vec[i] = dummy;
for (int i = l-1; i >= 1; i--) {
if (vec[2*i] == dummy && vec[2*i+1] == dummy) vec[i] = dummy;
else if (vec[2*i] == dummy) vec[i] = vec[2*i+1];
else if (vec[2*i+1] == dummy) vec[i] = vec[2*i];
else vec[i] = f(vec[2*i], vec[2*i+1]);
}
}
SegTree() {}
void valid(int x) {assert(x >= 0 && x < og);}
int get(int a, int b) {
valid(a); valid(b); assert(b >= a);
a += l; b += l;
int s = vec[a];
a++;
while (a <= b) {
if (a % 2 == 1) {
if (vec[a] != dummy) s = f(s, vec[a]);
a++;
}
if (b % 2 == 0) {
if (vec[b] != dummy) s = f(s, vec[b]);
b--;
}
a /= 2; b /= 2;
}
return s;
}
void add(int x, int c) {
valid(x);
x += l;
vec[x] += c;
for (x /= 2; x >= 1; x /= 2) {
if (vec[2*x] == dummy && vec[2*x+1] == dummy) vec[x] = dummy;
else if (vec[2*x] == dummy) vec[x] = vec[2*x+1];
else if (vec[2*x+1] == dummy) vec[x] = vec[2*x];
else vec[x] = f(vec[2*x], vec[2*x+1]);
}
}
void update(int x, int c) {add(x, c-vec[x+l]);}
};
// Constructor (where val is something that an element in the array is
// guaranteed to never reach):
// SegTree st(vec, val, func);
// finds longest subsequence where GCD is greater than 1
int longest(const vector<int> &vec) {
int l = vec.size();
SegTree st(vec, -1, [](int a, int b){return __gcd(a, b);});
// checks if a certain length is valid in O(n log (m² * n)) time
auto valid = [&](int n) -> bool {
for (int i = 0; i <= l-n; i++) {
if (st.get(i, i+n-1) != 1) {
return true;
}
}
return false;
};
int length = 0;
// do a "binary search" on the best possible length
for (int i = l; i >= 1; i /= 2) {
while (length+i <= l && valid(length+i)) {
length += i;
}
}
return length;
}

Dijkstra running slow

There is a problem at Spoj called HIGHWAYS, that is basically to find the shortest path between 2 given cities.
The first time I solved it, I used Dijkstra algorithm... I got it right, although the code was kind of big, so I decided to redo it with smaller code (that obviously acts the same way), but it's getting Time Limit Exceeded.
I'd like to know what difference between them is making this TLE to happen.
The input is like this:
n //number of test cases
c e s e //number of cities (from 1 to c), number of edges, start and end cities
c1 c2 w //e lines, each with connection between c1 and c2 with weight w
Here are the long code (Accepted):
#include <bits/stdc++.h>
using namespace std;
#define si(n) scanf("%d", &n)
#define INF 99999
int d[100010];
struct edge {
int v, weight;
edge(int a, int w) {
v = a;
weight = w;
}
bool operator < (const edge & o) const {
return weight > o.weight;
}
};
struct vertex {
int value;
vector <edge> adj;
vertex() {
adj.clear();
}
vertex(int val) {
value = val;
adj.clear();
}
void add(edge a) {
adj.push_back(a);
}
};
struct graph {
vertex v[100010];
void add_v(int val) {
vertex a(val);
a.adj.clear();
v[val] = a;
}
void add_a(int v1, int v2, int p) {
v[v1].add(edge(v2, p));
v[v2].add(edge(v1, p));
}
void dijkstra(int n, int f) {
for(int i = 0; i <= f; i++ ) d[i] = INF;
priority_queue < edge > Q;
d[n] = 0;
int current;
Q.push(edge(n, 0));
while (!Q.empty()) {
current = Q.top().v;
Q.pop();
for (int i = 0; i < v[current].adj.size(); i++) {
edge a = v[current].adj[i];
if (d[a.v] > d[current] + a.weight) {
d[a.v] = d[current] + a.weight;
Q.push(edge(a.v, d[a.v]));
}
}
}
}
};
int main(){
int cases;
si(cases);
int v, a, ini, fim;
int v1, v2, w;
while(cases--){
si(v); si(a);
si(ini); si(fim);
graph g;
for(int i = 1; i <= v; i++){
g.add_v(i);
}
for(int i = 0; i < a; i++){
si(v1); si(v2); si(w);
g.add_a(v1, v2, w);
}
g.dijkstra(ini, v+1);
int dist = d[fim];
if(dist < 0 || dist >= INF) printf("NONE\n");
else printf("%d\n", dist);
}
}
Here is the short one (Time Limit Exceeded):
#include <bits/stdc++.h>
using namespace std;
struct edge{
int v, w;
edge(){}
edge(int a, int b){v = a; w = b;}
};
bool operator < (edge a, edge b) {return a.w < b.w;}
const int INF = INT_MAX;
typedef vector<vector<edge> > graph;
typedef priority_queue<edge> heap;
int d[100020];
void Dijkstra(graph G, int length, int s){
for(int i = 1; i <= length; i++) d[i] = INF;
edge base;
base.v = s;
base.w = d[s] = 0;
heap H;
H.push(base);
while(!H.empty()){
int current = H.top().v;
H.pop();
for (int i = 0; i < G[current].size(); i++) {
edge a = G[current][i];
if (d[a.v] > d[current] + a.w) {
d[a.v] = d[current] + a.w;
H.push(edge (a.v, d[a.v]));
}
}
}
}
int main(){
int cases;
int n, m, s, e;
int v1, v2, w;
scanf("%d", &cases);
while(cases--){
scanf("%d %d %d %d", &n, &m, &s, &e);
graph G(n + 1);
for(int i = 0; i < m; i++){
scanf("%d %d %d", &v1, &v2, &w);
G[v1].push_back(edge(v2, w));
G[v2].push_back(edge(v1, w));
}
Dijkstra(G, n, s);
if(d[e] != INF) printf("%d\n", d[e]);
else printf("NONE\n");
}
}
The difference is in how you control the priority queue. In the long version, you take the edges with a small weight first, which enables you to find the optimum earlier and cut many possible paths short:
bool operator < (const edge & o) const {
return weight > o.weight;
}
In the short version, you have the behaviour (accidentially?) reversed and always take the edge with the greatest weight, which means that you effectively probe all possible paths.
bool operator < (edge a, edge b) {return a.w < b.w;}
Change the inequality operator and both versions will run equally fast.
The containers of STL are slow. Avoid using vector if necessary.
here is my dij:
class graph
{
public :
int head[N],next[M],node[M];
int dist[M];
int tot;
void init()
{
tot = 0;
CLR(head,-1);
}
void add(int x,int y,int z = 1)
{
node[tot] = y;
dist[tot] = z;
next[tot] = head[x];
head[x] = tot++;
}
graph() {init();}
} g;
int dist[N]; ///the distance
///src means source. ter is optional, it means terminal
void dij(int src, graph &g, int ter=-1)
{
memset(dist,0x3f,sizeof(dist)); ///init d[i] as a very large value
dist[src] = 0;
priority_queue<pair<int,int>,vector<pair<int,int> >,greater<pair<int,int> > > pq;
pq.push(make_pair(dist[src],src));
while(!pq.empty())
{
int x = pq.top().second;
int d = pq.top().first;
if(d != dist[x])continue;
if(x == ter)return ;
for(int i = g.head[x] ; ~i ; i = g.next[i])
{
int y = g.node[i];
if(d+g.dist[i]<dist[y])
{
dist[y] = d + g.dist[i];
pq.push(make_pair(dist[y],y));
}
}
}
}

(ACM) How to use segment tree to count how many elements in [a,b] is smaller than a given constant?

I am quite new to segment tree and would like to make myself busy by doing some more exercise on segment tree.
The problem's actually more ACM like and have following conditions:
There are n numbers and m operations, n,m<=10,000, each operation can be one of the following:
1. Update an interval by minus a number x, x can be different each time
2. Query an interval to find how many numbers in the interval is <= 0
Building the segment tree and updating here is obviously can be done in O(nlog n) / O(log n)
But I cannot figure out how to make a query in O(log n), can anyone give me some suggestions / hints?
Any suggestions would be helpful! Thanks!
TL;DR:
Given n numbers, and 2 type operations:
add x to all elements in [a,b], x can be different each time
Query number of elements in [a,b] is < C, C is given constant
How to make operation 1 & 2 both can be done in O(log n)?
Nice Problem:)
I think for a while but still can't work out this problem with segment tree, but I've tried using "Bucket Method" to solve this problem.
We can divide the initial n numbers into B buckets, sort the number in each buckets and maintain the total add val in each bucket. Then for each query:
"Add" update interval [a, b] with c
we only need to rebuild at most two buckets and add c to (b - a) / BUCKET_SIZE buckets
"Query" query interval [a, b] <= c
we only need to scan at most two buckets with each value one by one and quick go through (b-a) / BUCKET_SIZE buckets with binary search quickly
It should be run in O( N/BUCKET_SIZE * log(BUCKET_SIZE, 2)) for each query, which is smaller than bruteforce method( O(N)). Though it's bigger than O(logN), it may be sufficient in most cases.
Here are the test code:
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <string>
#include <cstring>
#include <cmath>
#include <algorithm>
#include <vector>
#include <set>
#include <map>
#include <ctime>
#include <cassert>
using namespace std;
struct Query {
//A a b c add c in [a, b] of arr
//Q a b c Query number of i in [a, b] which arr[i] <= c
char ty;
int a, b, c;
Query(char _ty, int _a, int _b, int _c):ty(_ty), a(_a), b(_b), c(_c){}
};
int n, m;
vector<int> arr;
vector<Query> queries;
vector<int> bruteforce() {
vector<int> ret;
vector<int> numbers = arr;
for (int i = 0; i < m; i++) {
Query q = queries[i];
if (q.ty == 'A') {
for (int i = q.a; i <= q.b; i++) {
numbers[i] += q.c;
}
ret.push_back(-1);
} else {
int tmp = 0;
for(int i = q.a; i <= q.b; i++) {
tmp += numbers[i] <= q.c;
}
ret.push_back(tmp);
}
}
return ret;
}
struct Bucket {
vector<int> numbers;
vector<int> numbers_sorted;
int add;
Bucket() {
add = 0;
numbers_sorted.clear();
numbers.clear();
}
int query(int pos) {
return numbers[pos] + add;
}
void add_pos(int pos, int val) {
numbers[pos] += val;
}
void build() {
numbers_sorted = numbers;
sort(numbers_sorted.begin(), numbers_sorted.end());
}
};
vector<int> bucket_count(int bucket_size) {
vector<int> ret;
vector<Bucket> buckets;
buckets.resize(int(n / bucket_size) + 5);
for (int i = 0; i < n; i++) {
buckets[i / bucket_size].numbers.push_back(arr[i]);
}
for (int i = 0; i <= n / bucket_size; i++) {
buckets[i].build();
}
for (int i = 0; i < m; i++) {
Query q = queries[i];
char ty = q.ty;
int a, b, c;
a = q.a, b = q.b, c = q.c;
if (ty == 'A') {
set<int> affect_buckets;
while (a < b && a % bucket_size != 0) buckets[a/ bucket_size].add_pos(a % bucket_size, c), affect_buckets.insert(a/bucket_size), a++;
while (a < b && b % bucket_size != 0) buckets[b/ bucket_size].add_pos(b % bucket_size, c), affect_buckets.insert(b/bucket_size), b--;
while (a < b) {
buckets[a/bucket_size].add += c;
a += bucket_size;
}
buckets[a/bucket_size].add_pos(a % bucket_size, c), affect_buckets.insert(a / bucket_size);
for (set<int>::iterator it = affect_buckets.begin(); it != affect_buckets.end(); it++) {
int id = *it;
buckets[id].build();
}
ret.push_back(-1);
} else {
int tmp = 0;
while (a < b && a % bucket_size != 0) tmp += (buckets[a/ bucket_size].query(a % bucket_size) <=c), a++;
while (a < b && b % bucket_size != 0) tmp += (buckets[b/ bucket_size].query(b % bucket_size) <=c), b--;
while (a < b) {
int pos = a / bucket_size;
tmp += upper_bound(buckets[pos].numbers_sorted.begin(), buckets[pos].numbers_sorted.end(), c - buckets[pos].add) - buckets[pos].numbers_sorted.begin();
a += bucket_size;
}
tmp += (buckets[a / bucket_size].query(a % bucket_size) <= c);
ret.push_back(tmp);
}
}
return ret;
}
void process(int cas) {
clock_t begin_t=clock();
vector<int> bf_ans = bruteforce();
clock_t bf_end_t =clock();
double bf_sec = ((1.0 * bf_end_t - begin_t)) / CLOCKS_PER_SEC;
//bucket_size is important
int bucket_size = 200;
vector<int> ans = bucket_count(bucket_size);
clock_t bucket_end_t =clock();
double bucket_sec = ((1.0 * bucket_end_t - bf_end_t)) / CLOCKS_PER_SEC;
bool correct = true;
for (int i = 0; i < ans.size(); i++) {
if (ans[i] != bf_ans[i]) {
cout << "query " << i + 1 << " bf = " << bf_ans[i] << " bucket = " << ans[i] << " bucket size = " << bucket_size << " " << n << " " << m << endl;
correct = false;
}
}
printf("Case #%d:%s bf_sec = %.9lf, bucket_sec = %.9lf\n", cas, correct ? "YES":"NO", bf_sec, bucket_sec);
}
void read() {
cin >> n >> m;
arr.clear();
for (int i = 0; i < n; i++) {
int val;
cin >> val;
arr.push_back(val);
}
queries.clear();
for (int i = 0; i < m; i++) {
char ty;
int a, b, c;
// a, b, c in [0, n - 1], a <= b
cin >> ty >> a >> b >> c;
queries.push_back(Query(ty, a, b, c));
}
}
void run(int cas) {
read();
process(cas);
}
int main() {
freopen("bucket.in", "r", stdin);
//freopen("bucket.out", "w", stdout);
int T;
scanf("%d", &T);
for (int cas = 1; cas <= T; cas++) {
run(cas);
}
return 0;
}
and here are the data gen code:
#coding=utf8
import random
import math
def gen_buckets(f):
t = random.randint(10, 20)
print >> f, t
nlimit = 100000
mlimit = 10000
limit = 100000
for i in xrange(t):
n = random.randint(1, nlimit)
m = random.randint(1, mlimit)
print >> f, n, m
for i in xrange(n):
val = random.randint(1, limit)
print >> f, val ,
print >> f
for i in xrange(m):
ty = random.randint(1, 2)
a = random.randint(0, n - 1)
b = random.randint(a, n - 1)
#a = 0
#b = n - 1
c = random.randint(-limit, limit)
print >> f, 'A' if ty == 1 else 'Q', a, b, c
f = open("bucket.in", "w")
gen_buckets(f)
Try applying a Binary Index Trees (BIT) instead of a segmented tree. Here's the link to the tutorial

Resources