## solution

それでは間に合わないので適当に定数倍高速化する。 具体的には区間min更新/点取得のsegment木を用いて、漸化式中の$\mathrm{dp}(N + k) \gets \min \{ \mathrm{dp}(N), f(N, k) \}$で$f$が単調減少な部分をいい感じにする。 計算量の上では変わらないが、十分速くなって通る。

## implementation

#include <cassert>
#include <climits>
#include <cstdio>
#include <numeric>
#include <vector>
#define repeat(i, n) for (int i = 0; (i) < int(n); ++(i))
#define whole(f, x, ...) ([&](decltype((x)) whole) { return (f)(begin(whole), end(whole), ## __VA_ARGS__); })(x)
using ll = long long;
using namespace std;
template <class T> inline void setmin(T & a, T const & b) { a = min(a, b); }

template <typename UnaryPredicate>
ll binsearch(ll l, ll r, UnaryPredicate p) { // [l, r), p is monotone
assert (l < r);
-- l;
while (r - l > 1) {
ll m = (l + r) / 2;
(p(m) ? r : l) = m;
}
return r; // = min { x in [l, r) | p(x) }, or r
}

template <class OperatorMonoid>
struct dual_segment_tree {
typedef OperatorMonoid monoid_type;
typedef typename OperatorMonoid::underlying_type operator_type;
typedef typename OperatorMonoid::target_type underlying_type;
int n;
vector<operator_type> f;
vector<underlying_type> a;
OperatorMonoid op;
dual_segment_tree() = default;
dual_segment_tree(int a_n, underlying_type initial_value, OperatorMonoid const & a_op = OperatorMonoid()) : op(a_op) {
n = 1; while (n < a_n) n *= 2;
a.resize(n, initial_value);
f.resize(n-1, op.unit());
}
underlying_type point_get(int i) { // 0-based
underlying_type acc = a[i];
for (i = (i+n)/2; i > 0; i /= 2) { // 1-based
acc = op.apply(f[i-1], acc);
}
return acc;
}
void range_apply(int l, int r, operator_type z) { // 0-based, [l, r)
assert (0 <= l and l <= r and r <= n);
range_apply(0, 0, n, l, r, z);
}
void range_apply(int i, int il, int ir, int l, int r, operator_type z) {
if (l <= il and ir <= r) { // 0-based
if (i < f.size()) {
f[i] = op.append(z, f[i]);
} else {
a[i-n+1] = op.apply(z, a[i-n+1]);
}
} else if (ir <= l or r <= il) {
// nop
} else {
range_apply(2*i+1, il, (il+ir)/2, 0, n, f[i]);
range_apply(2*i+2, (il+ir)/2, ir, 0, n, f[i]);
f[i] = op.unit();
range_apply(2*i+1, il, (il+ir)/2, l, r, z);
range_apply(2*i+2, (il+ir)/2, ir, l, r, z);
}
}
};

struct min_operator_t {
typedef int underlying_type;
typedef int target_type;
int unit() const { return INT_MAX; }
int append(int a, int b) const { return min(a, b); }
int apply(int a, int b) const { return min(a, b); }
};

int main() {
while (true) {
// input
int w, n; scanf("%d%d", &w, &n);
if (w == 0) break;
vector<ll> x(n); repeat (i, n) scanf("%lld", &x[i]);
// solve
vector<ll> acc(n + 1); whole(partial_sum, x, acc.begin() + 1);
const int inf = w;
vector<int> dp(n + 1, inf);
dual_segment_tree<min_operator_t> segtree(n + 1, inf);
dp[0] = 1;
repeat (l, n) {
setmin(dp[l], segtree.point_get(l));
int r = l + 2;
for (; r < n + 1; ++ r) {
int used = acc[r] - acc[l];
int k = r - l - 1;
if (w < used + k) break;
int it = (w - used + (k - 1)) / k;
if (it <= dp[l]) break;
setmin(dp[r], max(dp[l], it));
}
if (r < n + 1) {
int rr = binsearch(r, n + 1, [&](int r) {
int used = acc[r] - acc[l];
int k = r - l - 1;
return w < used + k;
});
segtree.range_apply(r, rr, dp[l]);
}
if (acc[n] - acc[l] + n - l - 1 <= w) {
setmin(dp[n], dp[l]);
}
}
// output
printf("%d\n", dp[n]);
}
return 0;
}