#include #include #include #include #include #include #include #include #include #include namespace { template double mean(It begin, It end) { double sum = std::accumulate(begin, end, 0.0); double mean = sum / std::distance(begin, end); return mean; } template double stdev(It begin, It end) { auto n = std::distance(begin, end); double sum = std::accumulate(begin, end, 0.0); double mean = sum / n; double sq_sum = std::inner_product(begin, end, begin, 0.0); double stdev = std::sqrt(sq_sum / n - mean * mean); return stdev; } std::vector find_peaks(const std::vector &y) { int lag = 100; double threshold = 10.0; double influence = 0.5; int t = y.size(); std::vector signals(t); if (t <= lag) return signals; std::vector filtered_y; filtered_y.reserve(t); for (int i = 0; i < t; i++) filtered_y.push_back(i < lag ? y[i] : 0.0); std::vector avg_filter(t); std::vector std_filter(t); avg_filter[lag] = mean(y.begin(), y.begin() + lag); avg_filter[lag] = stdev(y.begin(), y.begin() + lag); for (int i = lag; i < t; i++) { if (std::abs(y[i] - avg_filter[i - 1]) > threshold * std_filter[i - 1]) { signals[i] = y[i] > avg_filter[i - 1] ? +1 : -1; filtered_y[i] = influence * y[i] + (1 - influence) * filtered_y[i - 1]; } else { signals[i] = 0; filtered_y[i] = y[i]; } avg_filter[i] = mean(filtered_y.begin() + (i - lag), filtered_y.begin() + i); std_filter[i] = stdev(filtered_y.begin() + (i - lag), filtered_y.begin() + i); } return signals; } std::pair, std::vector> process_data(const std::vector> &series) { std::unordered_map grouped; for (const auto &p : series) { auto bucket = p.first; auto volume = p.second; grouped[bucket] += volume; } std::vector> temp; temp.reserve(grouped.size()); for (const auto &p : grouped) temp.emplace_back(p.first, p.second); std::sort(temp.begin(), temp.end()); std::vector y; y.reserve(grouped.size()); for (const auto &p : temp) y.push_back(p.second); return {y, find_peaks(y)}; } const uint64_t BUCKET_SIZE = 1000000000; } // namespace int main(int argc, char *argv[]) { using clock = std::chrono::high_resolution_clock; using std::chrono::duration_cast; using std::chrono::milliseconds; auto t = clock::now(); std::unordered_map>> data; std::ifstream file(argv[1]); bool header = true; for (std::string line; std::getline(file, line);) { if (header) { header = false; continue; } std::stringstream ss(line); std::vector x; for (std::string field; std::getline(ss, field, '|');) x.push_back(field); if (x[0] == "END" || x[4] == "ENDP") continue; uint64_t timestamp = std::stoull(x[0]); std::string symbol = x[2]; long volume = std::stol(x[4]); data[symbol].emplace_back(timestamp / BUCKET_SIZE, volume); } for (auto &e : data) { auto symbol = e.first; auto &series = e.second; auto p = process_data(series); auto &signals = p.second; std::cout << symbol << " " << std::reduce(signals.begin(), signals.end()) << std::endl; } std::cout << (duration_cast(clock::now() - t).count() / 1e3) << std::endl; }