1
0
mirror of https://github.com/exaloop/codon.git synced 2025-06-03 15:03:52 +08:00
codon/bench/taq/taq.cpp
Ibrahim Numanagić 5de233a64e
Dynamic Polymorphism (#58)
* Use Static[] for static inheritance

* Support .seq extension

* Fix #36

* Polymorphic typechecking; vtables [wip]

* v-table dispatch [wip]

* vtable routing [wip; bug]

* vtable routing [MVP]

* Fix texts

* Add union type support

* Update FAQs

* Clarify

* Add BSL license

* Add makeUnion

* Add IR UnionType

* Update union representation in LLVM

* Update README

* Update README.md

* Update README

* Update README.md

* Add benchmarks

* Add more benchmarks and README

* Add primes benchmark

* Update benchmarks

* Fix cpp

* Clean up list

* Update faq.md

* Add binary trees benchmark

* Add fannkuch benchmark

* Fix paths

* Add PyPy

* Abort on fail

* More benchmarks

* Add cpp word_count

* Update set_partition cpp

* Add nbody cpp

* Add TAQ cpp; fix word_count timing

* Update CODEOWNERS

* Update README

* Update README.md

* Update CODEOWNERS

* Fix bench script

* Update binary_trees.cpp

* Update taq.cpp

* Fix primes benchmark

* Add mandelbrot benchmark

* Fix OpenMP init

* Add Module::unsafeGetUnionType

* UnionType [wip] [skip ci]

* Integrate IR unions and Union

* UnionType refactor [skip ci]

* Update README.md

* Update docs

* UnionType [wip] [skip ci]

* UnionType and automatic unions

* Add Slack

* Update faq.md

* Refactor types

* New error reporting [wip]

* New error reporting [wip]

* peglib updates [wip] [skip_ci]

* Fix parsing issues

* Fix parsing issues

* Fix error reporting issues

* Make sure random module matches Python

* Update releases.md

* Fix tests

* Fix #59

* Fix #57

* Fix #50

* Fix #49

* Fix #26; Fix #51; Fix #47; Fix #49

* Fix collection extension methods

* Fix #62

* Handle *args/**kwargs with Callable[]; Fix #43

* Fix #43

* Fix Ptr.__sub__; Fix polymorphism issues

* Add typeinfo

* clang-format

* Upgrade fmtlib to v9; Use CPM for fmtlib; format spec support; __format__ support

* Use CPM for semver and toml++

* Remove extension check

* Revamp str methods

* Update str.zfill

* Fix thunk crashes [wip] [skip_ci]

* Fix str.__reversed__

* Fix count_with_max

* Fix vtable memory allocation issues

* Add poly AST tests

* Use PDQsort when stability does not matter

* Fix dotted imports; Fix  issues

* Fix kwargs passing to Python

* Fix #61

* Fix #37

* Add isinstance support for unions; Union methods return Union type if different

* clang-format

* Nicely format error tracebacks

* Fix build issues; clang-format

* Fix OpenMP init

* Fix OpenMP init

* Update README.md

* Fix tests

* Update license [skip ci]

* Update license [ci skip]

* Add copyright header to all source files

* Fix super(); Fix error recovery in ClassStmt

* Clean up whitespace [ci skip]

* Use Python 3.9 on CI

* Print info in random test

* Fix single unions

* Update random_test.codon

* Fix polymorhic thunk instantiation

* Fix random test

* Add operator.attrgetter and operator.methodcaller

* Add code documentation

* Update documentation

* Update README.md

* Fix tests

* Fix random init

Co-authored-by: A. R. Shajii <ars@ars.me>
2022-12-04 19:45:21 -05:00

132 lines
3.5 KiB
C++

#include <algorithm>
#include <chrono>
#include <cmath>
#include <fstream>
#include <iostream>
#include <numeric>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
namespace {
template <typename It> double mean(It begin, It end) {
double sum = std::accumulate(begin, end, 0.0);
double mean = sum / std::distance(begin, end);
return mean;
}
template <typename It> double stdev(It begin, It end) {
auto n = std::distance(begin, end);
double sum = std::accumulate(begin, end, 0.0);
double mean = sum / n;
double sq_sum = std::inner_product(begin, end, begin, 0.0);
double stdev = std::sqrt(sq_sum / n - mean * mean);
return stdev;
}
std::vector<int> find_peaks(const std::vector<double> &y) {
int lag = 100;
double threshold = 10.0;
double influence = 0.5;
int t = y.size();
std::vector<int> signals(t);
if (t <= lag)
return signals;
std::vector<double> filtered_y;
filtered_y.reserve(t);
for (int i = 0; i < t; i++)
filtered_y.push_back(i < lag ? y[i] : 0.0);
std::vector<double> avg_filter(t);
std::vector<double> std_filter(t);
avg_filter[lag] = mean(y.begin(), y.begin() + lag);
avg_filter[lag] = stdev(y.begin(), y.begin() + lag);
for (int i = lag; i < t; i++) {
if (std::abs(y[i] - avg_filter[i - 1]) > threshold * std_filter[i - 1]) {
signals[i] = y[i] > avg_filter[i - 1] ? +1 : -1;
filtered_y[i] = influence * y[i] + (1 - influence) * filtered_y[i - 1];
} else {
signals[i] = 0;
filtered_y[i] = y[i];
}
avg_filter[i] = mean(filtered_y.begin() + (i - lag), filtered_y.begin() + i);
std_filter[i] = stdev(filtered_y.begin() + (i - lag), filtered_y.begin() + i);
}
return signals;
}
std::pair<std::vector<double>, std::vector<int>>
process_data(const std::vector<std::pair<uint64_t, long>> &series) {
std::unordered_map<uint64_t, long> grouped;
for (const auto &p : series) {
auto bucket = p.first;
auto volume = p.second;
grouped[bucket] += volume;
}
std::vector<std::pair<uint64_t, long>> temp;
temp.reserve(grouped.size());
for (const auto &p : grouped)
temp.emplace_back(p.first, p.second);
std::sort(temp.begin(), temp.end());
std::vector<double> y;
y.reserve(grouped.size());
for (const auto &p : temp)
y.push_back(p.second);
return {y, find_peaks(y)};
}
const uint64_t BUCKET_SIZE = 1000000000;
} // namespace
int main(int argc, char *argv[]) {
using clock = std::chrono::high_resolution_clock;
using std::chrono::duration_cast;
using std::chrono::milliseconds;
auto t = clock::now();
std::unordered_map<std::string, std::vector<std::pair<uint64_t, long>>> data;
std::ifstream file(argv[1]);
bool header = true;
for (std::string line; std::getline(file, line);) {
if (header) {
header = false;
continue;
}
std::stringstream ss(line);
std::vector<std::string> x;
for (std::string field; std::getline(ss, field, '|');)
x.push_back(field);
if (x[0] == "END" || x[4] == "ENDP")
continue;
uint64_t timestamp = std::stoull(x[0]);
std::string symbol = x[2];
long volume = std::stol(x[4]);
data[symbol].emplace_back(timestamp / BUCKET_SIZE, volume);
}
for (auto &e : data) {
auto symbol = e.first;
auto &series = e.second;
auto p = process_data(series);
auto &signals = p.second;
std::cout << symbol << " " << std::reduce(signals.begin(), signals.end())
<< std::endl;
}
std::cout << (duration_cast<milliseconds>(clock::now() - t).count() / 1e3)
<< std::endl;
}