Belirli bir noktaya 10 en yakın bulmak için Nasıl?3D milyonlarca puan:
3-d bir nokta (x,y,z) olarak tanımlanır. Mesafe d arasında herhangi iki nokta (X,Y,Z) ve (x,y,z) d= Karekök[(X-x)^2 (Y-y)^2 (Z-z)^2]. Şimdi her giriş uzayda bir noktada, belirli bir dosya bir milyon girdi, orada yok. Herhangi bir nokta (a,b,c) verilen en yakın 10 puan bulun. Kaç milyon puan saklamak istiyorsunuz ve nasıl bir veri yapısı bu 10 puan almak istiyorsunuz.
CEVAP
Milyon puan küçük bir sayı. En basit yaklaşım burada (KDTree göre kod daha yavaş (sadece bir puan sorgulama için) çalışır.
Kaba kuvvet (~1 saniye) yaklaşım
#!/usr/bin/env python
import numpy
NDIM = 3 # number of dimensions
# read points into array
a = numpy.fromfile('million_3D_points.txt', sep=' ')
a.shape = a.size / NDIM, NDIM
point = numpy.random.uniform(0, 100, NDIM) # choose random point
print 'point:', point
d = ((a-point)**2).sum(axis=1) # compute distances
ndx = d.argsort() # indirect sort
# print 10 nearest points to the chosen one
import pprint
pprint.pprint(zip(a[ndx[:10]], d[ndx[:10]]))
Çalıştırın:
$ time python nearest.py
point: [ 69.06310224 2.23409409 50.41979143]
[(array([ 69., 2., 50.]), 0.23500677815852947),
(array([ 69., 2., 51.]), 0.39542392750839772),
(array([ 69., 3., 50.]), 0.76681859086988302),
(array([ 69., 3., 50.]), 0.76681859086988302),
(array([ 69., 3., 51.]), 0.9272357402197513),
(array([ 70., 2., 50.]), 1.1088022980015722),
(array([ 70., 2., 51.]), 1.2692194473514404),
(array([ 70., 2., 51.]), 1.2692194473514404),
(array([ 70., 3., 51.]), 1.801031260062794),
(array([ 69., 1., 51.]), 1.8636121147970444)]
real 0m1.122s
user 0m1.010s
sys 0m0.120s
İşte milyon 3B noktaları oluşturur komut dosyası:
#!/usr/bin/env python
import random
for _ in xrange(10**6):
print ' '.join(str(random.randrange(100)) for _ in range(3))
Çıkış:
$ head million_3D_points.txt
18 56 26
19 35 74
47 43 71
82 63 28
43 82 0
34 40 16
75 85 69
88 58 3
0 63 90
81 78 98
Bu kodu daha karmaşık veri yapıları ve algoritmalar (aslında daha az bellek kullanmak ister örneğin, ya da daha hızlı olarak yukarıda en basit yaklaşım) test etmek için kullanabilirsiniz. Şu anda çalışan bir kod içeren tek cevap olduğunu fazlalaştı.
Çözüm KDTree dayalı (~1.4 saniye)
#!/usr/bin/env python
import numpy
NDIM = 3 # number of dimensions
# read points into array
a = numpy.fromfile('million_3D_points.txt', sep=' ')
a.shape = a.size / NDIM, NDIM
point = [ 69.06310224, 2.23409409, 50.41979143] # use the same point as above
print 'point:', point
from scipy.spatial import KDTree
# find 10 nearest points
tree = KDTree(a, leafsize=a.shape[0] 1)
distances, ndx = tree.query([point], k=10)
# print 10 nearest points to the chosen one
print a[ndx]
Çalıştırın:
$ time python nearest_kdtree.py
point: [69.063102240000006, 2.2340940900000001, 50.419791429999997]
[[[ 69. 2. 50.]
[ 69. 2. 51.]
[ 69. 3. 50.]
[ 69. 3. 50.]
[ 69. 3. 51.]
[ 70. 2. 50.]
[ 70. 2. 51.]
[ 70. 2. 51.]
[ 70. 3. 51.]
[ 69. 1. 51.]]]
real 0m1.359s
user 0m1.280s
sys 0m0.080s
C kısmi sıralama (~1.1 saniye)
// $ g nearest.cc && (time ./a.out < million_3D_points.txt )
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/lambda/lambda.hpp> // _1
#include <boost/lambda/bind.hpp> // bind()
#include <boost/tuple/tuple_io.hpp>
namespace {
typedef double coord_t;
typedef boost::tuple<coord_t,coord_t,coord_t> point_t;
coord_t distance_sq(const point_t& a, const point_t& b) { // or boost::geometry::distance
coord_t x = a.get<0>() - b.get<0>();
coord_t y = a.get<1>() - b.get<1>();
coord_t z = a.get<2>() - b.get<2>();
return x*x y*y z*z;
}
}
int main() {
using namespace std;
using namespace boost::lambda; // _1, _2, bind()
// read array from stdin
vector<point_t> points;
cin.exceptions(ios::badbit); // throw exception on bad input
while(cin) {
coord_t x,y,z;
cin >> x >> y >> z;
points.push_back(boost::make_tuple(x,y,z));
}
// use point value from previous examples
point_t point(69.06310224, 2.23409409, 50.41979143);
cout << "point: " << point << endl; // 1.14s
// find 10 nearest points using partial_sort()
// Complexity: O(N)*log(m) comparisons (O(N)*log(N) worst case for the implementation)
const size_t m = 10;
partial_sort(points.begin(), points.begin() m, points.end(),
bind(less<coord_t>(), // compare by distance to the point
bind(distance_sq, _1, point),
bind(distance_sq, _2, point)));
for_each(points.begin(), points.begin() m, cout << _1 << "\n"); // 1.16s
}
Çalıştırın:
g -O3 nearest.cc && (time ./a.out < million_3D_points.txt )
point: (69.0631 2.23409 50.4198)
(69 2 50)
(69 2 51)
(69 3 50)
(69 3 50)
(69 3 51)
(70 2 50)
(70 2 51)
(70 2 51)
(70 3 51)
(69 1 51)
real 0m1.152s
user 0m1.140s
sys 0m0.010s
Öncelik C (~1.2 saniye) Sıra
#include <algorithm> // make_heap
#include <functional> // binary_function<>
#include <iostream>
#include <boost/range.hpp> // boost::begin(), boost::end()
#include <boost/tr1/tuple.hpp> // get<>, tuple<>, cout <<
namespace {
typedef double coord_t;
typedef std::tr1::tuple<coord_t,coord_t,coord_t> point_t;
// calculate distance (squared) between points `a` & `b`
coord_t distance_sq(const point_t& a, const point_t& b) {
// boost::geometry::distance() squared
using std::tr1::get;
coord_t x = get<0>(a) - get<0>(b);
coord_t y = get<1>(a) - get<1>(b);
coord_t z = get<2>(a) - get<2>(b);
return x*x y*y z*z;
}
// read from input stream `in` to the point `point_out`
std::istream& getpoint(std::istream& in, point_t& point_out) {
using std::tr1::get;
return (in >> get<0>(point_out) >> get<1>(point_out) >> get<2>(point_out));
}
// Adaptable binary predicate that defines whether the first
// argument is nearer than the second one to given reference point
template<class T>
class less_distance : public std::binary_function<T, T, bool> {
const T& point;
public:
less_distance(const T& reference_point) : point(reference_point) {}
bool operator () (const T& a, const T& b) const {
return distance_sq(a, point) < distance_sq(b, point);
}
};
}
int main() {
using namespace std;
// use point value from previous examples
point_t point(69.06310224, 2.23409409, 50.41979143);
cout << "point: " << point << endl;
const size_t nneighbours = 10; // number of nearest neighbours to find
point_t points[nneighbours 1];
// populate `points`
for (size_t i = 0; getpoint(cin, points[i]) && i < nneighbours; i)
;
less_distance<point_t> less_distance_point(point);
make_heap (boost::begin(points), boost::end(points), less_distance_point);
// Complexity: O(N*log(m))
while(getpoint(cin, points[nneighbours])) {
// add points[-1] to the heap; O(log(m))
push_heap(boost::begin(points), boost::end(points), less_distance_point);
// remove (move to last position) the most distant from the
// `point` point; O(log(m))
pop_heap (boost::begin(points), boost::end(points), less_distance_point);
}
// print results
push_heap (boost::begin(points), boost::end(points), less_distance_point);
// O(m*log(m))
sort_heap (boost::begin(points), boost::end(points), less_distance_point);
for (size_t i = 0; i < nneighbours; i) {
cout << points[i] << ' ' << distance_sq(points[i], point) << '\n';
}
}
Çalıştırın:
$ g -O3 nearest.cc && (time ./a.out < million_3D_points.txt )
point: (69.0631 2.23409 50.4198)
(69 2 50) 0.235007
(69 2 51) 0.395424
(69 3 50) 0.766819
(69 3 50) 0.766819
(69 3 51) 0.927236
(70 2 50) 1.1088
(70 2 51) 1.26922
(70 2 51) 1.26922
(70 3 51) 1.80103
(69 1 51) 1.86361
real 0m1.174s
user 0m1.180s
sys 0m0.000s
Doğrusal Arama -tabanlı yaklaşım (zaman ~1.15 saniye)
// $ g -O3 nearest.cc && (time ./a.out < million_3D_points.txt )
#include <algorithm> // sort
#include <functional> // binary_function<>
#include <iostream>
#include <boost/foreach.hpp>
#include <boost/range.hpp> // begin(), end()
#include <boost/tr1/tuple.hpp> // get<>, tuple<>, cout <<
#define foreach BOOST_FOREACH
namespace {
typedef double coord_t;
typedef std::tr1::tuple<coord_t,coord_t,coord_t> point_t;
// calculate distance (squared) between points `a` & `b`
coord_t distance_sq(const point_t& a, const point_t& b);
// read from input stream `in` to the point `point_out`
std::istream& getpoint(std::istream& in, point_t& point_out);
// Adaptable binary predicate that defines whether the first
// argument is nearer than the second one to given reference point
class less_distance : public std::binary_function<point_t, point_t, bool> {
const point_t& point;
public:
explicit less_distance(const point_t& reference_point)
: point(reference_point) {}
bool operator () (const point_t& a, const point_t& b) const {
return distance_sq(a, point) < distance_sq(b, point);
}
};
}
int main() {
using namespace std;
// use point value from previous examples
point_t point(69.06310224, 2.23409409, 50.41979143);
cout << "point: " << point << endl;
less_distance nearer(point);
const size_t nneighbours = 10; // number of nearest neighbours to find
point_t points[nneighbours];
// populate `points`
foreach (point_t& p, points)
if (! getpoint(cin, p))
break;
// Complexity: O(N*m)
point_t current_point;
while(cin) {
getpoint(cin, current_point); //NOTE: `cin` fails after the last
//point, so one can't lift it up to
//the while condition
// move to the last position the most distant from the
// `point` point; O(m)
foreach (point_t& p, points)
if (nearer(current_point, p))
// found point that is nearer to the `point`
//NOTE: could use insert (on sorted sequence) & break instead
//of swap but in that case it might be better to use
//heap-based algorithm altogether
std::swap(current_point, p);
}
// print results; O(m*log(m))
sort(boost::begin(points), boost::end(points), nearer);
foreach (point_t p, points)
cout << p << ' ' << distance_sq(p, point) << '\n';
}
namespace {
coord_t distance_sq(const point_t& a, const point_t& b) {
// boost::geometry::distance() squared
using std::tr1::get;
coord_t x = get<0>(a) - get<0>(b);
coord_t y = get<1>(a) - get<1>(b);
coord_t z = get<2>(a) - get<2>(b);
return x*x y*y z*z;
}
std::istream& getpoint(std::istream& in, point_t& point_out) {
using std::tr1::get;
return (in >> get<0>(point_out) >> get<1>(point_out) >> get<2>(point_out));
}
}
Ölçümler çoğu zaman dosya, gerçek hesaplamaları büyüklük sırası daha az zaman alır dizi okuma harcandığını gösterir.
Bunları nasıl belirli sütun adları ile...
Nasıl belirli bir sayıda karakter için...
eğer bulmak için nasıl bir WordPress k...
Nasıl belirli bir kullanıcı ne anlama ...
Nasıl Haftanın gününü bulmak için, bel...