- Timestamp:
- 07/10/15 18:33:26 (10 months ago)
- Location:
- mystic
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
mystic/_math/measures.py
r792 r803 504 504 """impose a standard deviation on a list of points by reweighting weights""" 505 505 return impose_reweighted_variance(s**2, samples, weights, solver) 506 507 ##### sampling statistics methods ##### 508 def _sort(samples, weights=None): 509 "sort (weighted) samples; returns 2-D array of samples and weights" 510 import numpy as np 511 if weights is None: 512 x = np.ones((2,len(samples))) #XXX: casts to a float 513 x[0] = np.sort(samples) 514 return x 515 x = np.vstack([samples,weights]).T 516 return x[x[:,0].argsort()].T 517 518 519 def median(samples, weights=None): 520 """calculate the (weighted) median for a list of points 521 522 Inputs: 523 samples -- a list of sample points 524 weights -- a list of sample weights 525 """ 526 import numpy as np 527 x,w = _sort(samples,weights) 528 s = sum(w) 529 return np.mean(x[s/2. - np.cumsum(w) <= 0][0:2-x.size%2]) 530 531 532 def mad(samples, weights=None): #, scale=1.4826): 533 """calculate the (weighted) median absolute deviation for a list of points 534 535 Inputs: 536 samples -- a list of sample points 537 weights -- a list of sample weights 538 """ 539 s = sarray(samples) 540 return median(abs(s - median(samples,weights)),weights) # * scale 541 542 543 #XXX: impose median by ensuring equal # samples < & >... (and not shifting) ? 544 def impose_median(m, samples, weights=None): 545 """impose a median on a list of (weighted) points 546 (this function is 'range-preserving' and 'mad-preserving') 547 548 Inputs: 549 m -- the target median 550 samples -- a list of sample points 551 weights -- a list of sample weights 552 """ 553 s = asarray(samples) 554 return (s + (m - median(samples, weights))).tolist() 555 556 557 def impose_mad(s, samples, weights=None): 558 """impose a median absolute deviation on a list of (weighted) points 559 (this function is 'median-preserving') 560 561 Inputs: 562 s -- the target median absolute deviation 563 samples -- a list of sample points 564 weights -- a list of sample weights 565 """ 566 import numpy as np 567 m = median(samples, weights) 568 samples = np.asarray(list(samples)) 569 _mad = mad(samples,weights) 570 if not _mad: # protect against ZeroDivision when mad = 0 571 return [np.nan]*len(samples) 572 scale = float(s) / _mad 573 samples = samples * scale #NOTE: not "median-preserving" until next line 574 return impose_median(m, samples, weights) #NOTE: not "range-preserving" 575 576 577 def _k(weights, k=0, clip=False, norm=False, eps=15): #XXX: better 9 ? 578 "trim weights at k%; if clip is True, winsorize instead of trim" 579 #NOTE: eps is tolerance for cancellation of similar values 580 import numpy as np 581 try: 582 klo,khi = k 583 except TypeError: 584 klo = khi = k 585 if klo + khi > 100: 586 msg = "cannot crop '%s + %s > 100' percent" % (klo,khi) 587 raise ValueError(msg) 588 elif klo < 0 or khi < 0: 589 msg = "cannot crop negative percent '%s + %s'" % (klo,khi) 590 raise ValueError(msg) 591 else: 592 klo,khi = .01*klo,.01*khi 593 w = np.array(weights, dtype=float)/sum(weights) #XXX: no dtype? 594 w_lo, w_hi = np.cumsum(w), np.cumsum(w[::-1]) 595 # calculate the cropped indicies 596 lo = len(w) - sum((w_lo - klo).round(eps) > 0) 597 hi = sum((w_hi - khi).round(eps) > 0) - 1 598 # flip indices if flipped 599 if lo > hi: 600 lo,hi = hi,lo 601 if not clip: 602 # find the values at k% 603 w_lo = w_lo[lo] 604 w_hi = w_hi[len(w)-1-hi] 605 # reset the values at k% 606 if klo + khi == 1: 607 w[lo] = w[hi] = 0 608 elif lo == hi: 609 w[lo] = max(1 - khi - klo,0) 610 else: 611 w[lo] = max(w_lo - klo,0) 612 w[hi] = max(w_hi - khi,0) 613 else: 614 # reset the values at k% 615 if lo == hi: 616 w[lo] = sum(w) 617 else: 618 w[lo] += sum(w[:lo]) 619 w[hi] += sum(w[hi+1:]) 620 # trim the remaining weights 621 w[:lo] = 0 622 w[hi+1:] = 0 623 if not norm: w *= sum(weights) 624 return w.tolist() 625 626 627 def tmean(samples, weights=None, k=0, clip=False): 628 """calculate the (weighted) trimmed mean for a list of points 629 630 Inputs: 631 samples -- a list of sample points 632 weights -- a list of sample weights 633 k -- percent samples to trim (k%) [tuple (lo,hi) or float if lo=hi] 634 clip -- if True, winsorize instead of trimming k% of samples 635 636 NOTE: if all samples are excluded, will return nan 637 """ 638 samples,weights = _sort(samples,weights) 639 weights = _k(weights,k,clip) 640 return sum(samples * weights)/sum(weights) 641 642 643 def tvariance(samples, weights=None, k=0, clip=False): 644 """calculate the (weighted) trimmed variance for a list of points 645 646 Inputs: 647 samples -- a list of sample points 648 weights -- a list of sample weights 649 k -- percent samples to trim (k%) [tuple (lo,hi) or float if lo=hi] 650 clip -- if True, winsorize instead of trimming k% of samples 651 652 NOTE: if all samples are excluded, will return nan 653 """ 654 samples,weights = _sort(samples,weights) 655 weights = _k(weights,k,clip) 656 trim_mean = sum(samples * weights)/sum(weights) 657 return mean(abs(samples - trim_mean)**2, weights) #XXX: correct ? 658 659 660 def tstd(samples, weights=None, k=0, clip=False): 661 """calculate the (weighted) trimmed standard deviation for a list of points 662 663 Inputs: 664 samples -- a list of sample points 665 weights -- a list of sample weights 666 k -- percent samples to trim (k%) [tuple (lo,hi) or float if lo=hi] 667 clip -- if True, winsorize instead of trimming k% of samples 668 669 NOTE: if all samples are excluded, will return nan 670 """ 671 import numpy as np 672 return np.sqrt(tvariance(samples, weights, k, clip)) 673 674 675 #XXX: use reweighting to impose tmean, tvariance, tstd, median, & mad ? 676 def impose_tmean(m, samples, weights=None, k=0, clip=False): 677 """impose a trimmed mean (at k%) on a list of (weighted) points 678 (this function is 'range-preserving' and 'tvariance-preserving') 679 680 Inputs: 681 m -- the target trimmed mean 682 samples -- a list of sample points 683 weights -- a list of sample weights 684 k -- percent samples to be trimmed (k%) [tuple (lo,hi) or float if lo=hi] 685 clip -- if True, winsorize instead of trimming k% of samples 686 """ 687 s = asarray(samples) 688 return (s + (m - tmean(samples, weights, k=k, clip=clip))).tolist() 689 690 691 def impose_tvariance(v, samples, weights=None, k=0, clip=False): 692 """impose a trimmed variance (at k%) on a list of (weighted) points 693 (this function is 'tmean-preserving') 694 695 Inputs: 696 v -- the target trimmed variance 697 samples -- a list of sample points 698 weights -- a list of sample weights 699 k -- percent samples to be trimmed (k%) [tuple (lo,hi) or float if lo=hi] 700 clip -- if True, winsorize instead of trimming k% of samples 701 """ 702 import numpy as np 703 m = tmean(samples, weights, k=k, clip=clip) 704 samples = np.asarray(list(samples)) 705 706 tvar = tvariance(samples,weights,k=k,clip=clip) 707 if not tvar: # protect against ZeroDivision when tvar = 0 708 return [np.nan]*len(samples) #XXX: k? 709 scale = np.sqrt(float(v) / tvar) 710 samples = samples * scale #NOTE: not "tmean-preserving" until next line 711 return impose_tmean(m, samples, weights, k=k, clip=clip) #NOTE: not "range-preserving" 712 713 714 def impose_tstd(s, samples, weights=None, k=0, clip=False): 715 """impose a trimmed std (at k%) on a list of (weighted) points 716 (this function is 'tmean-preserving') 717 718 Inputs: 719 s -- the target trimmed standard deviation 720 samples -- a list of sample points 721 weights -- a list of sample weights 722 k -- percent samples to be trimmed (k%) [tuple (lo,hi) or float if lo=hi] 723 clip -- if True, winsorize instead of trimming k% of samples 724 """ 725 return impose_tvariance(s**2, samples, weights, k=k, clip=clip) 726 506 727 507 728 ##### misc methods #####
Note: See TracChangeset
for help on using the changeset viewer.