Carrot2 circle chart - user-interface
Anyone know how to create circle chart like the one used in carrto2?
The mbostock/d3 gallery has good visualizations for Carrot2 output.
This carrot2-rb ruby client for Carrot2 returns an object with a clusters array. The scores and phrases attributes can be used in a simple doughnut chart.
More dynamic visualizations like expandable dendrograms are possible with tree structures like flare.json.
Here is a zoomable wheel based on Carrot2 results.
This is the coffeescript code I wrote to create flare.json using the documents elements.
clusters = [{"id":0,"size":3,"phrases":["Coupon"],"score":0.06441151442396735,"documents":["0","1","2"],"attributes":{"score":0.06441151442396735}},{"id":1,"size":2,"phrases":["Exclusive"],"score":0.7044284368639101,"documents":["0","1"],"attributes":{"score":0.7044284368639101}},{"id":2,"size":1,"phrases":["Other Topics"],"score":0.0,"documents":["3"],"attributes":{"other-topics":true,"score":0.0}}]
flare = get_flare clusters
get_children = (index, index2, clusters, documents) ->unless index == (clusters.length - 1) # If not last cluster
orphans = {'name': ''}
intr = _.intersection(documents, clusters[index2].documents);
if intr.length > 0 # continue drilling
if index2 < (clusters.length - 1) # Up until last element.
# Get next layer of orphans
orphan_docs = _.difference(intr, clusters[index2 + 1].documents)
if orphan_docs.length > 0
orphans = {'name': orphan_docs, 'size': orphan_docs.length}
if _.intersection(intr, clusters[index2 + 1].documents).length > 0
return [orphans, {'name': clusters[index2+1].phrases[0], 'children': get_children(index, (index2 + 1), clusters, intr)}]
else
return [orphans]
else
# At second to last cluster, so terminate here
return [{'name': inter}]
else # No intersection, so return bundle of current documents.
return [{'name': documents}]
return [{'name': _.intersection(clusters[index].documents, clusters[index2].documents)}]
get_flare = (clusters) ->
# Make root object
flare =
name: "root"
children: []
children = flare.children
_.each(clusters[0..(clusters.length - 2)], (cluster, index) -> # All clusters but the last. (It has already been compared to previous ones)
#All documents for all remaining clusters in array
remaining_documents = _.flatten(_.map clusters[(index + 1)..clusters.length], (c) ->
c.documents
)
root_child = {'name': cluster.phrases[0], 'children': []}
# Get first layer of orphans
orphan_docs = _.difference(cluster.documents, remaining_documents)
if orphan_docs.length > 0
root_child.children.push {'name': orphan_docs, size: orphan_docs.length}
for index2 in [(index + 1)..(clusters.length - 1)] by 1
if _.intersection(cluster.documents, clusters[index2].documents).length > 0
root_child.children.push {'name': clusters[index2].phrases[0], 'children': get_children(index, (index2), clusters, cluster.documents)}
children.push root_child
)
flare
You can buy their Circles Javascript component: http://carrotsearch.com/circles-overview
Related
How do I get historical candlestick data or kline from Phemex Public API?
I need to be able to extract historical candlestick data (such as Open, Close, High, Low, and Volume) of a candlestick in differing intervals (1m, 3m, 5m, 1H, etc.) at a specified time (timestamps) from Phemex. Other exchanges, such as Binance or FTX, seem to provide REST Websocket API for this, yet I can't seem to find one for Phemex. Mind helping me resolve this issue? Thank you so much. Steps I have taken, yet found no resolution: Went to https://phemex.com/user-guides/api-overview Went to https://github.com/phemex/phemex-api-docs/blob/master/Public-Contract-API-en.md None of the items listed in 'Market Data API List' seem to do the task
This code will get the candels and save them to a csv file. Hope this helps:) exchange = ccxt.phemex({ 'options': { 'defaultType': 'swap' }, 'enableRateLimit': True }) # Load the markets markets = exchange.load_markets() curent_time = int(time.time()*1000) one_min = 60000 def get_all_candels(symbol,start_time,stop_time): counter = 0 candel_counter = 0 data_set = [] t = 0 while t < stop_time: if data_set == []: block = exchange.fetch_ohlcv(symbol,'1m',start_time) for candle in block: if candle == []: break data_set.append(candle) last_time_in_block = block[-1][0] counter += 1 candel_counter += len(block) print(f'{counter} - {block[0]} - {candel_counter} - {last_time_in_block}') if data_set != []: t = last_time_in_block + one_min block = exchange.fetch_ohlcv(symbol,'1m',t) if block == []: break for candle in block: if candle == []: break data_set.append(candle) last_time_in_block = block[-1][0] candel_counter += len(block) counter += 1 print(f'{counter} - {block[0]} - {candel_counter} - {last_time_in_block}') time.sleep(1) return data_set data_set = get_all_candels('BTCUSD',1574726400000,curent_time) print(np.shape(data_set)) with open('raw.csv', 'w', newline='') as csv_file: column_names = ['time', 'open', 'high', 'low', 'close', 'volume'] csv_writer = csv.DictWriter(csv_file,fieldnames=column_names) csv_writer.writeheader() for candel in data_set: csv_writer.writerow({ 'time':candel[0], 'open':candel[1], 'high':candel[2], 'low':candel[3], 'close':candel[4], 'volume':candel[5] })
multisearch give diffrent total hits in different runs
I'm using Elasticsearch 6.8 and python 3. I'm running on my laptop with 1 node, and there are no threads/processes that insert/update/delete docs into the index while I'm running the multi search I'm running the following multi search command: es = Elasticsearch() search_arr = [] # search-1 search_arr.append({'index': 'test1', 'type': 'type1'}) search_arr.append({"query": {"term": {"confidence": "1"}}}) # search-2 search_arr.append({'index': 'test1', 'type': 'type1'}) search_arr.append({"query": {"match_all": {}}, 'from': 0, 'size': 2}) request = '' for each in search_arr: request += '%s \n' % json.dumps(each) res = es.msearch(body=request) print("First Query, num of results = ", res['responses'][0]['hits']['total']) print("Second Query, num of results = ", res['responses'][1]['hits']['total']) Each time I run this code, I'm getting different results (as I wrote before, there are no processes that insert/delete/update documents) Why I'm getting different result each time ? And what I need to do in order to fix it and get consistent results ?
I found the problem cause. I needed to add refresh = True after the first time I added new data. (I bulked thousands of new documents and right after use the multi search).
Clarification needed for children_ attribute of sklearn's AgglomerativeClustering
To start with, it seems to me that I am making a silly mistake somewhere. Hence, your help is much appreciated to locate this mistake. The documentation for sklearn's AgglomerativeClustering model states this: children_ : array-like, shape (n_nodes-1, 2) The children of each non-leaf node. Values less than n_samples correspond to leaves of the tree which are the original samples. A node i greater than or equal to n_samples is a non-leaf node and has children children_[i - n_samples]. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node n_samples + i Now, here is my model.children_ output, with 1093 being the biggest index, which is (n_samples-1)*2 -1): agglo_children = [[67, 256],[13, 400],[70, 309],[15, 35],[181, 416],[391, 546],[311, 486],[420, 516],[80, 240],[75, 314],[26, 254],[76, 338],[134, 536],[258, 455],[152, 411],[481, 500],[170, 243],[24, 37],[428, 514],[8, 453],[93, 301],[163, 214],[74, 498],[206, 318],[17, 217],[121, 395],[306, 394],[12, 362],[169, 287],[16, 408],[224, 426],[61, 263],[136, 187],[0, 18],[21, 22],[89, 361],[175, 531],[105, 465],[294, 547],[31, 68],[308, 468],[417, 430],[266, 456],[278, 449],[127, 549],[36, 373],[151, 448],[213, 365],[100, 343],[180, 567],[77, 216],[220, 555],[159, 435],[146, 445],[54, 503],[57, 336],[42, 452],[399, 472],[49, 562],[564, 565],[274, 487],[235, 412],[247, 540],[162, 522],[1, 538],[207, 497],[79, 480],[125, 528],[85, 345],[478, 485],[447, 603],[160, 592],[535, 544],[303, 319],[132, 282],[55, 457],[4, 509],[479, 588],[227, 470],[2, 402],[117, 469],[95, 580],[120, 142],[401, 576],[52, 316],[591, 594],[380, 575],[339, 467],[233, 600],[48, 458],[38, 346],[246, 568],[53, 265],[221, 355],[335, 560],[196, 344],[135, 226],[201, 238],[72, 190],[239, 518],[248, 459],[150, 586],[414, 515],[520, 629],[143, 272],[58, 307],[477, 604],[543, 574],[204, 450],[328, 405],[237, 621],[164, 496],[101, 442],[323, 376],[444, 570],[495, 512],[33, 333],[464, 473],[441, 471],[398, 460],[633, 649],[418, 609],[82, 174],[103, 182],[374, 572],[577, 639],[88, 261],[25, 291],[69, 466],[6, 563],[371, 566],[385, 505],[504, 524],[492, 608],[168, 578],[474, 598],[255, 488],[558, 634],[137, 491],[228, 357],[155, 403],[192, 556],[87, 624],[610, 626],[34, 605],[147, 529],[183, 622],[178, 384],[84, 209],[45, 647],[386, 490],[130, 409],[62, 98],[312, 661],[44, 407],[203, 589],[148, 584],[140, 635],[210, 606],[527, 653],[73, 425],[363, 534],[310, 667],[358, 625],[288, 298],[599, 623],[351, 397],[297, 382],[43, 245],[10, 324],[602, 673],[99, 230],[299, 415],[23, 451],[611, 678],[561, 677],[614, 681],[277, 356],[111, 421],[499, 654],[65, 617],[259, 484],[585, 630],[40, 352],[569, 657],[131, 637],[212, 669],[454, 613],[205, 273],[50, 628],[108, 462],[5, 114],[177, 387],[59, 185],[81, 662],[521, 674],[198, 275],[331, 571],[30, 537],[341, 672],[123, 691],[3, 129],[250, 429],[315, 593],[63, 64],[251, 293],[284, 632],[119, 141],[153, 436],[194, 597],[86, 573],[636, 682],[78, 393],[173, 727],[184, 651],[262, 718],[27, 532],[195, 197],[253, 286],[66, 327],[11, 377],[381, 513],[138, 552],[139, 257],[413, 687],[71, 730],[541, 659],[313, 694],[47, 502],[369, 664],[419, 734],[28, 229],[507, 640],[236, 583],[106, 686],[551, 668],[124, 305],[724, 754],[158, 188],[218, 279],[9, 102],[582, 697],[326, 349],[354, 508],[51, 696],[545, 698],[643, 646],[41, 671],[104, 378],[260, 601],[360, 790],[711, 783],[20, 705],[422, 706],[359, 704],[292, 751],[701, 755],[332, 763],[161, 320],[431, 707],[208, 439],[383, 690],[97, 443],[501, 778],[109, 252],[716, 746],[348, 767],[234, 761],[438, 728],[122, 638],[115, 590],[539, 619],[225, 595],[631, 759],[553, 642],[302, 322],[242, 396],[94, 717],[133, 709],[771, 774],[249, 267],[530, 735],[432, 792],[364, 812],[559, 785],[641, 776],[612, 679],[476, 542],[525, 652],[660, 749],[317, 353],[695, 756],[329, 685],[368, 607],[334, 375],[46, 179],[244, 482],[116, 281],[280, 721],[379, 693],[112, 742],[410, 722],[424, 811],[342, 743],[533, 791],[390, 782],[475, 680],[714, 740],[285, 713],[773, 807],[14, 167],[202, 489],[655, 839],[437, 463],[91, 337],[92, 330],[264, 741],[32, 232],[748, 829],[616, 840],[699, 794],[145, 703],[60, 200],[189, 554],[725, 823],[423, 815],[813, 837],[366, 670],[404, 676],[29, 855],[832, 871],[627, 758],[290, 787],[579, 806],[171, 644],[154, 805],[388, 523],[268, 656],[96, 581],[90, 276],[692, 702],[193, 739],[789, 830],[506, 700],[295, 769],[596, 689],[803, 831],[289, 645],[762, 866],[283, 764],[128, 864],[325, 897],[738, 857],[493, 801],[304, 620],[118, 885],[511, 587],[367, 849],[222, 715],[144, 172],[708, 780],[744, 777],[733, 848],[615, 726],[271, 843],[215, 321],[176, 766],[223, 347],[156, 890],[186, 300],[684, 851],[269, 510],[434, 440],[820, 884],[650, 795],[149, 519],[340, 389],[19, 39],[406, 446],[658, 836],[550, 886],[860, 865],[113, 665],[56, 392],[802, 804],[881, 891],[370, 752],[816, 906],[712, 900],[548, 796],[648, 846],[427, 483],[350, 833],[107, 165],[719, 921],[231, 800],[784, 877],[683, 750],[157, 675],[720, 896],[788, 905],[710, 911],[770, 874],[557, 918],[372, 876],[850, 934],[241, 914],[879, 947],[745, 882],[461, 852],[737, 927],[817, 936],[110, 858],[270, 880],[841, 904],[166, 723],[757, 901],[7, 898],[126, 826],[663, 862],[753, 818],[736, 892],[732, 929],[875, 966],[781, 920],[747, 913],[845, 945],[731, 808],[834, 853],[868, 895],[870, 968],[935, 949],[899, 915],[219, 902],[433, 517],[211, 952],[872, 959],[526, 786],[688, 859],[296, 950],[910, 933],[838, 847],[922, 979],[775, 976],[618, 760],[199, 893],[869, 981],[797, 955],[956, 973],[842, 867],[768, 908],[765, 931],[835, 919],[827, 984],[917, 946],[793, 974],[814, 819],[948, 990],[798, 825],[729, 854],[883, 888],[930, 987],[903, 943],[856, 964],[844, 993],[83, 666],[821, 977],[809, 928],[907, 967],[822, 1004],[940, 951],[958, 971],[894, 960],[970, 978],[963, 1014],[954, 989],[828, 861],[191, 988],[972, 1012],[772, 975],[986, 1008],[941, 1013],[992, 994],[799, 1022],[938, 999],[996, 1011],[925, 942],[953, 1021],[1000, 1010],[957, 985],[494, 983],[962, 1019],[932, 1027],[1005, 1028],[889, 909],[824, 1001],[1006, 1023],[980, 1016],[923, 926],[1026, 1031],[873, 1041],[1003, 1025],[1007, 1043],[944, 1009],[937, 995],[1002, 1048],[887, 916],[998, 1018],[1020, 1038],[1047, 1053],[779, 1033],[991, 1045],[939, 961],[1040, 1042],[997, 1036],[1024, 1052],[965, 1039],[912, 1035],[1032, 1056],[982, 1051],[878, 1015],[1059, 1066],[1017, 1067],[1030, 1037],[1044, 1065],[1050, 1064],[1054, 1057],[1071, 1074],[924, 1063],[1069, 1075],[1072, 1077],[1055, 1070],[1029, 1060],[1068, 1076],[1046, 1061],[810, 969],[1078, 1083],[1062, 1084],[1080, 1082],[1073, 1079],[863, 1058],[1085, 1086],[1081, 1088],[1087, 1089],[1034, 1091],[1049, 1092],[1090, 1093]] My problem is that I cannot map these to the original data. The end goal is to have a Newick parse of this and build a hierarchy from it. But once I do that, I have indexes in the hierarchy that are > n_samples, which makes no sense. Here is my parsing, on the front-end: var agglo_data = d3.stratify() .id((d,i) => i + numSamples) .parentId((d, i) => { var parIndex = agglo_data.findIndex(e => e.includes && e.includes(i + numSamples)); if (parIndex < 0) { return; } return parIndex + numSamples; })(agglo_children); Here is what I get: As you can see, there are indexes that are bigger than n_samples, which makes no sense to me. Can anyone spot the mistake?
Breadth-first algorithm implementation
I am trying to implement a "Breadth-First" Algorithm as a variation of something I've seen in a book. My issue is that the algorithm is not adding the elements of every node into the queue. For instance, if I search for "black lab" under the name 'mariela' in the "search()" function, I will get the correct output: "simon is a black lab" However, I ought to be able to look for "black lab" in "walter", which is connected to "mariela", which is connected to "simon", who is a "black lab'. This is not working. Have I made a rookie mistake in my implementation of this algorithm, or have I set up my graph wrong? As always, any/all help is much appreciated! from collections import deque # TEST GRAPH ------------- graph = {} graph['walter'] = ['luci', 'kaiser', 'andrea', 'mariela'] graph['andrea'] = ['echo', 'dante', 'walter', 'mariela'] graph['mariela'] = ['ginger', 'simon', 'walter', 'andrea'] graph['kaiser'] = 'german shepherd' graph['luci'] = 'black cat' graph['echo'] = 'pitbull' graph['dante'] = 'pitbull' graph['ginger'] = 'orange cat' graph['simon'] = 'black lab' def condition_met(name): if graph[name] == 'black lab': return name def search(name): search_queue = deque() search_queue += graph[name] # add all elements of "name" to queue searchedAlready = [] # holding array for people already searched through while search_queue: # while queue not empty... person = search_queue.popleft() # pull 1st person from queue if person not in searchedAlready: # if person hasn't been searched through yet... if condition_met(person): print person + ' is a black labrador' return True else: search_queue += graph[person] searchedAlready.append(person) return False search('walter') #search('mariela')
You have lots of problems in your implementation - both Python and Algorithm wise. Rewrite as: # #param graph graph to search # #param start the node to start at # #param value the value to search for def search(graph, start, value): explored = [] queue = [start] while len(queue) > 0: # next node to explore node = queue.pop() # only explore if not already explored if node not in explored: # node found, search complete if node == value: return True # add children of node to queue else: explored.append(node) queue.extend(graph[node]) # extend is faster than concat (+=) return False graph = {} graph['walter'] = ['luci', 'kaiser', 'andrea', 'mariela'] graph['andrea'] = ['echo', 'dante', 'walter', 'mariela'] graph['mariela'] = ['ginger', 'simon', 'walter', 'andrea'] # children should be a list graph['kaiser'] = ['german shepherd'] graph['luci'] = ['black cat'] graph['echo'] = ['pitbull'] graph['dante'] = ['pitbull'] graph['ginger'] = ['orange cat'] graph['simon'] = ['black lab'] print search(graph, 'mariela', 'walter') Here is a demo https://repl.it/IkRA/0
creating nested dictionary from flat list with python
i have a list of file in this form: base/images/graphs/one.png base/images/tikz/two.png base/refs/images/three.png base/one.txt base/chapters/two.txt i would like to convert them to a nested dictionary of this sort: { "name": "base" , "contents": [{"name": "images" , "contents": [{"name": "graphs", "contents":[{"name":"one.png"}] }, {"name":"tikz", "contents":[{"name":"two.png"}]} ] }, {"name": "refs", "contents": [{"name":"images", "contents": [{"name":"three.png"}]}] }, {"name":"one.txt", }, {"name": "chapters", "contents":[{"name":"two.txt"}] ] } trouble is, my attempted solution, given some input like images/datasetone/grapha.png" ,"images/datasetone/graphb.png" each one of them will end up in a different dictionary named "datasetone" however i'd like both to be in the same parent dictionary as they are in the same directory, how do i create this nested structure without duplicating parent dictionaries when there's more than one file in a common path? here is what i had come up with and failed: def path_to_tree(params): start = {} for item in params: parts = item.split('/') depth = len(parts) if depth > 1: if "contents" in start.keys(): start["contents"].append(create_base_dir(parts[0],parts[1:])) else: start ["contents"] = [create_base_dir(parts[0],parts[1:]) ] else: if "contents" in start.keys(): start["contents"].append(create_leaf(parts[0])) else: start["contents"] =[ create_leaf(parts[0]) ] return start def create_base_dir(base, parts): l={} if len(parts) >=1: l["name"] = base l["contents"] = [ create_base_dir(parts[0],parts[1:]) ] elif len(parts)==0: l = create_leaf(base) return l def create_leaf(base): l={} l["name"] = base return l b=["base/images/graphs/one.png","base/images/graphs/oneb.png","base/images/tikz/two.png","base/refs/images/three.png","base/one.txt","base/chapters/two.txt"] d =path_to_tree(b) from pprint import pprint pprint(d) In this example you can see we end up with as many dictionaries named "base" as there are files in the list, but only one is necessary, the subdirectories should be listed in the "contents" array.
This does not assume that all paths start with the same thing, so we need a list for it: from pprint import pprint def addBits2Tree( bits, tree ): if len(bits) == 1: tree.append( {'name':bits[0]} ) else: for t in tree: if t['name']==bits[0]: addBits2Tree( bits[1:], t['contents'] ) return newTree = [] addBits2Tree( bits[1:], newTree ) t = {'name':bits[0], 'contents':newTree} tree.append( t ) def addPath2Tree( path, tree ): bits = path.split("/") addBits2Tree( bits, tree ) tree = [] for p in b: print p addPath2Tree( p, tree ) pprint(tree) Which produces the following for your example path list: [{'contents': [{'contents': [{'contents': [{'name': 'one.png'}, {'name': 'oneb.png'}], 'name': 'graphs'}, {'contents': [{'name': 'two.png'}], 'name': 'tikz'}], 'name': 'images'}, {'contents': [{'contents': [{'name': 'three.png'}], 'name': 'images'}], 'name': 'refs'}, {'name': 'one.txt'}, {'contents': [{'name': 'two.txt'}], 'name': 'chapters'}], 'name': 'base'}]
Omitting the redundant name tags, you can go on with : import json result = {} records = ["base/images/graphs/one.png", "base/images/tikz/two.png", "base/refs/images/three.png", "base/one.txt", "base/chapters/two.txt"] recordsSplit = map(lambda x: x.split("/"), records) for record in recordsSplit: here = result for item in record[:-1]: if not item in here: here[item] = {} here = here[item] if "###content###" not in here: here["###content###"] = [] here["###content###"].append(record[-1]) print json.dumps(result, indent=4) The # characters are used for uniqueness (there could be a folder which name was content in the hierarchy). Just run it and see the result. EDIT : Fixed a few typos, added the output.