benfulcher · benfulcher · Dec 9, 2021 · Dec 13, 2021 · Dec 16, 2021
diff --git a/AllGenes.py b/AllGenes.py
@@ -24,13 +24,13 @@ def QueryAPI(model,criteriaString,includeString="",optionsString="",writeOut=[])
     api = RmaApi()
     # Settings for retrieval
     rows = []
-    blockSize = 2000
+    blockSize = 5000
     done = False
     startRow = 0
     # for i in range(0, total_rows, blockSize):
 
     while not done:
-        print "Row %d, attempting to retrieve %d rows..." % (startRow, blockSize)
+        print("Row %d, attempting to retrieve %d rows..." % (startRow, blockSize))
 
         tot_rows = len(rows)
         if len(includeString)==0:
@@ -50,16 +50,16 @@ def QueryAPI(model,criteriaString,includeString="",optionsString="",writeOut=[])
         numRows = len(rows) - tot_rows # additional rows retrieved on running the query
         startRow += numRows
 
-        print "%d rows retrieved." % numRows
+        print("%d rows retrieved." % numRows)
 
         # Check if we're at the end of the road
         if numRows == 0 or numRows < blockSize:
             done = True
 
         # Write out the results as they come in, if requested
-        if isinstance(writeOut, basestring):
+        if isinstance(writeOut, str):
             json_utilities.write(json_file_name, rows)
-            print "Wrote to %s" % json_file_name
+            print("Wrote to %s" % json_file_name)
 
     return rows
 #-------------------------------------------------------------------------------
@@ -106,7 +106,7 @@ def SaveSectionsToCSV(sections):
                 })
     # To dataframe:
     df_sections = pd.DataFrame.from_records(sectionList) #,index='section_id')
-    df_sections = df_sections.sort('section_id')
+    df_sections = df_sections.sort_values(by=['section_id'])
 
     # Save as a .csv file:
     df_sections.to_csv(sectionDatasetFilename)
@@ -123,19 +123,19 @@ def SaveSectionsToCSV(sections):
                 })
     # To dataframe:
     df_genes = pd.DataFrame.from_records(geneList) #,index='entrez_id')
-    df_genes = df_genes.sort('entrez_id')
+    df_genes = df_genes.sort_values(by=['entrez_id'])
     numGenesFull = df_genes.shape[0]
     df_genes = df_genes.drop_duplicates()
     numGenesFiltered = df_genes.shape[0]
-    print "Genes filtered from %u to %u" % (numGenesFull, numGenesFiltered)
+    print("Genes filtered from %u to %u" % (numGenesFull, numGenesFiltered))
     # Save as a .csv file:
     df_genes.to_csv(geneInfoFilename)
 #-------------------------------------------------------------------------------
 def SaveListCSV(stringList,fileName):
     # Outputs a csv from a given list of strings
-    resultFile = open(fileName,'wb')
-    wr = csv.writer(resultFile, dialect='excel')
-    wr.writerow(stringList)
+    with open(file=fileName, mode='w') as resultFile:
+        wr = csv.writer(resultFile, dialect='excel')
+        wr.writerow(stringList)
 
 #-------------------------------------------------------------------------------
 
@@ -154,7 +154,7 @@ def SaveListCSV(stringList,fileName):
 entrezSet = set(geneEntrezList)
 geneEntrezList = list(entrezSet)
 geneEntrezList.sort()
-print "There are %u unique genes in section datasets" % len(entrezSet)
+print("There are %u unique genes in section datasets" % len(entrezSet))
 SaveListCSV(geneEntrezList,entrezIDFilename)
 
 # Saves to:

diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # AllenSDK
 
+This is a python 3 ported version of [https://github.com/benfulcher/AllenSDK](https://github.com/benfulcher/AllenSDK), enhanced through some parallelism using `joblib` and others.
+
 [![DOI](https://zenodo.org/badge/104984017.svg)](https://zenodo.org/badge/latestdoi/104984017)
 [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/bendfulcher.svg?style=social&label=Follow%20%40bendfulcher)](https://twitter.com/bendfulcher)
 
@@ -10,6 +12,13 @@ This repository contains code for:
 Requires Matlab and python.
 The [AllenSDK package](http://alleninstitute.github.io/AllenSDK/install.html) for python must be installed.
 
+Commands to install AllenSDK (As incompatible with newer versions)
+```
+conda create -n allensdk python=3.7
+conda activate allensdk
+pip install allensdk
+```
+
 If anything is unclear or needs improvement, please send questions by [raising an Issue](https://docs.github.com/en/github/managing-your-work-on-github/creating-an-issue) or [sending me an email](mailto:ben.d.fulcher@gmail.com).
 
 This pipeline is based on code developed for [Fulcher and Fornito, _PNAS_ (2016)](https://doi.org/10.1073/pnas.1513302113), and used for [Fulcher et al., _PNAS_ (2019)](https://doi.org/10.1073/pnas.1814144116).