diff --git a/cmi2csv.xsl b/cmi2csv.xsl
index c79041c..0b2aec9 100644
--- a/cmi2csv.xsl
+++ b/cmi2csv.xsl
@@ -1,6 +1,6 @@
-
+
@@ -33,6 +33,8 @@
"edition"
"key"
+
+ "notes"
@@ -71,8 +73,49 @@
+
+
+
+
+
+
+
+
+ ?
+
+
+
+
+
+
+
+
+
+
+
+
+
+ [
+
+
+
+ ..
+
+
+
+ ]
+
+
+
+
+
+
+
+
+
+
@@ -82,14 +125,6 @@
-
-
- [
-
-
-
- ]
-
-
+
diff --git a/codemeta.json b/codemeta.json
index f699c87..2d23f89 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -4,7 +4,7 @@
"identifier": "CSV2CMI",
"name": "CSV2CMI",
"description": "converts a table of letters into CMI format",
- "softwareVersion": "1.5.2",
+ "softwareVersion": "1.6.0",
"license": "OSI Approved :: MIT License",
"programmingLanguage": "Python :: 3",
"maintainer": "https://orcid.org/0000-0003-1898-2543",
diff --git a/csv2cmi.py b/csv2cmi.py
index 8e53cef..70da0e5 100755
--- a/csv2cmi.py
+++ b/csv2cmi.py
@@ -18,7 +18,7 @@
from xml.etree.ElementTree import Element, SubElement, Comment, ElementTree
__license__ = "MIT"
-__version__ = '1.5.2'
+__version__ = '1.6.0'
# define log output
logging.basicConfig(format='%(levelname)s: %(message)s')
@@ -33,10 +33,12 @@
parser.add_argument('filename', help='input file (.csv)')
parser.add_argument('-a', '--all',
help='include unedited letters', action='store_true')
-parser.add_argument('--line-numbers',
- help='add line numbers', action='store_true')
+parser.add_argument('-n', '--notes', help='transfer notes',
+ action='store_true')
parser.add_argument('-v', '--verbose',
help='increase output verbosity', action='store_true')
+parser.add_argument('--line-numbers',
+ help='add line numbers', action='store_true')
parser.add_argument('--version', action='version',
version='%(prog)s ' + __version__)
args = parser.parse_args()
@@ -168,6 +170,29 @@ def createCorrespondent(namestring):
logging.warning(
'%sID in line %s links to undifferentiated Person', namestring, table.line_num)
authID = ''
+ elif 'loc' in authID:
+ try:
+ locrdf = ElementTree(
+ file=urllib.request.urlopen(authID + '.rdf'))
+ except urllib.error.HTTPError:
+ logging.error(
+ 'Authority file not found for %sID in line %s', namestring, table.line_num)
+ correspondent = Element('persName')
+ authID = ''
+ except urllib.error.URLError:
+ logging.error('Failed to reach LOC')
+ correspondent = Element('persName')
+ else:
+ locrdf_root = locrdf.getroot()
+ if locrdf_root.find('.//rdf:type[@rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"]', rdf) is not None:
+ correspondent = Element('orgName')
+ elif locrdf_root.find('.//rdf:type[@rdf:resource="http://id.loc.gov/ontologies/bibframe/Person"]', rdf) is not None:
+ correspondent = Element('persName')
+ else:
+ logging.warning(
+ '%sID in line %s links to unprocessable authority file', namestring, table.line_num)
+ correspondent = Element('persName')
+ authID = ''
else:
logging.error(
'No proper authority record in line %s for %s', table.line_num, namestring)
@@ -188,6 +213,27 @@ def createCorrespondent(namestring):
return correspondent
+def createDate(dateString):
+ date = Element('date')
+ normalized_date = dateString.translate(
+ dateString.maketrans('', '', '[]()?~'))
+ if normalized_date != dateString:
+ date.set('cert', 'medium')
+ logging.info(
+ 'Added @cert for in line %s', table.line_num)
+ date_list = normalized_date.split('/')
+ if len(date_list) == 2:
+ if checkIsodate(date_list[0]):
+ date.set('from', str(date_list[0]))
+ if checkIsodate(date_list[1]):
+ date.set('to', str(date_list[1]))
+ elif checkIsodate(normalized_date):
+ date.set('when', str(normalized_date))
+ else:
+ return None
+ return date
+
+
def createPlaceName(placestring):
# creates a placeName element
placeName = Element('placeName')
@@ -203,7 +249,7 @@ def createPlaceName(placestring):
if 'http://www.geonames.org/' in letter[placestring + 'ID']:
placeName.set('ref', str(letter[placestring + 'ID']))
else:
- logging.warning("no standardized %sID in line %s",
+ logging.warning("No standardized %sID in line %s",
placestring, table.line_num)
else:
logging.warning('ID for %s missing in line %s', letter[
@@ -305,7 +351,7 @@ def createID(id_prefix):
editionID = createID('edition')
sourceDesc.append(createEdition(edition, editionID))
entry = Element('correspDesc')
- if (args.line_numbers):
+ if args.line_numbers:
entry.set('n', str(table.line_num))
entry.set('xml:id', createID('letter'))
if edition:
@@ -332,20 +378,14 @@ def createID(id_prefix):
if ('senderPlace' in table.fieldnames) and letter['senderPlace']:
action.append(createPlaceName('senderPlace'))
# add date
- if 'senderDate' in table.fieldnames:
- if checkIsodate(letter['senderDate']) or checkIsodate(letter['senderDate'][1:-1]):
- senderDate = SubElement(action, 'date')
- if letter['senderDate'].startswith('[') and letter['senderDate'].endswith(']'):
- senderDate.set('cert', 'medium')
- letter['senderDate'] = letter['senderDate'][1:-1]
- logging.info(
- 'Added @cert for in line %s', table.line_num)
- senderDate.set('when', str(letter['senderDate']))
- else:
+ if 'senderDate' in table.fieldnames and letter['senderDate']:
+ try:
+ action.append(createDate(letter['senderDate']))
+ except TypeError:
logging.warning(
- 'senderDate in line %s not set (no ISO)', table.line_num)
+ 'Could not parse senderDate in line %s', table.line_num)
else:
- logging.info('no information on sender in line %s', table.line_num)
+ logging.info('No information on sender in line %s', table.line_num)
# addressee info block
if letter['addressee'] or ('addresseePlace' in table.fieldnames and letter['addresseePlace']) or ('addresseeDate' in table.fieldnames and letter['addresseeDate']):
@@ -360,21 +400,20 @@ def createID(id_prefix):
if ('addresseePlace' in table.fieldnames) and letter['addresseePlace']:
action.append(createPlaceName('addresseePlace'))
# add date
- if 'addresseeDate' in table.fieldnames:
- if checkIsodate(letter['addresseeDate']) or checkIsodate(letter['addresseeDate'][1:-1]):
- addresseeDate = SubElement(action, 'date')
- if letter['addresseeDate'].startswith('[') and letter['addresseeDate'].endswith(']'):
- senderDate.set('cert', 'medium')
- letter['addresseeDate'] = letter['addresseeDate'][1:-1]
- logging.info(
- 'Added @cert for in line %s', table.line_num)
- senderDate.set('when', str(letter['addresseeDate']))
- else:
+ if 'addresseeDate' in table.fieldnames and letter['addresseeDate']:
+ try:
+ action.append(createDate(letter['addresseeDate']))
+ except TypeError:
logging.warning(
- 'addresseeDate in line %s not set (no ISO)', table.line_num)
+ 'Could not parse addresseeDate in line %s', table.line_num)
else:
- logging.info('no information on addressee in line %s',
+ logging.info('No information on addressee in line %s',
table.line_num)
+ if args.notes:
+ if ('note' in table.fieldnames) and letter['note']:
+ note = SubElement(entry, 'note')
+ note.set('xml:id', createID('note'))
+ note.text = str(letter['note'])
if entry.find('*'):
profileDesc.append(entry)