diff --git a/cmi2csv.xsl b/cmi2csv.xsl index c79041c..0b2aec9 100644 --- a/cmi2csv.xsl +++ b/cmi2csv.xsl @@ -1,6 +1,6 @@ - + @@ -33,6 +33,8 @@ "edition" "key" + + "notes" @@ -71,8 +73,49 @@ + + + + + + + + + ? + + + + + + + + + + + + + + [ + + + + .. + + + + ] + + + + + + + + + + @@ -82,14 +125,6 @@ - - - [ - - - - ] - - + diff --git a/codemeta.json b/codemeta.json index f699c87..2d23f89 100644 --- a/codemeta.json +++ b/codemeta.json @@ -4,7 +4,7 @@ "identifier": "CSV2CMI", "name": "CSV2CMI", "description": "converts a table of letters into CMI format", - "softwareVersion": "1.5.2", + "softwareVersion": "1.6.0", "license": "OSI Approved :: MIT License", "programmingLanguage": "Python :: 3", "maintainer": "https://orcid.org/0000-0003-1898-2543", diff --git a/csv2cmi.py b/csv2cmi.py index 8e53cef..70da0e5 100755 --- a/csv2cmi.py +++ b/csv2cmi.py @@ -18,7 +18,7 @@ from xml.etree.ElementTree import Element, SubElement, Comment, ElementTree __license__ = "MIT" -__version__ = '1.5.2' +__version__ = '1.6.0' # define log output logging.basicConfig(format='%(levelname)s: %(message)s') @@ -33,10 +33,12 @@ parser.add_argument('filename', help='input file (.csv)') parser.add_argument('-a', '--all', help='include unedited letters', action='store_true') -parser.add_argument('--line-numbers', - help='add line numbers', action='store_true') +parser.add_argument('-n', '--notes', help='transfer notes', + action='store_true') parser.add_argument('-v', '--verbose', help='increase output verbosity', action='store_true') +parser.add_argument('--line-numbers', + help='add line numbers', action='store_true') parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) args = parser.parse_args() @@ -168,6 +170,29 @@ def createCorrespondent(namestring): logging.warning( '%sID in line %s links to undifferentiated Person', namestring, table.line_num) authID = '' + elif 'loc' in authID: + try: + locrdf = ElementTree( + file=urllib.request.urlopen(authID + '.rdf')) + except urllib.error.HTTPError: + logging.error( + 'Authority file not found for %sID in line %s', namestring, table.line_num) + correspondent = Element('persName') + authID = '' + except urllib.error.URLError: + logging.error('Failed to reach LOC') + correspondent = Element('persName') + else: + locrdf_root = locrdf.getroot() + if locrdf_root.find('.//rdf:type[@rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"]', rdf) is not None: + correspondent = Element('orgName') + elif locrdf_root.find('.//rdf:type[@rdf:resource="http://id.loc.gov/ontologies/bibframe/Person"]', rdf) is not None: + correspondent = Element('persName') + else: + logging.warning( + '%sID in line %s links to unprocessable authority file', namestring, table.line_num) + correspondent = Element('persName') + authID = '' else: logging.error( 'No proper authority record in line %s for %s', table.line_num, namestring) @@ -188,6 +213,27 @@ def createCorrespondent(namestring): return correspondent +def createDate(dateString): + date = Element('date') + normalized_date = dateString.translate( + dateString.maketrans('', '', '[]()?~')) + if normalized_date != dateString: + date.set('cert', 'medium') + logging.info( + 'Added @cert for in line %s', table.line_num) + date_list = normalized_date.split('/') + if len(date_list) == 2: + if checkIsodate(date_list[0]): + date.set('from', str(date_list[0])) + if checkIsodate(date_list[1]): + date.set('to', str(date_list[1])) + elif checkIsodate(normalized_date): + date.set('when', str(normalized_date)) + else: + return None + return date + + def createPlaceName(placestring): # creates a placeName element placeName = Element('placeName') @@ -203,7 +249,7 @@ def createPlaceName(placestring): if 'http://www.geonames.org/' in letter[placestring + 'ID']: placeName.set('ref', str(letter[placestring + 'ID'])) else: - logging.warning("no standardized %sID in line %s", + logging.warning("No standardized %sID in line %s", placestring, table.line_num) else: logging.warning('ID for %s missing in line %s', letter[ @@ -305,7 +351,7 @@ def createID(id_prefix): editionID = createID('edition') sourceDesc.append(createEdition(edition, editionID)) entry = Element('correspDesc') - if (args.line_numbers): + if args.line_numbers: entry.set('n', str(table.line_num)) entry.set('xml:id', createID('letter')) if edition: @@ -332,20 +378,14 @@ def createID(id_prefix): if ('senderPlace' in table.fieldnames) and letter['senderPlace']: action.append(createPlaceName('senderPlace')) # add date - if 'senderDate' in table.fieldnames: - if checkIsodate(letter['senderDate']) or checkIsodate(letter['senderDate'][1:-1]): - senderDate = SubElement(action, 'date') - if letter['senderDate'].startswith('[') and letter['senderDate'].endswith(']'): - senderDate.set('cert', 'medium') - letter['senderDate'] = letter['senderDate'][1:-1] - logging.info( - 'Added @cert for in line %s', table.line_num) - senderDate.set('when', str(letter['senderDate'])) - else: + if 'senderDate' in table.fieldnames and letter['senderDate']: + try: + action.append(createDate(letter['senderDate'])) + except TypeError: logging.warning( - 'senderDate in line %s not set (no ISO)', table.line_num) + 'Could not parse senderDate in line %s', table.line_num) else: - logging.info('no information on sender in line %s', table.line_num) + logging.info('No information on sender in line %s', table.line_num) # addressee info block if letter['addressee'] or ('addresseePlace' in table.fieldnames and letter['addresseePlace']) or ('addresseeDate' in table.fieldnames and letter['addresseeDate']): @@ -360,21 +400,20 @@ def createID(id_prefix): if ('addresseePlace' in table.fieldnames) and letter['addresseePlace']: action.append(createPlaceName('addresseePlace')) # add date - if 'addresseeDate' in table.fieldnames: - if checkIsodate(letter['addresseeDate']) or checkIsodate(letter['addresseeDate'][1:-1]): - addresseeDate = SubElement(action, 'date') - if letter['addresseeDate'].startswith('[') and letter['addresseeDate'].endswith(']'): - senderDate.set('cert', 'medium') - letter['addresseeDate'] = letter['addresseeDate'][1:-1] - logging.info( - 'Added @cert for in line %s', table.line_num) - senderDate.set('when', str(letter['addresseeDate'])) - else: + if 'addresseeDate' in table.fieldnames and letter['addresseeDate']: + try: + action.append(createDate(letter['addresseeDate'])) + except TypeError: logging.warning( - 'addresseeDate in line %s not set (no ISO)', table.line_num) + 'Could not parse addresseeDate in line %s', table.line_num) else: - logging.info('no information on addressee in line %s', + logging.info('No information on addressee in line %s', table.line_num) + if args.notes: + if ('note' in table.fieldnames) and letter['note']: + note = SubElement(entry, 'note') + note.set('xml:id', createID('note')) + note.text = str(letter['note']) if entry.find('*'): profileDesc.append(entry)