Skip to content

Commit

Permalink
Merge pull request #20 from rettinghaus/master
Browse files Browse the repository at this point in the history
update to version 1.6
  • Loading branch information
rettinghaus authored Sep 16, 2018
2 parents 0d33f10 + c2b5245 commit b77c1cd
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 40 deletions.
55 changes: 45 additions & 10 deletions cmi2csv.xsl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- * cmi2csv * -->
<!-- 2.1.1 -->
<!-- 2.2.0 -->
<!-- * programmed by * -->
<!-- * Klaus Rettinghaus * -->
<xsl:stylesheet xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" exclude-result-prefixes="tei">
Expand Down Expand Up @@ -33,6 +33,8 @@
<xsl:text>"edition"</xsl:text>
<xsl:value-of select="$sep"/>
<xsl:text>"key"</xsl:text>
<xsl:value-of select="$sep"/>
<xsl:text>"notes"</xsl:text>
<xsl:value-of select="'&#10;'"/>
<xsl:apply-templates/>
</xsl:template>
Expand Down Expand Up @@ -71,8 +73,49 @@
<xsl:value-of select="@ref"/>
</xsl:when>
</xsl:choose>
<xsl:value-of select="$sep"/>
<xsl:apply-templates select="tei:note"/>
<xsl:value-of select="'&#10;'"/>
</xsl:template>
<xsl:template match="tei:date">
<xsl:value-of select="'&quot;'"/>
<xsl:choose>
<xsl:when test="@when">
<xsl:value-of select="@when"/>
<xsl:if test="@cert or @evidence">
<xsl:text>?</xsl:text>
</xsl:if>
</xsl:when>
<xsl:when test="@from or @to">
<xsl:choose>
<xsl:when test="@cert or @evidence">
<xsl:value-of select="concat(@from,'?/',@to,'?')"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat(@from,'/',@to)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:when test="@notBefore or @notAfter">
<xsl:text>[</xsl:text>
<xsl:if test="@notBefore">
<xsl:value-of select="@notBefore"/>
</xsl:if>
<xsl:text>..</xsl:text>
<xsl:if test="@notAfter">
<xsl:value-of select="@notAfter"/>
</xsl:if>
<xsl:text>]</xsl:text>
</xsl:when>
<xsl:otherwise/>
</xsl:choose>
<xsl:value-of select="'&quot;'"/>
</xsl:template>
<xsl:template match="tei:note">
<xsl:value-of select="'&quot;'"/>
<xsl:value-of select="text()"/>
<xsl:value-of select="'&quot;'"/>
</xsl:template>
<xsl:template match="tei:correspAction">
<xsl:value-of select="concat('&quot;',normalize-space(tei:persName),'&quot;')"/>
<xsl:value-of select="$sep"/>
Expand All @@ -82,14 +125,6 @@
<xsl:value-of select="$sep"/>
<xsl:value-of select="concat('&quot;',tei:placeName/@ref,'&quot;')"/>
<xsl:value-of select="$sep"/>
<xsl:if test="tei:date/@when">
<xsl:if test="tei:date/@cert">
<xsl:text>[</xsl:text>
</xsl:if>
<xsl:value-of select="concat('&quot;',tei:date/@when,'&quot;')"/>
<xsl:if test="tei:date/@cert">
<xsl:text>]</xsl:text>
</xsl:if>
</xsl:if>
<xsl:apply-templates select="tei:date[1]"/>
</xsl:template>
</xsl:stylesheet>
2 changes: 1 addition & 1 deletion codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"identifier": "CSV2CMI",
"name": "CSV2CMI",
"description": "converts a table of letters into CMI format",
"softwareVersion": "1.5.2",
"softwareVersion": "1.6.0",
"license": "OSI Approved :: MIT License",
"programmingLanguage": "Python :: 3",
"maintainer": "https://orcid.org/0000-0003-1898-2543",
Expand Down
97 changes: 68 additions & 29 deletions csv2cmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from xml.etree.ElementTree import Element, SubElement, Comment, ElementTree

__license__ = "MIT"
__version__ = '1.5.2'
__version__ = '1.6.0'

# define log output
logging.basicConfig(format='%(levelname)s: %(message)s')
Expand All @@ -33,10 +33,12 @@
parser.add_argument('filename', help='input file (.csv)')
parser.add_argument('-a', '--all',
help='include unedited letters', action='store_true')
parser.add_argument('--line-numbers',
help='add line numbers', action='store_true')
parser.add_argument('-n', '--notes', help='transfer notes',
action='store_true')
parser.add_argument('-v', '--verbose',
help='increase output verbosity', action='store_true')
parser.add_argument('--line-numbers',
help='add line numbers', action='store_true')
parser.add_argument('--version', action='version',
version='%(prog)s ' + __version__)
args = parser.parse_args()
Expand Down Expand Up @@ -168,6 +170,29 @@ def createCorrespondent(namestring):
logging.warning(
'%sID in line %s links to undifferentiated Person', namestring, table.line_num)
authID = ''
elif 'loc' in authID:
try:
locrdf = ElementTree(
file=urllib.request.urlopen(authID + '.rdf'))
except urllib.error.HTTPError:
logging.error(
'Authority file not found for %sID in line %s', namestring, table.line_num)
correspondent = Element('persName')
authID = ''
except urllib.error.URLError:
logging.error('Failed to reach LOC')
correspondent = Element('persName')
else:
locrdf_root = locrdf.getroot()
if locrdf_root.find('.//rdf:type[@rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"]', rdf) is not None:
correspondent = Element('orgName')
elif locrdf_root.find('.//rdf:type[@rdf:resource="http://id.loc.gov/ontologies/bibframe/Person"]', rdf) is not None:
correspondent = Element('persName')
else:
logging.warning(
'%sID in line %s links to unprocessable authority file', namestring, table.line_num)
correspondent = Element('persName')
authID = ''
else:
logging.error(
'No proper authority record in line %s for %s', table.line_num, namestring)
Expand All @@ -188,6 +213,27 @@ def createCorrespondent(namestring):
return correspondent


def createDate(dateString):
date = Element('date')
normalized_date = dateString.translate(
dateString.maketrans('', '', '[]()?~'))
if normalized_date != dateString:
date.set('cert', 'medium')
logging.info(
'Added @cert for <date> in line %s', table.line_num)
date_list = normalized_date.split('/')
if len(date_list) == 2:
if checkIsodate(date_list[0]):
date.set('from', str(date_list[0]))
if checkIsodate(date_list[1]):
date.set('to', str(date_list[1]))
elif checkIsodate(normalized_date):
date.set('when', str(normalized_date))
else:
return None
return date


def createPlaceName(placestring):
# creates a placeName element
placeName = Element('placeName')
Expand All @@ -203,7 +249,7 @@ def createPlaceName(placestring):
if 'http://www.geonames.org/' in letter[placestring + 'ID']:
placeName.set('ref', str(letter[placestring + 'ID']))
else:
logging.warning("no standardized %sID in line %s",
logging.warning("No standardized %sID in line %s",
placestring, table.line_num)
else:
logging.warning('ID for %s missing in line %s', letter[
Expand Down Expand Up @@ -305,7 +351,7 @@ def createID(id_prefix):
editionID = createID('edition')
sourceDesc.append(createEdition(edition, editionID))
entry = Element('correspDesc')
if (args.line_numbers):
if args.line_numbers:
entry.set('n', str(table.line_num))
entry.set('xml:id', createID('letter'))
if edition:
Expand All @@ -332,20 +378,14 @@ def createID(id_prefix):
if ('senderPlace' in table.fieldnames) and letter['senderPlace']:
action.append(createPlaceName('senderPlace'))
# add date
if 'senderDate' in table.fieldnames:
if checkIsodate(letter['senderDate']) or checkIsodate(letter['senderDate'][1:-1]):
senderDate = SubElement(action, 'date')
if letter['senderDate'].startswith('[') and letter['senderDate'].endswith(']'):
senderDate.set('cert', 'medium')
letter['senderDate'] = letter['senderDate'][1:-1]
logging.info(
'Added @cert for <date> in line %s', table.line_num)
senderDate.set('when', str(letter['senderDate']))
else:
if 'senderDate' in table.fieldnames and letter['senderDate']:
try:
action.append(createDate(letter['senderDate']))
except TypeError:
logging.warning(
'senderDate in line %s not set (no ISO)', table.line_num)
'Could not parse senderDate in line %s', table.line_num)
else:
logging.info('no information on sender in line %s', table.line_num)
logging.info('No information on sender in line %s', table.line_num)

# addressee info block
if letter['addressee'] or ('addresseePlace' in table.fieldnames and letter['addresseePlace']) or ('addresseeDate' in table.fieldnames and letter['addresseeDate']):
Expand All @@ -360,21 +400,20 @@ def createID(id_prefix):
if ('addresseePlace' in table.fieldnames) and letter['addresseePlace']:
action.append(createPlaceName('addresseePlace'))
# add date
if 'addresseeDate' in table.fieldnames:
if checkIsodate(letter['addresseeDate']) or checkIsodate(letter['addresseeDate'][1:-1]):
addresseeDate = SubElement(action, 'date')
if letter['addresseeDate'].startswith('[') and letter['addresseeDate'].endswith(']'):
senderDate.set('cert', 'medium')
letter['addresseeDate'] = letter['addresseeDate'][1:-1]
logging.info(
'Added @cert for <date> in line %s', table.line_num)
senderDate.set('when', str(letter['addresseeDate']))
else:
if 'addresseeDate' in table.fieldnames and letter['addresseeDate']:
try:
action.append(createDate(letter['addresseeDate']))
except TypeError:
logging.warning(
'addresseeDate in line %s not set (no ISO)', table.line_num)
'Could not parse addresseeDate in line %s', table.line_num)
else:
logging.info('no information on addressee in line %s',
logging.info('No information on addressee in line %s',
table.line_num)
if args.notes:
if ('note' in table.fieldnames) and letter['note']:
note = SubElement(entry, 'note')
note.set('xml:id', createID('note'))
note.text = str(letter['note'])
if entry.find('*'):
profileDesc.append(entry)

Expand Down

0 comments on commit b77c1cd

Please sign in to comment.