@@ -120,12 +120,14 @@ class Base(Block):
120
120
121
121
# pylint: disable=too-many-arguments
122
122
def __init__ (self , model = None , model_alias = None , online = False ,
123
- tokenize = True , tag = True , parse = True , resegment = False , ** kwargs ):
123
+ tokenize = True , tag = True , parse = True , resegment = False ,
124
+ delete_nodes = False , ** kwargs ):
124
125
"""Create the udpipe.En block object."""
125
126
super ().__init__ (** kwargs )
126
127
self .model , self .model_alias , self .online = model , model_alias , online
127
128
self ._tool = None
128
129
self .tokenize , self .tag , self .parse , self .resegment = tokenize , tag , parse , resegment
130
+ self .delete_nodes = delete_nodes
129
131
130
132
@property
131
133
def tool (self ):
@@ -146,16 +148,19 @@ def tool(self):
146
148
return self ._tool
147
149
148
150
def process_document (self , doc ):
149
- tok , tag , par = self .tokenize , self .tag , self .parse
151
+ tok , tag , par , reseg = self .tokenize , self .tag , self .parse , self . resegment
150
152
old_bundles = doc .bundles
151
153
new_bundles = []
152
154
for bundle in old_bundles :
153
155
for tree in bundle :
154
156
new_bundles .append (bundle )
155
157
if self ._should_process_tree (tree ):
158
+ if self .delete_nodes :
159
+ for subroot in tree .children :
160
+ subroot .remove ()
156
161
if tok :
157
- new_trees = self .tool .tokenize_tag_parse_tree (tree , resegment = self . resegment ,
158
- tag = self . tag , parse = self . parse )
162
+ new_trees = self .tool .tokenize_tag_parse_tree (tree , resegment = reseg ,
163
+ tag = tag , parse = par )
159
164
if self .resegment and len (new_trees ) > 1 :
160
165
orig_bundle_id = bundle .bundle_id
161
166
bundle .bundle_id = orig_bundle_id + '-1'
@@ -164,9 +169,9 @@ def process_document(self, doc):
164
169
new_tree .zone = tree .zone
165
170
new_bundle .add_tree (new_tree )
166
171
new_bundles .append (new_bundle )
167
- elif not tok and tag and par :
168
- self .tool .tag_parse_tree (tree )
169
- elif not tok and not tag and not par and self . resegment :
172
+ elif not tok and not reseg and ( tag or par ) :
173
+ self .tool .tag_parse_tree (tree , tag = tag , parse = par )
174
+ elif not tok and reseg and not tag and not par :
170
175
sentences = self .tool .segment_text (tree .text )
171
176
if len (sentences ) > 1 :
172
177
orig_bundle_id = bundle .bundle_id
@@ -178,7 +183,7 @@ def process_document(self, doc):
178
183
new_tree .text = sentence
179
184
new_bundles .append (new_bundle )
180
185
else :
181
- raise ValueError ("Unimplemented tokenize=%s tag=%s parse=%s" % ( tok , tag , par ) )
186
+ raise ValueError (f "Unimplemented tokenize={ tok } tag={ tag } parse={ par } resegment= { reseg } " )
182
187
doc .bundles = new_bundles
183
188
184
189
'''
0 commit comments