| Home | Trees | Indices | Help | 
 | 
|---|
|  | 
  1  # -*- coding: utf8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 10   
 11  import sys 
 12  import codecs 
 13  import logging 
 14  import csv 
 15  import re as regex 
 16   
 17   
 18  if __name__ == '__main__': 
 19          sys.path.insert(0, '../../') 
 20  from Gnumed.pycommon import gmPG2 
 21  from Gnumed.pycommon import gmTools 
 22  from Gnumed.pycommon import gmMatchProvider 
 23   
 24   
 25  _log = logging.getLogger('gm.loinc') 
 26   
 27   
 28  origin_url = u'http://loinc.org' 
 29  file_encoding = 'latin1'                        # encoding is empirical 
 30  license_delimiter = u'Clip Here for Data' 
 31  version_tag = u'LOINC(R) Database Version' 
 32  name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)' 
 33  name_short = u'LOINC' 
 34   
 35  loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 36   
 37  #============================================================ 
 38   
 39  LOINC_creatinine_quantity = ['2160-0', '14682-9', '40264-4', '40248-7'] 
 40  LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1'] 
 41  LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9'] 
 42  LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9'] 
 43   
 44  #============================================================ 
 46   
 47          # NOTE: will return [NULL] on no-match due to the coalesce() 
 48          cmd = u""" 
 49  SELECT coalesce ( 
 50          (SELECT term 
 51          FROM ref.v_coded_terms 
 52          WHERE 
 53                  coding_system = 'LOINC' 
 54                          AND 
 55                  code = %(loinc)s 
 56                          AND 
 57                  lang = i18n.get_curr_lang() 
 58          ), 
 59          (SELECT term 
 60          FROM ref.v_coded_terms 
 61          WHERE 
 62                  coding_system = 'LOINC' 
 63                          AND 
 64                  code = %(loinc)s 
 65                          AND 
 66                  lang = 'en_EN' 
 67          ), 
 68          (SELECT term 
 69          FROM ref.v_coded_terms 
 70          WHERE 
 71                  coding_system = 'LOINC' 
 72                          AND 
 73                  code = %(loinc)s 
 74          ) 
 75  )""" 
 76          args = {'loinc': loinc} 
 77          rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 
 78   
 79          if rows[0][0] is None: 
 80                  return [] 
 81   
 82          return [ r[0] for r in rows ] 
 83  #============================================================ 
 85   
 86          _log.debug('splitting LOINC source file [%s]', input_fname) 
 87   
 88          if license_fname is None: 
 89                  license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 
 90          _log.debug('LOINC header: %s', license_fname) 
 91   
 92          if data_fname is None: 
 93                  data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 
 94          _log.debug('LOINC data: %s', data_fname) 
 95   
 96          loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace') 
 97          out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace') 
 98   
 99          for line in loinc_file: 
100   
101                  if license_delimiter in line: 
102                          out_file.write(line) 
103                          out_file.close() 
104                          out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace') 
105                          continue 
106   
107                  out_file.write(line) 
108   
109          out_file.close() 
110   
111          return data_fname, license_fname 
112  #============================================================ 
114   
115          csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 
116          first_line = csv_file.readline() 
117          sniffer = csv.Sniffer() 
118          if sniffer.has_header(first_line): 
119                  pass 
120  #============================================================ 
122   
123          in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 
124   
125          version = None 
126          for line in in_file: 
127                  if line.startswith(version_tag): 
128                          version = line[len(version_tag):].strip() 
129                          break 
130   
131          in_file.close() 
132          return version 
133  #============================================================ 
135   
136          if version is None: 
137                  version = get_version(license_fname = license_fname) 
138   
139          if version is None: 
140                  raise ValueError('cannot detect LOINC version') 
141   
142          _log.debug('importing LOINC version [%s]', version) 
143   
144          # clean out staging area 
145          curs = conn.cursor() 
146          cmd = u"""DELETE FROM ref.loinc_staging""" 
147          gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 
148          curs.close() 
149          conn.commit() 
150          _log.debug('staging table emptied') 
151   
152          # import data from csv file into staging table 
153          csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 
154          loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 
155          curs = conn.cursor() 
156          cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1)) 
157          first = False 
158          for loinc_line in loinc_reader: 
159                  if not first: 
160                          first = True 
161                          continue 
162                  gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 
163          curs.close() 
164          conn.commit() 
165          csv_file.close() 
166          _log.debug('staging table loaded') 
167   
168          # create data source record 
169          in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 
170          desc = in_file.read() 
171          in_file.close() 
172          args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 
173          queries = [ 
174                  # insert if not existing 
175                  {'args': args, 'cmd': u""" 
176                          INSERT INTO ref.data_source (name_long, name_short, version) SELECT 
177                                  %(name_long)s, 
178                                  %(name_short)s, 
179                                  %(ver)s 
180                          WHERE NOT EXISTS ( 
181                                  SELECT 1 FROM ref.data_source WHERE 
182                                          name_long = %(name_long)s 
183                                                  AND 
184                                          name_short = %(name_short)s 
185                                                  AND 
186                                          version = %(ver)s 
187                          )""" 
188                  }, 
189                  # update non-unique fields 
190                  {'args': args, 'cmd': u""" 
191                          UPDATE ref.data_source SET 
192                                  description = %(desc)s, 
193                                  source = %(url)s, 
194                                  lang = %(lang)s 
195                          WHERE 
196                                  name_long = %(name_long)s 
197                                          AND 
198                                  name_short = %(name_short)s 
199                                          AND 
200                                  version = %(ver)s 
201                          """ 
202                  }, 
203                  # retrieve PK of data source 
204                  {'args': args, 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""} 
205          ] 
206          curs = conn.cursor() 
207          rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True) 
208          data_src_pk = rows[0][0] 
209          curs.close() 
210          _log.debug('data source record created or updated, pk is #%s', data_src_pk) 
211   
212          # import from staging table to real table 
213          args = {'src_pk': data_src_pk} 
214          queries = [] 
215          queries.append ({ 
216                  'args': args, 
217                  'cmd': u""" 
218                          INSERT INTO ref.loinc ( 
219                                  fk_data_source, term, code 
220                          ) 
221                          SELECT 
222                                  %(src_pk)s, 
223                                  coalesce ( 
224                                          nullif(long_common_name, ''), 
225                                          ( 
226                                                  coalesce(nullif(component, '') || ':', '') || 
227                                                  coalesce(nullif(property, '') || ':', '') || 
228                                                  coalesce(nullif(time_aspect, '') || ':', '') || 
229                                                  coalesce(nullif(system, '') || ':', '') || 
230                                                  coalesce(nullif(scale_type, '') || ':', '') || 
231                                                  coalesce(nullif(method_type, '') || ':', '') 
232                                          ) 
233                                  ), 
234                                  nullif(loinc_num, '') 
235                          FROM 
236                                  ref.loinc_staging r_ls 
237                          WHERE NOT EXISTS ( 
238                                  SELECT 1 FROM ref.loinc r_l WHERE 
239                                          r_l.fk_data_source = %(src_pk)s 
240                                                  AND 
241                                          r_l.code = nullif(r_ls.loinc_num, '') 
242                                                  AND 
243                                          r_l.term =      coalesce ( 
244                                                  nullif(r_ls.long_common_name, ''), 
245                                                  ( 
246                                                          coalesce(nullif(r_ls.component, '') || ':', '') || 
247                                                          coalesce(nullif(r_ls.property, '') || ':', '') || 
248                                                          coalesce(nullif(r_ls.time_aspect, '') || ':', '') || 
249                                                          coalesce(nullif(r_ls.system, '') || ':', '') || 
250                                                          coalesce(nullif(r_ls.scale_type, '') || ':', '') || 
251                                                          coalesce(nullif(r_ls.method_type, '') || ':', '') 
252                                                  ) 
253                                          ) 
254                          )""" 
255          }) 
256          queries.append ({ 
257                  'args': args, 
258                  'cmd': u""" 
259                          UPDATE ref.loinc SET 
260                                  comment = nullif(r_ls.comments, ''), 
261                                  component = nullif(r_ls.component, ''), 
262                                  property = nullif(r_ls.property, ''), 
263                                  time_aspect = nullif(r_ls.time_aspect, ''), 
264                                  system = nullif(r_ls.system, ''), 
265                                  scale_type = nullif(r_ls.scale_type, ''), 
266                                  method_type = nullif(r_ls.method_type, ''), 
267                                  related_names_1_old = nullif(r_ls.related_names_1_old, ''), 
268                                  grouping_class = nullif(r_ls.class, ''), 
269                                  loinc_internal_source = nullif(r_ls.source, ''), 
270                                  dt_last_change = nullif(r_ls.dt_last_change, ''), 
271                                  change_type = nullif(r_ls.change_type, ''), 
272                                  answer_list = nullif(r_ls.answer_list, ''), 
273                                  code_status = nullif(r_ls.status, ''), 
274                                  maps_to = nullif(r_ls.map_to, ''), 
275                                  scope = nullif(r_ls.scope, ''), 
276                                  normal_range = nullif(r_ls.normal_range, ''), 
277                                  ipcc_units = nullif(r_ls.ipcc_units, ''), 
278                                  reference = nullif(r_ls.reference, ''), 
279                                  exact_component_synonym = nullif(r_ls.exact_component_synonym, ''), 
280                                  molar_mass = nullif(r_ls.molar_mass, ''), 
281                                  grouping_class_type = nullif(r_ls.class_type, '')::smallint, 
282                                  formula = nullif(r_ls.formula, ''), 
283                                  species = nullif(r_ls.species, ''), 
284                                  example_answers = nullif(r_ls.example_answers, ''), 
285                                  acs_synonyms = nullif(r_ls.acs_synonyms, ''), 
286                                  base_name = nullif(r_ls.base_name, ''), 
287                                  final = nullif(r_ls.final, ''), 
288                                  naa_ccr_id = nullif(r_ls.naa_ccr_id, ''), 
289                                  code_table = nullif(r_ls.code_table, ''), 
290                                  is_set_root = nullif(r_ls.is_set_root, '')::boolean, 
291                                  panel_elements = nullif(r_ls.panel_elements, ''), 
292                                  survey_question_text = nullif(r_ls.survey_question_text, ''), 
293                                  survey_question_source = nullif(r_ls.survey_question_source, ''), 
294                                  units_required = nullif(r_ls.units_required, ''), 
295                                  submitted_units = nullif(r_ls.submitted_units, ''), 
296                                  related_names_2 = nullif(r_ls.related_names_2, ''), 
297                                  short_name = nullif(r_ls.short_name, ''), 
298                                  order_obs = nullif(r_ls.order_obs, ''), 
299                                  cdisc_common_tests = nullif(r_ls.cdisc_common_tests, ''), 
300                                  hl7_field_subfield_id = nullif(r_ls.hl7_field_subfield_id, ''), 
301                                  external_copyright_notice = nullif(r_ls.external_copyright_notice, ''), 
302                                  example_units = nullif(r_ls.example_units, ''), 
303                                  inpc_percentage = nullif(r_ls.inpc_percentage, ''), 
304                                  long_common_name = nullif(r_ls.long_common_name, '') 
305                          FROM 
306                                  ref.loinc_staging r_ls 
307                          WHERE 
308                                  fk_data_source = %(src_pk)s 
309                                          AND 
310                                  code = nullif(r_ls.loinc_num, '') 
311                                          AND 
312                                  term = coalesce ( 
313                                          nullif(r_ls.long_common_name, ''), 
314                                          ( 
315                                                  coalesce(nullif(r_ls.component, '') || ':', '') || 
316                                                  coalesce(nullif(r_ls.property, '') || ':', '') || 
317                                                  coalesce(nullif(r_ls.time_aspect, '') || ':', '') || 
318                                                  coalesce(nullif(r_ls.system, '') || ':', '') || 
319                                                  coalesce(nullif(r_ls.scale_type, '') || ':', '') || 
320                                                  coalesce(nullif(r_ls.method_type, '') || ':', '') 
321                                          ) 
322                                  ) 
323                  """ 
324          }) 
325          curs = conn.cursor() 
326          gmPG2.run_rw_queries(link_obj = curs, queries = queries) 
327          curs.close() 
328          conn.commit() 
329          _log.debug('transfer from staging table to real table done') 
330   
331          # clean out staging area 
332          curs = conn.cursor() 
333          cmd = u"""DELETE FROM ref.loinc_staging""" 
334          gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 
335          curs.close() 
336          conn.commit() 
337          _log.debug('staging table emptied') 
338   
339          return True 
340   
341  #============================================================ 
342  _SQL_LOINC_from_test_type = u""" 
343          -- from test type 
344          SELECT 
345                  loinc AS data, 
346                  loinc AS field_label, 
347                  (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label 
348          FROM clin.test_type 
349          WHERE loinc %(fragment_condition)s 
350  """ 
351   
352  _SQL_LOINC_from_i18n_coded_term = u""" 
353          -- from coded term, in user language 
354          SELECT 
355                  code AS data, 
356                  code AS field_label, 
357                  (code || ': ' || term) AS list_label 
358          FROM ref.v_coded_terms 
359          WHERE 
360                  coding_system = 'LOINC' 
361                          AND 
362                  lang = i18n.get_curr_lang() 
363                          AND 
364                  (code %(fragment_condition)s 
365                          OR 
366                  term %(fragment_condition)s) 
367  """ 
368   
369  _SQL_LOINC_from_en_EN_coded_term = u""" 
370          -- from coded term, in English 
371          SELECT 
372                  code AS data, 
373                  code AS field_label, 
374                  (code || ': ' || term) AS list_label 
375          FROM ref.v_coded_terms 
376          WHERE 
377                  coding_system = 'LOINC' 
378                          AND 
379                  lang = 'en_EN' 
380                          AND 
381                  (code %(fragment_condition)s 
382                          OR 
383                  term %(fragment_condition)s) 
384  """ 
385   
386  _SQL_LOINC_from_any_coded_term = u""" 
387          -- from coded term, in any language 
388          SELECT 
389                  code AS data, 
390                  code AS field_label, 
391                  (code || ': ' || term) AS list_label 
392          FROM ref.v_coded_terms 
393          WHERE 
394                  coding_system = 'LOINC' 
395                          AND 
396                  (code %(fragment_condition)s 
397                          OR 
398                  term %(fragment_condition)s) 
399  """ 
400   
402   
403          _pattern = regex.compile(r'^\D+\s+\D+$', regex.UNICODE | regex.LOCALE) 
404   
405          _normal_query = u""" 
406                  SELECT DISTINCT ON (list_label) 
407                          data, 
408                          field_label, 
409                          list_label 
410                  FROM ( 
411                          (%s) UNION ALL ( 
412                          %s) 
413                  ) AS all_known_loinc""" % ( 
414                          _SQL_LOINC_from_test_type, 
415                          _SQL_LOINC_from_any_coded_term 
416                  ) 
417  #--                     %s) UNION ALL ( 
418  #--                     %s) UNION ALL ( 
419  #               % 
420  #                       _SQL_LOINC_from_i18n_coded_term, 
421  #                       _SQL_LOINC_from_en_EN_coded_term, 
422          #-------------------------------------------------------- 
424                  """Return matches for aFragment at start of phrases.""" 
425   
426                  self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 
427                  return gmMatchProvider.cMatchProvider_SQL2.getMatchesByPhrase(self, aFragment) 
428          #-------------------------------------------------------- 
430                  """Return matches for aFragment at start of words inside phrases.""" 
431   
432                  if cLOINCMatchProvider._pattern.match(aFragment): 
433                          fragmentA, fragmentB = aFragment.split(u' ', 1) 
434                          query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': u'~* %%(fragmentA)s'} 
435                          self._args['fragmentA'] = u"( %s)|(^%s)" % (fragmentA, fragmentA) 
436                          query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': u'~* %%(fragmentB)s'} 
437                          self._args['fragmentB'] = u"( %s)|(^%s)" % (fragmentB, fragmentB) 
438                          self._queries = [u"SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 
439                          return self._find_matches(u'dummy') 
440   
441                  self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 
442                  return gmMatchProvider.cMatchProvider_SQL2.getMatchesByWord(self, aFragment) 
443          #-------------------------------------------------------- 
445                  """Return matches for aFragment as a true substring.""" 
446   
447                  if cLOINCMatchProvider._pattern.match(aFragment): 
448                          fragmentA, fragmentB = aFragment.split(u' ', 1) 
449                          query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': u"ILIKE %%(fragmentA)s"} 
450                          self._args['fragmentA'] = u'%%%s%%' % fragmentA 
451                          query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': u"ILIKE %%(fragmentB)s"} 
452                          self._args['fragmentB'] = u'%%%s%%' % fragmentB 
453                          self._queries = [u"SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 
454                          return self._find_matches(u'dummy') 
455   
456                  self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 
457                  return gmMatchProvider.cMatchProvider_SQL2.getMatchesBySubstr(self, aFragment) 
458  #============================================================ 
459  # main 
460  #------------------------------------------------------------ 
461  if __name__ == "__main__": 
462   
463          if len(sys.argv) < 2: 
464                  sys.exit() 
465   
466          if sys.argv[1] != 'test': 
467                  sys.exit() 
468   
469          from Gnumed.pycommon import gmLog2 
470          from Gnumed.pycommon import gmI18N 
471   
472          gmI18N.activate_locale() 
473  #       gmDateTime.init() 
474   
475          #-------------------------------------------------------- 
477                  print split_LOINCDBTXT(input_fname = sys.argv[2]) 
478          #-------------------------------------------------------- 
481          #-------------------------------------------------------- 
485          #-------------------------------------------------------- 
486          test_loinc_split() 
487          #test_loinc_import() 
488          #test_loinc2term() 
489   
490  #============================================================ 
491   
| Home | Trees | Indices | Help | 
 | 
|---|
| Generated by Epydoc 3.0.1 on Sat Oct 5 03:57:28 2013 | http://epydoc.sourceforge.net |