1   
  2  """ATC handling code. 
  3   
  4  http://who.no 
  5   
  6  There is no DDD handling because DDD explicitely 
  7  does not carry clinical meaning. 
  8  """ 
  9   
 10  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 11  __license__ = "GPL v2 or later" 
 12   
 13  import sys 
 14  import io 
 15  import logging 
 16  import csv 
 17  import os.path 
 18  import re as regex 
 19   
 20   
 21  if __name__ == '__main__': 
 22          sys.path.insert(0, '../../') 
 23  from Gnumed.pycommon import gmPG2 
 24  from Gnumed.pycommon import gmTools 
 25  from Gnumed.pycommon import gmCfg2 
 26   
 27   
 28  _log = logging.getLogger('gm.atc') 
 29  _cfg = gmCfg2.gmCfgData() 
 30   
 31   
 32  ATC_NICOTINE = 'N07BA01' 
 33  ATC_ETHANOL  = 'V03AB16' 
 34   
 35   
 37   
 38          _log.debug('substance <%s>, ATC <%s>', substance, atc) 
 39   
 40          if atc is not None: 
 41                  if atc.strip() == '': 
 42                          atc = None 
 43   
 44          if atc is None: 
 45                  atcs = text2atc(text = substance, fuzzy = False, link_obj = link_obj) 
 46                  if len(atcs) == 0: 
 47                          _log.debug('no ATC found, aborting') 
 48                          return atc 
 49                  if len(atcs) > 1: 
 50                          _log.debug('non-unique ATC mapping, aborting') 
 51                          return atc 
 52                  atc = atcs[0][0].strip() 
 53   
 54          args = {'atc': atc, 'term': substance.strip()} 
 55          queries = [ 
 56                  {'cmd': "UPDATE ref.substance SET atc = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc IS NULL", 
 57                   'args': args}, 
 58                  {'cmd': "UPDATE ref.drug_product SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL", 
 59                   'args': args} 
 60          ] 
 61          gmPG2.run_rw_queries(link_obj = link_obj, queries = queries) 
 62   
 63          return atc 
  64   
 65   
 66 -def text2atc(text=None, fuzzy=False, link_obj=None): 
  67   
 68          text = text.strip() 
 69   
 70          if fuzzy: 
 71                  args = {'term': '%%%s%%' % text} 
 72                  cmd = """ 
 73                          SELECT DISTINCT ON (atc_code) * 
 74                          FROM ( 
 75                                  SELECT atc as atc_code, is_group_code, pk_data_source 
 76                                  FROM ref.v_atc 
 77                                  WHERE term ilike %(term)s AND atc IS NOT NULL 
 78                                          UNION 
 79                                  SELECT atc as atc_code, null, null 
 80                                  FROM ref.substance 
 81                                  WHERE description ilike %(term)s AND atc IS NOT NULL 
 82                                          UNION 
 83                                  SELECT atc_code, null, null 
 84                                  FROM ref.drug_product 
 85                                  WHERE description ilike %(term)s AND atc_code IS NOT NULL 
 86                          ) as tmp 
 87                          ORDER BY atc_code 
 88                  """ 
 89          else: 
 90                  args = {'term': text.lower()} 
 91                  cmd = """ 
 92                          SELECT DISTINCT ON (atc_code) * 
 93                          FROM ( 
 94                                  SELECT atc as atc_code, is_group_code, pk_data_source 
 95                                  FROM ref.v_atc 
 96                                  WHERE lower(term) = lower(%(term)s) AND atc IS NOT NULL 
 97                                          UNION 
 98                                  SELECT atc as atc_code, null, null 
 99                                  FROM ref.substance 
100                                  WHERE lower(description) = lower(%(term)s) AND atc IS NOT NULL 
101                                          UNION 
102                                  SELECT atc_code, null, null 
103                                  FROM ref.drug_product 
104                                  WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL 
105                          ) as tmp 
106                          ORDER BY atc_code 
107                  """ 
108   
109          rows, idx = gmPG2.run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 
110   
111          _log.debug('term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy) 
112   
113          return rows 
 114   
115   
117          args = {'term': substance} 
118          cmd = 'SELECT EXISTS (SELECT 1 FROM ref.atc WHERE lower(term) = lower(%(term)s))' 
119          rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 
120          return rows[0][0] 
 121   
122   
124          cmd = 'SELECT * FROM ref.v_atc ORDER BY %s' % order_by 
125          rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False) 
126          return rows 
 127   
128   
130   
131           
132          _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8') 
133   
134          data_fname = os.path.join ( 
135                  os.path.dirname(cfg_fname), 
136                  _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')]) 
137          )                        
138          version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')]) 
139          lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')]) 
140          desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')]) 
141          url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')]) 
142          name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')]) 
143          name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')]) 
144   
145          _cfg.remove_source(source = 'atc') 
146   
147          _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname) 
148   
149          args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 
150   
151           
152          cmd = u"select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s" 
153          rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}]) 
154          if len(rows) > 0: 
155                  data_src_pk = rows[0][0] 
156                  _log.debug('ATC data source record existed, pk is #%s, refreshing fields', data_src_pk) 
157                   
158                  args['pk'] = data_src_pk 
159                  cmd = u"""UPDATE ref.data_source SET 
160                                  name_long = %(name_long)s, 
161                                  description = %(desc)s, 
162                                  lang = %(lang)s, 
163                                  source = %(url)s 
164                          WHERE 
165                                  pk = %(pk)s 
166                  """ 
167                  rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}]) 
168          else: 
169                  _log.debug('ATC data source record not found, creating') 
170                   
171                  cmd = u"""insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( 
172                          %(name_long)s, 
173                          %(name_short)s, 
174                          %(ver)s, 
175                          %(desc)s, 
176                          %(lang)s, 
177                          %(url)s 
178                  ) returning pk""" 
179                  rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}], return_data = True) 
180                  data_src_pk = rows[0][0] 
181                  _log.debug('ATC data source record created, pk is #%s', data_src_pk) 
182   
183           
184          csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 
185          atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"') 
186   
187           
188          curs = conn.cursor() 
189          cmd = """delete from ref.atc_staging""" 
190          gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 
191          curs.close() 
192          conn.commit() 
193          _log.debug('ATC staging table emptied') 
194   
195           
196          curs = conn.cursor() 
197          cmd = """insert into ref.atc_staging values (%s, %s, %s, %s, %s)""" 
198          first = False 
199          for atc_line in atc_reader: 
200                   
201                  if not first: 
202                          first = True 
203                          continue 
204   
205                   
206                  if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == '': 
207                          continue 
208   
209                  comment = '' 
210                  unit = '' 
211                  adro = '' 
212   
213                   
214                  if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 
215                          tmp, unit, adro = regex.split('\s', atc_line[4]) 
216                   
217                  elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 
218                          tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) 
219                   
220                  elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 
221                          tmp, unit, adro = regex.split('\s', atc_line[4]) 
222                   
223                  elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 
224                          tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) 
225                   
226                  else: 
227                          comment = atc_line[4] 
228   
229                  args = [ 
230                          atc_line[0].strip(), 
231                          atc_line[2], 
232                          unit, 
233                          adro, 
234                          comment 
235                  ] 
236   
237                  gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 
238   
239          curs.close() 
240          conn.commit() 
241          csv_file.close() 
242          _log.debug('ATC staging table loaded') 
243   
244           
245          args = {'src_pk': data_src_pk} 
246          queries = [] 
247           
248          cmd = u""" 
249                  insert into ref.atc ( 
250                          fk_data_source, 
251                          code, 
252                          term, 
253                          comment, 
254                          administration_route 
255                  ) select 
256                          %(src_pk)s, 
257                          atc, 
258                          name, 
259                          nullif(comment, ''), 
260                          nullif(adro, '') 
261                  FROM 
262                          ref.atc_staging 
263                  WHERE 
264                          not exists ( 
265                                  select 1 FROM ref.atc WHERE fk_data_source = %(src_pk)s AND code = ref.atc_staging.atc 
266                          ) 
267          """ 
268          queries.append({'cmd': cmd, 'args': args}) 
269           
270          cmd = u""" 
271                  UPDATE ref.atc SET 
272                          code = r_as.atc, 
273                          term = r_as.name, 
274                          comment = nullif(r_as.comment, ''), 
275                          administration_route = nullif(r_as.adro, '') 
276                  FROM 
277                          (SELECT atc, name, comment, adro FROM ref.atc_staging) AS r_as 
278                  WHERE 
279                          fk_data_source = %(src_pk)s 
280          """ 
281          queries.append({'cmd': cmd, 'args': args}) 
282          curs = conn.cursor() 
283          gmPG2.run_rw_queries(link_obj = curs, queries = queries) 
284          curs.close() 
285          conn.commit() 
286          _log.debug('transfer from ATC staging table to real ATC table done') 
287   
288           
289          curs = conn.cursor() 
290          cmd = """delete from ref.atc_staging""" 
291          gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 
292          curs.close() 
293          conn.commit() 
294          _log.debug('ATC staging table emptied') 
295   
296          return True 
 297   
298   
299   
300   
301  if __name__ == "__main__": 
302   
303          if len(sys.argv) == 1: 
304                  sys.exit() 
305   
306          if sys.argv[1] != 'test': 
307                  sys.exit() 
308   
309          from Gnumed.pycommon import gmLog2 
310          from Gnumed.pycommon import gmI18N 
311   
312          gmI18N.activate_locale() 
313   
314   
315           
318           
320                  print('searching ATC code for:', sys.argv[2]) 
321                  print(' ', text2atc(sys.argv[2])) 
322                  print(' ', text2atc(sys.argv[2], True)) 
 323           
328           
329           
330           
331          test_get_reference_atcs() 
332   
333   
334