1   
  2  """ATC handling code. 
  3   
  4  http://who.no 
  5   
  6  There is no DDD handling because DDD explicitely 
  7  does not carry clinical meaning. 
  8   
  9  license: GPL v2 or later 
 10  """ 
 11   
 12  __version__ = "$Revision: 1.7 $" 
 13  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 14   
 15  import sys, codecs, logging, csv, re as regex, os.path 
 16   
 17   
 18  if __name__ == '__main__': 
 19          sys.path.insert(0, '../../') 
 20  from Gnumed.pycommon import gmPG2, gmTools, gmCfg2 
 21   
 22   
 23  _log = logging.getLogger('gm.atc') 
 24  _log.info(__version__) 
 25   
 26  _cfg = gmCfg2.gmCfgData() 
 27   
 29   
 30          _log.debug('substance <%s>, ATC <%s>', substance, atc) 
 31   
 32          if atc is not None: 
 33                  if atc.strip() == u'': 
 34                          atc = None 
 35   
 36          if atc is None: 
 37                  atcs = text2atc(text = substance, fuzzy = False) 
 38                  if len(atcs) == 0: 
 39                          _log.debug(u'no ATC found, aborting') 
 40                          return atc 
 41                  if len(atcs) > 1: 
 42                          _log.debug(u'non-unique ATC mapping, aborting') 
 43                          return atc 
 44                  atc = atcs[0][0].strip() 
 45   
 46          args = {'atc': atc, 'term': substance.strip()} 
 47          queries = [ 
 48                  {'cmd': u"UPDATE ref.consumable_substance SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL", 
 49                   'args': args}, 
 50                  {'cmd': u"UPDATE ref.branded_drug SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL", 
 51                   'args': args} 
 52          ] 
 53          gmPG2.run_rw_queries(queries = queries) 
 54   
 55          return atc 
  56   
 57 -def text2atc(text=None, fuzzy=False): 
  58   
 59          text = text.strip() 
 60   
 61          if fuzzy: 
 62                  args = {'term': u'%%%s%%' % text} 
 63                  cmd = u""" 
 64                          SELECT DISTINCT ON (atc_code) * 
 65                          FROM ( 
 66                                  SELECT atc as atc_code, is_group_code, pk_data_source 
 67                                  FROM ref.v_atc 
 68                                  WHERE term ilike %(term)s AND atc IS NOT NULL 
 69                                          UNION 
 70                                  SELECT atc_code, null, null 
 71                                  FROM ref.consumable_substance 
 72                                  WHERE description ilike %(term)s AND atc_code IS NOT NULL 
 73                                          UNION 
 74                                  SELECT atc_code, null, null 
 75                                  FROM ref.branded_drug 
 76                                  WHERE description ilike %(term)s AND atc_code IS NOT NULL 
 77                          ) as tmp 
 78                          ORDER BY atc_code 
 79                  """ 
 80          else: 
 81                  args = {'term': text.lower()} 
 82                  cmd = u""" 
 83                          SELECT DISTINCT ON (atc_code) * 
 84                          FROM ( 
 85                                  SELECT atc as atc_code, is_group_code, pk_data_source 
 86                                  FROM ref.v_atc 
 87                                  WHERE lower(term) = lower(%(term)s) AND atc IS NOT NULL 
 88                                          UNION 
 89                                  SELECT atc_code, null, null 
 90                                  FROM ref.consumable_substance 
 91                                  WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL 
 92                                          UNION 
 93                                  SELECT atc_code, null, null 
 94                                  FROM ref.branded_drug 
 95                                  WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL 
 96                          ) as tmp 
 97                          ORDER BY atc_code 
 98                  """ 
 99   
100          rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 
101   
102          _log.debug(u'term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy) 
103   
104          return rows 
 105   
106   
108          cmd = u'SELECT * FROM ref.v_atc ORDER BY %s' % order_by 
109          rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False) 
110          return rows 
 111   
112   
114   
115           
116          _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8') 
117   
118          data_fname = os.path.join ( 
119                  os.path.dirname(cfg_fname), 
120                  _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')]) 
121          )                        
122          version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')]) 
123          lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')]) 
124          desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')]) 
125          url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')]) 
126          name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')]) 
127          name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')]) 
128   
129          _cfg.remove_source(source = 'atc') 
130   
131          _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname) 
132   
133          args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 
134   
135           
136          queries = [ 
137                  { 
138                  'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 
139                  'args': args 
140                  }, { 
141                  'cmd': u""" 
142  insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( 
143          %(name_long)s, 
144          %(name_short)s, 
145          %(ver)s, 
146          %(desc)s, 
147          %(lang)s, 
148          %(url)s 
149  )""", 
150                  'args': args 
151                  }, { 
152                  'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 
153                  'args': args 
154                  } 
155          ] 
156          rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True) 
157          data_src_pk = rows[0][0] 
158          _log.debug('ATC data source record created, pk is #%s', data_src_pk) 
159   
160           
161          csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 
162          atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"') 
163   
164           
165          curs = conn.cursor() 
166          cmd = u"""delete from ref.atc_staging""" 
167          gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 
168          curs.close() 
169          conn.commit() 
170          _log.debug('ATC staging table emptied') 
171   
172           
173          curs = conn.cursor() 
174          cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s)""" 
175          first = False 
176          for atc_line in atc_reader: 
177                   
178                  if not first: 
179                          first = True 
180                          continue 
181   
182                   
183                  if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == u'': 
184                          continue 
185   
186                  comment = u'' 
187                  unit = u'' 
188                  adro = u'' 
189   
190                   
191                  if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 
192                          tmp, unit, adro = regex.split('\s', atc_line[4]) 
193                   
194                  elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 
195                          tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) 
196                   
197                  elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 
198                          tmp, unit, adro = regex.split('\s', atc_line[4]) 
199                   
200                  elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 
201                          tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) 
202                   
203                  else: 
204                          comment = atc_line[4] 
205   
206                  args = [ 
207                          atc_line[0].strip(), 
208                          atc_line[2], 
209                          unit, 
210                          adro, 
211                          comment 
212                  ] 
213   
214                  gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 
215   
216          curs.close() 
217          conn.commit() 
218          csv_file.close() 
219          _log.debug('ATC staging table loaded') 
220   
221           
222          curs = conn.cursor() 
223          args = {'src_pk': data_src_pk} 
224          cmd = u""" 
225  insert into ref.atc ( 
226          fk_data_source, 
227          code, 
228          term, 
229          comment, 
230          unit, 
231          administration_route 
232  ) select 
233          %(src_pk)s, 
234          atc, 
235          name, 
236          nullif(comment, ''), 
237          nullif(unit, ''), 
238          nullif(adro, '') 
239   
240  from 
241          ref.atc_staging 
242  """ 
243   
244          gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 
245   
246          curs.close() 
247          conn.commit() 
248          _log.debug('transfer from ATC staging table to real ATC table done') 
249   
250           
251          curs = conn.cursor() 
252          cmd = u"""delete from ref.atc_staging""" 
253          gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 
254          curs.close() 
255          conn.commit() 
256          _log.debug('ATC staging table emptied') 
257   
258          return True 
 259   
260   
261   
262  if __name__ == "__main__": 
263   
264          if len(sys.argv) == 1: 
265                  sys.exit() 
266   
267          if sys.argv[1] != 'test': 
268                  sys.exit() 
269   
270          from Gnumed.pycommon import gmLog2 
271          from Gnumed.pycommon import gmI18N 
272   
273          gmI18N.activate_locale() 
274   
275   
276           
279           
281                  print 'searching ATC code for:', sys.argv[2] 
282                  print ' ', text2atc(sys.argv[2]) 
283                  print ' ', text2atc(sys.argv[2], True) 
 284           
289           
290           
291           
292          test_get_reference_atcs() 
293   
294   
295