require 'generator' def usage(msg); puts msg; exit 1; end def parse_line(persons, lines, member, col_name) while (line = lines.next.chomp) !~ /^#{Regexp.escape(col_name)}/ #puts "#{line} doesn't match skipping it" end values = line.split /\t/ values.shift # remove the column name # s = SyncEnumerator.new(persons, values) s = persons.zip(values) s.each { |person, value| if person.send(member) # not overwriting if value is not nil next end _value = if block_given?; yield value; else value; end person.send(member.to_s + '=', _value) } end class Person attr_accessor :person_id, :is_male, :age, :job, :state, :education def initialize(person_id) @person_id = person_id end end if $0 == __FILE__ fName = ARGV.shift || usage("missing filename of statistics file") f = File.open(fName) lines = Generator.new(f) all_persons = [] while lines.next? line = lines.next.chomp while (lines.next? && line !~ /^(\t\d+)+\s*$/) break if line !~ /^(\t\d+)+\s*$/ # found a series of data. first line => series # of person numbers person_ids = line.scan(/\d+/).map {|i| i.to_i} line = nil # clear for next iteration persons = person_ids.map { |p_id| Person.new(p_id) } # now the series of genders parse_line(persons, lines, :is_male, 'col1') { |gender_s| (gender_s == 'M') } # now the age parse_line(persons, lines, :age, 'col2') # now the job parse_line(persons, lines, :job, 'col3') # now the stanje parse_line(persons, lines, :state, 'col4') # now the education parse_line(persons, lines, :education, 'col5') all_persons.concat(persons) puts "parsed #{all_persons.size} people" File.open('/proc/meminfo') {|memi| puts memi.grep(/MemFree/)} if RUBY_PLATFORM =~ /linux/ end f.close File.open('data.dta', 'w') do |file| Marshal.dump(all_persons, file) end end