[prev in list] [next in list] [prev in thread] [next in thread]
List: sas-l
Subject: SAS-L: Determinating gender from firstname AI sas r and python
From: Roger Deangelis <roger_deangelis () COMCAST ! NET>
Date: 2024-04-08 21:47:57
Message-ID: 2790327075296100.WA.rogerdeangeliscomcast.net () listserv ! uga ! edu
[Download RAW message or body]
%let pgm=utl-determinating-gender-from-firstname-AI-sas-r-and-python;
TOO LONG TO POST HERE, IF INTERESTED SEE GITHUB
Determinating gender from firstname AI sas r and python
Two solutions
1 manually download nam2sex.sas7bdat and class.sas7bdat from github
2 programatically download nam2sex.csv and class.csv from github and \
convert to sas datasets then run step 1 (only after problem statement)
github
https://tinyurl.com/yhyntbnt
https://github.com/rogerjdeangelis/utl-determinating-gender-from-firstname-AI-sas-r-and-python
file in github you can use the csvs or the sas datasets(created by stattrasnfer)
class.csv ( I provide code to programatically download the csv.so you don't \
have to open github nam2sex.csv
class.sas7bat (nterestingly you canot create a panda dataframe call class using \
class=class,meta pyreadstat) nam2sex.sas7bdat
/* _ _
_ __ _ __ ___ | |__ | | ___ _ __ ___
> `_ \| `__/ _ \| `_ \| |/ _ \ `_ ` _ \
> > _) | | | (_) | |_) | | __/ | | | | |
> .__/|_| \___/|_.__/|_|\___|_| |_| |_|
> _|
*/
You have to run all the processes left justified,because of parmcards or use the \
programs after the problem section.
/***************************************************************************************************************************** \
*/ /* | \
| */ /* ___ __ _ ___ | \
| */ /* / __|/ _` / __| | \
| */ /* \__ \ (_| \__ \ | \
| */ /* |___/\__,_|___/ | \
| */ /* | \
| */ /* INPUT | \
PROCESS | OUTPUT */ /* \
| | LOOKUP \
*/ /* libname tmp "d:/temp"; | You will need to condition your first \
names | NAME FIRSTNAME SEX GENDER MATCH */ /* data tmp.class; \
| | \
*/ /* set sashelp.class(keep=name sex); | upcase and keep \
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'| Alfred M M 1 */ /* run;quit; \
| firstname must be between 3 and 11 letters | Alice ALICE F F 1 \
*/ /* | Conditioning not shown \
| Barbara BARBARA F F 1 */ /* SD1.CLASS total obs=19 | \
| Carol CAROL F F 1 */ /* | \
proc sql; | Henry M M 1 \
*/ /* NAME SEX | create \
| James M M 1 */ /* | \
table want as | Jane JANE F F 1 */ /* \
Alfred M | select | \
Janet JANET F F 1 */ /* Alice F | \
l.name | Jeffrey M M 1 */ /* \
Barbara F | ,l.sex | \
John M M 1 */ /* Carol F | \
,r.firstname | Joyce JOYCE F F 1 */ /* \
Henry M | ,case (r.sex) | \
Judy JUDY F F 1 */ /* James M | \
when ( ' ' ) then 'M' | Louise LOUISE F F 1 */ /* \
Jane F | else 'F' | \
Mary MARY F F 1 */ /* Janet F | \
end as sex_match | Philip M M 1 */ /* \
Jeffrey M | ,(l.sex = calculated sex_match) as match| \
Robert M M 1 */ /* John M | \
from | Ronald M M 1 */ \
/* Joyce F | tmp.class as l \
| Thomas M M 1 */ /* Judy F | \
left | */ \
/* Louise F | join tmp.nam2sex as r \
| */ /* Mary F | \
on | William WILLIAM M F 0** */ \
/* Philip M | upcase(l.name) = r.firstname \
| */ /* Robert M | \
;quit; | * SAS HAS GENDER WRONG? \
*/ /* Ronald M | \
| */ /* Thomas M | \
| */ /* William M | \
| */ /* | \
| */ /* tmp.nam2sex obs=5,053 | \
| */ /* FIRSTNAME | \
| */ /* | \
| */ /* ABAGAIL F | \
| */ /* ABAGIL F | \
| */ /* ABBEY F | \
| */ /* ABBIE F | \
| */ /* ABBIGALE F | \
| */ /* .... | \
| */ /* ZOIE F | \
| */ /* ZOLA F | \
| */ /* ZOLLIE F | \
| */ /* ZONA F | \
| */ /* ZONIA F | \
| */ /* ZORA F | \
| */ /* ZORAH F | \
| */ /* | \
| */ \
/*--------------------------------------|----------------------------------------------|-------------------------------------- \
*/ /* | \
| */ /* _ __ | \
| */ /* | `__| | \
%utl_rbegin; | \
*/ /* | | | parmcards4; \
| NAME FIRSTNAME SEX MAT MATCHED */ /* |_| | \
library(haven) | \
*/ /* | library(sqldf) \
| Alfred M M 1 */ /* | \
source("c:/temp/fn_tosas9.R"); | Alice ALICE F F 1 \
*/ /* SAMEINPUT | \
nam2sex<-read_sas("c:/temp/nam2sex.sas7bdat")| Barbara BARBARA F F 1 \
*/ /* | class <- \
read_sas("c:/temp/class.sas7bdat") | Carol CAROL F F 1 */ /* \
| want<-sqldf(' | Henry M M 1 \
*/ /* | select \
| James M M 1 */ /* | \
l.name | Jane JANE F F 1 */ /* \
| ,r.firstname | Janet JANET F F 1 \
*/ /* | ,l.sex \
| Jeffrey M M 1 */ /* | \
,case | John M M 1 */ \
/* | when (r.sex = "F") then "F" \
| Joyce JOYCE F F 1 */ /* | \
else "M" | Judy JUDY F F 1 */ /* \
| end as mat | Louise LOUISE F F 1 \
*/ /* | ,l.sex = \
| Mary MARY F F 1 */ /* | \
case | Philip M M 1 */ /* \
| when (r.sex = "F") then "F" | Robert M M 1 \
*/ /* | else "M" \
| Ronald M M 1 */ /* | \
end as matched | Thomas M M 1 */ /* \
| from | William WILLIAM M F 0 \
*/ /* | class as l \
| */ /* | \
left | */ \
/* | join nam2sex as r \
| */ /* | \
on | */ \
/* | upper(l.name) = r.firstname \
| */ /* | \
'); | */ \
/* | want; \
| */ /* | \
fn_tosas9(dataf=want); | \
*/ /* | ;;;; \
| */ /* | \
%utl_rend; | \
*/ /* | \
| */ /* | \
libname tmp "c:/temp"; | \
*/ /* | proc print data=tmp.want; \
| */ /* | \
run;quit; | \
*/ /* | \
| */ \
/*--------------------------------------|------------------------------------------------------------------------------------- \
*/ /* | \
| */ /* _ _ | \
| NAME SEX MAT EST */ /* _ __ _ _| |_| |__ ___ _ __ | \
| */ /* | `_ \| | | | __| `_ \ / _ \| `_ \ | %utl_pybegin; \
| Alfred M M 1 */ /* | |_) | |_| | |_| | | | (_) | | | | | parmcards4; \
| Alice F F 1 */ /* | .__/ \__, |\__|_| |_|\___/|_| |_| | import os; \
| Barbara F F 1 */ /* |_| |___/ | import subprocess \
| Carol F F 1 */ /* | import time \
| Henry M M 1 */ /* SAME INPUT | import pandas as \
pd; | James M M 1 */ /* SAME SQL \
CODE AS R | import numpy as np; \
| Jane F F 1 */ /* | from pandasql \
import sqldf; | Janet F F 1 */ /* \
| mysql = lambda q: sqldf(q, globals()); | Jeffrey M M 1 \
*/ /* | from pandasql import PandaSQL; \
| John M M 1 */ /* | pdsql = \
PandaSQL(persist=True); | Joyce F F 1 */ /* \
| sqlite3conn = next(pdsql.conn.gen).connection.connection; | Judy F F 1 \
*/ /* | sqlite3conn.enable_load_extension(True); \
| Louise F F 1 */ /* | \
sqlite3conn.load_extension('c:/temp/libsqlitefunctions.dll'); | Mary F F 1 \
*/ /* | mysql = lambda q: sqldf(q, globals()); \
| Philip M M 1 */ /* | import pyreadstat \
as ps | Robert M M 1 */ /* \
| clas,meta=ps.read_sas7bdat("c:/temp/class.sas7bdat"); | Ronald M M 1 \
*/ /* | \
nam2sex,meta=ps.read_sas7bdat("c:/temp/nam2sex.sas7bdat"); | Thomas M M 1 \
*/ /* | want = pdsql(''' \
| William M F 0 */ /* | select \
| */ /* | l.name \
| */ /* | ,r.firstname \
| */ /* | ,l.sex \
| */ /* | ,case \
| */ /* | when \
(r.sex = "F") then "F" | */ /* \
| else "M" | \
*/ /* | end as mat \
| */ /* | ,l.sex = \
| */ /* | case \
| */ /* | when \
(r.sex = "F") then "F" | */ /* \
| else "M" | \
*/ /* | end as matched \
| */ /* | from \
| */ /* | clas as l \
| */ /* | left \
| */ /* | join nam2sex \
as r | */ /* \
| on | \
*/ /* | upper(l.name) = r.firstname \
| */ /* | '''); \
| */ /* | want.info(); \
| */ /* | print(want); \
| */ /* | \
exec(open('c:/temp/fn_tosas9.py').read()) | \
*/ /* | fn_tosas9( \
| */ /* | want \
| */ /* | ,dfstr="want" \
| */ /* | ,timeest=4 \
| */ /* | ); \
| */ /* | ;;;; \
| */ /* | %utl_pyend; \
| */ /* | \
| */ /* | libname tmp \
"c:/temp"; | */ /* \
| proc print data=tmp.want; | \
*/ /* | run;quit; \
| */ /* | \
| */ \
/*******************************************************************************************************|***********************/
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic