[prev in list] [next in list] [prev in thread] [next in thread] 

List:       sas-l
Subject:    StackOverflow R: Locating the html node and web scrape the html tabl
From:       Roger DeAngelis <rogerjdeangelis () GMAIL ! COM>
Date:       2019-09-30 21:55:26
Message-ID: 3073079737923331.WA.rogerjdeangelisgmail.com () listserv ! uga ! edu
[Download RAW message or body]

StackOverflow R: Locating the html node and web scrape the html table                 \
                
                                                                                      \
 github                                                                               \
 https://tinyurl.com/y3fsza4n                                                         \
 https://github.com/rogerjdeangelis/utl-locating-the-html-node-and-web-scrape-the-html-table \
                
                                                                                      \
 StackOverflow                                                                        \
 https://tinyurl.com/y4ylqq8f                                                         \
 https://stackoverflow.com/questions/58168419/scraping-a-table-from-a-website-using-rvest \
                
                                                                                      \
 Yifyan                                                                               \
 https://stackoverflow.com/users/6037369/yifyan                                       \
                
                                                                                      \
                
*_                   _                                                                \
 (_)_ __  _ __  _   _| |_                                                             \
 | | '_ \| '_ \| | | | __|                                                            \
 | | | | | |_) | |_| | |_                                                             \
 |_|_| |_| .__/ \__,_|\__|                                                            \
                
        |_|                                                                           \
 ;                                                                                    \
                
                                                                                      \
 The html table at                                                                    \
                
                                                                                      \
                
                                                                                      \
 https://tinyurl.com/y2er2q3r                                                         \
 https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldAll \
                
                                                                                      \
 _____                                      ____       _                              \
 |_   _| __ ___  __ _ ___ _   _ _ __ _   _  |  _ \ __ _| |_ ___  ___                  \
  | || '__/ _ \/ _` / __| | | | '__| | | | | |_) / _` | __/ _ \/ __|                  \
  | || | |  __/ (_| \__ \ |_| | |  | |_| | |  _ < (_| | ||  __/\__ \                  \
  |_||_|  \___|\__,_|___/\__,_|_|   \__, | |_| \_\__,_|\__\___||___/                  \
                
                                    |___/                                             \
                
                                                                                      \
  DATE    X1MO X2MO  X3MO X6MO X1YR X2YR X3YR X5YR X7YR X10YR X20YR X30YR             \
                
                                                                                      \
  01/02/90  N/A   N/A  7.83 7.89 7.81 7.87 7.90 7.87 7.98 7.94   N/A  8.00            \
  01/03/90  N/A   N/A  7.89 7.94 7.85 7.94 7.96 7.92 8.04 7.99   N/A  8.04            \
  01/04/90  N/A   N/A  7.84 7.90 7.82 7.92 7.93 7.91 8.02 7.98   N/A  8.04            \
  01/05/90  N/A   N/A  7.79 7.85 7.79 7.90 7.94 7.92 8.03 7.99   N/A  8.06            \
  01/08/90  N/A   N/A  7.79 7.88 7.81 7.90 7.95 7.92 8.05 8.02   N/A  8.09            \
  01/09/90  N/A   N/A  7.80 7.82 7.78 7.91 7.94 7.92 8.05 8.02   N/A  8.10            \
  01/10/90  N/A   N/A  7.75 7.78 7.77 7.91 7.95 7.92 8.00 8.03   N/A  8.11            \
  01/11/90  N/A   N/A  7.80 7.80 7.77 7.91 7.95 7.94 8.01 8.04   N/A  8.11            \
  01/12/90  N/A   N/A  7.74 7.81 7.76 7.93 7.98 7.99 8.07 8.10   N/A  8.17            \
                
 ....                                                                                 \
                
                                                                                      \
                
*            _               _                                                        \
  ___  _   _| |_ _ __  _   _| |_                                                      \
  / _ \| | | | __| '_ \| | | | __|                                                    \
 | (_) | |_| | |_| |_) | |_| | |_                                                     \
  \___/ \__,_|\__| .__/ \__,_|\__|                                                    \
                
                |_|                                                                   \
 ;                                                                                    \
                
                                                                                      \
 WORK.WANT total obs=7,445                                                            \
                
                                                                                      \
  DATE     X1_MO   X2_MO   X3_MO   X6_MO   X1_YR   X2_YR   X3_YR   X5_YR   X7_YR  \
                X10_YR  X20_YR  X30_YR                            
                                                                                      \
  01/02/90    N/A    N/A    7.83    7.89    7.81    7.87    7.90    7.87    7.98    \
7.94    N/A     8.00                               01/03/90    N/A    N/A    7.89    \
7.94    7.85    7.94    7.96    7.92    8.04    7.99    N/A     8.04                  \
  01/04/90    N/A    N/A    7.84    7.90    7.82    7.92    7.93    7.91    8.02    \
7.98    N/A     8.04                               01/05/90    N/A    N/A    7.79    \
7.85    7.79    7.90    7.94    7.92    8.03    7.99    N/A     8.06                  \
  01/08/90    N/A    N/A    7.79    7.88    7.81    7.90    7.95    7.92    8.05    \
8.02    N/A     8.09                               01/09/90    N/A    N/A    7.80    \
7.82    7.78    7.91    7.94    7.92    8.05    8.02    N/A     8.10                  \
  01/10/90    N/A    N/A    7.75    7.78    7.77    7.91    7.95    7.92    8.00    \
                8.03    N/A     8.11                             
                                                                                      \
                
*                                                                                     \
  _ __  _ __ ___   ___ ___  ___ ___                                                   \
 | '_ \| '__/ _ \ / __/ _ \/ __/ __|                                                  \
 | |_) | | | (_) | (_|  __/\__ \__ \                                                  \
 | .__/|_|  \___/ \___\___||___/___/                                                  \
 |_|                                                                                  \
 ;                                                                                    \
                
                                                                                      \
 Got to                                                                               \
 https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldAll \
                
                                                                                      \
 In Chrome highlight the first column heading, 'Date' in the html table               \
 Right click and select 'inspect'.                                                    \
                
                                                                                      \
 The html source will appear.                                                         \
                
                                                                                      \
 <table class="t-chart" xmlns:fmt="urn:treasury-xslt-fmt"                             \
 xmlns:msxml="urn:schemas-microsoft-com:xslt"                                         \
 xmlns:d="http://schemas.microsoft.com/ado/2007/08/dataservices"                      \
 xmlns:m="http://schemas.microsoft.com/ado/2007/08/dataservices/metadata"             \
                
...                                                                                   \
                
                                                                                      \
 The node we are interested in is "table.t-chart                                      \
                
                                                                                      \
                
                                                                                      \
 %utl_submit_r64('                                                                    \
 library(rvest);                                                                      \
 library(httr);                                                                       \
 library(SASxport);                                                                   \
 url  <- "https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldAll"; \
 ua   <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like \
Gecko) Chrome/77.0.3865.90 Safari/537.36";        data <- \
html_session(url,user_agent(ua));                                                     \
 want <- data %>%                                                                     \
  html_node("table.t-chart") %>%                                                      \
  html_table();                                                                       \
 write.xport(want,file="d:/xpt/want.xpt");                                            \
 ');                                                                                  \
                
                                                                                      \
                
                                                                                      \
 libname xpt xport "d:/xpt/want.xpt";                                                 \
 data want;                                                                           \
  set xpt.want;                                                                       \
 run;quit;                                                                            \
 libname xpt clear;                                                                   \
                
                                                                                      \
                
                                                                                      \



[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic