I'm trying to log into Youtube, which redirects to accounts.google.com/ServiceLogin?service=youtube... with chromedp. For some reason it times out trying to retrieve anything from that page. Most of the selectors have randomly generated classes but that shouldn't prevent fetching body, which is does. Screenshot shows the login page so it's navigating there correctly but it can't find anything on the page. There are iframes but the login form/input isn't in one, and the #identifierId can't be found.
Here is a code snippet:
func main() {
// create chrome instance
var buf []byte
ctx, cancel := chromedp.NewContext(
context.Background(),
// chromedp.WithDebugf(log.Printf),
)
defer cancel()
// create a timeout
ctx, cancel = context.WithTimeout(ctx, 15*time.Second)
defer cancel()
// navigate to a page, wait for an element, click
var u string
err := chromedp.Run(ctx,
chromedp.Navigate(`https://youtube.com`),
chromedp.Click(`#buttons > ytd-button-renderer > a`),
chromedp.SendKeys(`#identifierId`, "bobboshmo#gmail.com", chromedp.ByID),
chromedp.Location(&u),
)
if err != nil {
log.Fatal(err)
}
log.Printf(u)
// capture entire browser viewport, returning png with quality=90
if err := chromedp.Run(ctx, fullScreenshot(u, 90, &buf)); err != nil {
log.Fatal(err)
}
if err := ioutil.WriteFile("fullScreenshot.png", buf, 0o644); err != nil {
log.Fatal(err)
}
}
func fullScreenshot(urlstr string, quality int, res *[]byte) chromedp.Tasks {
return chromedp.Tasks{
chromedp.Navigate(urlstr),
chromedp.FullScreenshot(res, quality),
}
}
Turns out Google accounts page was responding differently based on the User-Agent so When I'm browsing and grabbing the xpath or selector they were different than what my go program was using. Fixed by adding the same useragent into Run at the start:
emulation.SetUserAgentOverride(`Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36`),
Related
I am trying to convert below nodejs code to Go. I have to establish keep alive http request to PouchDB server's _changes?feed=continuous. However, I'm not able to achieve it in Go.
var http = require('http')
var agent = new http.Agent({
keepAlive: true
});
var options = {
host: 'localhost',
port: '3030',
method: 'GET',
path: '/downloads/_changes?feed=continuous&include_docs=true',
agent
};
var req = http.request(options, function(response) {
response.on('data', function(data) {
let val = data.toString()
if(val == '\n')
console.log('newline')
else {
console.log(JSON.parse(val))
//to close the connection
//agent.destroy()
}
});
response.on('end', function() {
// Data received completely.
console.log('end');
});
response.on('error', function(err) {
console.log(err)
})
});
req.end();
Below is the Go code
client := &http.Client{}
data := url.Values{}
req, err := http.NewRequest("GET", "http://localhost:3030/downloads/_changes?feed=continuous&include_docs=true", strings.NewReader(data.Encode()))
req.Header.Set("Connection", "keep-alive")
resp, err := client.Do(req)
fmt.Println(resp.Status)
if err != nil {
fmt.Println(err)
}
defer resp.Body.Close()
result, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println(err)
}
fmt.Println(result)
I am getting status 200 Ok, but no data gets printed, its stuck. On the other hand if I use longpoll option ie. http://localhost:3030/downloads/_changes?feed=longpoll then I am receiving data.
Your code is working "as expected" and what you wrote in Go is not equivalent to code shown in Node.js. Go code blocks on ioutil.ReadAll(resp.Body) because connection is kept open by CouchDB server. Once server closes the connection your client code will print out result as ioutil.ReadAll() will be able to read all data down to EOF.
From CouchDB documentation about continuous feed:
A continuous feed stays open and connected to the database until explicitly closed and changes are sent to the client as they happen, i.e. in near real-time. As with the longpoll feed type you can set both the timeout and heartbeat intervals to ensure that the connection is kept open for new changes and updates.
You can try experiment and add &timeout=1 to URL which will force CouchDB to close connection after 1s. Your Go code then should print the whole response.
Node.js code works differently, event data handler is called every time server sends some data. If you want to achieve same and process partial updates as they come (before connection is closed) you cannot use ioutil.ReadAll() as that waits for EOF (and thus blocks in your case) but something like resp.Body.Read() to process partial buffers. Here is very simplified snippet of code that demonstrates that and should give you basic idea:
package main
import (
"fmt"
"net/http"
"net/url"
"strings"
)
func main() {
client := &http.Client{}
data := url.Values{}
req, err := http.NewRequest("GET", "http://localhost:3030/downloads/_changes?feed=continuous&include_docs=true", strings.NewReader(data.Encode()))
req.Header.Set("Connection", "keep-alive")
resp, err := client.Do(req)
defer resp.Body.Close()
fmt.Println(resp.Status)
if err != nil {
fmt.Println(err)
}
buf := make([]byte, 1024)
for {
l, err := resp.Body.Read(buf)
if l == 0 && err != nil {
break // this is super simplified
}
// here you can send off data to e.g. channel or start
// handler goroutine...
fmt.Printf("%s", buf[:l])
}
fmt.Println()
}
In real world application you probably want to make sure your buf holds something that looks like a valid message and then pass it to channel or handler goroutine for further processing.
Finally, I was able to resolve the issue. The issue was related to DisableCompression flag. https://github.com/golang/go/issues/16488 this issue gave me some hint.
By setting DisableCompression: true fixed the issue.
client := &http.Client{Transport: &http.Transport{
DisableCompression: true,
}}
I am assuming client := &http.Client{} sends DisableCompression : false by default and pouchdb server is sending compressed json, Hence received data was compressed and resp.Body.Read was not able to read.
I was using http.NewRequest to make a GET Request.
I deliberately tried to tamper the API url just to check if my err handling works.
But its not working as expected . In err value is returned and I am unable to compare it.
jsonData := map[string]string{"firstname": "Nic", "lastname": "Raboy"}
jsonValue, _ := json.Marshal(jsonData)
request, err := http.NewRequest("POST", "http://httpbin.org/postsdf", bytes.NewBuffer(jsonValue))
request.Header.Set("Content-Type", "application/json")
client := &http.Client{}
response, err := client.Do(request)
if err != nil {
fmt.Println("wrong")
} else {
data, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(data))
}
The output is as below:
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>404 Not Found</title>
<h1>Not Found</h1>
<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>
But I am expecting to print "Wrong".
The HTTP call succeeds (the call went through to the server, and response came back), that's why err is nil. It's just that the HTTP status code is not http.StatusOK (but by judging the response document, it's http.StatusNotFound).
You should check the HTTP status code like this:
response, err := client.Do(request)
if err != nil {
fmt.Println("HTTP call failed:", err)
return
}
// Don't forget, you're expected to close response body even if you don't want to read it.
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
fmt.Println("Non-OK HTTP status:", response.StatusCode)
// You may read / inspect response body
return
}
// All is OK, server reported success.
Also note that certain API endpoints might return other than http.StatusOK for success, such as HTTP 201 - Created, or HTTP 202 - Accepted etc. If you want to check all success status codes, you can do it like this:
// Success is indicated with 2xx status codes:
statusOK := response.StatusCode >= 200 && response.StatusCode < 300
if !statusOK {
fmt.Println("Non-OK HTTP status:", response.StatusCode)
// You may read / inspect response body
return
}
you should user the status code, you can also write a small handler for every http request that you create.
response, err := client.Do(request)
switch response.StatusCode {
case 200:
fmt.Println("work!")
break
case 404:
fmt.Println("not found!")
break
default:
fmt.Println("http", response.Status)
}
I've been reading up on the various timeouts that are available on an http request and they all seem to act as hard deadlines on the total time of a request.
I am running an http download, I don't want to implement a hard timeout past the initial handshake as I don't know anything about my users connection and don't want to timeout on slow connections. What I would ideally like is to timeout after a period of inactivity (when nothing has been downloaded for x seconds). Is there any way to do this as a built in or do I have to interrupt based on stating the file?
The working code is a little hard to isolate but I think these are the relevant parts, there is another loop that stats the file to provide progress but I will need to refactor a bit to use this to interrupt the download:
// httspClientOnNetInterface returns an http client using the named network interface, (via proxy if passed)
func HttpsClientOnNetInterface(interfaceIP []byte, httpsProxy *Proxy) (*http.Client, error) {
log.Printf("Got IP addr : %s\n", string(interfaceIP))
// create address for the dialer
tcpAddr := &net.TCPAddr{
IP: interfaceIP,
}
// create the dialer & transport
netDialer := net.Dialer{
LocalAddr: tcpAddr,
}
var proxyURL *url.URL
var err error
if httpsProxy != nil {
proxyURL, err = url.Parse(httpsProxy.String())
if err != nil {
return nil, fmt.Errorf("Error parsing proxy connection string: %s", err)
}
}
httpTransport := &http.Transport{
Dial: netDialer.Dial,
Proxy: http.ProxyURL(proxyURL),
}
httpClient := &http.Client{
Transport: httpTransport,
}
return httpClient, nil
}
/*
StartDownloadWithProgress will initiate a download from a remote url to a local file,
providing download progress information
*/
func StartDownloadWithProgress(interfaceIP []byte, httpsProxy *Proxy, srcURL, dstFilepath string) (*Download, error) {
// start an http client on the selected net interface
httpClient, err := HttpsClientOnNetInterface(interfaceIP, httpsProxy)
if err != nil {
return nil, err
}
// grab the header
headResp, err := httpClient.Head(srcURL)
if err != nil {
log.Printf("error on head request (download size): %s", err)
return nil, err
}
// pull out total size
size, err := strconv.Atoi(headResp.Header.Get("Content-Length"))
if err != nil {
headResp.Body.Close()
return nil, err
}
headResp.Body.Close()
errChan := make(chan error)
doneChan := make(chan struct{})
// spawn the download process
go func(httpClient *http.Client, srcURL, dstFilepath string, errChan chan error, doneChan chan struct{}) {
resp, err := httpClient.Get(srcURL)
if err != nil {
errChan <- err
return
}
defer resp.Body.Close()
// create the file
outFile, err := os.Create(dstFilepath)
if err != nil {
errChan <- err
return
}
defer outFile.Close()
log.Println("starting copy")
// copy to file as the response arrives
_, err = io.Copy(outFile, resp.Body)
// return err
if err != nil {
log.Printf("\n Download Copy Error: %s \n", err.Error())
errChan <- err
return
}
doneChan <- struct{}{}
return
}(httpClient, srcURL, dstFilepath, errChan, doneChan)
// return Download
return (&Download{
updateFrequency: time.Microsecond * 500,
total: size,
errRecieve: errChan,
doneRecieve: doneChan,
filepath: dstFilepath,
}).Start(), nil
}
Update
Thanks to everyone who had input into this.
I've accepted JimB's answer as it seems like a perfectly viable approach that is more generalised than the solution I chose (and probably more useful to anyone who finds their way here).
In my case I already had a loop monitoring the file size so I threw a named error when this did not change for x seconds. It was much easier for me to pick up on the named error through my existing error handling and retry the download from there.
I probably crash at least one goroutine in the background with my approach (I may fix this later with some signalling) but as this is a short running application (its an installer) so this is acceptable (at least tolerable)
Doing the copy manually is not particularly difficult. If you're unsure how to properly implement it, it's only a couple dozen lines from the io package to copy and modify to suit your needs (I only removed the ErrShortWrite clause, because we can assume that the std library io.Writer implementations are correct)
Here is a copy work-alike function, that also takes a cancelation context and an idle timeout parameter. Every time there is a successful read, it signals to the cancelation goroutine to continue and start a new timer.
func idleTimeoutCopy(dst io.Writer, src io.Reader, timeout time.Duration,
ctx context.Context, cancel context.CancelFunc) (written int64, err error) {
read := make(chan int)
go func() {
for {
select {
case <-ctx.Done():
return
case <-time.After(timeout):
cancel()
case <-read:
}
}
}()
buf := make([]byte, 32*1024)
for {
nr, er := src.Read(buf)
if nr > 0 {
read <- nr
nw, ew := dst.Write(buf[0:nr])
written += int64(nw)
if ew != nil {
err = ew
break
}
}
if er != nil {
if er != io.EOF {
err = er
}
break
}
}
return written, err
}
While I used time.After for brevity, it's more efficient to reuse the Timer. This means taking care to use the correct reset pattern, as the return value of the Reset function is broken:
t := time.NewTimer(timeout)
for {
select {
case <-ctx.Done():
return
case <-t.C:
cancel()
case <-read:
if !t.Stop() {
<-t.C
}
t.Reset(timeout)
}
}
You could skip calling Stop altogether here, since in my opinion if the timer fires while calling Reset, it was close enough to cancel anyway, but it's often good to have the code be idiomatic in case this code is extended in the future.
I want to create a simple script that checks if a certain hostname:port is running. I only want to get a bool response if that URL is live, but I'm not sure if there's a straightforward way of doing it.
If you only want see if a URL is reachable you could use net.DialTimeout. Like this:
timeout := 1 * time.Second
conn, err := net.DialTimeout("tcp","mysyte:myport", timeout)
if err != nil {
log.Println("Site unreachable, error: ", err)
}
If you want to check if a Web server answers on a certain URL, you can invoke an HTTP GET request using net/http.
You will get a timeout if the server doesn't response at all. You might also check the response status.
resp, err := http.Get("http://google.com/")
if err != nil {
print(err.Error())
} else {
print(string(resp.StatusCode) + resp.Status)
}
You can change the default timeout by initializing a http.Client.
timeout := time.Duration(1 * time.Second)
client := http.Client{
Timeout: timeout,
}
resp, err := client.Get("http://google.com")
Bonus:
Go generally does not rely on exceptions and the built in libraries generally do not panic, but return an error as a second value.
See Why does Go not have exceptions?.
You can assume that something very bad happened if your call to a native function panics.
You can make a HEAD request:
package main
import "net/http"
func head(s string) bool {
r, e := http.Head(s)
return e == nil && r.StatusCode == 200
}
func main() {
b := head("https://stackoverflow.com")
println(b)
}
https://golang.org/pkg/net/http#Head
If you don't mind the port, use http.Get(web):
package main
import (
"fmt"
"net/http"
"os"
)
func main() {
web := os.Args[1]
fmt.Println(webIsReachable(web))
}
func webIsReachable(web string) bool {
response, errors := http.Get(web)
if errors != nil {
_, netErrors := http.Get("https://www.google.com")
if netErrors != nil {
fmt.Fprintf(os.Stderr, "no internet\n")
os.Exit(1)
}
return false
}
if response.StatusCode == 200 {
return true
}
return false
}
For testing I like to simulate signups. I get the signup page, fill in the form and post it. Apparently the session cookie that is provided by the server is not sent in the post request. If I access the server from a web browser all works fine. I can see that the response to Get contains the cookie. How can I add it to the PostForm?
func signup(name string, ret chan bool) {
var xsrf string
fmt.Println("Starting signup with", name)
response, err := http.Get("http://localhost:8080/signup")
if err != nil {
panic(err)
} else {
defer response.Body.Close()
buffer, _ := ioutil.ReadAll(response.Body)
xsrf = regXsrf.FindStringSubmatch(string(buffer))[1]
}
data := url.Values{}
data.Set("name", name)
data.Add("password", "111222")
data.Add("password2", "111222")
data.Add("groupcode", "AllesWirdGut")
data.Add("websocketstoken", xsrf)
response, err = http.PostForm("http://localhost:8080/signup", data)
if err != nil {
panic(err)
} else {
defer response.Body.Close()
}
}